/*
%function [ranks,rho,F,prho,pi,piu]=mvrsample(A, varargin)
% MVRSAMPLE samples the minimum violation rankings for a directed network.
% Source: http://santafe.edu/~aaronc/facultyhiring/
%
% [ranks,rho,F,prho,pi,piu]=mvrsample(A, varargin)
%
% MVRSAMPLE(A) samples the minimum violation rankings for a directed
% network, based method described in Clauset, Arbesman, Larremore (2015).
% A is a matrix of size N x N representing a directed network. Each
% element A(i,j) should be a natural number. Self-loops are allowed.
% MVRSAMPLE automatically detects whether A meets the requirements of
% the method.
%
% The MVR sampling procedure works as follows:
% 1) Initially, nodes are ranked in descreasing order of their out-
% degree. The score of this ranking 'pi' is the number of directed
% edges (u,v) for which pi(v) is better than pi(v).
% 2) We then search over rankings to minimize the number of such
% violations. The algorithm used is a zero-temperature Markov chain
% Monte Carlo (MCMC) sampler, which has several user-definable
% parameters that specify length of burn in, number of samples to
% take, and spacing between samples.
% 3) By default, this procedure is performed once. Optionally, it can be
% performed multiple times and the results averaged. Each repetition
% can be run on a bootstrap of the edges, which would better capture
% natural variability in the edge generating process (assuming edges
% can be treated iid).
% 4) The algorithm returns several outputs that capture the results of
% the sampler: the average ranking and rho across all repetitions,
% and optionally the individual rankings and rhos for each
% repetition.
%
%
% Example:
% A = rand(100,100)<0.1;
% names = (1:n)';
% [ranks,rho,F] = mvrsample(A,'names',names);
% names(ranks(:,1))
%
% [ranks,rho,F] = mvrsample(A,'names',names,'bootstrap','reps',10);
%
%
% Outputs:
% 'ranks' is a N x 2 matrix whose rows give the (index, mean pi
% score) for each node in A, in descending order of mean pi,
% across repetitions.
% 'rho' is a scalar that gives the fraction of edges in A that violate
% the ranks ordering.
% 'F' is a N x N matrix that is equivalent to A under the ranking given
% by ranks.
% 'prho' is a 1 x N vector containing the rho values for each rep
% 'pi' is a reps x N matrix containing the pi vectors for each rep
% 'piu' is a reps x N matrix containing the stddev of pi for each rep
%
%
% For more information, try 'type mvrsample'
% Version 1.0 (2015 February)
% Copyright (C) 2015 Aaron Clauset (University of Colorado Boulder)
% Distributed under GPL 2.0
% http://www.gnu.org/licenses/gpl-2.0.html
% MVRSAMPLE comes with ABSOLUTELY NO WARRANTY
%
% Notes:
%
% 1. The default 'move' in the MCMC is to choose a pair of nodes and
% propose a ranking in which they are 'rotated'. The number of nodes in
% the set to be rotated is an adjustable parameter. For instance, this
% uses 3-node rotations:
%
% [ranks,rho,F] = mvrsample(A,'rotate',3);
%
% 2. Each repetition can be performed on a simple bootstrap of the edges.
% The adjacency matrix is interpreted as an unweighted, directed
% multigraph, in which A(i,j) = g indicates that there are g unweighted
% edges (i,j) in the network. The bootstrap chooses m=sum(sum(A)) edges
% with replacement from the set of all such edges (i,j) and builds a new
% network from these. The default setting is no bootstrap. To turn it
% on, simply call
%
% [ranks,rho,F] = mvrsample(A,'bootstrap');
%
% 3. The parameters of the MCMC can also be changed: the number of samples
% to take before stopping, the number of swaps to successfully perform
% between each sampled ranking, and the number of steps used for 'burn
% in' before the sampling begins. These can be changed like so
%
% [ranks,rho,F] = mvrsample(A,'samples',1000,'spacing',10000,'burnin',10^6);
%
% Any subset of these can be omitted, which will leave them at their
% default settings.
%
% 4. The algorithm writes updates about the progress of the MCMC to stdout.
% This can be silenced like so
%
% [ranks,rho,F] = mvrsample(A,'quiet');
%
The following code is the mvrsample function in C. It's slightly different to the MATLAB version considering the input format.
Here we use a mvrsample_input.csv file as the input file. The first row contains the running parameters. Then, the adjacent
matrix starts from the second row. ( names = (1:n)' as default. ) By Huanshen Wei (huanshenwei@gmail.com)
Example: mvrsample_input.csv
bootstrap,reps,20,spacing,100,samples,20,burn_in,10000
1,2,1
0,3,1
1,0,1
*/
#include <time.h>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <stack>
#include <ctime>
#include <string.h>
#include <vector>
#include <sstream>
#include <math.h>
using namespace std;
//translate string to other type, eg.:int or double
template <class Type2>
Type2 string2num(const string &str){
istringstream iss(str);
Type2 num;
iss >> num;
return num;
}
//for time recording
std::stack<clock_t> tictoc_stack;
void tic(){
tictoc_stack.push(clock());
}
int toc(){
int a = int(((double)(clock() - tictoc_stack.top())) / CLOCKS_PER_SEC);
return(a);
}
//for datainput
enum string_code{
failed,
Rotate,
reps,
samples,
spacing,
burn_in,
Bootstrap,
Nowarn,
Quiet,
Names,
};
string_code hashit(std::string const& inString){
if (inString == "rotate") return Rotate;
if (inString == "reps") return reps;
if (inString == "samples") return samples;
if (inString == "spacing") return spacing;
if (inString == "burn_in") return burn_in;
if (inString == "bootstrap") return Bootstrap;
if (inString == "nowarn") return Nowarn;
if (inString == "quiet") return Quiet;
if (inString == "names") return Names;
return(failed);
}
void Input(int &n, int &m, int* names, bool &bstrap, int &nr, int &nreps, int &s_rate, int &n_samp, int &Tb, bool &nowarn, bool &quiet, int** &A, double* &prho, double** &pi, double** &piu){
vector<vector<string>> InputData;
ifstream inFile("mvrsample_input.csv", ios::in);
string lineStr;
while (getline(inFile, lineStr)){
stringstream ss(lineStr);
string str;
vector<string> lineArray;
while (getline(ss, str, ','))
lineArray.push_back(str);
InputData.push_back(lineArray);
}
inFile.close();
n = InputData[1].size(); // n either comes from names.size or A[1][].size
// default settings
nr = 2; // rotation size = pairwise swaps
for (int i=0; i<n; i++) names[i] = i; // names = indices
bstrap = false; // bootstrap = false
nreps = 1; // 1 repetition of sampler
s_rate = n; // n steps per sampled state
n_samp = n; // n samples stored
Tb = n * n; // n^2 steps for burn-in
bool inputnames = false; // whether provide names?
int Startln = 1; // Start line for matrix-A
// parse command-line parameters; trap for bad input
int i = 0, argok;
vector<string> lineArray = InputData[0];
while (i < int(lineArray.size())){
argok = 1;
string str = lineArray[i];
if (str.length() == 0){
i++;
continue;
}
if (!((string2num<char>(lineArray[i].substr(0,1)) >=48) && (string2num<char>(lineArray[i].substr(0,1)) <=57))){
switch (hashit(str)){
case Rotate: {
nr = string2num<int>(lineArray[i+1]);
i++;
break;
}
case reps: {
nreps = string2num<int>(lineArray[i+1]);
i++;
break;
}
case samples: {
n_samp = string2num<int>(lineArray[i+1]);
i++;
break;
}
case spacing: {
s_rate = string2num<int>(lineArray[i+1]);
i++;
break;
}
case burn_in: {
Tb = string2num<int>(lineArray[i+1]);
i++;
break;
}
case Bootstrap: {
bstrap = true;
break;
}
case Nowarn: {
nowarn = true;
break;
}
case Quiet: {
quiet = true;
break;
}
case Names: {
inputnames = true;
break;
}
default: argok = 0;
}
}
if (!argok)
printf("(MVRSAMPLE) Ignoring invalid argument #%d\n",i+1);
i++;
}
// load names
if (inputnames){
for (int i=0; i<int(InputData[1].size()); i++)
names[i] = string2num<int>(InputData[1][i]);
Startln = 2;
}
// load adjacentmatrix A
m = 0;
A = (int**) malloc(sizeof(int*) * n);
for (int i=0; i<n; i++) A[i] = (int*) malloc(sizeof(int) * n);
for (int i=0; i<n; i++)
for (int j=0; j<n; j++){
A[i][j] = string2num<int>(InputData[i + Startln][j]);
m += A[i][j];
}
vector<string> tstr; tstr.push_back("off"); tstr.push_back("on");
if (!quiet){
printf("Minimum violation ranking sampler\n");
printf(" Copyright 2015 Aaron Clauset | Ports to C by Huanshen.Wei\n");
printf(" Warning: This can be a very slow calculation; please be patient.\n");
printf(" nodes, n = %i\n edges, m = %i\n reps = %i\n",n,m,nreps);
cout << " bootstrap of edges = " << tstr[bstrap] << endl;
printf(" number of nodes to swap = %i\n",nr);
printf(" steps between samples = %i\n",s_rate);
printf(" target sample count = %i\n",n_samp);
printf(" burn-in step = %i\n", Tb);
}
printf("\nPress any key to start!\n"); getchar();
prho = (double*) malloc(sizeof(double) * nreps);
for (int i=0; i<reps; i++) prho[i] = 0.0;
pi = (double**) malloc(sizeof(double*) * n);
for (int i=0; i<n; i++) pi[i] = (double*) malloc(sizeof(double) * nreps);
for (int i=0; i<n; i++)
for (int j=0; j<nreps; j++)
pi[i][j] = 0.0;
piu = (double**) malloc(sizeof(double*) * n);
for (int i=0; i<n; i++) piu[i] = (double*) malloc(sizeof(double) * nreps);
for (int i=0; i<n; i++)
for (int j=0; j<nreps; j++)
piu[i][j] = 0.0;
}
int dfs(int** arr, int n, int* visited, int type, int i){
visited[i] = type;
int tt = 1;
for (int j=0; j<n; j++)
if ((i != j) && (arr[i][j] || arr[j][i]) && (!visited[j])){
visited[j] = type;
tt += dfs(arr,n,visited,type,j);
}
return tt;
}
int SplitDetect(int** arr, int n){
int* visited;
visited = (int*) malloc(sizeof(int) * n);
memset(visited, 0, sizeof(int) * n);
int temp = 0;
for (int i=0; i<n; i++){
if (!visited[i]){
temp++;
dfs(arr,n,visited,temp,i);
}
}
free(visited);
visited = NULL;
return (temp);
}
void PrintAdj(int** arr, int a, int b){
for (int i=0; i<a; i++){
for (int j=0; j<b; j++)
printf("%d ", arr[i][j]);
printf("\n");
}
printf("\n");
}
void swap(int &a, int &b){
a = a ^ b;
b = a ^ b;
a = a ^ b;
}
int ScoreCalc(int** arr, int n){
int sum = 0;
for (int i=1; i<n; i++)
for (int j=0; j<i; j++)
sum += arr[i][j];
return(sum);
}
void mvrsample(){
int n; // number of vertices
int m; // number of edges
int** A = NULL; // AdjacentMatrix of network
int names[1300]; // names of the nodes
int nr; // size of a rotation
bool bstrap; // boostrap the edges?
int nreps; // number of repititions of sampler to run
int s_rate; // take sample every s_rate steps
int n_samp; // number of samples to store
int Tb; // burn-in steps before sampling
bool nowarn = false; // default: don't display warnings
bool quiet = false; // default: display messages
double *prho = NULL; // output: fraction of edges that violate MVR (by rep)
double **pi = NULL; // output: mean of ranks across MVR samples (by rep)
double **piu = NULL; // output: std of ranks across MVR samples (by rep)
Input(n,m,names,bstrap,nr,nreps,s_rate,n_samp,Tb,nowarn,quiet,A,prho,pi,piu);
tic();
int** B;
B = (int**) malloc(sizeof(int*) * n);
for (int i=0; i<n; i++) B[i] = (int*) malloc(sizeof(int) * n);
int* kout; kout = (int*) malloc(sizeof(int) * n);
int* I; I = (int*) malloc(sizeof(int) * n);
int* h; h = (int*) malloc(sizeof(int) * n);
int** F;
F = (int**) malloc(sizeof(int*) * n);
for (int i=0; i<n; i++) F[i] = (int*) malloc(sizeof(int) * n);
double** rs = (double**) malloc(sizeof(double*) * n); // stored samples
for (int i=0; i<n; i++) rs[i] = (double*) malloc(sizeof(double) * n_samp);
int* pr = (int*) malloc(sizeof(int) * nr);
srand((unsigned)time(NULL)); // random number generator
// int ttm = m;
// int mtemp = int(m * 5.044 / (m / double(n)));
for (int ijk=1; ijk<=nreps; ijk++){
// m = ttm;
// 1. if necessary, bootstrap the set of edges by sampling them
// uniformly at random with replacement. turning this feature off
// will reduce the sampler's ability to accurately estimate the
// uncertainty of the MVR score.
if (bstrap){
int stemp = 0;
int* u;
int* v;
u = (int*) malloc(sizeof(int) * m);
v = (int*) malloc(sizeof(int) * m);
for (int i=0; i<n; i++)
for (int j=0; j<n; j++){
if (A[i][j]){
for (int k=0; k<A[i][j]; k++){
stemp++;
u[stemp-1] = i;
v[stemp-1] = j;
}
}
}
for (int i=0; i<n; i++)
for (int j=0; j<n; j++)
B[i][j] = 0;
int ntemp = 0;
// printf("real edges are %d\n", mtemp);
for (int i=0; i<m; i++){
ntemp = rand() % m;
B[u[ntemp]][v[ntemp]] ++;
}
}
// 1b. don't bootstrap the edges
else{
for (int i=0; i<n; i++)
for (int j=0; j<n; j++)
B[i][j] = A[i][j];
}
// m = mtemp;
printf("Seperated components = %d\n",SplitDetect(B,n));
int temp = SplitDetect(B,n);
if (temp > 0) printf("reps %d:The network splits into %5d part!\n", ijk, temp);
// 2a. initialize the ranking out-degree, in decreasing order
memset(kout, 0, sizeof(int) * n);
for (int i=0; i<n; i++)
for (int j=0; j<n; j++)
kout[i] += A[i][j];
for (int i=0; i<n; i++) I[i] = i;
for (int i=0; i<n; i++)
for (int j=i+1; j<n; j++)
if (kout[i] < kout[j]){
swap(kout[i],kout[j]);
swap(I[i],I[j]);
}
for (int i=0; i<n; i++)
h[I[i]] = i;
// 2b. initialize the MVR score
for (int i=0; i<n; i++)
for (int j=0; j<n; j++)
F[h[i]][h[j]] = B[i][j];
int score = ScoreCalc(F,n);
if (!quiet) printf("[rep=%d][t=%8d] violations = %4i (%4.2f%%)\tconverging: %d\t(%4.2fm done)\n",ijk,1,score,100*score/double(m),Tb,toc()/60.0);
int mins = score;
// 2c. initialize the zero-temperature MCMC sampler
for (int i=0; i<n; i++)
for (int j=0; j<n_samp; j++)
rs[i][j] = 0.0;
int k = 1; // index of sample
int T = Tb+n_samp*s_rate; // total runtimes needed
bool f_stop = 0; // stop flag
int cnt = 0; // neutral counter
int t = 1; // time
// 3. Run the zero-temperature MCMC to sample the minimum violation
// rankings. The proposal step of the MCMC chooses a uniformly random
// group of vertices of size r and then tries rotating them to create
// a new ordering. If that ordering is no worse than the current
// ordering, it accepts the move (Metropolis-Hastings rule) and
// repeats. Otherwise, it discards that proposal, and chooses a new
// one. The MCMC runs for Tb "burn in" steps before beginning the
// sampling of MVRs. Some information is written to stdout as the
// MCMC progresses.
while (true){
t++;
// 3a. check stopping criteria
if (t > T) f_stop = 1;
if (f_stop > 0) break;
// 3b. choose r positions to swap
int r = 0, temp;
while (r < nr){
while (true){
temp = rand() % n;
bool flag = 1;
for (int pos=0; pos<r; pos++)
if (pr[pos] == temp){
flag = 0;
break;
}
if (flag) break;
}
pr[r] = temp;
r++;
}
// 3c.& 3d. "rotate" them & tabulate proposed block matrix
int a, b, delta = 0;
for (int i=0; i<nr-1; i++){
a = h[pr[i]]; b = h[pr[i+1]];
if (a > b) swap(a,b);
for (int j=a+1; j<b; j++) delta += F[a][j] - F[b][j] + F[j][b] - F[j][a];
delta += F[a][b] - F[b][a];
for (int j=0; j<n; j++) swap(F[a][j],F[b][j]);
for (int j=0; j<n; j++) swap(F[j][a],F[j][b]);
swap(h[pr[i]],h[pr[i+1]]);
}
// 3e. compute F2's score
int tt = score;
score = tt + delta;
if (score <= mins){
if (score < mins){
mins = score;
if (!quiet && (t >= Tb))
printf("[rep=%d][t=%8d] violations = %4d (%4.2f%%)\tfound a better MVR; restarting sampling\n",ijk,t,score,100*score/double(m));
if (t > Tb){
k = 1;
cnt = 0;
t = Tb + 1;
}
}
cnt++;
}
else{
// reverse rotation
for (int i = nr - 1; i>0; i--){
a = h[pr[i]]; b = h[pr[i-1]];
for (int j=0; j<n; j++) swap(F[a][j],F[b][j]);
for (int j=0; j<n; j++) swap(F[j][a],F[j][b]);
swap(h[pr[i]],h[pr[i-1]]);
}
score = tt;
}
// sampling to record the rankings
if ((t > Tb) && (t % s_rate == 0)){
for (int i=0; i<n; i++)
rs[i][k] = h[i];
k++;
cnt = 0;
}
// 3f. update the user on the progress (stdout)
if (t % 1000 == 0){
if (!quiet){
if (t <= Tb)
printf("[rep=%i][t=%8i] violations = %4i (%4.2f%%)\tconverging: %i\t(%4.2fm done | %4.2fm to go)\n",ijk,t,score,100*score/double(m),Tb-t,toc()/60.0,((T*nreps)/(t+(ijk-1)*t-1))*(toc()/60.0));
else
printf("[rep=%i][t=%8i] violations = %4i (%4.2f%%)\tsamples: %i (%4.2f%%)\t(%4.2fm done | %4.2fm to go)\n",ijk,t,score,100*score/double(m),k,100*k/double(n_samp),toc()/60.0,((T*nreps)/(t+(ijk-1)*t-1))*(toc()/60.0));
}
}
// 3g. recheck stopping criteria
if (t > T) f_stop = 1;
if (f_stop > 0) break;
}
// store the results of this rep
prho[ijk - 1] = mins/double(m);
for (int i=0; i<n; i++){
double sum = 0;
for (int j=0; j<n_samp; j++)
sum += rs[i][j];
pi[i][ijk - 1] = sum / double(n_samp);
}
for (int i=0; i<n; i++){
double sum = 0;
for (int j=0; j<n_samp; j++)
sum += (rs[i][j] - pi[i][ijk - 1]) * (rs[i][j] - pi[i][ijk - 1]) / double(n_samp);
piu[i][ijk - 1] = sqrt(sum);
}
}
// compute the mean results and return them
double* ranks; ranks = (double*) malloc(sizeof(double) * n);
for (int i=0; i<n; i++){
double sum = 0;
for (int j=0; j<nreps; j++)
sum += pi[i][j];
ranks[i] = sum / double(nreps);
}
for (int i=0; i<n; i++) I[i] = i;
for (int i=0; i<n; i++)
for (int j=i+1; j<n; j++)
if (ranks[i] > ranks[j]){
double c; c = ranks[i]; ranks[i] = ranks[j]; ranks[j] = c;
swap(I[i],I[j]);
}
for (int i=0; i<n; i++)
h[I[i]] = i;
// the reordered adjacency matrix
for (int i=0; i<n; i++)
for (int j=0; j<n; j++)
F[h[i]][h[j]] = B[i][j];
// fraction of edges that violate the ranking
double rho = ScoreCalc(F,n);
// print the results to .csv files
ofstream outFile0("mvrsample_output_prho.csv", ios::out);
outFile0 << "rho" << ',' << rho/double(m) << endl;
outFile0 << prho[0];
for (int i=1; i<nreps; i++)
outFile0 << ',' << prho[i];
outFile0 << endl;
outFile0.close();
ofstream outFile1("mvrsample_output_ranks.csv", ios::out);
outFile1 << I[0] + 1 << ',' << ranks[0] << endl;
for (int i=1; i<n; i++)
outFile1 << I[i] + 1 << ',' << ranks[i] << endl;
outFile1.close();
ofstream outFile2("mvrsample_output_pi.csv", ios::out);
for (int i=0; i<n; i++){
outFile2 << pi[i][0];
for (int j=1; j<nreps; j++)
outFile2 << ',' << pi[i][j];
outFile2 << endl;
}
outFile2.close();
ofstream outFile3("mvrsample_output_piu.csv", ios::out);
for (int i=0; i<n; i++){
outFile3 << piu[i][0];
for (int j=1; j<nreps; j++)
outFile3 << ',' << piu[i][j];
outFile3 << endl;
}
outFile3.close();
}
int main(){
mvrsample();
return 0;
}