/*

%function [ranks,rho,F,prho,pi,piu]=mvrsample(A, varargin)

% MVRSAMPLE samples the minimum violation rankings for a directed network.

%    Source: http://santafe.edu/~aaronc/facultyhiring/

%   

%    [ranks,rho,F,prho,pi,piu]=mvrsample(A, varargin)

%

%    MVRSAMPLE(A) samples the minimum violation rankings for a directed

%    network, based method described in Clauset, Arbesman, Larremore (2015).

%    A is a matrix of size N x N representing a directed network. Each

%    element A(i,j) should be a natural number. Self-loops are allowed.

%    MVRSAMPLE automatically detects whether A meets the requirements of

%    the method.

%  

%    The MVR sampling procedure works as follows:

%    1) Initially, nodes are ranked in descreasing order of their out-

%       degree. The score of this ranking 'pi' is the number of directed

%       edges (u,v) for which pi(v) is better than pi(v).

%    2) We then search over rankings to minimize the number of such

%       violations. The algorithm used is a zero-temperature Markov chain

%       Monte Carlo (MCMC) sampler, which has several user-definable

%       parameters that specify length of burn in, number of samples to

%       take, and spacing between samples.

%    3) By default, this procedure is performed once. Optionally, it can be

%       performed multiple times and the results averaged. Each repetition

%       can be run on a bootstrap of the edges, which would better capture

%       natural variability in the edge generating process (assuming edges

%       can be treated iid).

%    4) The algorithm returns several outputs that capture the results of

%       the sampler: the average ranking and rho across all repetitions,

%       and optionally the individual rankings and rhos for each

%       repetition.

%

%

%    Example:

%       A = rand(100,100)<0.1;

%       names = (1:n)';

%       [ranks,rho,F] = mvrsample(A,'names',names);

%       names(ranks(:,1))

%

%       [ranks,rho,F] = mvrsample(A,'names',names,'bootstrap','reps',10);

%      

%

%    Outputs:

%    'ranks' is a N x 2 matrix whose rows give the (index, mean pi

%            score) for each node in A, in descending order of mean pi,

%            across repetitions.

%    'rho' is a scalar that gives the fraction of edges in A that violate

%          the ranks ordering.

%    'F' is a N x N matrix that is equivalent to A under the ranking given

%        by ranks.

%    'prho' is a 1 x N vector containing the rho values for each rep

%    'pi' is a reps x N matrix containing the pi vectors for each rep

%    'piu' is a reps x N matrix containing the stddev of pi for each rep

%   

%   

%    For more information, try 'type mvrsample'

 

% Version 1.0    (2015 February)

% Copyright (C) 2015 Aaron Clauset (University of Colorado Boulder)

% Distributed under GPL 2.0

% http://www.gnu.org/licenses/gpl-2.0.html

% MVRSAMPLE comes with ABSOLUTELY NO WARRANTY

%

% Notes:

%

% 1. The default 'move' in the MCMC is to choose a pair of nodes and

%    propose a ranking in which they are 'rotated'. The number of nodes in

%    the set to be rotated is an adjustable parameter. For instance, this

%    uses 3-node rotations:

%   

%       [ranks,rho,F] = mvrsample(A,'rotate',3);

%   

% 2. Each repetition can be performed on a simple bootstrap of the edges.

%    The adjacency matrix is interpreted as an unweighted, directed

%    multigraph, in which A(i,j) = g indicates that there are g unweighted

%    edges (i,j) in the network. The bootstrap chooses m=sum(sum(A)) edges

%    with replacement from the set of all such edges (i,j) and builds a new

%    network from these. The default setting is no bootstrap. To turn it

%    on, simply call

%   

%       [ranks,rho,F] = mvrsample(A,'bootstrap');

%

% 3. The parameters of the MCMC can also be changed: the number of samples

%    to take before stopping, the number of swaps to successfully perform

%    between each sampled ranking, and the number of steps used for 'burn

%    in' before the sampling begins. These can be changed like so

%   

%       [ranks,rho,F] = mvrsample(A,'samples',1000,'spacing',10000,'burnin',10^6);

%

%    Any subset of these can be omitted, which will leave them at their

%    default settings.

%

% 4. The algorithm writes updates about the progress of the MCMC to stdout.

%    This can be silenced like so

%   

%       [ranks,rho,F] = mvrsample(A,'quiet');

%

 

       The following code is the mvrsample function in C. It's slightly different to the MATLAB version considering the input format.

       Here we use a mvrsample_input.csv file as the input file. The first row contains the running parameters. Then, the adjacent

       matrix starts from the second row. ( names = (1:n)' as default. ) By Huanshen Wei (huanshenwei@gmail.com)

 

       Example: mvrsample_input.csv

       bootstrap,reps,20,spacing,100,samples,20,burn_in,10000

       1,2,1

       0,3,1

       1,0,1

 

*/

 

#include <time.h>

#include <stdlib.h>

#include <iostream>

#include <fstream> 

#include <stack>

#include <ctime>

#include <string.h>

#include <vector>

#include <sstream>

#include <math.h>

using namespace std;

 

//translate string to other type, eg.:int or double

template <class Type2>

Type2 string2num(const string &str){

       istringstream iss(str);

       Type2 num;

       iss >> num;

       return num;

}

 

//for time recording

std::stack<clock_t> tictoc_stack;

 

void tic(){

    tictoc_stack.push(clock());

}

 

int toc(){

       int a = int(((double)(clock() - tictoc_stack.top())) / CLOCKS_PER_SEC);

    return(a);

}

 

//for datainput

enum string_code{

       failed,

       Rotate,  

       reps,     

       samples,

       spacing, 

       burn_in, 

       Bootstrap,     

       Nowarn,

       Quiet,

       Names,

};

 

string_code hashit(std::string const& inString){

       if (inString == "rotate") return Rotate;

       if (inString == "reps") return reps;

       if (inString == "samples") return samples;

       if (inString == "spacing") return spacing;

       if (inString == "burn_in") return burn_in;

       if (inString == "bootstrap") return Bootstrap;

       if (inString == "nowarn") return Nowarn;

       if (inString == "quiet") return Quiet;

       if (inString == "names") return Names;

       return(failed);

}

 

void Input(int &n, int &m, int* names, bool &bstrap, int &nr, int &nreps, int &s_rate, int &n_samp, int &Tb, bool &nowarn, bool &quiet, int** &A, double* &prho, double** &pi, double** &piu){

       vector<vector<string>> InputData;

       ifstream inFile("mvrsample_input.csv", ios::in);

       string lineStr;

       while (getline(inFile, lineStr)){

              stringstream ss(lineStr);

              string str;

              vector<string> lineArray;

              while (getline(ss, str, ','))

                     lineArray.push_back(str);

              InputData.push_back(lineArray);

       }

       inFile.close();

 

       n = InputData[1].size(); // n either comes from names.size or A[1][].size

      

       // default settings

       nr = 2;           // rotation size = pairwise swaps

     for (int i=0; i<n; i++) names[i] = i;             // names = indices

       bstrap = false;              // bootstrap = false

       nreps = 1;                // 1 repetition of sampler

       s_rate = n;            // n steps per sampled state

       n_samp = n;                 // n samples stored

       Tb = n * n;                   // n^2 steps for burn-in

 

       bool inputnames = false; // whether provide names?

       int Startln = 1;             // Start line for matrix-A

 

       // parse command-line parameters; trap for bad input

       int i = 0, argok;           

       vector<string> lineArray = InputData[0];

       while (i < int(lineArray.size())){

              argok = 1;

           string str = lineArray[i];

           if (str.length() == 0){

                  i++;

                  continue;

           }

            if (!((string2num<char>(lineArray[i].substr(0,1)) >=48) && (string2num<char>(lineArray[i].substr(0,1)) <=57))){

                  switch (hashit(str)){

                      case Rotate: {

                             nr = string2num<int>(lineArray[i+1]);

                             i++;

                             break;

                      }

                      case reps: {

                               nreps = string2num<int>(lineArray[i+1]);

                               i++;

                               break;

                        }  

                        case samples: {

                               n_samp = string2num<int>(lineArray[i+1]);

                               i++;

                               break;

                        }     

                        case spacing: {

                             s_rate = string2num<int>(lineArray[i+1]);

                               i++;

                               break;

                        }    

                        case burn_in: {

                            Tb = string2num<int>(lineArray[i+1]);

                               i++;

                               break;

                        }

                        case Bootstrap:     {

                               bstrap  = true;

                               break;

                        }

                        case Nowarn: {

                               nowarn  = true;

                             break;

                        } 

                        case Quiet:  {

                                   quiet   = true;    

                             break;

                        }

                        case Names:  {

                               inputnames = true;

                               break;

                        }     

                        default: argok = 0;

                  }

           }

           if (!argok)

                  printf("(MVRSAMPLE) Ignoring invalid argument #%d\n",i+1);

           i++;

       }

 

       // load names

       if (inputnames){

              for (int i=0; i<int(InputData[1].size()); i++)

                     names[i] = string2num<int>(InputData[1][i]);

              Startln = 2;

       }

 

       // load adjacentmatrix A

       m = 0;

       A = (int**) malloc(sizeof(int*) * n);

       for (int i=0; i<n; i++) A[i] = (int*) malloc(sizeof(int) * n);

       for (int i=0; i<n; i++)

              for (int j=0; j<n; j++){

                     A[i][j] = string2num<int>(InputData[i + Startln][j]);

                     m += A[i][j];

              }

 

       vector<string> tstr; tstr.push_back("off"); tstr.push_back("on");

       if (!quiet){

           printf("Minimum violation ranking sampler\n");

           printf("   Copyright 2015 Aaron Clauset | Ports to C by Huanshen.Wei\n");

           printf("   Warning: This can be a very slow calculation; please be patient.\n");

           printf("   nodes, n = %i\n   edges, m = %i\n   reps     = %i\n",n,m,nreps);

           cout << "   bootstrap of edges      = " << tstr[bstrap] << endl;

           printf("   number of nodes to swap = %i\n",nr);

           printf("   steps between samples   = %i\n",s_rate);

           printf("   target sample count     = %i\n",n_samp);           

              printf("   burn-in step            = %i\n", Tb);

       }

       printf("\nPress any key to start!\n"); getchar();

      

       prho = (double*) malloc(sizeof(double) * nreps);

       for (int i=0; i<reps; i++) prho[i] = 0.0;

 

       pi = (double**) malloc(sizeof(double*) * n);

       for (int i=0; i<n; i++) pi[i] = (double*) malloc(sizeof(double) * nreps);

       for (int i=0; i<n; i++)

              for (int j=0; j<nreps; j++)

                     pi[i][j] = 0.0;

 

       piu = (double**) malloc(sizeof(double*) * n);

       for (int i=0; i<n; i++) piu[i] = (double*) malloc(sizeof(double) * nreps);

       for (int i=0; i<n; i++)

              for (int j=0; j<nreps; j++)

                     piu[i][j] = 0.0;

}

 

int dfs(int** arr, int n, int* visited, int type, int i){

       visited[i] = type;

       int tt = 1;

       for (int j=0; j<n; j++)

              if ((i != j) && (arr[i][j] || arr[j][i]) && (!visited[j])){

                     visited[j] = type;

                     tt += dfs(arr,n,visited,type,j);

              }

       return tt;

}

 

int SplitDetect(int** arr, int n){

       int* visited;

       visited = (int*) malloc(sizeof(int) * n);

       memset(visited, 0, sizeof(int) * n);

 

       int temp = 0;

       for (int i=0; i<n; i++){

              if (!visited[i]){

                     temp++;

                     dfs(arr,n,visited,temp,i);

              }

       }

 

       free(visited);

       visited = NULL;

       return (temp);

}

 

void PrintAdj(int** arr, int a, int b){

       for (int i=0; i<a; i++){

              for (int j=0; j<b; j++)

                     printf("%d ", arr[i][j]);

              printf("\n");

       }

       printf("\n");

}

 

void swap(int &a, int &b){

       a = a ^ b;

       b = a ^ b;

       a = a ^ b;

}

 

int ScoreCalc(int** arr, int n){

       int sum = 0;

       for (int i=1; i<n; i++)

              for (int j=0; j<i; j++)

                     sum += arr[i][j];

       return(sum);

}

 

void mvrsample(){

       int n;                                   // number of vertices

       int m;                                  // number of edges

       int** A = NULL;                   // AdjacentMatrix of network

       int names[1300];           // names of the nodes

       int nr;                                 // size of a rotation

       bool bstrap;                 // boostrap the edges?

       int nreps;                     // number of repititions of sampler to run

       int s_rate;                            // take sample every s_rate steps

       int n_samp;                         // number of samples to store

       int Tb;                                 // burn-in steps before sampling

       bool nowarn = false;    // default: don't display warnings

       bool quiet = false;        // default: display messages

       double *prho = NULL;        // output: fraction of edges that violate MVR (by rep)

       double **pi = NULL;          // output: mean of ranks across MVR samples (by rep)

       double **piu = NULL;        // output: std of ranks across MVR samples (by rep)

      

       Input(n,m,names,bstrap,nr,nreps,s_rate,n_samp,Tb,nowarn,quiet,A,prho,pi,piu);

 

       tic();

 

       int** B;

       B = (int**) malloc(sizeof(int*) * n);

       for (int i=0; i<n; i++) B[i] = (int*) malloc(sizeof(int) * n);

 

       int* kout; kout = (int*) malloc(sizeof(int) * n);

       int* I;  I = (int*) malloc(sizeof(int) * n);

       int* h;  h = (int*) malloc(sizeof(int) * n);

      

       int** F;

       F = (int**) malloc(sizeof(int*) * n);

       for (int i=0; i<n; i++) F[i] = (int*) malloc(sizeof(int) * n);

   

    double** rs = (double**) malloc(sizeof(double*) * n);      // stored samples

       for (int i=0; i<n; i++) rs[i] = (double*) malloc(sizeof(double) * n_samp);

      

       int* pr = (int*) malloc(sizeof(int) * nr);

       srand((unsigned)time(NULL)); // random number generator

 

       // int ttm = m;

       // int mtemp = int(m * 5.044 / (m / double(n)));

 

       for (int ijk=1; ijk<=nreps; ijk++){

 

              // m = ttm;

           // 1. if necessary, bootstrap the set of edges by sampling them

           //    uniformly at random with replacement. turning this feature off

           //    will reduce the sampler's ability to accurately estimate the

           //    uncertainty of the MVR score.

              if (bstrap){

                     int stemp = 0;

                     int* u;

                     int* v;

                     u = (int*) malloc(sizeof(int) * m);

                     v = (int*) malloc(sizeof(int) * m);

                     for (int i=0; i<n; i++)

                            for (int j=0; j<n; j++){

                                   if (A[i][j]){

                                          for (int k=0; k<A[i][j]; k++){

                                                 stemp++;

                                                 u[stemp-1] = i;

                                                 v[stemp-1] = j;

                                          }

                                   }

 

                            }     

                     for (int i=0; i<n; i++)

                            for (int j=0; j<n; j++)

                                   B[i][j] = 0;

                     int ntemp = 0;

 

                     // printf("real edges are %d\n", mtemp);

                     for (int i=0; i<m; i++){

                            ntemp = rand() % m;

                            B[u[ntemp]][v[ntemp]] ++;

                     }

              }

              // 1b. don't bootstrap the edges

              else{

                     for (int i=0; i<n; i++)

                            for (int j=0; j<n; j++)

                                   B[i][j] = A[i][j];

              }

              // m = mtemp;

              printf("Seperated components = %d\n",SplitDetect(B,n));

 

 

              int temp = SplitDetect(B,n);

              if (temp > 0) printf("reps %d:The network splits into %5d part!\n", ijk, temp);

 

              // 2a. initialize the ranking out-degree, in decreasing order

              memset(kout, 0, sizeof(int) * n);

              for (int i=0; i<n; i++)

                     for (int j=0; j<n; j++)

                            kout[i] += A[i][j];

              for (int i=0; i<n; i++) I[i] = i;

              for (int i=0; i<n; i++)

                     for (int j=i+1; j<n; j++)

                            if (kout[i] < kout[j]){

                                   swap(kout[i],kout[j]);

                                   swap(I[i],I[j]);

                            }

              for (int i=0; i<n; i++)

                     h[I[i]] = i;

 

              // 2b. initialize the MVR score

              for (int i=0; i<n; i++)

                     for (int j=0; j<n; j++)

                            F[h[i]][h[j]] = B[i][j];

 

              int score = ScoreCalc(F,n);

              if (!quiet) printf("[rep=%d][t=%8d] violations = %4i (%4.2f%%)\tconverging: %d\t(%4.2fm done)\n",ijk,1,score,100*score/double(m),Tb,toc()/60.0);

              int mins = score;

 

              // 2c. initialize the zero-temperature MCMC sampler

              for (int i=0; i<n; i++)

                     for (int j=0; j<n_samp; j++)

                            rs[i][j] = 0.0;

           int k = 1;                // index of sample

           int T = Tb+n_samp*s_rate; // total runtimes needed

           bool f_stop = 0;                   // stop flag

           int cnt    = 0;                           // neutral counter

           int t      = 1;                           // time

 

              // 3. Run the zero-temperature MCMC to sample the minimum violation

           //    rankings. The proposal step of the MCMC chooses a uniformly random

           //    group of vertices of size r and then tries rotating them to create

           //    a new ordering. If that ordering is no worse than the current

           //    ordering, it accepts the move (Metropolis-Hastings rule) and

           //    repeats. Otherwise, it discards that proposal, and chooses a new

           //    one. The MCMC runs for Tb "burn in" steps before beginning the

           //    sampling of MVRs. Some information is written to stdout as the

           //    MCMC progresses.

           while (true){

                  t++;

                  // 3a. check stopping criteria

               if (t > T) f_stop = 1;

               if (f_stop > 0) break;

               // 3b. choose r positions to swap

               int r = 0, temp;

               while (r < nr){

                      while (true){

                             temp = rand() % n;

                             bool flag = 1;

                             for (int pos=0; pos<r; pos++)

                                    if (pr[pos] == temp){

                                           flag = 0;

                                           break;

                             }

                             if (flag) break;

                      }

                      pr[r] = temp;

                      r++;

               }

 

               // 3c.& 3d. "rotate" them & tabulate proposed block matrix

               int a, b, delta = 0;

               for (int i=0; i<nr-1; i++){

                      a = h[pr[i]]; b = h[pr[i+1]];

                      if (a > b) swap(a,b);

                      for (int j=a+1; j<b; j++) delta += F[a][j] - F[b][j] + F[j][b] - F[j][a];

                      delta += F[a][b] - F[b][a];

                      for (int j=0; j<n; j++) swap(F[a][j],F[b][j]);                          

                      for (int j=0; j<n; j++) swap(F[j][a],F[j][b]);

                      swap(h[pr[i]],h[pr[i+1]]);

               }

               // 3e. compute F2's score

               int tt = score;

               score = tt + delta;

               if (score <= mins){

                      if (score < mins){

                             mins = score;

                             if (!quiet && (t >= Tb))

                                    printf("[rep=%d][t=%8d] violations = %4d (%4.2f%%)\tfound a better MVR; restarting sampling\n",ijk,t,score,100*score/double(m));

                             if (t > Tb){

                                    k = 1;

                                    cnt = 0;

                                    t = Tb + 1;

                             }

                      }

                      cnt++;

               }

               else{

                      // reverse rotation

                      for (int i = nr - 1; i>0; i--){

                             a = h[pr[i]]; b = h[pr[i-1]];

                             for (int j=0; j<n; j++) swap(F[a][j],F[b][j]);

                             for (int j=0; j<n; j++) swap(F[j][a],F[j][b]);

                             swap(h[pr[i]],h[pr[i-1]]);

                      }

                      score = tt;

               }

 

               // sampling to record the rankings

               if ((t > Tb) && (t % s_rate == 0)){

                      for (int i=0; i<n; i++)

                             rs[i][k] = h[i];

                      k++;

                      cnt = 0;

               }

 

               // 3f. update the user on the progress (stdout)

               if (t % 1000 == 0){

                      if (!quiet){

                       if (t <= Tb)

                           printf("[rep=%i][t=%8i] violations = %4i (%4.2f%%)\tconverging: %i\t(%4.2fm done | %4.2fm to go)\n",ijk,t,score,100*score/double(m),Tb-t,toc()/60.0,((T*nreps)/(t+(ijk-1)*t-1))*(toc()/60.0));

                       else

                           printf("[rep=%i][t=%8i] violations = %4i (%4.2f%%)\tsamples: %i (%4.2f%%)\t(%4.2fm done | %4.2fm to go)\n",ijk,t,score,100*score/double(m),k,100*k/double(n_samp),toc()/60.0,((T*nreps)/(t+(ijk-1)*t-1))*(toc()/60.0));

                      }

               }

 

               // 3g. recheck stopping criteria

               if (t > T) f_stop = 1;

               if (f_stop > 0) break;

 

           }

                 

              // store the results of this rep

              prho[ijk - 1] = mins/double(m);

             

              for (int i=0; i<n; i++){

                     double sum = 0;

                     for (int j=0; j<n_samp; j++)

                            sum += rs[i][j];

                     pi[i][ijk - 1] = sum / double(n_samp);

              }

 

              for (int i=0; i<n; i++){

                     double sum = 0;

                     for (int j=0; j<n_samp; j++)

                            sum += (rs[i][j] - pi[i][ijk - 1]) * (rs[i][j] - pi[i][ijk - 1]) / double(n_samp);

                     piu[i][ijk - 1] = sqrt(sum);

              }

       }     

 

       // compute the mean results and return them

       double* ranks; ranks = (double*) malloc(sizeof(double) * n);

       for (int i=0; i<n; i++){

              double sum = 0;

              for (int j=0; j<nreps; j++)

                     sum += pi[i][j];

              ranks[i] = sum / double(nreps);

       }

 

       for (int i=0; i<n; i++) I[i] = i;

       for (int i=0; i<n; i++)

              for (int j=i+1; j<n; j++)

                     if (ranks[i] > ranks[j]){

                            double c; c = ranks[i]; ranks[i] = ranks[j]; ranks[j] = c;

                            swap(I[i],I[j]);

                     }

       for (int i=0; i<n; i++)

              h[I[i]] = i;

       // the reordered adjacency matrix

       for (int i=0; i<n; i++)

              for (int j=0; j<n; j++)

                     F[h[i]][h[j]] = B[i][j];

       // fraction of edges that violate the ranking

       double rho = ScoreCalc(F,n);

 

       // print the results to .csv files

       ofstream outFile0("mvrsample_output_prho.csv", ios::out);

       outFile0 << "rho" << ',' << rho/double(m) << endl;

       outFile0 << prho[0];

       for (int i=1; i<nreps; i++)

              outFile0 << ',' << prho[i];

       outFile0 << endl;

       outFile0.close();

 

       ofstream outFile1("mvrsample_output_ranks.csv", ios::out);

       outFile1 << I[0] + 1 << ',' << ranks[0] << endl;

       for (int i=1; i<n; i++)

              outFile1 << I[i] + 1 << ',' << ranks[i] << endl;

       outFile1.close();

 

       ofstream outFile2("mvrsample_output_pi.csv", ios::out);

       for (int i=0; i<n; i++){

              outFile2 << pi[i][0];

              for (int j=1; j<nreps; j++)

                     outFile2  << ',' << pi[i][j];

              outFile2 << endl;

       }

       outFile2.close();

 

       ofstream outFile3("mvrsample_output_piu.csv", ios::out);

       for (int i=0; i<n; i++){

              outFile3 << piu[i][0];

              for (int j=1; j<nreps; j++)

                     outFile3  << ',' << piu[i][j];

              outFile3 << endl;

       }

       outFile3.close();

}

 

int main(){

       mvrsample();

       return 0;

}