R/MEMS_ergm_nonparam.R
In netmediate: Micro-Macro Analysis for Social Networks

#
# This function estimates the MEMS using ERGM with nonparametric algorithm
#
#
# model is the ERGM object
#micro_process is the micro process of interest, provided as a character string
#macro_function is the function to be calculated on the network object. NOTE: currently only supports statistics calculated on network and igraph objects.
#object type is the type of object used in the function to calculate macro statistics. Currently only supports igraph and network objects. If left NULL, the object is assumed to be a network object.
#the interval provides the values over which to calculate the MEMS. It defaults to 0 and 1
#nsim is the number of bootstrap resamples
# silent tells R whether to provide updates on the algorithm's progress
#full_output tells R whether to return simulated distribution in addition to summary statistic



MEMS_ergm_nonparam <- function(model,
                               micro_process,
                               macro_function,
                               object_type=object_type,
                               interval=interval,
                               nsim=nsim,
                               silent=silent,
                               full_output=full_output,
                               mediator=mediator,
                               link_id=link_id,
                               controls=controls,
                               control_functions=control_functions,
                               sensitivity_ev=sensitivity_ev) {


  if(class(model)[1]=="btergm"){

    btergm_formula<-btergm::getformula(model)
    offset_mat<-btergm::mtergm(btergm_formula,returndata = TRUE,verbose=FALSE)$offsmat
  }


  interval<-sort(interval) #order from lowest to highest
  ergm_mat<-btergm::edgeprob(model)
  dyad_mat<-ergm_mat
  start.drops<-ncol(dyad_mat)-5
  dyad_mat<-dyad_mat[,-c(2,start.drops:ncol(dyad_mat))]

    #include offset matrix
  if(class(model)[1]=="btergm"){

    lag_t<-ergm_mat[,c("tie","i","j","t")]
    lag_t$t<-lag_t$t+1
    colnames(lag_t)[1]<-"lag_tie"
    ergm_mat<-plyr::join(ergm_mat,lag_t)
    ergm_mat$lag_tie[is.na(ergm_mat$lag_tie)]<-0
    dyad_mat$lag_tie<-ergm_mat$lag_tie
  }




  message("Computing MEMS over ",interval[1],"-",interval[length(interval)]," interval.")
  message("Getting bootstrap parameter vector.")

  #start bootstrap with pseudolikelihood
  theta<-matrix(NA,nrow=nsim,ncol=length(btergm::coef(model)))
  colnames(theta)<-names(btergm::coef(model))

  for(j in 1:nsim){

    bs_mat<-dyad_mat[sample(nrow(dyad_mat), replace=TRUE), ]
    if(class(model)[1]=="btergm"){
      bs_model<-glm(bs_mat$tie ~  1+.+offset(bs_mat$lag_tie), data = data.frame(bs_mat[,!colnames(bs_mat)%in%c("tie","lag_tie")]),family=binomial)
    }else{
      bs_model<-glm(bs_mat$tie ~  1+., data = data.frame(bs_mat[,!colnames(bs_mat)%in%c("tie")]),family=binomial)

    }
    theta[j,]<-coef(bs_model)

    if(silent==FALSE){

       message("Getting bootstrap parameters ", j, " of ", nsim, " complete.")

    }

  }

  #create list of values to predict over
  mat_list<-list()
  for(i in 1:length(interval)){
    mat_list[[i]]<-ergm_mat

  }

  #create lists of data for each link_id and control. These lists provide the data for AMME function
  if(!is.null(link_id)){

    link_list_data<-list()
    for(i in 1:nsim){
      link_list_data[[i]]<-matrix(NA,nrow=length(unique(link_id[[1]])),ncol=length(interval))
      rownames(link_list_data[[i]])<-unique(link_id[[1]])
    }

    if(length(controls)>0){
      controls_list<-as.list(controls)
      for(i in 1:length(controls)){
        sim_list<-list()
        for(j in 1:nsim){
          sim_list[[j]]<-matrix(NA,nrow=length(unique(link_id[[i+1]])),ncol=length(interval))
          rownames(sim_list[[j]])<-unique(link_id[[i+1]])
        }
        controls_list[[i]]<-sim_list
        names(controls_list)<-controls

      }
    }else{
      controls_list<-NULL
    }
  }


  output_data<-matrix(NA,nrow=nsim,ncol=length(interval))
  aMEMS_tracer<-0

  #main loop
  for(j in 1:nrow(theta)){

    #create networks for k intervals

    net_list<-list()
    for(i in 1:length(mat_list)){

      pred_mat<-mat_list[[i]]
      if(class(model)[1]=="btergm"){
        start.drops<-ncol(pred_mat)-6
      }else{
        start.drops<-ncol(pred_mat)-5
      }
      pred_mat<-pred_mat[,-c(1,start.drops:ncol(pred_mat))]
      cbcoef<-theta[j,]
      cbcoef[micro_process]<-cbcoef[micro_process]*interval[i]

      #predict ties
      lp <- as.matrix(pred_mat)%*%cbcoef
      result <- c(1/(1 + exp(-lp)))
      pred_mat<-mat_list[[i]]
      pred_mat$y <- rbinom(nrow(mat_list[[i]]),1,result)      # create predicted ties

    #create network, assign vertex attributes, by creating empty network and adding new edges
      if(class(model)[1]=="btergm"){
        el<-pred_mat[,c("i","j","y","t")]
        el_list<-list()

        for(t in unique(el$t)){
          el_list[[t]]<-el[which(el$t==t &el$y==1),]
        }

        net_list[[i]]<-model@data$networks

        if("matrix"%in%class(model@data$networks[[1]])){
          warning("Model fit treating adjacency matrix as dependent variable. This may limit the range of statistics that can be calculated on the network. Please respecify model using a network object if the desired values can't be estimated.")
             for(t in 1:length(net_list[[i]])){
                 net_list[[i]][[t]]<-as.network(net_list[[i]][[t]])
            }
        }

        for(t in 1:length(net_list[[i]])){
          net_list[[i]][[t]][,]<-0
          net_list[[i]][[t]]<-network::add.edges(net_list[[i]][[t]],tail=el_list[[t]][,1],head=el_list[[t]][,2])

        }

      }else{
        el<-pred_mat[,c("i.name","j.name","y")]
        el<-el[el$y==1,-c(3)]
        el<-as.matrix(el)
        net_list[[i]]<-model$network
        net_list[[i]][,]<-0
        net_list[[i]]<-network::add.edges(net_list[[i]],tail=el[,1],head=el[,2])


      }

    }


    if(class(model)[1]=="btergm"){
      if(object_type[1]%in%c("network")){

        for(i in 1:length(net_list)){
          temporal_out<-list()
          for(t in 1:length(net_list[[i]])){
            temporal_out[[t]]<-macro_function(net_list[[i]][[t]])
          }
          b<-unlist(temporal_out)
          if(!is.null(link_id)){
            link_list_data[[j]][,i]<-b
          } ##store vector of output when calling AMME

          aMEMS_tracer<-1 ##handle node and multiple obs characteristics
          output_data[j,i]<-mean(b,na.rm=TRUE)
        }

      }else{
        #for igraph functions

        for(i in 1:length(net_list)){
          temporal_out<-list()
          for(t in 1:length(net_list[[i]])){
            net_list[[i]][[t]]<-intergraph::asIgraph(net_list[[i]][[t]])
            temporal_out[[t]]<-macro_function(net_list[[i]][[t]])
            }
          b<-unlist(temporal_out)
          if(!is.null(link_id)){
            link_list_data[[j]][,i]<-b
          } ##store vector of output when calling AMME

          aMEMS_tracer<-1 ##handle node and multiple obs characteristics
          output_data[j,i]<-mean(b,na.rm=TRUE)
          if(length(controls)>0){
            net_list[[i]][[t]]<-intergraph::asNetwork(net_list[[i]][[t]]) # convert back to network for control operations
          }
        }


         }

      #close btergm
      }else{

      if(object_type[1]%in%c("network")){

       for(i in 1:length(net_list)){
          b<-macro_function(net_list[[i]])
         if(length(b)>1){
            aMEMS_tracer<-1} ##handle node and multiple obs characteristics
          if(!is.null(link_id)){
            link_list_data[[j]][,i]<-b
          }
          output_data[j,i]<-mean(b,na.rm=TRUE)
        }

      }else{
      #for igraph functions
        for(i in 1:length(net_list)){
          net_list[[i]]<-intergraph::asIgraph(net_list[[i]])
          b<-macro_function(net_list[[i]])
          if(length(b)>1){
            aMEMS_tracer<-1} ##handle node and multiple obs characteristics
          if(!is.null(link_id)){
            link_list_data[[j]][,i]<-b
          }
          output_data[j,i]<-mean(b,na.rm=TRUE)
          if(length(controls)>0){
            net_list[[i]]<-intergraph::asNetwork(net_list[[i]]) # convert back to network for control operations
          }
        }

      }

    }#close ifelse btergm statement


    ##get controls

    if(length(controls)>0){

      for(control in 1:length(controls_list)){
        if(class(model)[1]=="btergm"){##handling for btergm

          if(object_type[control+1]%in%c("network")){

           for(i in 1:length(net_list)){
            for(t in 1:length(net_list[[i]])){
              temporal_out[[t]]<-control_functions[[control]](net_list[[i]][[t]])
            }
            b<-unlist(temporal_out)
            controls_list[[control]][[j]][,i]<-b
           }

          }else{
          #for igraph functions

          for(i in 1:length(net_list)){
            temporal_out<-list()
            for(i in 1:length(net_list)){
              for(t in 1:length(net_list[[i]])){
                net_list[[i]][[t]]<-intergraph::asIgraph(net_list[[i]][[t]])
                temporal_out[[t]]<-control_functions[[control]](net_list[[i]][[t]])
              }
              b<-unlist(temporal_out)
              controls_list[[control]][[j]][,i]<-b
              net_list[[i]][[t]]<-intergraph::asNetwork(net_list[[i]][[t]]) #convert back to network object for further loops

            }

          }


        }

        }else{
          if(object_type[control+1]%in%c("network")){

             for(i in 1:length(net_list)){
                b<-control_functions[[control]](net_list[[i]])
                controls_list[[control]][[j]][,i]<-b
                }

           }else{
          #for igraph functions
          for(i in 1:length(net_list)){
            net_list[[i]]<-intergraph::asIgraph(net_list[[i]])
            b<-control_functions[[control]](net_list[[i]])
            controls_list[[control]][[j]][,i]<-b
            net_list[[i]]<-intergraph::asNetwork(net_list[[i]]) #convert back to network object for further loops

           }

          }


        }#close ifelse btergm statement
      }#closes for controls loop
  } #closes if controls statement


  if(silent==FALSE){
      print(paste("Simulation ",j," out of", nsim," complete"))
    }

  }#close bootstrap loop


  ###calculate summary statistics

  if(aMEMS_tracer==1 & is.null(link_id)){
    message("More than one macro statistic is being calculated. Reporting the aMEMS.")
  }


  diff_data<-matrix(NA,nrow=nrow(output_data),ncol=ncol(output_data)-1)

  for(i in 1:ncol(diff_data)){
    k<-i+1
    diff_data[,i]<-output_data[,k]-output_data[,i]
  }

  summary_dat<-matrix(NA,nrow=2,ncol=5)
  rownames(summary_dat)<-c("MEMS","Prop. Change in M")
  colnames(summary_dat)<-c("Estimate","Std. Dev.","lower 95% CI","Upper 95% CI","BS p-val")

  summary_dat[1,1]<-mean(diff_data,na.rm=TRUE)
  summary_dat[1,2]<-sd(diff_data,na.rm=TRUE)

  if(summary_dat[1,1]<0){
    summary_dat[1,5]<-length(diff_data[which(diff_data>0)])/nsim
  }else{
    summary_dat[1,5]<-length(diff_data[which(diff_data<0)])/nsim

  }
  summary_dat[1,3]<-quantile(diff_data,.025,na.rm=TRUE)
  summary_dat[1,4]<-quantile(diff_data,.975,na.rm=TRUE)



  prop_change<-matrix(NA,nrow=nrow(output_data),ncol=ncol(output_data)-1)

  for(i in 1:ncol(prop_change)){
    k<-i+1
    prop_change[,i]<-(diff_data[,i]/output_data[,k])
  }

  prop_change<-prop_change[!is.infinite(prop_change)]

  summary_dat[2,1]<-mean(prop_change,na.rm=TRUE)

  ev_data<-NULL

  if(sensitivity_ev==TRUE){
    M_T1 <- mean(output_data[,2], na.rm=TRUE)  # E[M | T=1]
    M_T0 <- mean(output_data[,1], na.rm=TRUE)  # E[M | T=0]
    n_boot <- 1000

    RR_M_given_T <- M_T1 / M_T0

    N1 <- sum(!is.na(output_data[,2]))
    N0 <- sum(!is.na(output_data[,1]))

    boot_samples <- replicate(n_boot, {
      sample_T1 <- sample(output_data[,2], N1, replace = TRUE)
      sample_T0 <- sample(output_data[,1], N0, replace = TRUE)
      mean(sample_T1, na.rm=TRUE) / mean(sample_T0, na.rm=TRUE)
    })
    CI_lower_boot <- quantile(boot_samples, 0.025, na.rm=TRUE)
    CI_upper_boot <- quantile(boot_samples, 0.975, na.rm=TRUE)

    # Compute E-value
    E_fun<-function(x){
      if (x < 1) {
        RR_inv <- 1 / x
        ev <- RR_inv + sqrt(RR_inv * (RR_inv - 1))
      } else {
        ev <- x + sqrt(x * (x - 1))
      }
      return(ev)
    }

    E_value<-E_fun(RR_M_given_T)
    CI_lower_boot<-E_fun(CI_lower_boot)
    CI_upper_boot<-E_fun(CI_lower_boot)


    message(sprintf("95%% CI (Bootstrap): [%.3f, %.3f]", CI_lower_boot, CI_upper_boot))
    message(sprintf("E-value: %.3f", E_value))

    ev_data <- (list(
      RR_M_given_T = RR_M_given_T,
      CI_bootstrap = c(CI_lower_boot, CI_upper_boot),
      E_value = E_value
    ))

  }

  if(full_output==FALSE){
    return(summary_dat)
  }else{
    out_dat<-list(summary_dat=summary_dat,
                  output_data=output_data,
                  mems_samples=diff_data,
                  ev_data=ev_data)

    if(is.null(link_id)){

      return(out_dat) #return data no controls

    }else{

      #return data for AMME call
      out_dat<-list(out_dat_main=link_list_data,
                    out_dat_controls=controls_list)

      return(out_dat)


    }
  }


}#close function