R/hysteresis.R

Defines functions hysteresis

Documented in hysteresis

#' @title A function to automatically generate hysteresis plots for all
#' structural parameter estimates.
#' @description Hysteresis plots were introduced by: Snijders, Tom AB, et al.
#' "New specifications for exponential random graph models." Sociological
#' methodology 36.1 (2006): 99-153. They can tell the user about sensitivity of the parameter estimates and whether they should worry about degeneracy.
#'
#' @param GERGM_Object A GERGM object returned by the gergm() estimation
#' function.
#' @param networks_to_simulate Number of simulations per unique parameter value
#' used in the hysteresis plots. Default is 1000.
#' @param burnin Number of samples from the MCMC simulation procedure that
#' will be discarded before drawing the samples used for hysteresis plots.
#' Default is 500.
#' @param range The magnitude of the interval over which theta parameter
#' values will be varied for the hysteresis plots. The actual range will be
#' vary from a minimum of theta_value - range * theta_std_error to a maximum of
#' theta_value + range * theta_std_error so the actual parameter ranges will be
#' scaled to the magnitude of the parameter estimate standard errors. Defaults to 4.
#' @param steps The number of theta values to simulate above and below the
#' estimated theta value within the given range. The total number of simulations
#' is then = 2 * steps + 1. Defaults to 20.
#' @param initial_density The initial network density used in simulations, can
#' range from 0 to 1. Defaults to 0.2.
#' @param simulation_method Simulation method for MCMC estimation. Default is
#' "Gibbs" but can also be set to "Metropolis".
#' @param proposal_variance The variance specified for the Metropolis Hastings
#' simulation method. This parameter is inversely proportional to the average
#' acceptance rate of the M-H sampler and should be adjusted so that the average
#' acceptance rate is approximately 0.25. Default is 0.1.
#' @param seed Seed used for reproducibility. Default is 123.
#' @param thin The proportion of samples that are kept from each simulation. For
#' example, thin = 1/200 will keep every 200th network in the overall simulated
#' sample. Default is 1.
#' @param output_directory The directory where you would like output generated
#' by this function to be saved. If NULL, then the current working directory will
#' be used. Defaults to NULL.
#' @param output_name The common name stem you would like to assign to all
#' plots generated by this function. If NULL, then no output will be saved to pdf
#' and plots will only be plotted to the graphics device.
#' @param parallel Logical indicating whether hysteresis plots for each theta
#' parameter should be simulated in parallel. Can greatly reduce runtime, but
#' the computer must have at least as many cores as theta parameters.
#' Defaults to FALSE.
#' @return A list object containing network densities for simulated networks.
#' @examples
#' \dontrun{
#' set.seed(12345)
#' net <- matrix(rnorm(100,0,20),10,10)
#' colnames(net) <- rownames(net) <- letters[1:10]
#' formula <- net ~  mutual + ttriads
#'
#' test <- gergm(formula,
#'               normalization_type = "division",
#'               network_is_directed = TRUE,
#'               use_MPLE_only = FALSE,
#'               estimation_method = "Metropolis",
#'               number_of_networks_to_simulate = 40000,
#'               thin = 1/10,
#'               proposal_variance = 0.5,
#'               downweight_statistics_together = TRUE,
#'               MCMC_burnin = 10000,
#'               seed = 456,
#'               convergence_tolerance = 0.01,
#'               MPLE_gain_factor = 0,
#'               force_x_theta_updates = 4)
#'
#' hysteresis_results <- hysteresis(
#'     test,
#'     networks_to_simulate = 1000,
#'     burnin = 500,
#'     range = 2,
#'     steps = 20,
#'     simulation_method = "Metropolis",
#'     proposal_variance = 0.5)
#' }
#' @export
hysteresis <- function(GERGM_Object,
                       networks_to_simulate = 1000,
                       burnin = 500,
                       range = 4,
                       steps = 20,
                       initial_density = 0.2,
                       simulation_method = c("Gibbs", "Metropolis"),
                       proposal_variance = 0.1,
                       seed = 12345,
                       thin = 1,
                       output_directory = NULL,
                       output_name = NULL,
                       parallel = FALSE){

  # preliminaries
  possible_structural_terms <- c("out2stars",
                                 "in2stars",
                                 "ctriads",
                                 "mutual",
                                 "ttriads",
                                 "edges",
                                 "diagonal")
  currentwd <- getwd()

  if (!is.null(output_directory)) {
    setwd(output_directory)
  }



  simulation_method <- simulation_method[1]
  GERGM_Object@proposal_variance <- proposal_variance
  GERGM_Object@estimation_method <- simulation_method
  GERGM_Object@number_of_simulations <- networks_to_simulate
  GERGM_Object@thin <- thin
  GERGM_Object@burnin <- burnin

  # figure out how many statistics we need to simulate values for
  num_network_terms <- length(GERGM_Object@theta.par)
  Hysteresis_Results <- vector(mode = "list", length = num_network_terms)
  cat("Observed transformed network has density:",
      mean(GERGM_Object@bounded.network),"\n")
  observed_density <- mean(GERGM_Object@bounded.network)
  # check initial density adjustment
  if (initial_density > 1) {
    initial_density = 1
  }
  if (initial_density < 0) {
    initial_density = 0
  }
  cat("Setting initial network density to:",initial_density,"\n")

  # if there is only one network term, do not run in parallel
  cores <- 1
  if(parallel){
    if(num_network_terms == 1){
      parallel <- FALSE
    }
    cores <- num_network_terms
  }

  # if we are doing things in parallel, start a cluster
  if(parallel){
    vec <- 1:num_network_terms
    cat("Simulating networks in parallel on",length(vec),
        "cores. This may take a while...\n")
    cl <- parallel::makeCluster(getOption("cl.cores", cores))

    results <- parallel::clusterApplyLB(cl = cl,
      x = vec,
      fun = hysteresis_parallel,
      GERGM_Object = GERGM_Object,
      initial_density = initial_density,
      possible_structural_terms = possible_structural_terms,
      seed = seed,
      steps = steps,
      observed_density = observed_density,
      range = range)

    # stop the cluster when we are done
    parallel::stopCluster(cl)

    for(k in 1:length(results)){
      cur_term <- GERGM_Object@stats_to_use[k]
      Hysteresis_Results[[k]] <- results[[k]]
      hysteresis_plot(Hysteresis_Results[k])
      if(!is.null(output_name)){
        try({
          pdf(file = paste(output_name,"_hysteresis_",cur_term,".pdf",sep = ""),
              height = 5,
              width = 8)
          hysteresis_plot(Hysteresis_Results[k])
          dev.off()
        })
      }
    }

  }else{
    #else do things serially
    for (i in 1:num_network_terms) {
      # figure out the range of values for each parameter
      current_theta <- GERGM_Object@theta.par[i]
      theta_se <- GERGM_Object@theta.coef[2,i]
      min_val <- current_theta - range * theta_se
      max_val <- current_theta + range * theta_se
      hysteresis_values <- seq(min_val, max_val, length.out = 2 * steps + 1)
      last_network <- floor(networks_to_simulate*thin)
      network_densities <- matrix(0,
                                  nrow = ceiling(networks_to_simulate*thin),
                                  ncol = 2*length(hysteresis_values))
      #GERGM_Object@bounded.network <- bounded_network
      n_nodes <- nrow(GERGM_Object@bounded.network)
      zero_net <- matrix(initial_density,n_nodes,n_nodes)
      GERGM_Object@bounded.network <- zero_net
      cur_term <- GERGM_Object@stats_to_use[i]
      # tell the user what is going on
      cat("Currently simulating networks while varying the",
          cur_term,"parameter from:",min_val,"to",max_val,"for a total of",
          length(hysteresis_values),"simulations...\n")

      # loop over values for theta
      column_counter <- 1
      for(j in 1:length(hysteresis_values)){

        # set the current value
        GERGM_Object@theta.par[i] <- hysteresis_values[j]
        cat("Current theta values:",GERGM_Object@theta.par,"\n")
        # simulate networks
        GERGM_Object <- Simulate_GERGM(
          GERGM_Object,
          seed1 = seed,
          possible.stats = possible_structural_terms)

        # assign the last simulated network as the starting network for the next
        # simulation
        GERGM_Object@bounded.network <- GERGM_Object@MCMC_output$Networks[,,last_network]
        # save the densities
        nr <- nrow(GERGM_Object@network)
        normalizer <- nr * (nr - 1)
        network_densities[,column_counter] <- GERGM_Object@MCMC_output$Statistics$edges/normalizer
        column_counter <- column_counter + 1
      }
      # now back down
      for(j in length(hysteresis_values):1){

        # set the current value
        GERGM_Object@theta.par[i] <- hysteresis_values[j]
        cat("Current theta values:",GERGM_Object@theta.par,"\n")
        # simulate networks
        GERGM_Object <- Simulate_GERGM(
          GERGM_Object,
          seed1 = seed,
          possible.stats = possible_structural_terms)

        # assign the last simulated network as the starting network for the next
        # simulation
        GERGM_Object@bounded.network <- GERGM_Object@MCMC_output$Networks[,,last_network]

        # save the densities
        nr <- nrow(GERGM_Object@network)
        normalizer <- nr * (nr - 1)
        network_densities[,column_counter] <- GERGM_Object@MCMC_output$Statistics$edges/normalizer
        column_counter <- column_counter + 1
      }

      # reset the theta value
      GERGM_Object@theta.par[i] <- current_theta
      mean_densities <- apply(network_densities,2,mean)
      thetas <- c(hysteresis_values, rev(hysteresis_values))

      hysteresis_dataframe <- data.frame(theta_values = thetas,
                                         mean_densities = mean_densities)

      Hysteresis_Results[[i]] <- list(network_densities = network_densities,
                                      mean_densities = mean_densities,
                                      theta_values = hysteresis_values,
                                      hysteresis_dataframe = hysteresis_dataframe,
                                      observed_density = observed_density,
                                      term = cur_term)

      # now make plots
      hysteresis_plot(Hysteresis_Results[i])
      if(!is.null(output_name)){
        try({
          pdf(file = paste(output_name,"_hysteresis_",cur_term,".pdf",sep = ""),
              height = 5,
              width = 8)
          hysteresis_plot(Hysteresis_Results[i])
          dev.off()
        })
      }

    }# end loop over network terms
  }# end parallel conditional

  # clean up a return everything
  setwd(currentwd)
  return(Hysteresis_Results)
}

Try the GERGM package in your browser

Any scripts or data that you put into this service are public.

GERGM documentation built on May 2, 2019, 5:14 a.m.