R/Simulations.R

Defines functions make_model_compare_plot make_ws_plot make_er_plot make_disag_pmax_plot run_all_simulations

Documented in make_er_plot make_model_compare_plot make_ws_plot run_all_simulations

# Load functions
# source("GM_functions.R")

#############################################################
###### Run simulations
#############################################################

# run <- function(){
# k = number of shuffled vertices
# 

# This function runs one simulation instance example 
# example_simulation_instance <- simulation_instance_ER(k_grid = k_grid[1],seed = 1)

# # NOTE: simulations take several hours to finish. Uncomment at your own risk

# # Set up clusters

#' Run all simulations from the paper.
#' 
#' This replicates the results from the paper.
#' 
#' @param ask Ask about running big simulations.
#' 
#' @seealso \code{\link{simulation_instance}},
#'  \code{\link{simulation_instance_ER}},
#'  \code{\link{simulation_instance_WS}}
#' 
#' @export
run_all_simulations <- function(ask = TRUE){

  if (!are_you_sure(ask)) {
    return();
  }

  k_grid <- c(2, 3, 4, 5, 10, 25, 50, 100, 200, 400)
  cl <-  makeCluster(7)
  clusterEvalQ(cl = cl, library(Matrix))
  clusterEvalQ(cl = cl, library(igraph))
  clusterExport(cl, 
    varlist = list("simulation_instance_ER",
      "simulation_instance_WS", "simulation_instance",
      "k_grid", "kshuffle_mean_sd"))

  # ER models
  parsim_er <- parallel::parLapply(cl = cl, 1:35,
    function(i) simulation_instance_ER(k_grid = k_grid, seed = i, ask = FALSE))
  #save(parsim_er, file = "Results/parsim_ER.RData")

  ## WS models
  parsim_ws <- parallel::parLapply(cl = cl, 1:35,
    function(i) simulation_instance_WS(k_grid = k_grid, seed = i, ask = FALSE))
  #save(parsim_ws, file = "Results/parsim_WS.RData")

  ## all models
  parsim5 <- parallel::parLapply(cl = cl, 1:35,
    function(i) simulation_instance(d = 5, k_grid = k_grid, seed = i, ask = FALSE))
  #save(parsim5, file = "Results/parsim5.RData")
  parsim50 <- parallel::parLapply(cl = cl, 1:35,
    function(i) simulation_instance(d = 50, k_grid = k_grid, seed = i, ask = FALSE))
  #save(parsim50, file = "Results/parsim50.RData")

  stopCluster(cl)
  list(parsim_er = parsim_er, parsim_ws = parsim_ws, 
    parsim5 = parsim5, parsim50 = parsim50)
}

make_disag_pmax_plot <- function(parsim, colors, labs, flabel){
  sum_df <- parsim %>% dplyr::bind_rows() %>%
    tidyr::pivot_longer(c("p.max", "mean")) %>%
    dplyr::group_by(modparam, K, name) %>%
    dplyr::summarize(
      val = mean(value),
      sd = stats::sd(value),
      nmc = n(),
      se = sd / sqrt(nmc)) %>%
    dplyr::mutate(val_normalized = ifelse(name == "p.max",
      val, val / (K * log(500)))) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(K = K / 500)

  ggplot(sum_df, aes(y = val_normalized, x= (K) )) + 
    geom_line(aes((K), val_normalized, color=modparam, linetype=modparam)) +
    geom_point(aes((K), val_normalized, color=modparam,  shape=modparam))+
    scale_x_log10() + 
    xlab("Fraction of shuffled vertices") +
    scale_color_manual(name = "Model", 
                       labels = labs,
                       values = rev(colors)) +
    scale_linetype_manual(name = "Model", 
                          labels = labs,
                          values = c(1, 2, 3, 4, 5))+
    scale_shape_manual(name = "Model", 
                       labels = labs,
                       values = c(19, 4, 20, 17, 0, 19))+
    facet_grid(rows = dplyr::vars(name), scales = "free_y", labeller = as_labeller(flabel)) + 
    ylab("")   +
    theme_bw() 

}


#############################################################

#' Simulation 1: ER models
#' 
#' Make plot from simulations generated by simulation_instance_ER.
#' 
#' @param data List of data frames created by simulation_instance_ER.
#'  NULL for data from the manuscript.
#' 
#' @examples
#'  make_er_plot()
#' 
#' @export
make_er_plot <- function(data = NULL){
  if(!is.null(data)){
    parsim_er <- data
  }


  flabel <- c("p.max" = "Probability bound", "mean"= "Normalized mean disag.")
  labs <- c("G(n, 0.1)", "G(n, 0.2)","G(n, 0.3)","G(n, 0.4)","G(n, 0.5)")
  colors <- c("#F8766D", "#F8986D", "#F8BA6D", "#F8CD6D", "#F8DD6D")
  
  # pdf("Simulations-ER.pdf", width = 4, height = 4)
  make_disag_pmax_plot(parsim_er, colors, 
    labs, flabel)
  # dev.off()
}


###########################################################################
###########################################################################

#' Simulation 2: WS models
#' 
#' Make plot from simulations generated by simulation_instance_WS.
#' 
#' @param data List of data frames created by simulation_instance_WS.
#'  NULL for data from the manuscript.
#' 
#' @examples
#'  make_ws_plot()
#' 
#' @export
make_ws_plot <- function(data = NULL){
  if(!is.null(data)){
    parsim_ws <- data
  }
  flabel <- c("p.max" = "Probability bound", "mean"= "Normalized mean disag.")
  labs <- c("WS(n, d, 0)", "WS(n, d, 0.05)","WS(n, d, 0.1)","WS(n, d, 0.25)","WS(n, d, 1)")
  colors <- c("#766DFF", "#986DFF", "#BA6DFF", "#CD6DFF", "#DD6DFF")

  # pdf("Simulations-WS.pdf", width = 4, height = 4)
  make_disag_pmax_plot(parsim_ws, colors, 
    labs, flabel)
  # dev.off()
}



#########################################################################################
#########################################################################################

#' Simulation 3: comparing different models for degree = 5 and 50
#' 
#' Make plot from simulations generated by simulation_instance
#' 
#' 
#' @param data5 List of data frames created by simulation_instance for d=5.
#'  NULL for data from the manuscript.
#' @param data50 List of data frames created by simulation_instance for d=50.
#'  NULL for data from the manuscript.
#' 
#' @examples
#'  make_model_compare_plot()
#' 
#' @export
make_model_compare_plot <- function(data5 = NULL, data50 = NULL){
  if(!is.null(data5)){
    parsim5 <- data5
  }
  if(!is.null(data50)){
    parsim50 <- data50
  }
  binded_simulations <- dplyr::bind_rows(
      parsim5 %>% dplyr::bind_rows() %>% dplyr::mutate(d = "d=5"),
      parsim50 %>% dplyr::bind_rows() %>% dplyr::mutate(d = "d=50")
    ) %>%
    tidyr::pivot_longer(c("p.max", "mean"), names_to = "variable") %>%
    dplyr::group_by(modparam, K, variable, d) %>%
    dplyr::summarize(
      val = mean(value),
      sd = stats::sd(value),
      nmc = n(),
      se = sd / sqrt(nmc)) %>%
    dplyr::mutate(val_normalized = ifelse(variable == "p.max",
      val, val / (K * log(500)))) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(K = K / 500)

  labsmodels <- c("G(n, dn)", "WS(n, d, 0.05)", "WS(n, d, 0.75)", "PA(n, 1, d)", "PA(n, 2, d)")

  flabel <- c("p.max" = "Probability bound", "mean"= "Normalized mean disag.", 
              "d=5" = "d=5","d=50" = "d=50")

  p_model_compare <- 
    ggplot(binded_simulations) +
    aes(x = K, y = val_normalized,
        color = modparam, linetype = modparam, shape = modparam) + 
    geom_line() +
    geom_point()+
    scale_x_log10() + 
    facet_grid(cols = dplyr::vars(d), rows = dplyr::vars(variable),
               scales = "free_y", labeller = as_labeller(flabel)) + ylab("") +
    theme_bw() +
    xlab("Fraction of shuffled vertices") +
    scale_color_manual(name = "Model", 
                       labels = labsmodels,
                       values = c("red", "#3399FF", "#339900", "#FF9933","violet","#00BFC4")) +
    scale_linetype_manual(name = "Model", 
                          labels = labsmodels,
                          values = c(1, 3, 1, 2, 1, 3))+
    scale_shape_manual(name = "Model", 
                       labels = labsmodels,
                       values = c(19, 17, 5, 4, 18))

  # pdf("simulations-models.pdf", width = 6, height = 5)
  p_model_compare
  # dev.off()
}
dpmcsuss/gmmle documentation built on July 2, 2020, 6:24 p.m.