R/esci_simulations.R

Defines functions validate_mdiff_contrast_bs validate_mdiff_one esci_simulator sim_mdiff_contrast_bs sim_mdiff_one

#' @export
sim_mdiff_one <- function(
  comparison_m,
  comparison_s,
  comparison_n,
  population_m,
  population_s,
  conf_level = 0.95,
  runs = 1000,
  verbose = FALSE
) {
  
  # Labels
  labels <- c("Comparison", "Reference", "Difference", "SMD")
  
  
  # Population info -------------------------------------------------
  population <- estimate_mdiff_one(
    comparison_m = comparison_m,
    comparison_s = comparison_s,
    comparison_n = comparison_n,
    population_m = population_m,
    population_s = population_s,
    conf_level = conf_level
  )
  
  
  population_es <- c(
    population$effect_sizes$effect_size, 
    population$standardized_effect_sizes$d_biased
  )
  population_se <- c(
    population$effect_sizes$se,
    population$standardized_effect_sizes$se
  )
  
  names(population_es) <- paste("pop.es.", labels, sep = "")
  names(population_se) <- paste("pop.se.", labels, sep = "")
  
  
  # Initialize trackers ------------------------------------------
  capture <- c(0, 0, 0, 0)
  bias <- c(0, 0, 0, 0)
  se <- c(0, 0, 0, 0)
  widths <- c(0, 0, 0, 0)
  overs <- c(0, 0, 0, 0)
  unders <- c(0, 0, 0, 0)
  names(capture) <- paste("capture.", labels, sep = "")
  names(bias) <- paste("bias.", labels, sep = "")
  names(se) <- paste("se.", labels, sep = "")
  names(widths) <- paste("width.", labels, sep = "")
  names(overs) <- paste("overs.", labels, sep = "")
  names(unders) <- paste("unders.", labels, sep = "")
  run_check <- runs/5
  
  for(x in 1:runs) {
    # Generate data ------------------------------------------------
    # Empty data frame
    this_data <- data.frame(
      outcome_variable = rnorm(
        n = comparison_n, 
        mean = comparison_m,
        sd = if(is.null(population_s)) comparison_s else population_s
      )
    )
    # Create data, group by groups
    
    # Analyze and prep sample data --------------------------------------------
    sample <- estimate_mdiff_one(
      data = this_data,
      outcome_variable = outcome_variable,
      population_m = population_m,
      population_s = population_s,
      conf_level = conf_level,
      save_raw_data = FALSE
    )
    
    sample_low <- c(
      sample$effect_sizes$lower,
      sample$standardized_effect_sizes$lower
    )
    sample_high <- c(
      sample$effect_sizes$upper,
      sample$standardized_effect_sizes$upper
    )
    sample_es <- c(
      sample$effect_sizes$effect_size,
      sample$standardized_effect_sizes$effect_size
    )
    
    
    # Track outcomes ------------------------------------------------------
    capture <- capture + as.numeric(
      (population_es >= sample_low) & (population_es <= sample_high)
    )
    overs <- overs + as.numeric((population_es < sample_low))
    unders <- unders + as.numeric((population_es > sample_high))
    
    bias <- bias + (sample_es - population_es)
    se <- se + (sample_es - population_es)^2
    widths <- widths + (sample_high - sample_low)
    
    if(x > run_check & verbose) {
      print(paste(
        "Current batch:", 
        x/runs*100, 
        "% runs complete at",
        Sys.time(),
        sep = "")
      )
      run_check <- run_check + runs/5
    }
    
  }
  
  # Final prep to return results ----------------------------------------
  capture <- capture / runs
  bias <- bias/runs
  widths <- widths/runs
  se <- sqrt(se/runs)
  over_under_ratio <- overs/unders
  names(over_under_ratio ) <- paste("over/under.", labels, sep = "")
  
  # Return data
  return( 
    data.frame(
      comparison_m = comparison_m,
      comparison_s = comparison_s,
      comparison_n = comparison_n,
      population_m = population_m,
      population_s = if(is.null(population_s)) "NULL" else population_s,
      conf_level = conf_level,
      runs = runs,
      t(population_es),
      t(population_se),
      t(capture),
      t(bias),
      t(se),
      t(widths),
      t(overs),
      t(unders),
      t(over_under_ratio)
    )
  )
  
  
}


#' @export
sim_mdiff_contrast_bs <- function(
  means,
  sds, 
  ns,
  contrast,
  conf_level,
  assume_equal_variance,
  runs = 1000,
  verbose = FALSE
) {

  # Labels
  labels <- c("Comparison", "Reference", "Difference", "SMD")
  
  
  # Population info -------------------------------------------------
  population <- estimate_mdiff_contrast_bs(
    means = means,
    sds = sds,
    ns = ns,
    contrast = contrast,
    group_labels = paste("group_labels - ", 1:length(means)),
    conf_level = conf_level,
    assume_equal_variance = assume_equal_variance
  )
  
  population_es <- c(
    population$effect_sizes$effect_size, 
    population$standardized_effect_sizes$d_biased
  )
  population_se <- c(
    population$effect_sizes$se,
    population$standardized_effect_sizes$se
  )
  
  names(population_es) <- paste("pop.es.", labels, sep = "")
  names(population_se) <- paste("pop.se.", labels, sep = "")

  
  # Initialize trackers ------------------------------------------
  capture <- c(0, 0, 0, 0)
  bias <- c(0, 0, 0, 0)
  se <- c(0, 0, 0, 0)
  widths <- c(0, 0, 0, 0)
  overs <- c(0, 0, 0, 0)
  unders <- c(0, 0, 0, 0)
  names(capture) <- paste("capture.", labels, sep = "")
  names(bias) <- paste("bias.", labels, sep = "")
  names(se) <- paste("se.", labels, sep = "")
  names(widths) <- paste("width.", labels, sep = "")
  run_check <- runs/5

  for(x in 1:runs) {
    # Generate data ------------------------------------------------
    # Empty data frame
    this_data <- data.frame(
      grouping_variable = character(), outcome_variable = double()
    )
    # Create data, group by groups
    for (y in 1:length(means)) {
      this_data <- rbind(
        this_data,
        data.frame(
          grouping_variable = paste("Group", y),
          outcome_variable = rnorm(n = ns[y], mean = means[y], sd = sds[y])
        )
      )
    }
    # Make grouping variable a factor
    this_data$grouping_variable <- as.factor(this_data$grouping_variable)
    
    
    # Analyze and prep sample data --------------------------------------------
    sample <- estimate_mdiff_contrast_bs(
      data = this_data,
      grouping_variable = grouping_variable,
      outcome_variable = outcome_variable,
      contrast = contrast,
      conf_level = conf_level,
      assume_equal_variance = assume_equal_variance,
      save_raw_data = FALSE
    )
    
    sample_low <- c(
      sample$effect_sizes$lower,
      sample$standardized_effect_sizes$lower
    )
    sample_high <- c(
      sample$effect_sizes$upper,
      sample$standardized_effect_sizes$upper
    )
    sample_es <- c(
      sample$effect_sizes$effect_size,
      sample$standardized_effect_sizes$effect_size
    )
    
    
   # Track outcomes ------------------------------------------------------
   capture <- capture + as.numeric(
     (population_es >= sample_low) & (population_es <= sample_high)
   )
   overs <- overs + as.numeric((population_es < sample_low))
   unders <- unders + as.numeric((population_es > sample_high))
    
   bias <- bias + (sample_es - population_es)
   se <- se + (sample_es - population_es)^2
   widths <- widths + (sample_high - sample_low)
   
   if(x > run_check & verbose) {
     print(paste(
       "Current batch:", 
       x/runs*100, 
       "% runs complete at",
       Sys.time(),
       sep = "")
      )
     run_check <- run_check + runs/5
   }
   
  }
  
  # Final prep to return results ----------------------------------------
  capture <- capture / runs
  bias <- bias/runs
  widths <- widths/runs
  se <- sqrt(se/runs)
  over_under_ratio <- overs/unders
  names(over_under_ratio ) <- paste("over/under.", labels, sep = "")
  
  # Return data
  return( 
    data.frame(
      means = paste("(", means, ")", collapse = ", "),
      sds = paste("(", sds, ")", collapse = ", "),
      ns = paste("(", ns, ")", collapse = ", "),
      contrast = paste("(", contrast, ")", collapse = ", "),
      conf_level = conf_level,
      assume_equal_variance = assume_equal_variance,
      runs = runs,
      t(population_es),
      t(population_se),
      t(capture),
      t(bias),
      t(se),
      t(widths),
      t(over_under_ratio)
    )
  )
  
}

#' @export
esci_simulator <- function(params, sim_function, verbose = TRUE) {

  # Determine problem space to explore ------------------------------
  param_count <- 0
  batches_needed <- 1
  for (param in names(params)) {
    param_count <- param_count + 1
    batches_needed <- batches_needed * length(params[[param]])
  }
  print(paste(batches_needed, "batches to run"))
  
  # Initialization
  indexes <- rep(1, times = param_count)
  batches_run <- 0
  batches_report <- batches_needed / 20
  result <- NULL
  
  while (batches_run < batches_needed) {
    # Set arguments
    args <- list()
    for (x in 1:length(params)) {
      args[[names(params)[[x]]]] <- params[[x]][[indexes[[x]]]]
    }
    
    # Call the simulator function
    result <- rbind(
      result,
      do.call(
        what = sim_function,
        args = args
      )
    )
    
    # Update indexes
    indexes[1] <- indexes[1] + 1
    for (y in 1:length(indexes)) {
      if (indexes[[y]] > length(params[[y]])) {
        indexes[[y]] <- 1
        if(y+1 <= length(indexes))
          indexes[[y+1]] <- indexes[[y+1]] + 1
      }
    }
    
    # Updates
    batches_run <- batches_run + 1
    if (batches_run > batches_report) {
      print(paste(
        batches_run/batches_needed*100, 
        "% completed @", 
        Sys.time()), 
        sep = ""
      )
      batches_report <- batches_report +  batches_needed / 20
    }
    
  }
  
  return(result)
}
 

#' @export
validate_mdiff_one <- function() {
  runs <- 20000
  
  print("One-sample z-test, known population sd")
  params <- list()
  params$comparison_m = list(0, 0.25, 0.5, 1, 2)
  params$comparison_s = list(2)
  params$comparison_n = list(5, 10, 20, 40, 80)
  params$population_m = list(0)
  params$population_s = list(1, 2)
  params$runs <- list(runs)
  params$verbose <- list(TRUE)
  
  result <- esci_simulator(
    params = params,
    sim_function = sim_mdiff_one
  )
  
  write.csv(result, "mdiff_one_pop_s_known.csv")
  
  
  print("One-sample z-test, sample sd")
  params <- list()
  params$comparison_m = list(0, 0.25, 0.5, 1, 2)
  params$comparison_s = list(1, 2)
  params$comparison_n = list(5, 10, 20, 40, 80)
  params$population_m = list(0)
  params$population_s = list(NULL)
  params$runs <- list(runs)
  params$verbose <- list(TRUE)
  
  result <- esci_simulator(
    params = params,
    sim_function = sim_mdiff_one
  )
  
  write.csv(result, "mdiff_one_sample_s.csv")
  
}


#' @export
validate_mdiff_contrast_bs <- function() { 
  
  runs <- 20000
  
  # Two groups, equal variance assumed --------------------------------------
  print("two_groups_equal_variance")
  params <- list()
  params$means <- list(
    c(0, 0),
    c(0, .10),
    c(0, .20),
    c(0, .50), 
    c(0, 1),
    c(0, 2)
  )
  params$sds <- list(
    c(1, 1)
  )
  params$ns <- list(
    c(5, 5), 
    c(10, 10),
    c(20, 20),
    c(40, 40),
    c(80, 80),
    c(10, 20),
    c(10, 30)
  )
  params$contrast <- list(
    c(-1, 1)
  )
  params$conf_level <- list(
    c(0.95)
  )
  params$assume_equal_variance <- list(
    c(TRUE)
  )
  params$runs <- list(
    c(runs)
  )
  params$verbose <- list(
    c(TRUE)
  )
  
  result <- esci_simulator(
    params = params,
    sim_function = sim_mdiff_contrast_bs
  )
  
  write.csv(result, "two_groups_equal_variance.csv")
  
  
  # Two groups, no assumption of equal variance ----------------------------
  print("two_groups_no_equal_variance")
  params <- list()
  params$means <- list(
    c(0, 0),
    c(0, .10),
    c(0, .20),
    c(0, .50), 
    c(0, 1),
    c(0, 2)
  )
  params$sds <- list(
    c(1, 1),
    c(1, 1.5),
    c(1, 2)
  )
  params$ns <- list(
    c(5, 5), 
    c(10, 10),
    c(20, 20),
    c(40, 40),
    c(80, 80),
    c(10, 30),
    c(30, 10)
  )
  params$contrast <- list(
    c(-1, 1)
  )
  params$conf_level <- list(
    c(0.95)
  )
  params$assume_equal_variance <- list(
    c(FALSE)
  )
  params$runs <- list(
    c(runs)
  )
  params$verbose <- list(
    c(TRUE)
  )
  
  result <- esci_simulator(
    params = params,
    sim_function = sim_mdiff_contrast_bs
  )
  
  write.csv(result, "two_groups_no_equal_variance.csv")
  

  # Four groups, equal variance assumed----------------------------
  print("four_groups_equal_variance")
  params <- list()
  params$means <- list(
    c(0, 0, 0, 0),
    c(-.125, .125, .125, .625),
    c(-.50, 0.50, 0, 1),
    c(0, 0, .750, 1.25),
    c(-1, 1, 1.5, 2.5)
  )
  params$sds <- list(
    c(1, 1, 1, 1)
  )
  params$ns <- list(
    c(5, 5, 5, 5), 
    c(10, 10, 10, 10),
    c(20, 20, 20, 20),
    c(40, 40, 40, 40),
    c(80, 80, 80, 80),
    c(30, 10, 10, 10),
    c(10, 10, 10, 30)
  )
  params$contrast <- list(
    c(-1/2, -1/2, 1/2, 1/2),
    c(-1, 1/3, 1/3, 1/3)
  )
  params$conf_level <- list(
    c(0.95)
  )
  params$assume_equal_variance <- list(
    c(TRUE)
  )
  params$runs <- list(
    c(runs)
  )
  params$verbose <- list(
    c(TRUE)
  )
  
  result <- esci_simulator(
    params = params,
    sim_function = sim_mdiff_contrast_bs
  )
  
  write.csv(result, "four_groups_equal_variance.csv")

  
  # Four groups, no equal variance assumed----------------------------
  print("four_groups_no_equal_variance")
  params <- list()
  params$means <- list(
    c(0, 0, 0, 0),
    c(-.125, .125, .125, .625),
    c(-.50, 0.50, 0, 1),
    c(0, 0, .750, 1.25),
    c(-1, 1, 1.5, 2.5)
  )
  params$sds <- list(
    c(1, 1, 1.5, 1.5),
    c(1, 1, 2, 2),
    c(1, 2, 1, 2)
  )
  params$ns <- list(
    c(5, 5, 5, 5), 
    c(10, 10, 10, 10),
    c(20, 20, 20, 20),
    c(40, 40, 40, 40),
    c(80, 80, 80, 80),
    c(30, 10, 10, 10),
    c(10, 10, 10, 30)
  )
  params$contrast <- list(
    c(-1/2, -1/2, 1/2, 1/2),
    c(-1, 1/3, 1/3, 1/3)
  )
  params$conf_level <- list(
    c(0.95)
  )
  params$assume_equal_variance <- list(
    c(FALSE)
  )
  params$runs <- list(
    c(runs)
  )
  params$verbose <- list(
    c(TRUE)
  )
  
  result <- esci_simulator(
    params = params,
    sim_function = sim_mdiff_contrast_bs
  )
  
  write.csv(result, "four_groups_no_equal_variance.csv")
    
}
rcalinjageman/esci2 documentation built on Dec. 22, 2021, 1:02 p.m.