R/simulation.R

Defines functions simulate_potential_outcomes simulate_simple_experiment calc_naive_difference_in_means simulate_observational_data make_data_for_figure_4_1 calc_average_causal_effect_estimand calc_conditional_difference_in_means

Documented in calc_average_causal_effect_estimand calc_conditional_difference_in_means calc_naive_difference_in_means make_data_for_figure_4_1 simulate_observational_data simulate_potential_outcomes simulate_simple_experiment

#' Simple simulation about potential outcomes and causal estimation
#'
#' Make a binary causal state, a binary valued potential outcome
#' @param n number of individuals
#' @param seed integer value for random seed
#' @return data.table with potential outcomes
#' @import data.table
#' @export
simulate_potential_outcomes <- function(n,seed = sample.int(.Machine$integer.max, 1))
{
  set.seed(seed)
  outcome_values <- c(1, 0)
  prob_y1 <- .55
  prob_y0 <- .45
  DATA <- data.table(
    y0 = 1 * (runif(n) < prob_y0),
    y1 = 1 * (runif(n) < prob_y1))
  attr(DATA, "seed") <- seed
  DATA
}

#' Simulates potential outcomes and adds a treatment variable
#' @param n number of individuals
#' @param prob_treatment probability of treatment
#' @param seed integer value for random seed
#' @return data.table with potential outcomes
#' @import data.table
#' @export
simulate_simple_experiment <- function(n, prob_treatment = .5,
                                       seed = sample.int(.Machine$integer.max, 1))
{
  set.seed(seed)
  DATA <- simulate_potential_outcomes(n, seed = seed)
  DATA[, prob := prob_treatment]
  DATA[, d := 1 * (runif(n) < prob)]
  DATA[, y := d * y1 + (1 - d) * y0]
  DATA[, `:=`(y1 = NULL, y0 = NULL)]
  attr(DATA, "seed") <- seed
  DATA
}

#' Calculate naive difference in means
#'
#' Calculate naive difference in means
#' @param DATA data.table generated by simulate_simple_experiment
#' @return estimated average
#' @import data.table
#' @export
calc_naive_difference_in_means <- function(DATA)
{
  DATA[, mean(y[d == 1]) - mean(y[d == 0])]
}

#' Simulate an observational study
#'
#' This study will all individuals to select their treatment value
#' based on their potential outcomes
#' @param n number of individuals
#' @param seed integer value for random seed
#' @return data.table with potential outcomes
#' @import data.table
#' @export
simulate_observational_data <- function(n,
                                        seed = sample.int(.Machine$integer.max, 1))
{
  set.seed(seed)
  DATA <- simulate_potential_outcomes(n, seed = seed)
  DATA[, prob := plogis(-2 + 4 * (y1 - y0))]
  DATA[, d := 1 * (runif(n) < prob)]
  DATA[, y := d * y1 + (1 - d) * y0]
  DATA[, `:=`(y1 = NULL, y0 = NULL, prob = NULL)]
  attr(DATA, "seed") <- seed
  DATA
}

#' Simulate data that corresponds to Figure 4.1 from Morgan & Winship (2007)
#'

#' Data generation
#' @param n number of observations
#' @param seed RNG seed
#' @return data.table with potential outcomes
#' @import data.table
#' @export
make_data_for_figure_4_1 <- function(n,
                                     seed = sample.int(.Machine$integer.max, 1))
{
  set.seed(seed)
  u <- rbinom(n, 1, prob = .5)
  s <- rbinom(n, 1, prob = plogis(-1 + 2 * u))
  x <- rbinom(n, 1, prob = plogis(1 - 2 * u))
  d <- rbinom(n, 1, prob = plogis(-3  + 6 * s))
  y1 <- rbinom(n, 1, prob = plogis(-1 + 2 * 1 - 2 * x))
  y0 <- rbinom(n, 1, prob = plogis(-1 + 2 * 0 - 2 * x))
  y <- d * y1 + (1 - d) * y0
  DATA <- data.table(y1, y0, y, u, s, x, d)
  attr(DATA, "seed") <- seed
  DATA
}


#' Calculate average causal effect estimand
#'
#' DETAIL
#' @param DATA a dataset with potential outcomes named y0 and y1
#' @return number
#' @import data.table
#' @export
calc_average_causal_effect_estimand <- function(DATA, conditioning_variable)
{
  DATA[, mean(y1 - y0)]
}


#' Calculate the naive difference in means
#' @param DATA a dataset, can't have a variable named "Z" in it
#' @param conditioning_variable character object for variable name in DATA;
#' assumed to be binary 0-1
#' @return number
#' @import data.table
#' @export
calc_conditional_difference_in_means <- function(DATA, conditioning_variable)
{
  Z <- DATA[, conditioning_variable, with = FALSE]
  p_Z_1 <- DATA[, mean(Z == 1)]
  p_Z_0 <- DATA[, mean(Z == 0)]
  mean_y_D_1_Z_1 <- DATA[, mean(y[d == 1 & Z == 1])]
  mean_y_D_1_Z_0 <- DATA[, mean(y[d == 1 & Z == 0])]
  mean_y_D_0_Z_1 <- DATA[, mean(y[d == 0 & Z == 1])]
  mean_y_D_0_Z_0 <- DATA[, mean(y[d == 0 & Z == 0])]
  (p_Z_1 * mean_y_D_1_Z_1 + p_Z_0 * mean_y_D_1_Z_0) -
    (p_Z_1 * mean_y_D_0_Z_1 + p_Z_0 * mean_y_D_0_Z_0)
}
jianzih/causalA16 documentation built on May 19, 2019, 9:29 a.m.