#' Simple simulation about potential outcomes and causal estimation
#'
#' Make a binary causal state, a binary valued potential outcome
#' @param n number of individuals
#' @param seed integer value for random seed
#' @return data.table with potential outcomes
#' @import data.table
#' @export
simulate_potential_outcomes <- function(n,seed = sample.int(.Machine$integer.max, 1))
{
set.seed(seed)
outcome_values <- c(1, 0)
prob_y1 <- .55
prob_y0 <- .45
DATA <- data.table(
y0 = 1 * (runif(n) < prob_y0),
y1 = 1 * (runif(n) < prob_y1))
attr(DATA, "seed") <- seed
DATA
}
#' Simulates potential outcomes and adds a treatment variable
#' @param n number of individuals
#' @param prob_treatment probability of treatment
#' @param seed integer value for random seed
#' @return data.table with potential outcomes
#' @import data.table
#' @export
simulate_simple_experiment <- function(n, prob_treatment = .5,
seed = sample.int(.Machine$integer.max, 1))
{
set.seed(seed)
DATA <- simulate_potential_outcomes(n, seed = seed)
DATA[, prob := prob_treatment]
DATA[, d := 1 * (runif(n) < prob)]
DATA[, y := d * y1 + (1 - d) * y0]
DATA[, `:=`(y1 = NULL, y0 = NULL)]
attr(DATA, "seed") <- seed
DATA
}
#' Calculate naive difference in means
#'
#' Calculate naive difference in means
#' @param DATA data.table generated by simulate_simple_experiment
#' @return estimated average
#' @import data.table
#' @export
calc_naive_difference_in_means <- function(DATA)
{
DATA[, mean(y[d == 1]) - mean(y[d == 0])]
}
#' Simulate an observational study
#'
#' This study will all individuals to select their treatment value
#' based on their potential outcomes
#' @param n number of individuals
#' @param seed integer value for random seed
#' @return data.table with potential outcomes
#' @import data.table
#' @export
simulate_observational_data <- function(n,
seed = sample.int(.Machine$integer.max, 1))
{
set.seed(seed)
DATA <- simulate_potential_outcomes(n, seed = seed)
DATA[, prob := plogis(-2 + 4 * (y1 - y0))]
DATA[, d := 1 * (runif(n) < prob)]
DATA[, y := d * y1 + (1 - d) * y0]
DATA[, `:=`(y1 = NULL, y0 = NULL, prob = NULL)]
attr(DATA, "seed") <- seed
DATA
}
#' Simulate data that corresponds to Figure 4.1 from Morgan & Winship (2007)
#'
#' Data generation
#' @param n number of observations
#' @param seed RNG seed
#' @return data.table with potential outcomes
#' @import data.table
#' @export
make_data_for_figure_4_1 <- function(n,
seed = sample.int(.Machine$integer.max, 1))
{
set.seed(seed)
u <- rbinom(n, 1, prob = .5)
s <- rbinom(n, 1, prob = plogis(-1 + 2 * u))
x <- rbinom(n, 1, prob = plogis(1 - 2 * u))
d <- rbinom(n, 1, prob = plogis(-3 + 6 * s))
y1 <- rbinom(n, 1, prob = plogis(-1 + 2 * 1 - 2 * x))
y0 <- rbinom(n, 1, prob = plogis(-1 + 2 * 0 - 2 * x))
y <- d * y1 + (1 - d) * y0
DATA <- data.table(y1, y0, y, u, s, x, d)
attr(DATA, "seed") <- seed
DATA
}
#' Calculate average causal effect estimand
#'
#' DETAIL
#' @param DATA a dataset with potential outcomes named y0 and y1
#' @return number
#' @import data.table
#' @export
calc_average_causal_effect_estimand <- function(DATA, conditioning_variable)
{
DATA[, mean(y1 - y0)]
}
#' Calculate the naive difference in means
#' @param DATA a dataset, can't have a variable named "Z" in it
#' @param conditioning_variable character object for variable name in DATA;
#' assumed to be binary 0-1
#' @return number
#' @import data.table
#' @export
calc_conditional_difference_in_means <- function(DATA, conditioning_variable)
{
Z <- DATA[, conditioning_variable, with = FALSE]
p_Z_1 <- DATA[, mean(Z == 1)]
p_Z_0 <- DATA[, mean(Z == 0)]
mean_y_D_1_Z_1 <- DATA[, mean(y[d == 1 & Z == 1])]
mean_y_D_1_Z_0 <- DATA[, mean(y[d == 1 & Z == 0])]
mean_y_D_0_Z_1 <- DATA[, mean(y[d == 0 & Z == 1])]
mean_y_D_0_Z_0 <- DATA[, mean(y[d == 0 & Z == 0])]
(p_Z_1 * mean_y_D_1_Z_1 + p_Z_0 * mean_y_D_1_Z_0) -
(p_Z_1 * mean_y_D_0_Z_1 + p_Z_0 * mean_y_D_0_Z_0)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.