Nothing
make_sim_data <- function(n_samp = 1000, dgp_type) {
# Simulation helper for generating data under a variety of data-generating
# processes. In each case, the treatment mechanism is normally distributed
# with the mean (and, sometimes, variance) being functions of the baseline
# covariates, while the outcome mechanism is a nontrivial function of the
# exposure and baseline covariates.
# Scenario definitions
# - number indicates _same_ outcome mechanism, similar treatment mechanisms
# - usually, treatment mechanism complexity differs within scenarios
# SCENARIO 1a: exposure density is N(mu = f(W), sigma2 = c)
if (dgp_type == "1a") {
# define treatment mechanism
g0 <- function(W1, W2, W3) {
mu <- 2 * W1 + W2 - 2 * (1 - W1) * W2
sigma2 <- 2
return(list(mu = mu, sigma2 = sigma2))
}
# define outcome mechanism
Q0 <- function(A, W1, W2, W3) {
plogis(3 * (A - 1) + W1 + 1.5 * W2^2 - W3 - 3 * (1 - W1) * W2)
}
# simulate data
W1 <- rbinom(n_samp, 1, 0.6)
W2 <- runif(n_samp, 0.5, 1.5)
W3 <- rpois(n_samp, 2)
g_obs <- g0(W1, W2, W3)
A <- rnorm(n_samp, g_obs$mu, sqrt(g_obs$sigma2))
Y <- rbinom(n_samp, 1, Q0(A, W1, W2, W3))
data_obs <- as_tibble(
list(W1 = W1, W2 = W2, W3 = W3, A = A, Y = Y)
)
}
# SCENARIO 1b: exposure density is N(mu = f_1(W), sigma2 = f_2(W))
if (dgp_type == "1b") {
# define treatment mechanism
g0 <- function(W1, W2, W3) {
mu <- W1 + 2 * W2 - 2 * (1 - W1) * W2
sigma2 <- (W1 * 2) + ((1 - W1) * 0.5) + W2
return(list(mu = mu, sigma2 = sigma2))
}
# define outcome mechanism
Q0 <- function(A, W1, W2, W3) {
plogis(5 * (A - 2) + W1 + 3 * W2^4 - 2 * W3 - 4 * (1 - W1) * W2)
}
# simulate data
W1 <- rbinom(n_samp, 1, 0.6)
W2 <- runif(n_samp, 0.5, 1.5)
W3 <- rpois(n_samp, 2)
g_obs <- g0(W1, W2, W3)
A <- rnorm(n_samp, g_obs$mu, sqrt(g_obs$sigma2))
Y <- rbinom(n_samp, 1, Q0(A, W1, W2, W3))
data_obs <- as_tibble(
list(W1 = W1, W2 = W2, W3 = W3, A = A, Y = Y)
)
}
# SCENARIO 2a: exposure density is Poisson(lambda = f(W))
if (dgp_type == "2a") {
# define treatment mechanism
g0 <- function(W1, W2, W3) {
lambda <- (1 - W1) + 0.25 * W2^3 + 2 * W1 * W2 + 4
return(list(rate = lambda))
}
# define outcome mechanism
Q0 <- function(A, W1, W2, W3) {
plogis(A + 2 * (1 - W1) + 0.5 * W2 + 0.5 * W3 + 2 * W1 * W2 - 7)
}
# simulate data
W1 <- rbinom(n_samp, 1, 0.6)
W2 <- runif(n_samp, 0.5, 1.5)
W3 <- rpois(n_samp, 2)
g_obs <- g0(W1, W2, W3)
A <- rpois(n_samp, lambda = g_obs$rate)
Y <- rbinom(n_samp, 1, Q0(A, W1, W2, W3))
data_obs <- as_tibble(
list(W1 = W1, W2 = W2, W3 = W3, A = A, Y = Y)
)
}
# output
dgp_funs <- list(g0 = g0, Q0 = Q0)
return(list(data_obs = data_obs, dgp_funs = dgp_funs))
}
# get truth for a given data-generating mechanism
# NOTE: g0 and Q0 are functions of {A, W1, W2, W3} only
get_truth <- function(n_samp = 1e7,
delta,
dgp_type = c("1a", "1b", "2a"),
param_type = c("pie", "tsm")) {
# input check
dgp_type <- match.arg(dgp_type)
param_type <- match.arg(param_type)
# compute large data set from data-generating mechanism
dgp <- make_sim_data(n_samp = n_samp, dgp_type = dgp_type)
wow_so_much_data <- dgp$data_obs
# extract helper functions
g0 <- dgp$dgp_funs$g0
Q0 <- dgp$dgp_funs$Q0
# compute treatment mechanism in large sample
gn_mech <- with(wow_so_much_data, g0(W1, W2, W3))
# compute likelihood factors
if (dgp_type %in% c("1a", "1b")) {
gn_Aminusdelta <- dnorm(wow_so_much_data$A - delta,
mean = gn_mech$mu,
sd = sqrt(gn_mech$sigma2)
)
gn_Anat <- dnorm(wow_so_much_data$A,
mean = gn_mech$mu,
sd = sqrt(gn_mech$sigma2)
)
} else if (dgp_type == "2a") {
gn_Aminusdelta <- dpois(wow_so_much_data$A - delta,
lambda = gn_mech$rate
)
gn_Anat <- dpois(wow_so_much_data$A,
lambda = gn_mech$rate
)
}
Qbar_noshift <- with(wow_so_much_data, Q0(A, W1, W2, W3))
Qbar_shift <- with(wow_so_much_data, Q0(A + delta, W1, W2, W3))
# EIF for shift parameter
Hn <- gn_Aminusdelta / gn_Anat
eif_shift <- Hn * (wow_so_much_data$Y - Qbar_noshift) +
(Qbar_shift - mean(Qbar_shift))
if (param_type == "pie") {
# truth in terms of PIE = EY - EY_delta
EY <- mean(wow_so_much_data$Y)
eif_EY <- wow_so_much_data$Y - EY
eif_pie <- eif_EY - eif_shift
true_psi <- EY - mean(Qbar_shift)
eff_bound <- var(eif_pie)
} else if (param_type == "tsm") {
# truth in terms of TSM EY_delta
true_psi <- mean(Qbar_shift)
eff_bound <- var(eif_shift)
}
# output
return(list(
psi = true_psi,
eff_bound = eff_bound
))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.