utils::globalVariables(c("beta_est", "se_eif"))
#' Hypothesis test of direct effect with mediated stochastic interventions
#' using the multiplier bootstrap
#'
#' @param W A \code{matrix}, \code{data.frame}, or similar corresponding to a
#' set of baseline covariates.
#' @param A A \code{numeric} vector corresponding to a treatment variable. The
#' parameter of interest is defined as a location shift of this quantity.
#' @param Z A \code{numeric} vector, \code{matrix}, \code{data.frame}, or
#' similar corresponding to a set of mediators (on the causal pathway between
#' the intervention A and the outcome Y).
#' @param Y A \code{numeric} vector corresponding to an outcome variable.
#' @param ids A \code{numeric} vector of observation-level IDs, allowing for
#' observational units to be related through a hierarchical structure. The
#' default is to assume all units are IID. When repeated IDs are included,
#' both the cross-validation procedures used for estimation and inferential
#' procedures respect these IDs.
#' @param delta_grid A \code{numeric} of values giving the varous degrees of
#' shift in the intervention to be used in defining the causal quantity of
#' interest. In the case of binary interventions, this takes the form of an
#' incremental propensity score shift, acting as a multiplier of the odds with
#' which a given observational unit receives the intervention (EH Kennedy,
#' 2018, JASA; <doi:10.1080/01621459.2017.1422737>).
#' @param mult_type A \code{character} identifying the type of multipliers to
#' be used in the multiplier bootstrap. Choices are \code{"rademacher"} or
#' \code{"gaussian"}, with the default being the former.
#' @param ci_level A \code{numeric} indicating the (1 - alpha) level of the
#' simultaneous confidence band to be computed around the estimates of the
#' direct effect. The error level of the test reported in the p-value returned
#' is simply alpha, i.e., one less this quantity.
#' @param g_learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in fitting the propensity
#' score, i.e., g = P(A | W).
#' @param e_learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in fitting a propensity
#' score that conditions on the mediators, i.e., e = P(A | Z, W).
#' @param m_learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in fitting the outcome
#' regression, i.e., m(A, Z, W).
#' @param phi_learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in a regression of a
#' pseudo-outcome on the baseline covariates, i.e.,
#' phi(W) = E[m(A = 1, Z, W) - m(A = 0, Z, W) | W).
#' @param cv_folds A \code{numeric} specifying the number of folds to be
#' created for cross-validation. Use of cross-validation / cross-fitting
#' allows for entropy conditions on the AIPW estimator to be relaxed. Note:
#' for compatibility with \code{\link[origami]{make_folds}}, this value must
#' be greater than or equal to 2; the default is to create 10 folds.
#' @param n_mult A \code{numeric} scalar giving the number of repetitions of
#' the multipliers to be used in computing the multiplier bootstrap.
#'
#' @importFrom stats var rbinom rnorm quantile
#' @importFrom data.table as.data.table setnames rbindlist
#' @importFrom dplyr "%>%" transmute between
#' @importFrom tibble as_tibble
#' @importFrom assertthat assert_that
#'
#' @export
test_de <- function(W,
A,
Z,
Y,
ids = seq(1, length(Y)),
delta_grid = seq(from = 0.5, to = 5.0, by = 0.9),
mult_type = c("rademacher", "gaussian"),
ci_level = 0.95,
g_learners,
e_learners,
m_learners,
phi_learners,
cv_folds = 10,
n_mult = 10000) {
# set default arguments
mult_type <- match.arg(mult_type)
# NOTE: procedure does _not_ support static interventions currently
assertthat::assert_that(all(delta_grid > 0) && all(delta_grid < Inf))
# construct input data structure
data_in <- data.table::as.data.table(cbind(Y, Z, A, W, ids))
w_names <- paste("W", seq_len(dim(data.table::as.data.table(W))[2]),
sep = "_"
)
z_names <- paste("Z", seq_len(dim(data.table::as.data.table(Z))[2]),
sep = "_"
)
data.table::setnames(data_in, c("Y", z_names, "A", w_names, "ids"))
# compute E[Y] for half of direct effect and size of observed data
EY <- mean(Y)
n_obs <- length(Y)
# generate multipliers
if (mult_type == "rademacher") {
mult_boot <- lapply(seq_len(n_mult), function(iter) {
mults <- (2 * stats::rbinom(n_obs, 1, 0.5)) - 1
return(mults)
})
} else if (mult_type == "gaussian") {
mult_boot <- lapply(seq_len(n_mult), function(iter) {
mults <- stats::rnorm(n_obs)
return(mults)
})
}
mult_boot_mat <- do.call(cbind, mult_boot)
# estimate via one-step for other half of direct effect
theta_est <- est_onestep(
data = data_in,
delta = delta_grid,
g_learners = g_learners,
e_learners = e_learners,
m_learners = m_learners,
phi_learners = phi_learners,
w_names = w_names,
z_names = z_names,
cv_folds = cv_folds
)
# perform multiplier bootstrap to construct ingredients for simultaneous CI
rho_est <- apply(mult_boot_mat, 2, function(mults) {
# NOTE: there ought to be a better way than looping over the grid of deltas
m_process_out <- lapply(seq_along(delta_grid), function(iter) {
# compute estimate of the direct effect
beta_est <- EY - theta_est[[iter]]$theta
# compute corresponding uncentered influence function
s_eif_est <- Y - (theta_est[[iter]]$eif + theta_est[[iter]]$theta)
# difference in estimated influence function and direct effect
eif_de_diff <- s_eif_est - beta_est
# estimated variance for current shift
se_eif <- sqrt(stats::var(eif_de_diff) / n_obs)
# compute process M(delta) using multipliers
m_process <- mean((mults * eif_de_diff) / se_eif)
# construct output
return(list(beta_est = beta_est, se_eif = se_eif, m_est = m_process))
})
m_process_out <- data.table::rbindlist(m_process_out)
sup_m_over_delta <- max(abs(m_process_out$m_est))
return(list(est = m_process_out[, -3], sup_m = sup_m_over_delta))
})
# NOTE: this is _very_ inefficient as we store the parameter and standard
# error estimates for each of the bootstrap iterations even though they
# are the same, i.e., we save n_mult data frames when we need just 1
param_est <- lapply(rho_est, `[[`, "est")[[1]]
# construct simultaneous CI using quantiles from supremum of M(delta)
sup_m_process <- do.call(c, lapply(rho_est, `[[`, "sup_m"))
c_alpha <- unname(stats::quantile(sup_m_process, ci_level))
est_with_cis <- param_est %>%
dplyr::transmute(
lwr_ci = beta_est - c_alpha * se_eif,
beta_est = I(beta_est),
upr_ci = beta_est + c_alpha * se_eif,
se_eif = I(se_eif),
test_stat = beta_est / se_eif,
delta = delta_grid
) %>%
tibble::as_tibble()
# for p-value, evaluate 1-rho(t) at observed supremum test statistic
pval_rho <- 1 - mean(sup_m_process < max(abs(est_with_cis$test_stat)))
# output
out <- list(est_de = est_with_cis, pval_de = pval_rho)
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.