utils::globalVariables(c("..eif_component_names", "..w_names"))
#' Efficient One-Step Estimator
#'
#' @param data A \code{data.table} containing the observed data, with columns
#' in the order specified by the NPSEM (Y, Z, A, W), with column names set
#' appropriately based on the original input data. Such a structure is merely
#' a convenience utility to passing data around to the various core estimation
#' routines and is automatically generated by \code{\link{medshift}}.
#' @param delta A \code{numeric} value indicating the degree of shift in the
#' intervention to be used in defining the causal quantity of interest. In the
#' case of binary interventions, this takes the form of an incremental
#' propensity score shift, acting as a multiplier of the probability with
#' which a given observational unit receives the intervention (EH Kennedy,
#' 2018, JASA; <doi:10.1080/01621459.2017.1422737>).
#' @param g_learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in fitting the propensity
#' score, i.e., g = P(A | W).
#' @param e_learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in fitting a propensity
#' score that conditions on the mediators, i.e., e = P(A | Z, W).
#' @param m_learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in fitting the outcome
#' regression, i.e., m(A, Z, W).
#' @param phi_learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in a regression of a
#' pseudo-outcome on the baseline covariates, i.e.,
#' phi(W) = E[m(A = 1, Z, W) - m(A = 0, Z, W) | W).
#' @param w_names A \code{character} vector of the names of the columns that
#' correspond to baseline covariates (W). The input for this argument is
#' automatically generated by \code{\link{medshift}}.
#' @param z_names A \code{character} vector of the names of the columns that
#' correspond to mediators (Z). The input for this argument is automatically
#' generated by \code{\link{medshift}}.
#' @param cv_folds A \code{numeric} specifying the number of folds to be
#' created for cross-validation. Use of cross-validation / cross-fitting
#' allows for entropy conditions on the AIPW estimator to be relaxed. Note:
#' for compatibility with \code{\link[origami]{make_folds}}, this value must
#' be greater than or equal to 2; the default is to create 10 folds.
#'
#' @importFrom stats var
#' @importFrom origami make_folds cross_validate folds_vfold
est_onestep <- function(data,
delta,
g_learners,
e_learners,
m_learners,
phi_learners,
w_names,
z_names,
cv_folds = 10) {
# use origami to perform CV-SL, fitting/evaluating EIF components per fold
eif_component_names <- c("Dy", "Da", "Dzw")
# create folds for use with origami::cross_validate
folds <- origami::make_folds(data,
fold_fun = origami::folds_vfold,
V = cv_folds,
cluster_ids = data[["ids"]]
)
# perform the cv_eif procedure on a per-fold basis
cv_eif_results <- origami::cross_validate(
cv_fun = cv_eif,
folds = folds,
data = data,
delta = delta,
g_learners = g_learners,
e_learners = e_learners,
m_learners = m_learners,
phi_learners = phi_learners,
w_names = w_names,
z_names = z_names,
use_future = FALSE,
.combine = FALSE
)
# combine results of EIF components for full EIF
est_over_delta <- lapply(seq_along(delta), function(iter) {
# compute influence function, parameter, variance for given delta
D_obs <- lapply(cv_eif_results[[iter]], function(x) {
D_obs_fold <- rowSums(x[, ..eif_component_names])
return(D_obs_fold)
})
# get estimated observation-level values of EIF
estim_eif <- do.call(c, D_obs)
# compute one-step estimate of parameter and variance from EIF
estim_onestep_param <- mean(estim_eif)
# combine repeated EIF estimates if IDs are non-unique
if (length(unique(data[["ids"]])) < nrow(data)) {
estim_eif_combined <- by(estim_eif, as.numeric(data[["ids"]]), mean,
simplify = FALSE
)
estim_eif_reduced <- unname(do.call(c, estim_eif_combined))
estim_onestep_var <- stats::var(estim_eif_reduced) /
length(estim_eif_reduced)
} else {
estim_onestep_var <- stats::var(estim_eif) / length(estim_eif)
}
# output for a given delta
estim_onestep_out <- list(
theta = estim_onestep_param,
var = estim_onestep_var,
eif = (estim_eif - estim_onestep_param),
type = "onestep"
)
return(estim_onestep_out)
})
# output
if (length(delta) == 1) {
return(unlist(est_over_delta, recursive = FALSE))
} else {
return(est_over_delta)
}
}
###############################################################################
#' Cross-validated Evaluation of Efficient Influence Function Components
#'
#' @param fold Object specifying cross-validation folds as generated by a call
#' to \code{origami::make_folds}.
#' @param data A \code{data.table} containing the observed data, with columns
#' in the order specified by the NPSEM (Y, Z, A, W), with column names set
#' appropriately based on the original input data. Such a structure is merely
#' a convenience utility to passing data around to the various core estimation
#' routines and is automatically generated by \code{\link{medshift}}.
#' @param delta A \code{numeric} value indicating the degree of shift in the
#' intervention to be used in defining the causal quantity of interest. In the
#' case of binary interventions, this takes the form of an incremental
#' propensity score shift, acting as a multiplier of the odds with which a
#' given observational unit receives the intervention (EH Kennedy, 2018,
#' JASA; <doi:10.1080/01621459.2017.1422737>).
#' @param g_learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in fitting the propensity
#' score, i.e., g = P(A | W).
#' @param e_learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in fitting a propensity
#' score that conditions on the mediators, i.e., e = P(A | Z, W).
#' @param m_learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in fitting the outcome
#' regression, i.e., m(A, Z, W).
#' @param phi_learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in a regression of a
#' pseudo-outcome on the baseline covariates, i.e.,
#' phi(W) = E[m(A = 1, Z, W) - m(A = 0, Z, W) | W).
#' @param w_names A \code{character} vector of the names of the columns that
#' correspond to baseline covariates (W). The input for this argument is
#' automatically generated by a call to the wrapper function \code{medshift}.
#' @param z_names A \code{character} vector of the names of the columns that
#' correspond to mediators (Z). The input for this argument is automatically
#' generated by a call to the wrapper function \code{medshift}.
#'
#' @importFrom data.table data.table
#' @importFrom origami training validation fold_index
#'
#' @keywords internal
cv_eif <- function(fold,
data,
delta,
g_learners,
e_learners,
m_learners,
phi_learners,
w_names,
z_names) {
# make training and validation data
train_data <- origami::training(data)
valid_data <- origami::validation(data)
# compute nuisance parameters eta = (g, m, e, phi)
## 1) fit regression for incremental propensity score intervention
g_out <- lapply(delta, function(delta) {
# NOTE: even this is _repeated_ computation since delta is a multiplier
# ...worth fixing later if a bottleneck.
g_est_delta <- fit_g_mech(
data = train_data, valid_data = valid_data,
delta = delta,
learners = g_learners, w_names = w_names
)
return(g_est_delta)
})
## 2) fit clever regression for treatment, conditional on mediators
e_out <- fit_e_mech(
data = train_data, valid_data = valid_data,
learners = e_learners,
z_names = z_names, w_names = w_names
)
## 3) fit regression for incremental propensity score intervention
m_out <- fit_m_mech(
data = train_data, valid_data = valid_data,
learners = m_learners,
z_names = z_names, w_names = w_names
)
## 4) difference-reduced dimension regression with pseudo-outcome
phi_est <- fit_phi_mech(
train_data = train_data, valid_data = valid_data,
learners = phi_learners,
m_output = m_out, w_names = w_names
)
# get indices of treated and control units in validation data
idx_A1 <- which(valid_data$A == 1)
idx_A0 <- which(valid_data$A == 0)
# loop over each delta-shift in grid to assemble EIF components
eif_over_delta <- lapply(seq_along(delta), function(iter) {
# compute component Dzw from nuisance parameters
Dzw_groupwise <- compute_Dzw(g_output = g_out[[iter]], m_output = m_out)
D_ZW <- Dzw_groupwise$dzw_cntrl + Dzw_groupwise$dzw_treat
# compute component Da from nuisance parameters
g_pred_A1 <- g_out[[iter]]$g_est$g_pred_A1
g_pred_A0 <- g_out[[iter]]$g_est$g_pred_A0
Da_numerator <- delta[iter] * phi_est * (valid_data$A - g_pred_A1)
Da_denominator <- (delta[iter] * g_pred_A1 + g_pred_A0)^2
D_A <- Da_numerator / Da_denominator
# compute component Dy from nuisance parameters
ipw_groupwise <- compute_ipw(
g_output = g_out[[iter]], e_output = e_out,
idx_treat = idx_A1, idx_cntrl = idx_A0
)
m_pred_obs <- rep(NA, nrow(valid_data))
m_pred_obs[idx_A1] <- m_out$m_est$m_pred_A1[idx_A1]
m_pred_obs[idx_A0] <- m_out$m_est$m_pred_A0[idx_A0]
# stabilize weights in AIPW by dividing by sample average, n.b., E[g/e] = 1
mean_aipw <- ipw_groupwise$mean_aipw
g_shifted <- ipw_groupwise$g_shifted
e_pred <- ipw_groupwise$e_pred
D_Y <- ((g_shifted / e_pred) / mean_aipw) * (valid_data$Y - m_pred_obs)
# output table of EIF results for given shift
eif_out <- data.table::data.table(
Dy = D_Y, Da = D_A, Dzw = D_ZW,
fold = origami::fold_index()
)
return(eif_out)
})
# output
return(eif_over_delta)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.