utils::globalVariables(c("A", ".N", "..w_names"))
#' Fit propensity score with incremental stochastic shift intervention
#'
#' @param data A \code{data.table} containing the observed data, with columns
#' in the order specified by the NPSEM (Y, Z, A, W), with column names set
#' appropriately based on the original input data. Such a structure is merely
#' a convenience utility to passing data around to the various core estimation
#' routines and is automatically generated by \code{\link{medshift}}.
#' @param valid_data A holdout data set, with columns exactly matching those
#' appearing in the preceding argument \code{data}, to be used for estimation
#' via cross-fitting. Optional, defaulting to \code{NULL}.
#' @param delta A \code{numeric} value indicating the degree of shift in the
#' intervention to be used in defining the causal quantity of interest. In the
#' case of binary interventions, this takes the form of an incremental
#' propensity score shift, acting as a multiplier of the odds with which a
#' given observational unit receives the intervention (EH Kennedy, 2018, JASA;
#' <doi:10.1080/01621459.2017.1422737>).
#' @param learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in fitting the propensity
#' score, i.e., g = P(A | W).
#' @param w_names A \code{character} vector of the names of the columns that
#' correspond to baseline covariates (W). The input for this argument is
#' automatically generated by \code{\link{medshift}}.
#'
#' @importFrom data.table as.data.table copy ":="
#' @importFrom sl3 sl3_Task
fit_g_mech <- function(data, valid_data = NULL,
delta, learners, w_names) {
# construct task for propensity score fit
g_task <- sl3::sl3_Task$new(
data = data,
covariates = w_names,
outcome = "A",
id = "ids"
)
# fit and predict
g_fit_stack <- learners$train(g_task)
# use full data for counterfactual prediction if no validation data provided
if (is.null(valid_data)) {
# copy full data
data_pred <- data.table::copy(data)
} else {
# copy only validation data
data_pred <- data.table::copy(valid_data)
}
# create task for estimating propensity score P(A = 1 | W)
g_task_pred <- sl3::sl3_Task$new(
data = data_pred,
covariates = w_names,
outcome = "A",
id = "ids"
)
g_pred_A1 <- g_fit_stack$predict(g_task_pred)
# compute A = 0 case by symmetry
g_pred_A0 <- 1 - g_pred_A1
# directly computed the shifted propensity score
g_pred_shifted_A1 <- (delta * g_pred_A1) /
(delta * g_pred_A1 + (1 - g_pred_A1))
# compute shifted propensity score for A = 0 by symmetry
g_pred_shifted_A0 <- 1 - g_pred_shifted_A1
# bounding to numerical precision
g_pred_A1 <- bound_precision(g_pred_A1)
g_pred_A0 <- bound_precision(g_pred_A0)
g_pred_shifted_A1 <- bound_precision(g_pred_shifted_A1)
g_pred_shifted_A0 <- bound_precision(g_pred_shifted_A0)
# bounding for potential positivity issues
g_pred_A1 <- bound_propensity(g_pred_A1)
g_pred_A0 <- bound_propensity(g_pred_A0)
g_pred_shifted_A1 <- bound_propensity(g_pred_shifted_A1)
g_pred_shifted_A0 <- bound_propensity(g_pred_shifted_A0)
# output
out <- list(
g_est = data.table::data.table(cbind(
g_pred_A1 = g_pred_A1,
g_pred_A0 = g_pred_A0,
g_pred_shifted_A1 = g_pred_shifted_A1,
g_pred_shifted_A0 = g_pred_shifted_A0
)),
g_fit = g_fit_stack
)
return(out)
}
###############################################################################
#' Fit propensity score regression while conditioning on mediators
#'
#' @param data A \code{data.table} containing the observed data, with columns
#' in the order specified by the NPSEM (Y, Z, A, W), with column names set
#' appropriately based on the original input data. Such a structure is merely
#' a convenience utility to passing data around to the various core estimation
#' routines and is automatically generated by \code{\link{medshift}}.
#' @param valid_data A holdout data set, with columns exactly matching those
#' appearing in the preceding argument \code{data}, to be used for estimation
#' via cross-fitting. Optional, defaulting to \code{NULL}.
#' @param learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in fitting a propensity
#' score that conditions on the mediators, i.e., e = P(A | Z, W).
#' @param w_names A \code{character} vector of the names of the columns that
#' correspond to baseline covariates (W). The input for this argument is
#' automatically generated by a call to the wrapper function \code{medshift}.
#' @param z_names A \code{character} vector of the names of the columns that
#' correspond to mediators (Z). The input for this argument is automatically
#' generated by \code{\link{medshift}}.
#'
#' @importFrom data.table as.data.table copy
#' @importFrom sl3 sl3_Task
fit_e_mech <- function(data, valid_data = NULL,
learners, z_names, w_names) {
# construct task for nuisance parameter fit
e_task <- sl3::sl3_Task$new(
data = data,
covariates = c(z_names, w_names),
outcome = "A",
id = "ids"
)
# fit and predict
e_fit_stack <- learners$train(e_task)
# use full data for counterfactual prediction if no validation data provided
if (is.null(valid_data)) {
# copy full data
data_pred <- data.table::copy(data)
} else {
# copy only validation data
data_pred <- data.table::copy(valid_data)
}
# create task for estimating propensity score P(A = 1 | W)
e_task_pred <- sl3::sl3_Task$new(
data = data_pred,
covariates = c(z_names, w_names),
outcome = "A",
id = "ids"
)
# predict from trained model on counterfactual data
e_pred_A1 <- e_fit_stack$predict(e_task_pred)
# get values of nuisance parameter E for A = 0 by symmetry with A = 1 case
e_pred_A0 <- 1 - e_pred_A1
# bounding to numerical precision
e_pred_A1 <- bound_precision(e_pred_A1)
e_pred_A0 <- bound_precision(e_pred_A0)
# bounding for potential positivity issues
e_pred_A1 <- bound_propensity(e_pred_A1)
e_pred_A0 <- bound_propensity(e_pred_A0)
# output
out <- list(
e_est = data.table::data.table(cbind(
e_pred_A1 = e_pred_A1,
e_pred_A0 = e_pred_A0
)),
e_fit = e_fit_stack
)
return(out)
}
###############################################################################
#' Fit outcome regression
#'
#' @param data A \code{data.table} containing the observed data, with columns
#' in the order specified by the NPSEM (Y, Z, A, W), with column names set
#' appropriately based on the original input data. Such a structure is merely
#' a convenience utility to passing data around to the various core estimation
#' routines and is automatically generated by \code{\link{medshift}}.
#' @param valid_data A holdout data set, with columns exactly matching those
#' appearing in the preceding argument \code{data}, to be used for estimation
#' via cross-fitting. Optional, defaulting to \code{NULL}.
#' @param learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in fitting the outcome
#' regression, i.e., m(A, Z, W).
#' @param w_names A \code{character} vector of the names of the columns that
#' correspond to baseline covariates (W). The input for this argument is
#' automatically generated by \code{\link{medshift}}.
#' @param z_names A \code{character} vector of the names of the columns that
#' correspond to mediators (Z). The input for this argument is automatically
#' generated by \code{\link{medshift}}.
#'
#' @importFrom data.table as.data.table copy
#' @importFrom sl3 sl3_Task
fit_m_mech <- function(data, valid_data = NULL,
learners, z_names, w_names) {
# construct task for propensity score fit
m_task <- sl3::sl3_Task$new(
data = data,
covariates = c("A", z_names, w_names),
outcome = "Y",
id = "ids"
)
# fit and predict
m_fit_stack <- learners$train(m_task)
# use full data for counterfactual prediction if no validation data provided
if (is.null(valid_data)) {
# copy full data
data_A1 <- data.table::copy(data)
data_A0 <- data.table::copy(data)
} else {
# copy only validation data
data_A1 <- data.table::copy(valid_data)
data_A0 <- data.table::copy(valid_data)
# NOTE: to fit nuisance regression phi, we need estimates on training set
# to construct the relevant pseudo-outcome, i.e., m(Z,A=1,W) - m(Z,A=0,W)
data_train_A1 <- data.table::copy(data)
data_train_A1[, A := 1]
m_task_train_A1 <- sl3::sl3_Task$new(
data = data_train_A1,
covariates = c("A", z_names, w_names),
outcome = "Y",
id = "ids"
)
m_pred_train_A1 <- m_fit_stack$predict(m_task_train_A1)
# repeat for A = 0 case
data_train_A0 <- data.table::copy(data)
data_train_A0[, A := 0]
m_task_train_A0 <- sl3::sl3_Task$new(
data = data_train_A0,
covariates = c("A", z_names, w_names),
outcome = "Y",
id = "ids"
)
m_pred_train_A0 <- m_fit_stack$predict(m_task_train_A0)
}
# copy data and set intervention A = 1
data_A1[, A := 1]
m_task_A1 <- sl3::sl3_Task$new(
data = data_A1,
covariates = c("A", z_names, w_names),
outcome = "Y",
id = "ids"
)
m_pred_A1 <- m_fit_stack$predict(m_task_A1)
# copy data and set intervention A = 0
data_A0[, A := 0]
m_task_A0 <- sl3::sl3_Task$new(
data = data_A0,
covariates = c("A", z_names, w_names),
outcome = "Y",
id = "ids"
)
m_pred_A0 <- m_fit_stack$predict(m_task_A0)
# output
out <- list(
m_est = data.table::data.table(cbind(
m_pred_A1 = m_pred_A1,
m_pred_A0 = m_pred_A0
)),
m_est_training = data.table::data.table(cbind(
m_pred_A1 =
if (!is.null(valid_data)) {
m_pred_train_A1
} else {
rep(NA, nrow(data))
},
m_pred_A0 =
if (!is.null(valid_data)) {
m_pred_train_A0
} else {
rep(NA, nrow(data))
}
)),
m_fit_sl = m_fit_stack
)
return(out)
}
###############################################################################
#' Fit intervention-specific exponential tilt nuisance parameter
#'
#' @param train_data A \code{data.table} containing the observed data, with
#' columns in the order specified by the NPSEM (Y, Z, A, W), with column names
#' set appropriately based on the input data. Such a structure is merely a
#' convenience utility to passing data around to the various core estimation
#' routines and is automatically generated by \code{\link{medshift}}.
#' @param valid_data A holdout data set, with columns exactly matching those
#' appearing in the preceding argument \code{train_data}, to be used for
#' estimation via cross-fitting. Not optional for this nuisance parameter.
#' @param learners A \code{\link[sl3]{Stack}} (or other learner class that
#' inherits from \code{\link[sl3]{Lrnr_base}}), containing a single or set of
#' instantiated learners from \pkg{sl3}, to be used in a regression of a
#' pseudo-outcome on the baseline covariates, i.e.,
#' phi(W) = E[m(A = 1, Z, W) - m(A = 0, Z, W) | W).
#' @param m_output Object containing results from fitting the outcome
#' regression, as produced by \code{\link{fit_m_mech}}.
#' @param w_names A \code{character} vector of the names of the columns that
#' correspond to baseline covariates (W). The input for this argument is
#' automatically generated by \code{\link{medshift}}.
#'
#' @importFrom data.table data.table as.data.table
#' @importFrom sl3 sl3_Task
fit_phi_mech <- function(train_data, valid_data, learners, m_output,
w_names) {
# regression on pseudo-outcome for this nuisance parameter
# NOTE: first, learn the regression model using the training data
m_pred_train_A1 <- m_output$m_est_training$m_pred_A1
m_pred_train_A0 <- m_output$m_est_training$m_pred_A0
m_pred_train_diff <- m_pred_train_A1 - m_pred_train_A0
# construct data structure for use with task objects
phi_train_data <- data.table::data.table(
m_diff = m_pred_train_diff,
train_data[, ..w_names],
ids = train_data[["ids"]]
)
phi_train_task <- sl3::sl3_Task$new(
data = phi_train_data,
covariates = w_names,
outcome = "m_diff",
outcome_type = "continuous",
id = "ids"
)
# fit stack of learners to learn the regression model for phi
phi_fit <- learners$train(phi_train_task)
# NOW, predict on the validation data
# NOTE: first, as before, must construct the pseudo-outcome
m_pred_valid_A1 <- m_output$m_est$m_pred_A1
m_pred_valid_A0 <- m_output$m_est$m_pred_A0
m_pred_valid_diff <- m_pred_valid_A1 - m_pred_valid_A0
# construct data structure for use with task objects
phi_valid_data <- data.table::data.table(
m_diff = m_pred_valid_diff,
valid_data[, ..w_names],
ids = valid_data[["ids"]]
)
phi_valid_task <- sl3::sl3_Task$new(
data = phi_valid_data,
covariates = w_names,
outcome = "m_diff",
outcome_type = "continuous",
id = "ids"
)
# predict and return for validation set only
phi_est <- phi_fit$predict(phi_valid_task)
return(phi_est)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.