Nothing
#' Sensitivity Analysis for Causal Decomposition with Individualized Interventions
#'
#' ‘ind.sens’ is used to assess the sensitivity of estimates from the
#' individualized causal decomposition to potential omitted confoudners between
#' the risk factor M and the outcome Y, using a benchmarking approach based on
#' the relative strength of an unmeasured confounder compared to a specified
#' covariate. The function employs a stochastic EM algorithm to simulate the
#' distribution of the unobserved confounder and generate bias-adjusted estimates.
#'
#' @usage
#' ind.sens(k.y, k.m, Iternum, outcome, group, group.ref, intermediates, moderators,
#' benchmark, risk.factor, covariates, data, B, cluster = NULL, nsim, mc.cores)
#'
#' @details This function returns the adjusted point estimates and confidence intervals
#' after accounting for a simulated unmeasured confounder \eqn{U}. We employ an approach
#' that compares the coefficient of the unmeasured confounder \eqn{U} with that of an
#' observed covariate \eqn{X_j}, after controlling for the remaining observed covariates
#' (\eqn{R}, \eqn{X_{-j}}, and \eqn{C}).
#'
#' Specifically, \eqn{k_y} indicates
#' the extent to which the outcome \eqn{Y} is associated with a one unit increase in
#' \eqn{U} relative to how much it is associated with a one unit change in \eqn{X_j},
#' after controlling for \eqn{R}, \eqn{X_{-j}}, and \eqn{C}. Likewise, \eqn{k_m} indicates the extent
#' to which the odds of \eqn{M = 1} are explained by unmeasured confounder \eqn{U} relative to
#' how much they are explained by \eqn{X_j}, after controlling for \eqn{R}, \eqn{X_{-j}}, and \eqn{C}.
#' These parameters (\eqn{k_m} and \eqn{k_y}) should be specified by researchers based on the
#' assumed strength of \eqn{U} relative to that of the given observed covariate \eqn{X_j}.
#'
#' @param k.y scales the association of the unmeasured confounder with the outcome
#' (numeric). A value of 1 means a one-unit change in the unmeasured confounder
#' shifts the mean of the outcome by the same amount as a one-unit change in the
#' benchmark covariate—conditional on the social-group indicator, intermediate
#' confounder(s), and baseline covariate(s). Default is 1.
#' @param k.m scales the association of the unmeasured confounder with the log-odds of
#' risk.factor = 1 (numeric). A value of 1 means a one-unit change in the unmeasured
#' confounder changes the log-odds of the risk factor by the same amount as the
#' benchmark covariate, conditional on the social-group indicator,
#' intermediate confounder(s), and baseline covariate(s). Default is 1.
#' @param Iternum Number of iterations in the stochastic EM algorithm for generating
#' the unmeasured confounder. default is 10.
#' @param outcome a character string indicating the name of the outcome.
#' @param group a character string indicating the name of the social group indicator
#' such as race or gender.
#' @param group.ref reference group of the social group indicator.
#' @param intermediates vector containing the name of intermediate confounders.
#' @param moderators a character string indicating the name of variables that have
#' heterogeneous effects on the outcome based on the risk factor.
#' @param benchmark a vector containing the name of the benchmark covariate used to
#' scale k.y and k.m.
#' @param risk.factor a character string indicating the name of the risk factor.
#' @param covariates a vector containing the name of baseline covariate variable(s).
#' @param data The data set for analysis (data.frame).
#' @param B Number of bootstrapped samples in the causal decomposition analysis.
#' @param cluster a vector of cluster indicators for the bootstrap. If provided,
#' the cluster bootstrap is used. Default is 'NULL'.
#' @param nsim Number of random draws of the unmeasured confounder per observation.
#' default is 15. Increase the number for more smooth curves.
#' @param mc.cores Number of cores to use. Must be exactly 1 on Windows. Default is 1.
#'
#' @return A matrix containing the adjusted point estimates for:
#' \enumerate{
#' \item the initial disparity, the proportion recommended for treatment,
#' \item the disparity remaining and percent reduction based on individualized conditional effects,
#' \item the disparity remaining, disparity reduction, and percent reduction based on individualized
#' intervention effects.
#' }
#' It also returns the adjusted nonparametric bootstrap confidence intervals for each estimate.
#'
#' @author
#' Soojin Park, University of California, Riverside, \email{soojinp@@ucr.edu};
#' Suyeon Kang, University of Central Florida, \email{suyeon.kang@@ucf.edu};
#' Karen Xu, University of California, Riverside, \email{karenxu@@ucr.edu}.
#'
#' @references
#' Park, S., Kang, S., & Lee, C. (2025). Simulation-Based Sensitivity Analysis in
#' Optimal Treatment Regimes and Causal Decomposition with Individualized Interventions.
#' arXiv preprint arXiv:2506.19010.
#'
#' @seealso \code{\link{ind.decomp}}
#'
#' @importFrom parallel detectCores mclapply
#' @importFrom utils txtProgressBar setTxtProgressBar
#' @export
#' @examples
#' data(idata)
#'
#' \donttest{results_adj <- ind.sens(k.y = 1, k.m = 1, Iternum = 5, outcome = "Y", group = "R", group.ref = "0",
#' intermediates = c("X1", "X2", "X3"), moderators = c("X1", "X2"),
#' benchmark = "X1", risk.factor = "M", covariates = "C", data = idata,
#' B = 50, cluster = NULL, nsim = 10, mc.cores = 1)
#' results_adj}
ind.sens <- function(k.y = 1, k.m = 1, Iternum = 10, outcome, group, group.ref, intermediates, moderators,
benchmark, risk.factor, covariates, data, B, cluster = NULL, nsim = 15, mc.cores = 1L){
if(.Platform$OS.typ == "windows" && as.integer(mc.cores) > 1L){
warning("'mc.cores' > 1 is not supported on Windows. mc.cores = 1 forced")
mc.cores <- 1L
} else if (as.integer(mc.cores) > detectCores()){
warning(paste("The maximum number of cores is ", detectCores(),
". mc.cores = ", detectCores(), " forced", sep = ""))
mc.cores <- detectCores()
} else if (as.integer(mc.cores) < 1L){
stop("'mc.cores' must be >= 1")
}
data[[group]] <- as.factor(data[[group]]) # NOTE: Ensure group is a factor
n_levels <- nlevels(data[[group]]) # NOTE: Check if the variable has exactly 2 levels
if (n_levels != 2) {
stop("The social group indicator R must be a factor with two levels")
}
# NOTE: Check if the specified ref_level exists
if (!(group.ref %in% levels(data[[group]]))) {
stop(sprintf("The specified reference level '%s' is not a level of '%s'", group.ref, group))
}
data[[risk.factor]] <- as.factor(data[[risk.factor]]) # NOTE: Ensure risk.factor is a factor
n_levels <- nlevels(data[[risk.factor]]) # NOTE: Check if the variable has exactly 2 levels
if (n_levels != 2) {
stop("The mediator has to be a binary factor")
}
if (!is.numeric(data[[outcome]]) || is.factor(data[[outcome]]) || length(unique(data[[outcome]])) <= 10) { # NOTE: Assumes a variable is continuous if it’s numeric and has more than, say, 10 unique values.
warning(sprintf("Variable '%s' must be a continuous numeric variable", outcome))
}
mods_formula <- as.formula(paste0(" ~ ", paste(moderators, collapse = " + ")))
# 1. Generate U
genU.res <- genU(
k.y = k.y, k.m = k.m, Iternum = Iternum,
benchmark = benchmark,
data = data, covariates = covariates, group = group, intermediates = intermediates,
risk.factor = risk.factor, outcome = outcome, nsim = nsim, num_cores = mc.cores,
mods_formula = mods_formula
)
# 2. Define a helper function to run the adjusted causal decomposition on each bootstrap sample
simOne <- function(ss) {
#library(rpart)
DATA.newU <- data
# Insert the generated U for this iteration
DATA.newU$U <- genU.res$U.final.nsim[ss, ]
b.m <- genU.res$b.m
b.y <- genU.res$b.y
res_reg <- reg1_no_int(
b.y = b.y, b.m = b.m, B = B,
data = DATA.newU, covariates = covariates, group = group, group.ref =group.ref, intermediates = intermediates,
risk.factor = risk.factor, outcome = outcome, mods_formula = mods_formula, cluster = cluster
)
return(res_reg)
}
# 3. Run simOne() 'nsim' times in parallel
# This captures the uncertainty of generating U
if (.Platform$OS.type == "windows" || mc.cores == 1L) {
# sequential with base progress bar
pb <- utils::txtProgressBar(min = 0, max = nsim, style = 3)
results_ind <- vector("list", nsim)
for (ss in seq_len(nsim)) {
utils::setTxtProgressBar(pb, ss)
res <- try(simOne(ss), silent = TRUE)
results_ind[[ss]] <- if (inherits(res, "try-error")) NULL else res
}
close(pb)
} else {
# parallel on Unix/mac
if (requireNamespace("pbmcapply", quietly = TRUE)) {
results_ind <- pbmcapply::pbmclapply(
X = seq_len(nsim),
FUN = function(ss) {
res <- try(simOne(ss), silent = TRUE)
if (inherits(res, "try-error")) NULL else res
},
mc.cores = mc.cores
)
} else {
message("Tip: install.packages('pbmcapply') to see a progress bar during parallel runs.")
results_ind <- parallel::mclapply(
X = seq_len(nsim),
FUN = function(ss) {
message("Running nsim replication ", ss, " of ", nsim)
res <- try(simOne(ss), silent = TRUE)
if (inherits(res, "try-error")) NULL else res
},
mc.cores = mc.cores
)
}
}
# 4. Aggregate results into results_hom1
# We calculate mean estimates and standard errors
# using the standard formula with the extra (1 + 1/nsim) factor for variance
#Initial disparity
results_ini <- lm(as.formula(paste0(outcome, " ~ ", paste(c(group,covariates), collapse = " + "))), data=data)
estimated_tau <- coef(results_ini)[2]
SE_tau <- summary(results_ini)$coefficients[2, 2] # NOTE: include initial disparity in results return
results_hom1 <- list(
p.opt = mean(
sapply(results_ind, function(x) if (!is.null(x[["p.opt"]])) x[["p.opt"]] else NA),
na.rm = TRUE
),
estimated_tau = estimated_tau, SE_tau = SE_tau, # NOTE: include initial disparity in results return
estimated_zeta_ICDE = mean(
sapply(results_ind, function(x) if (!is.null(x[["estimated_zeta_ICDE"]])) x[["estimated_zeta_ICDE"]] else NA),
na.rm = TRUE
),
estimated_delta_IIE = mean(
sapply(results_ind, function(x) if (!is.null(x[["reg_delta_IIE"]])) x[["reg_delta_IIE"]] else NA),
na.rm = TRUE
),
estimated_zeta_IIE = mean(
sapply(results_ind, function(x) if (!is.null(x[["reg_zeta_IIE"]])) x[["reg_zeta_IIE"]] else NA),
na.rm = TRUE
),
SE_zeta_ICDE = sqrt(
mean(
sapply(results_ind, function(x) if (!is.null(x[["SE_zeta_ICDE"]])) x[["SE_zeta_ICDE"]]^2 else NA),
na.rm = TRUE
) +
(1 + 1 / nsim) * var(
sapply(results_ind, function(x) if (!is.null(x[["SE_zeta_ICDE"]])) x[["SE_zeta_ICDE"]] else NA),
na.rm = TRUE
)
),
SE_delta_IIE = sqrt(
mean(
sapply(results_ind, function(x) if (!is.null(x[["SE_regdelta_IIE"]])) x[["SE_regdelta_IIE"]]^2 else NA),
na.rm = TRUE
) +
(1 + 1 / nsim) * var(
sapply(results_ind, function(x) if (!is.null(x[["SE_regdelta_IIE"]])) x[["SE_regdelta_IIE"]] else NA),
na.rm = TRUE
)
),
SE_zeta_IIE = sqrt(
mean(
sapply(results_ind, function(x) if (!is.null(x[["SE_regzeta_IIE"]])) x[["SE_regzeta_IIE"]]^2 else NA),
na.rm = TRUE
) +
(1 + 1 / nsim) * var(
sapply(results_ind, function(x) if (!is.null(x[["SE_regzeta_IIE"]])) x[["SE_regzeta_IIE"]] else NA),
na.rm = TRUE
)
)
)
class(results_hom1) <- "ind.sens"
# 5. Return the final list of results
return(results_hom1)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.