R/ind.sens.R
In causal.decomp: Causal Decomposition Analysis

Documented in ind.sens

#' Sensitivity Analysis for Causal Decomposition with Individualized Interventions
#'
#' ‘ind.sens’ is used to assess the sensitivity of estimates from the
#' individualized causal decomposition to potential omitted confoudners between
#' the risk factor M and the outcome Y, using a benchmarking approach based on
#' the relative strength of an unmeasured confounder compared to a specified
#' covariate. The function employs a stochastic EM algorithm to simulate the
#' distribution of the unobserved confounder and generate bias-adjusted estimates.
#'
#' @usage
#' ind.sens(k.y, k.m, Iternum, outcome, group, group.ref, intermediates, moderators,
#'   benchmark, risk.factor, covariates, data, B, cluster = NULL, nsim, mc.cores)
#'
#' @details This function returns the adjusted point estimates and confidence intervals
#'   after accounting for a simulated unmeasured confounder \eqn{U}. We employ an approach
#'   that compares the coefficient of the unmeasured confounder \eqn{U} with that of an
#'   observed covariate \eqn{X_j}, after controlling for the remaining observed covariates
#'   (\eqn{R}, \eqn{X_{-j}}, and \eqn{C}).
#'
#'   Specifically, \eqn{k_y} indicates
#'   the extent to which the outcome \eqn{Y} is associated with a one unit increase in
#'   \eqn{U} relative to how much it is associated with a one unit change in \eqn{X_j},
#'   after controlling for \eqn{R}, \eqn{X_{-j}}, and \eqn{C}. Likewise, \eqn{k_m} indicates the extent
#'   to which the odds of \eqn{M = 1} are explained by unmeasured confounder \eqn{U} relative to
#'   how much they are explained by \eqn{X_j}, after controlling for \eqn{R}, \eqn{X_{-j}}, and \eqn{C}.
#'   These parameters (\eqn{k_m} and \eqn{k_y}) should be specified by researchers based on the
#'   assumed strength of \eqn{U} relative to that of the given observed covariate \eqn{X_j}.

#'
#' @param k.y scales the association of the unmeasured confounder with the outcome
#'   (numeric). A value of 1 means a one-unit change in the unmeasured confounder
#'   shifts the mean of the outcome by the same amount as a one-unit change in the
#'   benchmark covariate—conditional on the social-group indicator, intermediate
#'   confounder(s), and baseline covariate(s). Default is 1.
#' @param k.m scales the association of the unmeasured confounder with the log-odds of
#'   risk.factor = 1 (numeric). A value of 1 means a one-unit change in the unmeasured
#'   confounder changes the log-odds of the risk factor by the same amount as the
#'   benchmark covariate, conditional on the social-group indicator,
#'   intermediate confounder(s), and baseline covariate(s). Default is 1.
#' @param Iternum Number of iterations in the stochastic EM algorithm for generating
#'   the unmeasured confounder. default is 10.
#' @param outcome a character string indicating the name of the outcome.
#' @param group a character string indicating the name of the social group indicator
#'   such as race or gender.
#' @param group.ref reference group of the social group indicator.
#' @param intermediates vector containing the name of intermediate confounders.
#' @param moderators a character string indicating the name of variables that have
#'   heterogeneous effects on the outcome based on the risk factor.
#' @param benchmark a vector containing the name of the benchmark covariate used to
#'   scale k.y and k.m.
#' @param risk.factor a character string indicating the name of the risk factor.
#' @param covariates a vector containing the name of baseline covariate variable(s).
#' @param data The data set for analysis (data.frame).
#' @param B Number of bootstrapped samples in the causal decomposition analysis.
#' @param cluster a vector of cluster indicators for the bootstrap. If provided,
#'   the cluster bootstrap is used. Default is 'NULL'.
#' @param nsim Number of random draws of the unmeasured confounder per observation.
#'   default is 15. Increase the number for more smooth curves.
#' @param mc.cores Number of cores to use. Must be exactly 1 on Windows. Default is 1.
#'
#' @return A matrix containing the adjusted point estimates for:
#' \enumerate{
#'   \item the initial disparity, the proportion recommended for treatment,
#'   \item the disparity remaining and percent reduction based on individualized conditional effects,
#'   \item the disparity remaining, disparity reduction, and percent reduction based on individualized
#'   intervention effects.
#' }
#'   It also returns the adjusted nonparametric bootstrap confidence intervals for each estimate.
#'
#' @author
#'   Soojin Park, University of California, Riverside, \email{soojinp@@ucr.edu};
#'   Suyeon Kang, University of Central Florida, \email{suyeon.kang@@ucf.edu};
#'   Karen Xu, University of California, Riverside, \email{karenxu@@ucr.edu}.
#'
#' @references
#'   Park, S., Kang, S., & Lee, C. (2025). Simulation-Based Sensitivity Analysis in
#'   Optimal Treatment Regimes and Causal Decomposition with Individualized Interventions.
#'   arXiv preprint arXiv:2506.19010.
#'
#' @seealso \code{\link{ind.decomp}}
#'
#' @importFrom parallel detectCores mclapply
#' @importFrom utils txtProgressBar setTxtProgressBar
#' @export
#' @examples
#' data(idata)
#'
#' \donttest{results_adj <- ind.sens(k.y = 1, k.m = 1, Iternum = 5, outcome = "Y", group = "R", group.ref = "0",
#'                        intermediates = c("X1", "X2", "X3"), moderators = c("X1", "X2"),
#'                        benchmark = "X1", risk.factor = "M", covariates = "C", data = idata,
#'                        B = 50, cluster = NULL, nsim = 10, mc.cores = 1)
#' results_adj}
ind.sens <- function(k.y = 1, k.m = 1, Iternum = 10, outcome, group, group.ref, intermediates, moderators,
                     benchmark, risk.factor, covariates, data, B, cluster = NULL, nsim = 15, mc.cores = 1L){
  
  if(.Platform$OS.typ == "windows" && as.integer(mc.cores) > 1L){
    warning("'mc.cores' > 1 is not supported on Windows. mc.cores = 1 forced")
    mc.cores <- 1L
  } else if (as.integer(mc.cores) > detectCores()){
    warning(paste("The maximum number of cores is ", detectCores(),
                  ". mc.cores = ", detectCores(), " forced", sep = ""))
    mc.cores <- detectCores()
  } else if (as.integer(mc.cores) < 1L){
    stop("'mc.cores' must be >= 1")
  }
  
  data[[group]] <- as.factor(data[[group]]) # NOTE: Ensure group is a factor
  n_levels <- nlevels(data[[group]])       # NOTE: Check if the variable has exactly 2 levels
  if (n_levels != 2) {
    stop("The social group indicator R must be a factor with two levels")

  }
  # NOTE: Check if the specified ref_level exists
  if (!(group.ref %in% levels(data[[group]]))) {
    stop(sprintf("The specified reference level '%s' is not a level of '%s'", group.ref, group))
  }

  data[[risk.factor]] <- as.factor(data[[risk.factor]]) # NOTE: Ensure risk.factor is a factor
  n_levels <- nlevels(data[[risk.factor]])       # NOTE: Check if the variable has exactly 2 levels
  if (n_levels != 2) {
    stop("The mediator has to be a binary factor")
  }

  if (!is.numeric(data[[outcome]]) || is.factor(data[[outcome]]) || length(unique(data[[outcome]])) <= 10) { # NOTE: Assumes a variable is continuous if it’s numeric and has more than, say, 10 unique values.
    warning(sprintf("Variable '%s' must be a continuous numeric variable", outcome))
  }
  
  mods_formula <- as.formula(paste0(" ~ ", paste(moderators, collapse = " + ")))

    # 1. Generate U
  genU.res <- genU(
    k.y = k.y, k.m = k.m, Iternum = Iternum,
    benchmark = benchmark,
    data = data, covariates = covariates, group = group, intermediates = intermediates,
    risk.factor = risk.factor, outcome = outcome, nsim = nsim, num_cores = mc.cores,
    mods_formula = mods_formula
  )

  # 2. Define a helper function to run the adjusted causal decomposition on each bootstrap sample
  simOne <- function(ss) {
    #library(rpart)
    DATA.newU <- data
    # Insert the generated U for this iteration
    DATA.newU$U <- genU.res$U.final.nsim[ss, ]
    b.m <- genU.res$b.m
    b.y <- genU.res$b.y

    res_reg <- reg1_no_int(
      b.y = b.y, b.m = b.m, B = B,
      data = DATA.newU, covariates = covariates, group = group, group.ref =group.ref, intermediates = intermediates,
      risk.factor = risk.factor, outcome = outcome, mods_formula = mods_formula, cluster = cluster
    )

    return(res_reg)
  }

  # 3. Run simOne() 'nsim' times in parallel
  #    This captures the uncertainty of generating U
  if (.Platform$OS.type == "windows" || mc.cores == 1L) {
    # sequential with base progress bar
    pb <- utils::txtProgressBar(min = 0, max = nsim, style = 3)
    results_ind <- vector("list", nsim)
    for (ss in seq_len(nsim)) {
      utils::setTxtProgressBar(pb, ss)
      res <- try(simOne(ss), silent = TRUE)
      results_ind[[ss]] <- if (inherits(res, "try-error")) NULL else res
    }
    close(pb)
  } else {
    # parallel on Unix/mac
    if (requireNamespace("pbmcapply", quietly = TRUE)) {
      results_ind <- pbmcapply::pbmclapply(
        X = seq_len(nsim),
        FUN = function(ss) {
          res <- try(simOne(ss), silent = TRUE)
          if (inherits(res, "try-error")) NULL else res
        },
        mc.cores = mc.cores
      )
    } else {
      message("Tip: install.packages('pbmcapply') to see a progress bar during parallel runs.")
      results_ind <- parallel::mclapply(
        X = seq_len(nsim),
        FUN = function(ss) {
          message("Running nsim replication ", ss, " of ", nsim)
          res <- try(simOne(ss), silent = TRUE)
          if (inherits(res, "try-error")) NULL else res
        },
        mc.cores = mc.cores
      )
    }
  }

  # 4. Aggregate results into results_hom1
  #    We calculate mean estimates and standard errors
  #    using the standard formula with the extra (1 + 1/nsim) factor for variance

  #Initial disparity
  results_ini <- lm(as.formula(paste0(outcome, " ~ ", paste(c(group,covariates), collapse = " + "))), data=data)
  estimated_tau <- coef(results_ini)[2]
  SE_tau <- summary(results_ini)$coefficients[2, 2]  # NOTE: include initial disparity in results return

  results_hom1 <- list(
    p.opt = mean(
      sapply(results_ind, function(x) if (!is.null(x[["p.opt"]])) x[["p.opt"]] else NA),
      na.rm = TRUE
    ),

    estimated_tau = estimated_tau, SE_tau = SE_tau, # NOTE: include initial disparity in results return

    estimated_zeta_ICDE = mean(
      sapply(results_ind, function(x) if (!is.null(x[["estimated_zeta_ICDE"]])) x[["estimated_zeta_ICDE"]] else NA),
      na.rm = TRUE
    ),
    estimated_delta_IIE = mean(
      sapply(results_ind, function(x) if (!is.null(x[["reg_delta_IIE"]])) x[["reg_delta_IIE"]] else NA),
      na.rm = TRUE
    ),
    estimated_zeta_IIE = mean(
      sapply(results_ind, function(x) if (!is.null(x[["reg_zeta_IIE"]])) x[["reg_zeta_IIE"]] else NA),
      na.rm = TRUE
    ),

    SE_zeta_ICDE = sqrt(
      mean(
        sapply(results_ind, function(x) if (!is.null(x[["SE_zeta_ICDE"]])) x[["SE_zeta_ICDE"]]^2 else NA),
        na.rm = TRUE
      ) +
        (1 + 1 / nsim) * var(
          sapply(results_ind, function(x) if (!is.null(x[["SE_zeta_ICDE"]])) x[["SE_zeta_ICDE"]] else NA),
          na.rm = TRUE
        )
    ),
    SE_delta_IIE = sqrt(
      mean(
        sapply(results_ind, function(x) if (!is.null(x[["SE_regdelta_IIE"]])) x[["SE_regdelta_IIE"]]^2 else NA),
        na.rm = TRUE
      ) +
        (1 + 1 / nsim) * var(
          sapply(results_ind, function(x) if (!is.null(x[["SE_regdelta_IIE"]])) x[["SE_regdelta_IIE"]] else NA),
          na.rm = TRUE
        )
    ),
    SE_zeta_IIE = sqrt(
      mean(
        sapply(results_ind, function(x) if (!is.null(x[["SE_regzeta_IIE"]])) x[["SE_regzeta_IIE"]]^2 else NA),
        na.rm = TRUE
      ) +
        (1 + 1 / nsim) * var(
          sapply(results_ind, function(x) if (!is.null(x[["SE_regzeta_IIE"]])) x[["SE_regzeta_IIE"]] else NA),
          na.rm = TRUE
        )
    )
  )

  class(results_hom1) <- "ind.sens"

  # 5. Return the final list of results
  return(results_hom1)
}