Nothing
#' Causal Decomposition with Individualized Interventions
#'
#' ‘ind.decomp’ estimates how much initial disparities would persist under two scenarios:
#' (i) Everyone follows the optimal treatment regime (OTR), yielding the Individualized
#' Controlled Direct Effect (ICDE); (ii) The proportion of individuals following the OTR is
#' equalized across groups, yielding the Individualized Interventional Effect (IIE).
#' This function implements the method proposed by Park, Kang, and Lee (2025+).
#'
#' @usage
#' ind.decomp(outcome, group, group.ref, risk.factor, intermediates, moderators,
#' covariates, data, B, cluster = NULL)
#'
#' @details This function first estimates the initial disparity, defined
#' as the average difference in an outcome between groups within the same levels
#' of outcome-allowable covariates. Formally,
#' \deqn{\tau_{c} \equiv E[Y \mid R = 1, c] - E[Y \mid R = 0, c], \quad \text{for } c \in \mathcal{C}}
#' where \eqn{R = 1} is the comparison group and \eqn{R = 0} is the reference group.
#' The term \eqn{c \in \mathcal{C}} represents baseline covariates.
#'
#' For individualized conditional effects, disparity remaining is defined as
#' \deqn{
#' \zeta_{c}^{\mathrm{ICDE}}(d^{opt}) \equiv E[Y(d^{opt}) \mid R = 1, c] - E[Y(d^{opt}) \mid R = 0, c], \quad \text{for } c \in \mathcal{C}
#' }
#' where \eqn{d^{opt}} is an optimal value for risk factor \eqn{M}. This definition of
#' disparity remaining states the difference in an outcome between the comparison
#' and reference groups after setting their risk factor \eqn{M} to the optimal value
#' obtained from the OTRs.
#'
#' For individualized interventional effects, disparity reduction and disparity
#' remaining due to equalizing compliance rates across groups can be expressed as
#' follows:
#' \deqn{
#' \delta_{c}^{\mathrm{IIE}}(1) \equiv E[Y \mid R = 1, c] - E[Y(K) \mid R = 1, c]
#' }
#' \deqn{
#' \zeta_{c}^{\mathrm{IIE}}(0) \equiv E[Y(K) \mid R = 1, c] - E[Y \mid R = 0, c]
#' }
#' where \eqn{Y(K)} represents a potential outcome under the value of \eqn{M} that is
#' determined by a random draw from the compliance distribution of the reference
#' group among individuals with the same levels of target-factor-allowable
#' covariates. Disparity reduction (\eqn{\delta_{c}^{\mathrm{IIE}}(1)}) represents
#' the disparity in outcomes among a comparison group
#' after intervening to setting the compliance rate equal to that of a reference
#' group within the same target-factor-allowable covariate levels. Disparity
#' remaining (\eqn{\zeta_{c}^{\mathrm{IIE}}(0)}) quantifies the outcome difference
#' that persists between a comparison and reference group even after the intervention.
#'
#' @param outcome a character string indicating the name of the outcome.
#' @param group a character string indicating the name of the social group indicator
#' such as race or gender.
#' @param group.ref reference group of the social group indicator.
#' @param risk.factor a character string indicating the name of the risk factor.
#' @param intermediates vector containing the name of intermediate confounders.
#' @param moderators a character string indicating the name of variables that have
#' heterogeneous effects on the outcome based on the risk factor.
#' @param covariates a vector containing the name of baseline covariate variable(s).
#' @param data The data set for analysis (data.frame).
#' @param B Number of bootstrapped samples in the causal decomposition analysis.
#' @param cluster a vector of cluster indicators for the bootstrap. If provided,
#' the cluster bootstrap is used. Default is 'NULL'.
#'
#' @return a matrix containing the point estimates for:
#' \enumerate{
#' \item the initial disparity and the proportion recommended for treatment,
#' \item the disparity remaining and percent reduction based on individualized conditional
#' effects,
#' \item the disparity remaining, disparity reduction, and percent reduction based on
#' individualized intervention effects.
#' }
#' It also returns the nonparametric bootstrap confidence intervals for each estimate.
#'
#' @author
#' Soojin Park, University of California, Riverside, \email{soojinp@@ucr.edu};
#' Suyeon Kang, University of Central Florida, \email{suyeon.kang@@ucf.edu};
#' Karen Xu, University of California, Riverside, \email{karenxu@@ucr.edu}.
#'
#' @references
#' Park, S., Kang, S., & Lee, C. (2025). Simulation-Based Sensitivity Analysis in
#' Optimal Treatment Regimes and Causal Decomposition with Individualized Interventions.
#' arXiv preprint arXiv:2506.19010.
#'
#' @seealso \code{\link{ind.sens}}
#'
#' @export
#' @examples
#' data(idata)
#'
#' # NOTE: We recommend using at least B = 500 boostrap replications.
#' results_unadj <- ind.decomp(outcome = "Y", group = "R", group.ref = "0", risk.factor = "M",
#' intermediates = c("X1", "X2", "X3"), moderators = c("X1", "X2"),
#' covariates = "C", data = idata, B = 50)
#' results_unadj
ind.decomp <- function(outcome, group, group.ref, risk.factor, intermediates, moderators,
covariates, data, B, cluster = NULL){
# NOTE: include reference level for group
data[[group]] <- as.factor(data[[group]]) # NOTE: Ensure group is a factor
n_levels <- nlevels(data[[group]]) # NOTE: Check if the variable has exactly 2 levels
if (n_levels != 2) {
stop("The social group indicator R must be a factor with two levels")
}
# NOTE: Check if the specified ref_level exists
if (!(group.ref %in% levels(data[[group]]))) {
stop(sprintf("The specified reference level '%s' is not a level of '%s'", group.ref, group))
}
data[[risk.factor]] <- as.factor(data[[risk.factor]]) # NOTE: Ensure risk.factor is a factor
n_levels <- nlevels(data[[risk.factor]]) # NOTE: Check if the variable has exactly 2 levels
if (n_levels != 2) {
stop("The mediator has to be a binary factor")
}
if (!is.numeric(data[[outcome]]) || is.factor(data[[outcome]]) || length(unique(data[[outcome]])) <= 10) {
# NOTE: Assumes a variable is continuous if it’s numeric and has more than, say, 10 unique values.
warning(sprintf("Variable '%s' must be a continuous numeric variable", outcome))
}
est1 <- function(data){
data_R0 <- subset(data, data[[group]] == group.ref)
data_R1 <- subset(data, data[[group]] != group.ref)
# weighting
moPropen1 <- buildModelObj(model = as.formula(paste("~", paste(c(covariates, intermediates), collapse = " + "))),
solver.method = "glm",
solver.args = list("family" = "binomial", control = glm.control(maxit = 50)),
# control = , change it as function input, give this value as default
predict.method = 'predict.glm',
predict.args = list(type = "response"))
moClass1 <- buildModelObj(
model = as.formula(paste0(" ~ ", paste(c(moderators), collapse = " + "))),
solver.method = rpart,
solver.args = list(method = "class",
control = rpart.control(minsplit = 200, cp = 0.01)),
predict.args = list(type = "class")
)
fitFS1 <- optimalClass(moPropen = moPropen1,
moClass = moClass1,
data = data.frame(data_R1), response = data_R1[[outcome]],
txName = risk.factor, verbose = FALSE)
fitFS0 <- optimalClass(moPropen = moPropen1,
moClass = moClass1,
data = data.frame(data_R0), response = data_R0[[outcome]],
txName = risk.factor, verbose = FALSE)
data$opt.M <- ifelse(data[[group]] == group.ref, optTx(fitFS0)$optimalTx, optTx(fitFS1)$optimalTx)
ta <- table(data$opt.M)
p.opt <- ta[2]/(ta[2] + ta[1]) * 100
# rpart.plot(classif(object=fitFS0))
DATA1 <- data
DATA1 <- DATA1 %>%
mutate(Ind = (DATA1[[risk.factor]] == DATA1$opt.M))
fit.m <- glm(as.formula(paste0(risk.factor, " ~ ", paste(c(group,covariates,intermediates), collapse = " + "))),
family = binomial(logit), data = DATA1)
coef.m <- c(fit.m$coef)
pre.m <- 1 / (1 + exp(- (cbind(model.matrix(fit.m)) %*% coef.m)))
p.med <- ifelse(DATA1[[risk.factor]] == 0, 1 - pre.m, pre.m)
DATA1$w <- 1 / p.med
#zeta_ICDE
fit <- lm(paste0(outcome, " ~ ", paste(c(covariates,group), collapse = " + ")), data=DATA1, weights = Ind * w)
estimated_zeta_ICDE <- coef(fit)[length(covariates)+2]
#zeta_IIE
fit.lm1 <- lm(paste0(outcome, " ~ ", paste(c(covariates), collapse = " + ")), data=data_R1)
W_Y1 <- coef(fit.lm1)[1]
fit.lm0 <- lm(paste0(outcome, " ~ ", paste(c(covariates), collapse = " + ")), data=data_R0)
W_Y0 <- coef(fit.lm0)[1]
fit.lm <- lm(paste0(outcome, " ~ ", paste(c(covariates,group,"Ind", paste0(group, ":Ind")), collapse = " + ")), data = DATA1, weights = w)
fit.Ind <- glm(paste0("Ind", " ~ ", paste(c(group), collapse = " + ")), data = DATA1, family = binomial(logit))
alpha1 <- plogis(sum(coef(fit.Ind)[1:2])) - plogis(coef(fit.Ind)[1]) #race
coef_names <- names(coef(fit.lm))
ind_coef <- coef_names[grepl(paste0("^", "Ind"), coef_names)]
interaction_coef <- coef_names[grepl(paste0(":", "Ind"), coef_names)]
reg_delta_IIE <- alpha1 * sum(coef(fit.lm)[c(ind_coef, interaction_coef)])
reg_zeta_IIE <- (W_Y1 - W_Y0) - reg_delta_IIE
return(list(fitFS0 = fitFS0, fitFS1 = fitFS1, p.opt = p.opt,
estimated_zeta_ICDE = estimated_zeta_ICDE,
reg_zeta_IIE = reg_zeta_IIE,
reg_delta_IIE = reg_delta_IIE))
}
est.ori <- est1(data)
reg_delta_IIE <- est.ori$reg_delta_IIE
reg_zeta_IIE <- est.ori$reg_zeta_IIE
estimated_zeta_ICDE <- est.ori$estimated_zeta_ICDE
p.opt <- est.ori$p.opt
fitFS1 <- est.ori$fitFS1
fitFS0 <- est.ori$fitFS0
regzeta_IIE.boot <- regdelta_IIE.boot <- zeta_ICDE.boot <- rep(NA, B)
for(i in 1:B){
if (is.null(cluster)) {
data.boot <- data[sample(nrow(data), size = nrow(data), replace = TRUE), ]
} else {
clusters <- unique(data[, cluster])
units <- sample(clusters, size = length(clusters), replace = TRUE)
df.bs <- sapply(units, function(x) which(data[, cluster] == x))
sb <- unlist(df.bs)
data.boot <- data[sb, ]
}
est.boot <- est1(data.boot)
zeta_ICDE.boot[i] <- est.boot$estimated_zeta_ICDE
regzeta_IIE.boot[i] <- est.boot$reg_zeta_IIE
regdelta_IIE.boot[i] <- est.boot$reg_delta_IIE
}
SE_zeta_ICDE <- sd(zeta_ICDE.boot)
SE_regdelta_IIE <- sd(regdelta_IIE.boot)
SE_regzeta_IIE <- sd(regzeta_IIE.boot)
# Initial disparity
results_ini <- lm(as.formula(paste0(outcome, " ~ ", paste(c(group,covariates), collapse = " + "))), data = data)
estimated_tau <- coef(results_ini)[2] # race
SE_tau <- summary(results_ini)$coefficients[2, 2] # race
results <- list(# result table
p.opt = p.opt, fitFS1 = fitFS1, fitFS0 = fitFS0,
estimated_tau = estimated_tau, SE_tau = SE_tau, # initial disparity
estimated_zeta_ICDE = as.numeric(estimated_zeta_ICDE), SE_zeta_ICDE = SE_zeta_ICDE, # disparity remaining_ICDE
reg_delta_IIE = as.numeric(reg_delta_IIE), SE_regdelta_IIE = SE_regdelta_IIE, # disparity reduction_IIE
reg_zeta_IIE = as.numeric(reg_zeta_IIE), SE_regzeta_IIE = SE_regzeta_IIE # disparity remaining_IIE
)
class(results) <- "ind.decomp"
return(results)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.