#' Causal Mediation Analysis with the Multistate Approach
#'
#' \code{cmest_multistate} is used to implement \emph{the multistate approach} by Valeri et al. (2023)
#' for causal mediation analysis.
#'
#' @param data a data frame (or object coercible by \link{as.data.frame} to a data frame)
#' containing the variables in the model.
#' @param outcome variable name of the outcome.
#' @param yevent variable name of the event for the outcome.
#' @param mediator variable name of the mediator.
#' @param mevent variable name of the event for the mediator
#' @param exposure variable name of the exposure.
#' @param EMint a logical value. \code{TRUE} indicates there is
#' exposure-mediator interaction in \code{yreg}.
#' @param basec a vector of variable names of the confounders.
#' @param astar the control value of the exposure.
#' @param a the treatment value of the exposure.
#' @param basecval (required when \code{estimation} is \code{paramfunc} and \code{EMint} is \code{TRUE})
#' a list of values at which each confounder is conditioned on, following the order in \code{basec}.
#' If \code{NULL}, the mean of each confounder is used.
#' @param ymreg type of multistate survival model to be used. Currently supporting coxph only.
#' @param multistate_seed The seed to be used when generating bootstrap datasets for multistate modeling.
#' @param s The time point(s) beyond which survival probability is interested in multistate modeling.
#' @param bh_method Method for estimating baseline hazards in multistate modeling. Currently supporting "breslow" only.
#' @param mevent Event indicator for the mediator in multistate modeling.
#' @param nboot (used when \code{inference} is \code{bootstrap}) the number of bootstraps applied.
#' Default is 200.
#'
#' @details
#' \strong{Assumptions of the multistate method}
#' \itemize{
#' \item{\emph{Consistency of potential outcomes:} }{For each i and each t, the survival in a world where we intervene, i.e., setting the time to treatment to a specific value t (via a fixed or stochastic intervention) is the same as the survival in the real world where we observe a time to treatment equal to t.}
#' \item{\emph{There is no unmeasured mediator-outcome confounding:} }{Given \code{exposure} and
#' \code{basec}, \code{mediator} is independent of \code{outcome}.}
#' \item{\emph{Non-informative censoring of event times:} }{The observed censoring time is conditionally independent of all potential event times.}
#' \item{\emph{Positivity:} }{Each exposure-covariate combination has a non-zero probability of occurring.}
#' }
#'
#' @references
#' Valeri L, Proust-Lima C, Fan W, Chen JT, Jacqmin-Gadda H. A multistate approach for the study of interventions on an intermediate time-to-event in health disparities research. Statistical Methods in Medical Research. 2023;32(8):1445-1460.
#'
#' @examples
#' \dontrun{
#' library(CMAverse)
#' multistate_out = cmest_multistate(data = sc_data,
#' s = s_vec,
#' multistate_seed = 1,
#' exposure = 'A', mediator = 'M', outcome = 'S',
#' yevent = "ind_S", mevent = "ind_M",
#' basec = c("C1", "C2"),
#' basecval = c("C1" = "1", "C2" = as.character(mean(sc_data$C2))),
#' astar="0", a="1",
#' nboot=1, EMint=F,
#' bh_method = "breslow")
#' multistate_out
#' }
#'
#' @return
#' The output is a list that consists of 4 elements:
#' \itemize{
#' \item{the model summary of the joint multistate Cox proportional hazards model fitted on the
#' original dataset}
#' \item{the point estimates of RD and SD for each of the
#' user-specified time points of interest on the original dataset}
#' \item{the summary of the bootstrapped RD, SD, and TE estimates for each of the
#' user-specified time point of interest, including the 2.5, 50, and
#' 97.5th percentiles}
#' \item{the estimated RD, SD, TD for each of the
#' user-specified time point of interest for each bootstrap dataset}}
#'
#' @importFrom mstate transMat msprep expand.covs msfit
#' @importFrom stats as.formula getCall median
#' @importFrom survival coxph strata
#' @importFrom dplyr arrange filter pull group_by summarize %>% row_number
#' @importFrom utils txtProgressBar
#' @importFrom parallel detectCores makeCluster stopCluster
#' @importFrom doSNOW registerDoSNOW
#' @importFrom foreach foreach %dopar%
#'
#' @export
cmest_multistate <- function(data = NULL, outcome = NULL, yevent = NULL,
mediator = NULL, mevent = NULL,
exposure = NULL, EMint = NULL,
basec = NULL, basecval = NULL,
ymreg = "coxph",
astar = NULL, a = NULL,
nboot = 200,
bh_method = "breslow",
s = NULL,
multistate_seed = 123,
n_workers = NULL) {
# function call
cl <- match.call()
# output list
out <- list(call = cl)
###################################################################################################
#################################Argument Restrictions And Warnings################################
###################################################################################################
# data
if (is.null(data)) stop("Unspecified data")
data <- as.data.frame(data)
if (!all(c(outcome, yevent, mediator, mevent, exposure, basec) %in% colnames(data))) stop("Variables specified in outcome, yevent, mediator, mevent, exposure and basec not found in data")
#if (sum(is.na(data[, c(outcome, yevent, mediator, mevent, exposure, basec)])) > 0 && !multimp) stop("NAs in outcome, yevent, mediator, mevent, exposure or basec data; delete rows with NAs in these variables from the data or set multimp = TRUE")
if (sum(is.na(data[, c(outcome, yevent, mediator, mevent, exposure, basec)])) > 0) stop("NAs in outcome, yevent, mediator, mevent, exposure or basec data; delete rows with NAs in these variables from the data.")
out$data <- data
n <- nrow(data)
# nboot
if (!is.numeric(nboot)) stop("nboot should be numeric")
if (nboot <= 0) stop("nboot must be greater than 0")
#if (!boot.ci.type %in% c("per", "bca")) stop("Select boot.ci.type from 'per', 'bca'")
out$methods$nboot <- nboot
#out$methods$boot.ci.type <- boot.ci.type
# outcome
if (length(outcome) == 0) stop("Unspecified outcome")
if (length(outcome) > 1) stop("length(outcome) > 1")
if (length(yevent) == 0) stop("Unspecified outcome event")
out$variables$outcome <- outcome
out$variables$yevent <- yevent
# exposure
if (length(exposure) == 0) stop("Unspecified exposure")
if (length(exposure) > 1) stop("length(exposure) > 1")
out$variables$exposure <- exposure
# mediator
if (length(mediator) == 0) stop("Unspecified mediator")
if (length(mevent) == 0) stop("Unspecified mediator event")
out$variables$mediator <- mediator
out$variables$mevent <- mevent
# EMint
if (!is.logical(EMint)) stop("EMint must be TRUE or FALSE")
out$variables$EMint <- EMint
# s
if(is.null(s)) stop("Unspecified s")
if(!is.numeric(s)) stop("Specified s must be numeric")
# basec
if (!is.null(basec)) out$variables$basec <- basec
# regs
out$reg.input <- list(ymreg = ymreg)
# a, astar
if (is.null(a) | is.null(astar)) stop("Unspecified a or astar")
# basecval
if(length(basec) != 0){
if(!all(names(basecval) %in% basec) == T){
stop("basec and basecval variable names mismatch.")
}else if(length(basec) != length(names(basecval))){
stop("There must be the same number of variables in basec and basecval. Please double check and rerun.")
}
for (i in length(basec)){
curr_basec = basec[i] # current basec column name
if (!is.numeric(data[,curr_basec]) && !is.factor(data[,curr_basec])){
stop("Baseline covariates must be of type numeric or factor.")
}
}
}
# multimp
#out$multimp <- list(multimp = multimp)
#if (!is.logical(multimp)) stop("multimp must be TRUE or FALSE")
# args_mice
#if (multimp) {
# if (!is.null(args_mice) && !is.list(args_mice)) stop("args_mice must be a list")
# if (is.null(args_mice)) args_mice <- list()
# args_mice$print <- FALSE
# if (!is.null(args_mice$data)) warning("args_mice$data is overwritten by data")
# args_mice$data <- data
# out$multimp$args_mice <- args_mice
#}
# run multistate method
set.seed(multistate_seed)
data = data.frame(data)
s_grid = s
## set up transition matrix
trans = transMat(x=list(c(2, 3), c(3), c()), names=c(exposure, mediator, outcome))
## transition-dependent covariates
covs_df = c(exposure, mediator, basec)
# create necessary objects
## create a list of mstate data that corresponds to the bootstrap samples
boot_ind_df = make_boot_ind(data=data, nboot=nboot)
boot_ind_list = boot_ind_df$boot_ind
mstate_bootlist <- lapply(boot_ind_list, function(ind){
boot_dat = data[ind,]
mstate_boot_dat <- make_mstate_dat(dat=boot_dat, mediator, outcome, mevent, yevent, trans, covs_df)
return(mstate_boot_dat)
})
## create the formula object for joint mstate modeling
mstate_form = mstate_formula(data=data, exposure=exposure, mediator=mediator, basec=basec, EMint=EMint)
mstate_form <- as.formula(mstate_form)
## create fixed newdata dfs for msfit()
mstate_data_orig = make_mstate_dat(dat=data, mediator=mediator, outcome=outcome, mevent=mevent, yevent=yevent, trans=trans, covs_df=covs_df)
fixed_newd = fixed_newd(mstate_data=mstate_data_orig, trans=trans, a=a, astar=astar, exposure=exposure, mediator=mediator, basec=basec, basecval=basecval)
# fit the joint multistate model on the original data and save the summary table
mstate_fit_orig = coxph(mstate_form, data = mstate_data_orig, method = bh_method)
mstate_fit_orig_summ = summary(mstate_fit_orig)
# compute the point estimate of RD and SD and TD on original data
pt_est = s_point_est(i=0, mstate_bootlist, mstate_data_orig,
s_grid, newd_A0M0=fixed_newd[[1]], newd_A1M0=fixed_newd[[2]], mstate_form,
a, astar, exposure, mediator,
trans, bh_method)
# PARALLEL computing
## nboot grid
cat("\n")
cat("Started bootstrapping...")
i_grid = seq(1, nboot, 1)
## run in parallel
if (!is.null(n_workers)){
cl <- parallel::makeCluster(n_workers) # user-specified number of cores to use
doSNOW::registerDoSNOW(cl)
}else{
no_cores <- parallel::detectCores()
cl <- parallel::makeCluster(no_cores-1)
#registerDoParallel(cl)
doSNOW::registerDoSNOW(cl)
}
pb <- txtProgressBar(max = nboot, style = 3)
progress <- function(n) setTxtProgressBar(pb, n)
opts <- list(progress = progress)
system.time({
i_grid = seq(1, nboot, 1)
results <- foreach::foreach(index = i_grid,
.options.snow = opts,
.verbose=F,
.combine = rbind,
#.export = c("mstate_formula", "make_mstate_dat", "s_point_est", "dynamic_newd", "fixed_newd", "make_boot_ind"),
.export = c("mstate_formula", "make_mstate_dat", "s_point_est", "dynamic_newd", "make_boot_ind"),
.packages = c("mstate", "tidyverse", "parallel")) %dopar% {
s_point_est(i=index, mstate_bootlist, mstate_data_orig,
s_grid, newd_A0M0=fixed_newd[[1]], newd_A1M0=fixed_newd[[2]], mstate_form,
a, astar, exposure, mediator,
trans, bh_method)
}
})
close(pb)
stopCluster(cl)
# clean up the data frame
results_summ = results %>%
group_by(s) %>%
summarize(RD_median = median(RD),
RD_lower = quantile(RD, 0.025),
RD_higher = quantile(RD, 0.975),
SD_median = median(SD),
SD_lower = quantile(SD, 0.025),
SD_higher = quantile(SD, 0.975),
TE_median = median(TE),
TE_lower = quantile(TE, 0.025),
TE_higher = quantile(TE, 0.975))
return(list(model_summary = mstate_fit_orig_summ,
pt_est = pt_est,
bootstrap_summary = results_summ,
raw_output = results))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.