R/dcee_helper.R
In MRTAnalysis: Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials

#' Stage-1 (nuisance) + Stage-2 (core) driver for DCEE
#'
#' Orchestrates the two-stage distal causal excursion effect (DCEE) estimation:
#' (i) fits nuisance outcome regressions \eqn{\mu_a(H_t)} with a chosen learner
#' (parametric or ML), optionally with cross-fitting; (ii) calls the core
#' estimating-equations routine to obtain \eqn{\beta}, its sandwich variance,
#' and confidence intervals.
#'
#' @param dta A data.frame in long format; one row per decision point.
#' @param id_var Character scalar; column name for subject identifier.
#' @param moderator_formula RHS-only formula for excursion-effect moderators
#'   (e.g., `~ 1`, `~ Z`, `~ Z1 + Z2`). If `~ 1`, the marginal effect is estimated.
#' @param trt_var Character scalar; column name for treatment \code{A_t} (must be 0/1).
#' @param outcome_var Character scalar; column name for (distal) continuous outcome \code{Y}.
#' @param prob_A_var Character scalar; column name for randomization probability
#'   \eqn{P(A_t=1 \mid H_t)}; values must lie strictly in \code{(0, 1)}.
#' @param avail_var Character scalar; column name for availability indicator (0/1).
#' @param control_reg_method Character scalar; nuisance learner to use for Stage 1.
#'   One of \code{"gam"}, \code{"lm"}, \code{"rf"}, \code{"ranger"},
#'   \code{"sl"}, \code{"sl.user-specified-library"}, \code{"set_to_zero"}.
#'   When \code{"gam"}, \code{control_formula} may include smooth terms like \code{s(x)}.
#'   When \code{"sl.user-specified-library"}, supply \code{sl.library} via \code{...}.
#' @param control_formula RHS-only formula specifying covariates used to learn
#'   \eqn{\mu_a(H_t)}. For SuperLearner methods, variables are extracted from this
#'   formula to form the design matrix \code{X}. Use \code{~ 1} only with
#'   \code{control_reg_method = "set_to_zero"}.
#' @param cross_fit Logical; if \code{TRUE}, perform K-fold cross-fitting by subject id.
#' @param cf_fold Integer; number of folds for cross-fitting (default often 10).
#' @param weighting_function_var Optional character scalar; column name giving
#'   decision-point weights \eqn{\omega_t}. If \code{NULL}, defaults to per-timepoint
#'   weight \code{1/T} (with constant \code{T} per subject).
#' @param ... Additional arguments forwarded to the selected learner
#'   (e.g., \code{num.trees}, \code{mtry} for forests; \code{sl.library} for
#'   SuperLearner; \code{family}, \code{knots} for \pkg{mgcv}).
#'
#' @details
#' Stage 1 fits two outcome regressions on the subsets \code{A=0} and \code{A=1}
#' and attaches predicted values \code{mu_hat_0}, \code{mu_hat_1} to \code{dta}.
#' Stage 2 uses these nuisance fits to solve estimating equations for
#' \eqn{\beta} (distal causal excursion effects) and computes a sandwich variance
#' with small-sample \eqn{t}-based confidence intervals.
#'
#' @return A list with at least the following components (used downstream by
#'   \code{summary.dcee}):
#' \itemize{
#'   \item \code{beta_hat} — named numeric vector of \eqn{\beta} estimates
#'   \item \code{beta_varcov} — variance–covariance matrix of \eqn{\beta}
#'   \item \code{beta_se} — standard errors
#'   \item \code{conf_int}, \code{conf_int_tquantile} — Wald and \eqn{t}-based CIs
#'   \item \code{regfit_a0}, \code{regfit_a1} — fitted Stage 1 models (may be \code{NULL} when cross-fitting)
#' }
#'
#' @seealso \code{\link{dcee}} for the user-facing wrapper,
#'   \code{\link{dcee_helper_stage1_fit_nuisance}} and \code{\link{dcee_helper_stage2_estimate_dcee}} for internals.
#'
#' @noRd

dcee_helper_2stage_estimation <- function(
    dta,
    id_var,
    moderator_formula,
    trt_var,
    outcome_var,
    prob_A_var,
    avail_var,
    control_reg_method,
    control_formula,
    cross_fit,
    cf_fold,
    weighting_function_var,
    ...) {
    dta <- as.data.frame(dta)

    # Stage 1: fit nuisance outcome regressions and attach mu_hat_0 / mu_hat_1

    outcome_reg_return <- dcee_helper_stage1_fit_nuisance(
        dta = dta,
        id_var = id_var,
        trt_var = trt_var,
        outcome_var = outcome_var,
        control_reg_method = control_reg_method,
        control_formula = control_formula,
        cross_fit = cross_fit,
        cf_fold = cf_fold,
        ...
    )

    dta <- outcome_reg_return$dta

    # Extract moderator variable names from RHS-only formula (~ 1 -> NULL)
    moderator_var <- all.vars(moderator_formula)
    if (length(moderator_var) == 0L) moderator_var <- NULL

    # Stage 2: estimating equations for beta
    fit <- dcee_helper_stage2_estimate_dcee(
        dta = dta,
        id_var = id_var,
        moderator_var = moderator_var,
        trt_var = trt_var,
        outcome_var = outcome_var,
        mu_hat_1_var = "mu_hat_1",
        mu_hat_0_var = "mu_hat_0",
        prob_A_var = prob_A_var,
        avail_var = avail_var,
        weighting_function_var = weighting_function_var
    )

    # Return with references to first-stage fits (if any)
    c(fit, list(
        regfit_a0 = outcome_reg_return$regfit_a0,
        regfit_a1 = outcome_reg_return$regfit_a1
    ))
}




#' Stage 1: fit nuisance outcome regressions for DCEE
#'
#' Fits the nuisance outcome regressions \eqn{\mu_a(H_t) = E[Y_t \mid H_t, A_t=a]}
#' for \eqn{a \in \{0,1\}} using a chosen learner (parametric or ML), optionally
#' with cross-fitting by subject id. The fitted functions are then used to
#' generate per–decision point predictions \code{mu_hat_0} and \code{mu_hat_1}
#' that are appended to \code{dta} for use in Stage 2.
#'
#' @param dta A data.frame in long format; one row per decision point.
#' @param id_var Character scalar; column name for subject identifier.
#' @param trt_var Character scalar; column name for treatment \code{A_t} (must be 0/1).
#' @param outcome_var Character scalar; column name for the (distal) continuous outcome \code{Y}.
#' @param control_reg_method Character scalar; learner used to model \eqn{\mu_a(H_t)}.
#'   One of \code{"gam"}, \code{"lm"}, \code{"rf"}, \code{"ranger"},
#'   \code{"sl"}, \code{"sl.user-specified-library"}, \code{"set_to_zero"}.
#'   When \code{"gam"}, \code{control_formula} may include smooth terms such as \code{s(x)}.
#'   When \code{"sl.user-specified-library"}, provide \code{sl.library} via \code{...}.
#'   When \code{"set_to_zero"}, both \code{mu_hat_0} and \code{mu_hat_1} are set to 0 and no models are fitted.
#' @param control_formula RHS-only formula specifying covariates used to learn
#'   \eqn{\mu_a(H_t)}. For SuperLearner methods, variables in this formula are
#'   used to build the design matrix \code{X}. Use \code{~ 1} only with
#'   \code{control_reg_method = "set_to_zero"}.
#' @param cross_fit Logical; if \code{TRUE}, perform K-fold cross-fitting by subject id:
#'   at each fold, fit the nuisance models on the training subjects and predict
#'   \code{mu_hat_0}, \code{mu_hat_1} on the held-out subjects.
#' @param cf_fold Integer; number of folds for cross-fitting (default 10).
#' @param ... Additional arguments forwarded to the selected learner
#'   (e.g., \code{num.trees}, \code{mtry} for forests; \code{sl.library} for
#'   SuperLearner; \code{family}, \code{knots} for \pkg{mgcv}).
#'
#' @details
#' The function fits separate models on the subsets \code{A=0} and \code{A=1}.
#' With cross-fitting enabled, subject ids are partitioned into \code{cf_fold}
#' folds; predictions are obtained on held-out subjects to reduce overfitting bias.
#' For SuperLearner-based methods, the package \pkg{SuperLearner} must be available.
#'
#' @return A list with components:
#' \itemize{
#'   \item \code{dta}: the input data with two added numeric columns
#'     \code{mu_hat_0} and \code{mu_hat_1} (predicted \eqn{\mu_0(H_t)} and \eqn{\mu_1(H_t)}).
#'   \item \code{regfit_a0}, \code{regfit_a1}: fitted learner objects for \code{A=0} and \code{A=1}
#'     (may be \code{NULL} when \code{cross_fit = TRUE} if per-fold fits are not retained,
#'     or when \code{control_reg_method = "set_to_zero"}).
#' }
#'
#' @seealso \code{\link{dcee_helper_stage2_estimate_dcee}} for Stage 2, and
#'   \code{\link{dcee_helper_2stage_estimation}} for the two-stage driver.
#'
#' @noRd

dcee_helper_stage1_fit_nuisance <- function(
    dta,
    id_var,
    trt_var,
    outcome_var,
    control_reg_method,
    control_formula,
    cross_fit,
    cf_fold,
    ... # forwarded to learners (e.g., num.trees, mtry, sl.library)
    ) {
    # Build outcome formula once; allow s(x) for GAM
    rhs <- as.character(control_formula)[2L]
    control_formula_with_outcome <- stats::as.formula(paste(outcome_var, "~", rhs))

    # Variables-only set for learners that use design matrices (e.g., SuperLearner)
    control_var <- all.vars(control_formula)

    # Ensure s() not used with non-GAM formula learners
    if (control_reg_method %in% c("lm", "rf", "ranger") && grepl("s\\s*\\(", rhs)) {
        stop("control_formula contains s(...), which is only supported when control_reg_method = 'gam'.")
    }

    # Handle SuperLearner library selection from ... when requested
    dots <- list(...)
    if (identical(control_reg_method, "sl")) {
        sl.library <- c("SL.mean", "SL.glm", "SL.earth")
    } else if (identical(control_reg_method, "sl.user-specified-library")) {
        sl.library <- dots$sl.library
        if (is.null(sl.library)) {
            stop("Provide `sl.library` via ... when control_reg_method = 'sl.user-specified-library'.")
        }
    } else {
        sl.library <- NULL
    }

    # set-to-zero shortcut
    if (identical(control_reg_method, "set_to_zero")) {
        dta$mu_hat_0 <- 0
        dta$mu_hat_1 <- 0
        regfit_a0 <- regfit_a1 <- NULL
        return(list(dta = dta, regfit_a0 = regfit_a0, regfit_a1 = regfit_a1))
    }

    # Split by treatment for training
    dta_a0 <- dta[dta[[trt_var]] == 0, , drop = FALSE]
    dta_a1 <- dta[dta[[trt_var]] == 1, , drop = FALSE]

    # Cross-fitting by unique id
    if (isTRUE(cross_fit)) {
        ids <- unique(dta[[id_var]])
        K <- min(cf_fold, length(ids))
        fold_id <- sample(rep_len(seq_len(K), length.out = length(ids)))

        dta_out <- dta[0, , drop = FALSE] # empty like dta

        for (k in seq_len(K)) {
            id_holdout <- ids[fold_id == k]
            id_train <- ids[fold_id != k]

            dta_holdout <- dta[dta[[id_var]] %in% id_holdout, , drop = FALSE]
            dta_train <- dta[dta[[id_var]] %in% id_train, , drop = FALSE]

            dta_train_a0 <- dta_train[dta_train[[trt_var]] == 0, , drop = FALSE]
            dta_train_a1 <- dta_train[dta_train[[trt_var]] == 1, , drop = FALSE]

            # Fit models on train splits
            if (control_reg_method == "gam") {
                regfit_a0 <- mgcv::gam(control_formula_with_outcome, data = dta_train_a0, ...)
                regfit_a1 <- mgcv::gam(control_formula_with_outcome, data = dta_train_a1, ...)
            } else if (control_reg_method == "lm") {
                regfit_a0 <- stats::lm(control_formula_with_outcome, data = dta_train_a0, ...)
                regfit_a1 <- stats::lm(control_formula_with_outcome, data = dta_train_a1, ...)
            } else if (control_reg_method == "rf") {
                regfit_a0 <- randomForest::randomForest(control_formula_with_outcome, data = dta_train_a0, ...)
                regfit_a1 <- randomForest::randomForest(control_formula_with_outcome, data = dta_train_a1, ...)
            } else if (control_reg_method == "ranger") {
                regfit_a0 <- ranger::ranger(formula = control_formula_with_outcome, data = dta_train_a0, ...)
                regfit_a1 <- ranger::ranger(formula = control_formula_with_outcome, data = dta_train_a1, ...)
            } else if (control_reg_method %in% c("sl", "sl.user-specified-library")) {
                if (!requireNamespace("SuperLearner", quietly = TRUE)) {
                    stop("SuperLearner package is required for control_reg_method = 'sl'/'sl.user-specified-library'.")
                }
                Y0 <- dta_train_a0[[outcome_var]]
                X0 <- dta_train_a0[, control_var, drop = FALSE]
                Y1 <- dta_train_a1[[outcome_var]]
                X1 <- dta_train_a1[, control_var, drop = FALSE]
                regfit_a0 <- SuperLearner::SuperLearner(
                    Y = Y0, X = X0, family = stats::gaussian(),
                    verbose = FALSE, SL.library = sl.library, ...
                )
                regfit_a1 <- SuperLearner::SuperLearner(
                    Y = Y1, X = X1, family = stats::gaussian(),
                    verbose = FALSE, SL.library = sl.library, ...
                )
            }

            # Predict for holdout ids
            if (control_reg_method %in% c("sl", "sl.user-specified-library")) {
                newdf <- dta_holdout[, control_var, drop = FALSE]
                pred0 <- stats::predict(regfit_a0, newdata = newdf)$pred
                pred1 <- stats::predict(regfit_a1, newdata = newdf)$pred
            } else if (control_reg_method == "ranger") {
                pred0 <- predict(regfit_a0, data = dta_holdout)$predictions
                pred1 <- predict(regfit_a1, data = dta_holdout)$predictions
            } else if (control_reg_method %in% c("gam", "lm", "rf")) {
                pred0 <- stats::predict(regfit_a0, newdata = dta_holdout, type = "response")
                pred1 <- stats::predict(regfit_a1, newdata = dta_holdout, type = "response")
            }

            dta_holdout$mu_hat_0 <- as.numeric(pred0)
            dta_holdout$mu_hat_1 <- as.numeric(pred1)
            dta_out <- rbind(dta_out, dta_holdout)
        }

        # reorder to original row order just in case
        dta_out <- dta_out[match(seq_len(nrow(dta)), rownames(dta_out)), , drop = FALSE]
        # regfit_a0 <- regfit_a1 <- NULL # comment out if want to return the last-fold fit
        dta <- dta_out
    } else {
        # Fit on full data (no cross-fitting)
        if (control_reg_method == "gam") {
            regfit_a0 <- mgcv::gam(control_formula_with_outcome, data = dta_a0, ...)
            regfit_a1 <- mgcv::gam(control_formula_with_outcome, data = dta_a1, ...)
        } else if (control_reg_method == "lm") {
            regfit_a0 <- stats::lm(control_formula_with_outcome, data = dta_a0, ...)
            regfit_a1 <- stats::lm(control_formula_with_outcome, data = dta_a1, ...)
        } else if (control_reg_method == "rf") {
            regfit_a0 <- randomForest::randomForest(control_formula_with_outcome, data = dta_a0, ...)
            regfit_a1 <- randomForest::randomForest(control_formula_with_outcome, data = dta_a1, ...)
        } else if (control_reg_method == "ranger") {
            regfit_a0 <- ranger::ranger(formula = control_formula_with_outcome, data = dta_a0, ...)
            regfit_a1 <- ranger::ranger(formula = control_formula_with_outcome, data = dta_a1, ...)
        } else if (control_reg_method %in% c("sl", "sl.user-specified-library")) {
            if (!requireNamespace("SuperLearner", quietly = TRUE)) {
                stop("SuperLearner package is required for control_reg_method = 'sl'/'sl.user-specified-library'.")
            }
            Y0 <- dta_a0[[outcome_var]]
            X0 <- dta_a0[, control_var, drop = FALSE]
            Y1 <- dta_a1[[outcome_var]]
            X1 <- dta_a1[, control_var, drop = FALSE]
            regfit_a0 <- SuperLearner::SuperLearner(
                Y = Y0, X = X0, family = stats::gaussian(),
                verbose = FALSE, SL.library = sl.library, ...
            )
            regfit_a1 <- SuperLearner::SuperLearner(
                Y = Y1, X = X1, family = stats::gaussian(),
                verbose = FALSE, SL.library = sl.library, ...
            )
        }

        # Predict on all rows
        if (control_reg_method %in% c("sl", "sl.user-specified-library")) {
            newdf <- dta[, control_var, drop = FALSE]
            pred0 <- stats::predict(regfit_a0, newdata = newdf)$pred
            pred1 <- stats::predict(regfit_a1, newdata = newdf)$pred
        } else if (control_reg_method == "ranger") {
            pred0 <- predict(regfit_a0, data = dta)$predictions
            pred1 <- predict(regfit_a1, data = dta)$predictions
        } else if (control_reg_method %in% c("gam", "lm", "rf")) {
            pred0 <- stats::predict(regfit_a0, newdata = dta, type = "response")
            pred1 <- stats::predict(regfit_a1, newdata = dta, type = "response")
        }

        dta$mu_hat_0 <- as.numeric(pred0)
        dta$mu_hat_1 <- as.numeric(pred1)
    }

    list(dta = dta, regfit_a0 = regfit_a0, regfit_a1 = regfit_a1)
}


#' Stage 2: estimate distal causal excursion effects (DCEE)
#'
#' Solves the estimating equations for the distal causal excursion effect
#' parameters \eqn{\beta} using the Stage-1 nuisance predictions
#' \eqn{\mu_0(H_t)} and \eqn{\mu_1(H_t)}. Inference is based on a
#' cluster-robust (by subject id) sandwich variance.
#'
#' @param dta A data.frame in long format; one row per decision point.
#'   Must contain at least the columns named by the arguments below.
#' @param id_var Character scalar; column name for subject identifier.
#' @param moderator_var Character vector of column names to be used as
#'   moderators in the excursion effect model. The Stage-2 design matrix is
#'   \code{cbind(Intercept = 1, dta[, moderator_var])}. Use \code{NULL} or
#'   \code{character(0)} for a marginal (intercept-only) effect.
#' @param trt_var Character scalar; column name for treatment \code{A_t}
#'   (must be coded \code{0/1}).
#' @param outcome_var Character scalar; column name for the (distal) outcome \code{Y}
#'   (constant within subject in typical DCEE setups).
#' @param mu_hat_1_var,mu_hat_0_var Character scalars; column names for the
#'   Stage-1 predictions \code{mu_hat_1 = \eqn{\mu_1(H_t)}}, \code{mu_hat_0 = \eqn{\mu_0(H_t)}}.
#' @param prob_A_var Character scalar; column name for the randomization probability
#'   \eqn{p_t = P(A_t=1 \mid H_t)}. Values must lie strictly in \code{(0,1)}.
#' @param avail_var Character scalar; column name for availability indicator (0/1).
#' @param weighting_function_var Optional character scalar; column name containing
#'   decision-point weights \eqn{\omega_t}. If \code{NULL}, a default per-timepoint
#'   weight of \code{1/T} is used, where \code{T} is the (assumed constant) number
#'   of decision points per subject.
#'
#' @details
#' Let \eqn{S_t = (1, M_t)^\top} denote the moderator vector (including the
#' intercept), and define \eqn{p_t(1)=p_t}, \eqn{p_t(0)=1-p_t}. The estimating
#' equation is built from the per-timepoint contribution
#' \deqn{
#'   U_t(\beta) \;=\; \omega_t \, S_t \Big[ \,(-1)^{1-A_t}\, \frac{\mathrm{avail}_t}{P(A_t \mid H_t)}\,
#'      \big\{ Y - p_t(0)\mu_1(H_t) - p_t(1)\mu_0(H_t) \big\}
#'      \;-\; S_t^\top \beta \Big],
#' }
#' and \eqn{\hat\beta} solves \eqn{\sum_t U_t(\beta) = 0}. The sandwich variance
#' is computed by clustering at the subject level (summing contributions within
#' id, then taking a cross-product), yielding a standard “bread–meat–bread”
#' estimator. Small-sample \eqn{t}-based intervals can be formed with
#' \code{df = #subjects - #betas} (usually computed by the caller).
#'
#' @return A list with components (consumed by \code{summary.dcee} and the wrapper):
#' \itemize{
#'   \item \code{beta_hat} — named numeric vector of \eqn{\beta} estimates (length \code{1 + length(moderator_var)})
#'   \item \code{beta_varcov} — variance–covariance matrix of \eqn{\beta}
#'   \item \code{beta_se} — standard errors (square roots of diagonal of \code{beta_varcov})
#'   \item \code{conf_int} — large-sample Wald CIs (normal critical value)
#'   \item \code{conf_int_tquantile} — \eqn{t}-based CIs using degrees of freedom supplied by the caller
#' }
#'
#' @seealso \code{\link{dcee_helper_stage1_fit_nuisance}} for Stage 1 nuisance fitting,
#'   and \code{\link{dcee_helper_2stage_estimation}} for the two-stage driver.
#'
#' @noRd


dcee_helper_stage2_estimate_dcee <- function(dta,
                                             id_var,
                                             moderator_var,
                                             trt_var,
                                             outcome_var,
                                             mu_hat_1_var,
                                             mu_hat_0_var,
                                             prob_A_var,
                                             avail_var,
                                             weighting_function_var) {
    total_person_decisionpoint <- nrow(dta)
    sample_size <- length(unique(dta[[id_var]]))
    total_T <- total_person_decisionpoint / sample_size # assuming everyone has the same number of dp

    if (is.null(weighting_function_var)) {
        omega <- rep(1 / total_T, total_person_decisionpoint)
    } else {
        omega <- dta[[weighting_function_var]]
    }

    pt1 <- dta[, prob_A_var] # this is p_t(1|H_t)
    A <- dta[, trt_var]
    Sdm <- as.matrix(cbind(rep(1, nrow(dta)), dta[, moderator_var]))
    Y <- dta[, outcome_var]
    mu_hat_1 <- dta[, mu_hat_1_var]
    mu_hat_0 <- dta[, mu_hat_0_var]
    avail <- dta[, avail_var]

    pt0 <- 1 - pt1 # this is p_t(0|H_t)
    pA <- ifelse(A, pt1, pt0) # this is p_t(A_t|H_t)

    dim_beta <- ncol(Sdm)

    person_first_index <- c(find_change_location(dta[, id_var]), total_person_decisionpoint + 1)
    sample_size <- length(person_first_index) - 1

    #### 1. Calculate beta_hat using its analytic form ####
    # from Goodnotes "2025.01.19 - Asymptotic Variance (updated)"

    ## compute the front inverse term

    sum_SStrans <- matrix(0, nrow = dim_beta, ncol = dim_beta)
    for (it in 1:total_person_decisionpoint) {
        S_it <- matrix(Sdm[it, ], ncol = 1)
        sum_SStrans <- sum_SStrans + omega[it] * S_it %*% t(S_it)
    }
    avg_SStrans <- sum_SStrans / sample_size

    ## compute the second term (involving a residual)
    residual <- omega * avail * (-1)^(1 - A) / pA * (Y - pt0 * mu_hat_1 - pt1 * mu_hat_0)
    sum_residualS <- matrix(0, nrow = dim_beta, ncol = 1)
    for (it in 1:total_person_decisionpoint) {
        S_it <- matrix(Sdm[it, ], ncol = 1)
        r_it <- as.numeric(residual[it])
        sum_residualS <- sum_residualS + r_it * S_it
    }
    avg_residualS <- sum_residualS / sample_size

    ## compute beta_hat

    beta_hat <- solve(avg_SStrans) %*% avg_residualS # a single column matrix

    #### 2. Calculate standard error of beta_hat ####

    ## The derivative (bread) term is avg_SStrans

    bread <- avg_SStrans

    ## Calculate the cross-product (meat) term

    meat <- matrix(0, nrow = dim_beta, ncol = dim_beta)
    for (i in 1:sample_size) {
        rowid <- person_first_index[i]:(person_first_index[i + 1] - 1)
        sum_t_ee_it <- numeric(dim_beta)
        for (it in rowid) {
            S_it <- matrix(Sdm[it, ], ncol = 1)
            r_it <- as.numeric(residual[it])
            omega_it <- omega[it]

            ee_it <- r_it * S_it - omega_it * S_it %*% t(S_it) %*% beta_hat
            sum_t_ee_it <- sum_t_ee_it + ee_it
        }
        meat <- meat + sum_t_ee_it %*% t(sum_t_ee_it)
    }
    meat <- meat / sample_size

    ## Calculate the variance of beta_hat using the sandwich estimator
    bread_inv <- solve(bread)
    beta_varcov <- bread_inv %*% meat %*% t(bread_inv) / sample_size
    beta_se <- sqrt(diag(beta_varcov))

    Snames <- c("Intercept", moderator_var)
    beta_hat <- as.vector(beta_hat)
    names(beta_hat) <- names(beta_se) <- Snames
    colnames(beta_varcov) <- rownames(beta_varcov) <- Snames

    #### 3. calculate confidence interval

    conf_int <- cbind(beta_hat - 1.96 * beta_se, beta_hat + 1.96 * beta_se)
    c <- qt(1 - 0.05 / 2, df = sample_size - dim_beta)
    conf_int_tquantile <- cbind(
        beta_hat - c * beta_se,
        beta_hat + c * beta_se
    )
    colnames(conf_int) <- colnames(conf_int_tquantile) <- c("2.5 %", "97.5 %")

    return(list(
        beta_hat = beta_hat,
        beta_se = beta_se,
        conf_int = conf_int,
        conf_int_tquantile = conf_int_tquantile,
        beta_varcov = beta_varcov
    ))
}
Any scripts or data that you put into this service are public.
MRTAnalysis documentation built on Sept. 9, 2025, 5:41 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
MRTAnalysis
Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials

R/dcee_helper.R
In MRTAnalysis: Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials

Defines functions dcee_helper_stage2_estimate_dcee dcee_helper_stage1_fit_nuisance dcee_helper_2stage_estimation

Try the MRTAnalysis package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MRTAnalysis Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials

R/dcee_helper.R In MRTAnalysis: Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials

Defines functions dcee_helper_stage2_estimate_dcee dcee_helper_stage1_fit_nuisance dcee_helper_2stage_estimation

Try the MRTAnalysis package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MRTAnalysis
Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials

R/dcee_helper.R
In MRTAnalysis: Assessing Proximal, Distal, and Mediated Causal Excursion Effects for Micro-Randomized Trials