Nothing
utils::globalVariables(c("..v_names"))
#' Compute the Shift Parameter Estimate and the Efficient Influence Function
#'
#' @details Estimate the value of the causal parameter alongside statistical
#' inference for the parameter estimate based on the efficient influence
#' function of the target parameter, which takes the following form:
#' %D(P)(o) = H(a,w)(y - \bar{Q}(a,w)) + \bar{Q}(d(a,w)) - \psi(P)
#'
#' @param Y A \code{numeric} vector of the observed outcomes.
#' @param Qn An object providing the value of the outcome evaluated after
#' imposing a shift in the treatment. This object is passed in after being
#' constructed by a call to the internal function \code{est_Q}.
#' @param Hn An object providing values of the auxiliary ("clever") covariate,
#' constructed from the treatment mechanism and required for targeted minimum
#' loss-based estimation. This object object should be passed in after being
#' constructed by a call to the internal function \code{est_Hn}.
#' @param estimator The type of estimator to be fit, either \code{"tmle"} for
#' targeted maximum likelihood estimation or \code{"onestep"} for a one-step
#' estimator.
#' @param C_samp Indicator for missingness due to exclusion from second-phase
#' sample. Used for compatibility with the IPCW-TML estimation routine.
#' @param ipc_weights A \code{numeric} vector that gives inverse probability of
#' censoring weights for each observation. These are generated by invoking the
#' routines for estimating the censoring mechanism.
#' @param fluc_mod_out An object giving values of the logistic tilting model
#' for targeted minimum loss estimation. This type of object should be the
#' output of the internal routines to perform this step of the TML estimation
#' procedure, as given by \code{\link{fit_fluctuation}}.
#'
#' @importFrom stats var
#'
#' @return A \code{list} containing the parameter estimate, estimated variance
#' based on the efficient influence function (EIF), the estimate of the EIF
#' incorporating inverse probability of censoring weights, and the estimate of
#' the EIF without the application of such weights.
eif <- function(Y,
Qn,
Hn,
estimator = c("tmle", "onestep"),
fluc_mod_out = NULL,
C_samp = rep(1, length(Y)),
ipc_weights = rep(1, length(Y))) {
# set TMLE as default estimator type
estimator <- match.arg(estimator)
# set Qn to use based on estimator type
if (estimator == "tmle") {
Qn_shift <- fluc_mod_out$Qn_shift_star
Qn_noshift <- fluc_mod_out$Qn_noshift_star
} else if (estimator == "onestep") {
Qn_shift <- Qn$upshift
Qn_noshift <- Qn$noshift
}
# normalize inverse probability of censoring weights
ipc_weights_norm <- ipc_weights / sum(ipc_weights)
# compute substitution estimator of the stochastic shift parameter
param_obs_est <- rep(0, length(C_samp))
param_obs_est[C_samp == 1] <- ipc_weights_norm * Qn_shift
psi <- sum(param_obs_est)
# compute the efficient influence function (EIF)
eif <- rep(0, length(C_samp))
eif[C_samp == 1] <-
ipc_weights * (Hn$noshift * (Y - Qn_noshift) + (Qn_shift - psi))
# this will be the outcome of the extra regression
eif_unweighted <- rep(0, length(C_samp))
eif_unweighted[C_samp == 1] <-
(Hn$noshift * (Y - Qn_noshift) + (Qn_shift - psi))
# add mean of EIF to parameter estimate if fitting one-step
# NOTE: the estimate of psi is updated _after_ evaluating the EIF
if (estimator == "onestep") {
psi <- psi + mean(eif)
}
# compute the variance based on the EIF and scale by number of observations
var_eif <- stats::var(eif) / length(Y)
# return the variance and the EIF value at each observation
out <- list(
psi = psi, var = var_eif, eif = eif,
eif_unweighted = eif_unweighted
)
return(out)
}
###############################################################################
#' Iterative IPCW Update Procedure of Augmented Efficient Influence Function
#'
#' @details An adaptation of the IPCW-TMLE for iteratively constructing an
#' efficient inverse probability of censoring weighted TML or one-step
#' estimator. The efficient influence function of the parameter and updating
#' the IPC weights in an iterative process, until a convergence criteria is
#' satisfied.
#'
#' @param data_internal A \code{data.table} containing of the observations
#' selected into the second-phase sample.
#' @param C_samp A \code{numeric} indicator for missingness due to exclusion
#' the from second-stage sample.
#' @param V A \code{data.table} giving the values across all observations of
#' all variables that play a role in the censoring mechanism.
#' @param ipc_mech A \code{numeric} vector of the censoring mechanism estimates
#' all of the observations, only for the two-phase sampling mechanism. Note
#' well that these values do NOT account for censoring from loss to follow-up.
#' @param ipc_weights A \code{numeric} vector of inverse probability of
#' censoring weights, including such weights for censoring due to loss to
#' follow-up. Without loss to follow-up, these are equivalent to \code{C_samp
#' / ipc_mech} in an initial run of this procedure.
#' @param Qn_estim A \code{data.table} corresponding to the outcome regression.
#' This is produced by invoking the internal function \code{est_Q}.
#' @param Hn_estim A \code{data.table} corresponding to values produced in the
#' computation of the auxiliary ("clever") covariate. This is produced easily
#' by invoking the internal function \code{est_Hn}.
#' @param estimator The type of estimator to be fit, either \code{"tmle"} for
#' targeted maximum likelihood estimation or \code{"onestep"} for a one-step
#' estimator.
#' @param fluctuation A \code{character} giving the type of regression to be
#' used in traversing the fluctuation submodel. The choices are "weighted" and
#' "standard".
#' @param flucmod_tol A \code{numeric} indicating the largest value to be
#' tolerated in the fluctuation model for the targeted minimum loss estimator.
#' @param eif_reg_type Whether a flexible nonparametric function ought to be
#' used in the dimension-reduced nuisance regression of the targeting step for
#' the censored data case. By default, the method used is a nonparametric
#' regression based on the Highly Adaptive Lasso (from \pkg{hal9001}). Set
#' this to \code{"glm"} to instead use a simple linear regression model. In
#' this step, the efficient influence function (EIF) is regressed against
#' covariates contributing to the censoring mechanism (i.e., EIF ~ V | C = 1).
#'
#' @importFrom stats var glm qlogis fitted predict as.formula coef
#' @importFrom data.table as.data.table set copy
#' @importFrom assertthat assert_that
#' @importFrom hal9001 fit_hal
#'
#' @return A \code{list} containing the estimated outcome mechanism, the fitted
#' fluctuation model for TML updates, the updated inverse probability of
#' censoring weights (IPCW), the updated estimate of the efficient influence
#' function, and the estimated IPCW component of the EIF.
ipcw_eif_update <- function(data_internal,
C_samp,
V,
ipc_mech,
ipc_weights,
Qn_estim,
Hn_estim,
estimator = c("tmle", "onestep"),
fluctuation = NULL,
flucmod_tol = 50,
eif_reg_type = c("hal", "glm")) {
# get names of columns in sampling mechanism
v_names <- names(V)
# perform submodel fluctuation if computing TMLE
if (estimator == "tmle" & !is.null(fluctuation)) {
# fit logistic regression for submodel fluctuation with updated weights
fitted_fluc_mod <- fit_fluctuation(
Y = data_internal$Y,
Qn_scaled = Qn_estim,
Hn = Hn_estim,
ipc_weights = ipc_weights,
method = fluctuation
)
} else if (estimator == "onestep" & is.null(fluctuation)) {
fitted_fluc_mod <- NULL
}
# compute EIF using updated weights and updated fluctuation (if TMLE)
# NOTE: for one-step, this adds the first half of the EIF as the correction
# _SO_ the second half (from the reduced regression) is still needed...
eif_eval <- eif(
Y = data_internal$Y,
Qn = Qn_estim,
Hn = Hn_estim,
estimator = estimator,
fluc_mod_out = fitted_fluc_mod,
C_samp = C_samp,
ipc_weights = ipc_weights
)
# NOTE: upon the first run of this procedure, the above two function calls
# have computed only the inefficient IPCW-TMLE, i.e., by fitting the
# initial fluctuation model and updating the EIF accordingly
# estimate the EIF nuisance regression using HAL
if (eif_reg_type == "hal") {
# fit HAL regression
eif_mod <- hal9001::fit_hal(
X = as.matrix(data_internal[, ..v_names]),
Y = as.numeric(eif_eval$eif_unweighted[C_samp == 1]),
max_degree = length(v_names),
family = "gaussian",
fit_control = list(
cv_select = TRUE,
type.measure = "mse"
),
return_lasso = FALSE,
yolo = FALSE
)
# conditional expectation E[D*|V]
eif_pred <- stats::predict(
eif_mod,
new_data = as.matrix(V)
)
} else if (eif_reg_type == "glm") {
eif_mod <- stats::glm(
stats::as.formula(paste("eif ~", paste(v_names, collapse = " + "))),
data = data_internal[, eif := eif_eval$eif_unweighted[C_samp == 1]]
)
# conditional expectation E[D*|V]
eif_pred <- unname(stats::predict(
eif_mod,
newdata = V
))
}
# IPCW-TMLE: fluctuation regression to update the IPC weights
if (estimator == "tmle") {
ipcw_fluc_reg_data <-
data.table::as.data.table(
list(
C_samp = C_samp,
logit_ipcw = stats::qlogis(bound_precision(ipc_mech)),
eif_reg_cov = (eif_pred / bound_precision(ipc_mech))
)
)
# fit fluctuation model for targeting updates
# NOTE: set `start` to zero to ensure updates are not too large
suppressWarnings(
ipcw_fluc <- stats::glm(
stats::as.formula("C_samp ~ -1 + offset(logit_ipcw) + eif_reg_cov"),
data = ipcw_fluc_reg_data,
family = "binomial",
start = 0
)
)
coefs_fluc <- stats::coef(ipcw_fluc)
# safety checks for ensuring sanity of the fluctuation model fit
if (!ipcw_fluc$converged || abs(max(coefs_fluc)) > flucmod_tol) {
# if convergence fails or tolerance is violated, fit without `start`
suppressWarnings(
ipcw_fluc <- stats::glm(
stats::as.formula("C_samp ~ -1 + offset(logit_ipcw) + eif_reg_cov"),
data = ipcw_fluc_reg_data,
family = "binomial"
)
)
# if the fluctuation model hasn't converged or is unstable, simply set
# the coefficients to disable updating, i.e., coef(eif_reg_cov) := 0
if (!ipcw_fluc$converged || abs(max(coefs_fluc)) > flucmod_tol) {
ipcw_fluc$coefficients <- 0
}
}
# fitted values following submodel fluctuation
ipc_pred <- unname(stats::fitted(ipcw_fluc))
} else {
# just use the initial estimates of censoring probability for one-step
ipc_pred <- ipc_mech
}
# this is the second half of the IPCW-EIF (solved by pi_n fluctuation):
# 0 = ((C_samp - pi_n) / pi_n) E[f(eif ~ V | C_samp = 1)]
ipcw_eif_component <- ((C_samp - ipc_pred) / ipc_pred) * eif_pred
# so, now we need weights to feed back into the previous steps
ipc_weights <- C_samp / ipc_pred
# as above, compute TMLE and EIF with NEW WEIGHTS and SUBMODEL FLUCTUATION
# NOTE: since this is meant to update the EIF components based on the TMLE
# update steps, it accomplishes _literally_ nothing for the one-step
if (estimator == "tmle") {
# NOTE: update fluctuation with new weights prior to re-computing EIF
fitted_fluc_mod <- fit_fluctuation(
Y = data_internal$Y,
Qn_scaled = Qn_estim,
Hn = Hn_estim,
ipc_weights = ipc_weights[C_samp == 1],
method = fluctuation
)
# now, update EIF after re-fitting fluctuation with updated weights
eif_eval <- eif(
Y = data_internal$Y,
Qn = Qn_estim,
Hn = Hn_estim,
estimator = estimator,
fluc_mod_out = fitted_fluc_mod,
C_samp = C_samp,
ipc_weights = ipc_weights[C_samp == 1]
)
}
# need to return output such that we can loop over this function
out <- list(
Qn_estim = Qn_estim,
fluc_mod_out = fitted_fluc_mod,
pi_mech_star = ipc_pred,
ipc_weights = ipc_weights,
eif_eval = eif_eval,
ipcw_eif_component = ipcw_eif_component
)
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.