#-----------------------------------------------------------------------------
# Methods for fitting and predicting the IPTW (clever covariate) for covariates (A, W, E): P_{g^*}(A | W, E)/P_{g0}(A | W, E)
# 3 ancillary functions: fitGenericDensity, predict.hbars, fit.hbars
#-----------------------------------------------------------------------------
#------------------------------------
#' Define and fit the multivariate conditional density under the user-specified arbitrary intervention function.
#'
#' Defines and fits regression models for the conditional density \code{P(A=a|W=w)} where \code{a} is generated under the user-
#' specified arbitrary (can be static, dynamic or stochastic) intervention function \code{f_gstar}. Note that \code{A} can be
#' multivariate \code{(A[1], ..., A[j])} and each of the compoenents A[i] can be either binary, categorical or continuous.
#' See detailed description in \code{\link{RegressionClass}}.
#' @param data \code{data.frame} with named columns, containing \code{Wnodes}, \code{Anode} and \code{Ynode}.
#' @param Anodes Column names or indices in \code{data} of outcome variables; exposures can be either binary, categorical or continuous.
#' @param Wnodes Column names or indices in \code{data} of baseline covariates. Factors are not currently allowed.
#' @param gform Character vector of regression formula for estimating the conditional density of P(A | W)
#' @param f_gstar Either a function or a vector or a matrix/ data frame of counterfactual exposures. See details in function argument
#' \code{f_gstar1} in \code{\link{tmleCommunity}}.
#' @param h.gstar_GenericModel ...
#' @param lbound lower bounds on estimated cumulative probabilities for \code{P(A=a|W=w)}, default to 0.01
#' @param n_MCsims ...
#' @param obs.wts ...
#' @param rndseed ...
#' @param verbose ...
#' @return A named list with 3 items containing the estimation results for:
#' \itemize{
#' \item \code{h_gstar} - A vector of likelihood prediction for \code{P(A=a|W=w)} where a is generated under the
#' user-specified intervention.
#' \item \code{OData.gstar} - A \code{DatKeepClassclass} object,where outcomes are generated under intervention \code{f_gstar}.
#' \item \code{genericmodels.gstar} - A \code{GenericModel} class object that defines and models \code{P(A=a|W=w)}.
#' }
#' @seealso \code{\link{tmleCom_Options}}, \code{\link{DatKeepClass}}, \code{\link{RegressionClass}}, \code{\link{GenericModel}},
#' \code{\link{ContinModel}}, \code{\link{CategorModel}}, \code{\link{tmleCommunity}}
#' @example tests/examples/3_fitGenericDensity_examples.R
#' @export
fitGenericDensity <- function(data, Anodes, Wnodes, gform = NULL, f_gstar, h.gstar_GenericModel = NULL,
lbound = 0.01, n_MCsims = 1, obs.wts = NULL,
rndseed = NULL, verbose = TRUE) {
#----------------------------------------------------------------------------------
# INITIALIZE PARAMETERS
#----------------------------------------------------------------------------------
if (!is.null(rndseed)) set.seed(rndseed) # make stochastic intervention trackable
oldverboseopt <- getOption("tmleCommunity.verbose")
options(tmleCommunity.verbose = verbose)
gvars$verbose <- verbose
message("Running fitGenericDensity with the following settings from tmleCom_Options(): "); str(gvars$opts)
if (is.null(obs.wts)) obs.wts <- rep(1, nrow(data))
## Check if any unexpected inputs
nodes <- list(Anodes = Anodes, WEnodes = Wnodes)
for (i in unlist(nodes)) { CheckVarNameExists(data = data, varname = i) }
if (!CheckInputs(data, nodes, NULL, gform, NULL, "logistic", NULL, obs.wts, NULL, f_gstar, NULL)) stop()
#----------------------------------------------------------------------------------
# DEFINING (OPTIONAL) REGRESSION FORMS
#----------------------------------------------------------------------------------
h.gstar.sVars <- define_regform(as.formula(gform), Anodes.lst = nodes$Anodes, Wnodes.lst = nodes$WEnodes)
if (verbose) {
print("Input regression gform (P(A|W) under g.star): " %+% gform)
print("Derived regression gform (P(A|W) under g.star): "); str(h.gstar.sVars)
}
## Create data based on gform, in case of interaction or higher-order term.
if (!is.null(gform)) {
allcovRHS <- strsplit(deparse(as.formula(gform)[[3]]), " \\+ ")[[1]]
merged.form <- reformulate(allcovRHS, response = NULL) # Reformulate a formula including all legitimate character of the RHS
if (any(!allcovRHS %in% unique(c(unlist(nodes), names(data))))) {
ExtraDat <- as.data.frame(model.matrix(merged.form, data = data))
data <- cbind(data, ExtraDat[, setdiff(names(ExtraDat), c(names(data), "(Intercept)")), drop = FALSE])
Extradat <- NULL
}
} else {
merged.form <- NULL
}
nodes <- append(nodes, list(Crossnodes = setdiff(names(data), Reduce(c, nodes))))
## Create an R6 object that stores and manages the input data, later passed on to estimation algorithm(s)
OData.ObsP0 <- DatKeepClass$new(Odata = data, nodes = nodes, norm.c.sVars = FALSE)
OData.ObsP0$addObsWeights(obs.wts = obs.wts)
#----------------------------------------------------------------------------------
# Defining and estimating treatment mechanism P(A|W)
#----------------------------------------------------------------------------------
Ws.gstar_nms <- h.gstar.sVars$predvars
As_nms_gstar <- h.gstar.sVars$outvars
subsets_expr <- lapply(As_nms_gstar, function(var) {var}) # subsetting by !gvars$misval on A
As_classes <- OData.ObsP0$type.sVar[As_nms_gstar]
if (verbose) {
message("================================================================")
message("fitting h_gstar with covariates: ", "P(" %+% paste(As_nms_gstar, collapse = ",")
%+% " | " %+% paste(Ws.gstar_nms, collapse = ",") %+% ")")
message("================================================================")
}
OData.gstar <- DatKeepClass$new(Odata = data, nodes = nodes, norm.c.sVars = FALSE)
OData.gstar$addObsWeights(obs.wts = obs.wts)
OData.gstar$make.dat.sVar(p = n_MCsims, f.g_fun = f_gstar, regform = merged.form)
if (gvars$verbose) {
print("Generated new exposure covariates by sampling A from f_gstar (OData.gstar): ");
print(class(OData.gstar$dat.sVar)); print(dim(OData.gstar$dat.sVar)); print(head(OData.gstar$dat.sVar));
}
regclass.gstar <- RegressionClass$new(outvar.class = As_classes,
outvar = As_nms_gstar,
predvars = Ws.gstar_nms,
subset_vars = subsets_expr,
estimator = getopt("gestimator"))
# Define Intervals Under g_star to Be The Same as under g0:
genericmodels.gstar <- GenericModel$new(reg = regclass.gstar, DatKeepClass.g0 = OData.ObsP0)
if (!is.null(h.gstar_GenericModel)) {
# 1) deep copy the object with model fits to genericmodels.gstar
genericmodels.gstar <- h.gstar_GenericModel$clone(deep = TRUE)
} else {
genericmodels.gstar$fit(data = OData.gstar)
}
h_gstar <- genericmodels.gstar$predictAeqa(newdata = OData.ObsP0)
h_gstar[is.nan(h_gstar)] <- 0 # 0/0 detection
h_gstar <- bound(h_gstar, c(lbound, 1))
return(list(h_gstar = h_gstar, OData.gstar = OData.gstar, genericmodels.gstar = genericmodels.gstar))
}
# @title Predict h weights under g_0 and g_star using existing model.h.fit model fit
# @name pred.hbars
# @export
# fit models for m_gAi
predict.hbars <- function(newdatnet = NULL, model.h.fit) {
lbound <- model.h.fit$lbound
# use original fitted data for prediction
if (is.null(newdatnet)) {
stop("newdatnet argument must be not null; this feature is not implemented")
}
# PASS ENTIRE newdatnet which will get subset, rather than constructing cY_mtx...
h_gN <- model.h.fit$genericmodels.g0$predictAeqa(newdata = newdatnet)
h_gstar <- model.h.fit$genericmodels.gstar$predictAeqa(newdata = newdatnet)
h_gstar_h_gN <- h_gstar / h_gN
h_gstar_h_gN[is.nan(h_gstar_h_gN)] <- 0 # 0/0 detection
h_gstar_h_gN <- bound(h_gstar_h_gN, c(0,1/lbound))
return(h_gstar_h_gN)
}
# @title Defining and fitting the clever covariate h under g_0 and g_star, i.e. models P(A[j] | W, E, A[j-1])
# @name fit.hbars
# @export
# fit models for m_gAi
fit.hbars <- function(OData.ObsP0, est_params_list) {
data <- est_params_list$data
nodes <- est_params_list$nodes
obs.wts <- est_params_list$obs.wts
lbound <- est_params_list$lbound
merged.form <- est_params_list$merged.form
estnames <- est_params_list$estnames
n_MCsims <- est_params_list$n_MCsims
h.g0.sVars <- est_params_list$h.g0.sVars
h.gstar.sVars <- est_params_list$h.gstar.sVars
h.g0_GenericModel <- est_params_list$h.g0_GenericModel
h.gstar_GenericModel <- est_params_list$h.gstar_GenericModel
f.g0 <- est_params_list$f.g0
f.gstar <- est_params_list$f.gstar
savetime <- est_params_list$savetime.fit.hbars
if (!is.null(h.g0_GenericModel)) {
message("NOTE: Predictions for P(A|W,E) under g0 will be based on the fitted model in h.g0_GenericModel, " %+%
"and all modeling settings will be ignored")
# verify h.g0_GenericModel is consistent with GenericModel
assert_that(inherits(h.g0_GenericModel, "GenericModel"))
}
if (!is.null(h.gstar_GenericModel)) {
message("NOTE: Predictions for P(A^*|W,E) under f_gstar will be based on the fitted model in h.gstar_GenericModel, " %+%
"and all modeling settings will be ignored")
# verify h.gstar_GenericModel is consistent with GenericModel
assert_that(inherits(h.gstar_GenericModel, "GenericModel"))
}
if (!is.null(f.gstar) | !savetime | ("TMLE_A" %in% estnames)) {
#---------------------------------------------------------------------------------
# Getting OBSERVED W & A
#---------------------------------------------------------------------------------
# Baseline variable names / expressions
Ws.g0_nms <- h.g0.sVars$predvars
Ws.gstar_nms <- h.gstar.sVars$predvars
# Exposure variable names / expressions:
As_nms_g0 <- h.g0.sVars$outvars
As_nms_gstar <- h.gstar.sVars$outvars
if (!all(As_nms_g0 == As_nms_gstar))
stop("the outcome variable names defined by regressions hform.g0 & hform.gstar are not identical;"
%+% " current implementation requires these to be the same.")
subsets_expr <- lapply(As_nms_g0, function(var) {var}) # subsetting by !gvars$misval on A
# As' class parameters:
As_classes <- OData.ObsP0$type.sVar[As_nms_g0]
#-------- Stage 1 --------- Calculate h_gN
if (gvars$verbose) {
message("================================================================")
message("fitting h_g0 with covariates: ", "P(" %+% paste(As_nms_g0, collapse = ",")
%+% " | " %+% paste(Ws.g0_nms, collapse = ",") %+% ")")
message("================================================================")
}
p_h0 <- ifelse(is.null(f.g0), 1, n_MCsims)
# If !is.null(f.g_fun) then OData.g0$dat.sVar IS NOT THE OBSERVED data (sVar), but rather sVar data sampled under known g0.
if (!is.null(f.g0)) {
if (gvars$verbose) message("generating OData.g0 under known g0")
OData.g0 <- DatKeepClass$new(Odata = data, nodes = nodes, norm.c.sVars = FALSE)
OData.g0$addObsWeights(obs.wts = obs.wts)
OData.g0$make.dat.sVar(p = p_h0, f.g_fun = f.g0, regform = merged.form)
print("head(OData.g0$dat.sVar): "); print(head(OData.g0$dat.sVar))
} else {
OData.g0 <- OData.ObsP0
}
regclass.g0 <- RegressionClass$new(outvar.class = As_classes,
outvar = As_nms_g0,
predvars = Ws.g0_nms,
subset_vars = subsets_expr,
estimator = getopt("gestimator"))
genericmodels.g0 <- GenericModel$new(reg = regclass.g0, DatKeepClass.g0 = OData.g0)
if (!is.null(h.g0_GenericModel)) {
# 1) deep copy model fits in h_g0_SummariesModel to genericmodels.g0
genericmodels.g0 <- h.g0_GenericModel$clone(deep = TRUE)
} else {
genericmodels.g0$fit(data = OData.g0)
}
h_gN <- genericmodels.g0$predictAeqa(newdata = OData.ObsP0)
#-------- Stage 2 --------- Calculate h_gstar
if (gvars$verbose) {
message("================================================================")
message("fitting h_gstar with covariates: ", "P(" %+% paste(As_nms_gstar, collapse = ",")
%+% " | " %+% paste(Ws.gstar_nms, collapse = ",") %+% ")")
message("================================================================")
}
OData.gstar <- DatKeepClass$new(Odata = data, nodes = nodes, norm.c.sVars = FALSE)
OData.gstar$addObsWeights(obs.wts = obs.wts)
OData.gstar$make.dat.sVar(p = n_MCsims, f.g_fun = f.gstar, regform = merged.form)
if (gvars$verbose) {
print("Generated new exposure covariates by sampling A from f_gstar (OData.gstar): "); print(class(OData.gstar$dat.sVar))
print(dim(OData.gstar$dat.sVar)); print(head(OData.gstar$dat.sVar));
}
regclass.gstar <- RegressionClass$new(outvar.class = As_classes,
outvar = As_nms_gstar,
predvars = Ws.gstar_nms,
subset_vars = subsets_expr,
estimator = getopt("gestimator"))
# Define Intervals Under g_star to Be The Same as under g0:
genericmodels.gstar <- GenericModel$new(reg = regclass.gstar, DatKeepClass.g0 = OData.g0)
# Define Intervals Under g_star Based on exposure covariates Generated under g_star:
# genericmodels.gstar <- GenericModel$new(reg = regclass.gstar, DatKeepClass.g0 = OData.gstar)
if (!is.null(h.gstar_GenericModel)) {
# 1) deep copy the object with model fits to genericmodels.gstar
genericmodels.gstar <- h.gstar_GenericModel$clone(deep = TRUE)
} else {
genericmodels.gstar$fit(data = OData.gstar)
}
h_gstar <- genericmodels.gstar$predictAeqa(newdata = OData.ObsP0)
#-------- Stage 3 --------- Calculate final h_bar (h_tilde) as ratio of h_gstar / h_gN and bound it
h_gstar_h_gN <- h_gstar / h_gN
h_gstar_h_gN[is.nan(h_gstar_h_gN)] <- 0 # 0/0 detection
h_gstar_h_gN <- bound(h_gstar_h_gN, c(0, 1/lbound))
} else { # is.null(f.gstar) & savetime = TRUE & !("TMLE_A" %in% estnames)
if (gvars$verbose) {
message("================================================================")
message("Since f.gstar = NULL & using tmle.intercept in targeting step, in order to save time, \n"
%+% "we skip g0 & gstar fitting procedure and directly set h_gstar_h_gN = 1 for each observation")
message("================================================================")
}
OData.gstar <- DatKeepClass$new(Odata = data, nodes = nodes, norm.c.sVars = FALSE)
OData.gstar$make.dat.sVar(p = n_MCsims, f.g_fun = f.gstar, regform = merged.form)
if (gvars$verbose) {
print("Generated new exposure covariates by sampling A from f_gstar (OData.gstar): "); print(class(OData.gstar$dat.sVar))
print(dim(OData.gstar$dat.sVar)); print(head(OData.gstar$dat.sVar));
}
if (!is.null(h.g0_GenericModel)) {
genericmodels.g0 <- h.g0_GenericModel$clone(deep = TRUE)
} else {
genericmodels.g0 <- NULL
}
if (!is.null(h.gstar_GenericModel)) {
genericmodels.gstar <- h.gstar_GenericModel$clone(deep = TRUE)
} else {
genericmodels.gstar <- NULL
}
h_gN <- h_gstar <- NULL
h_gstar_h_gN <- rep_len(1, OData.gstar$nobs)
}
model.h.fit <- list(genericmodels.g0 = genericmodels.g0, genericmodels.gstar = genericmodels.gstar, lbound = lbound)
return(list(h_gstar_h_gN = h_gstar_h_gN, model.h.fit = model.h.fit, OData.gstar = OData.gstar, h_gN = h_gN, h_gstar = h_gstar))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.