#################################################################################
########################### NETWORK TMLE R PACKAGE ##############################
# authors: Oleg Sofrygin <sofrygin@berkeley.edu> and Mark van der Laan <laan@berkeley.edu>
#################################################################################
# @title tmlenet-package
# @docType package
# @author Oleg Sofrygin, Mark J. van der Laan
#' @useDynLib tmlenet
#' @import R6
#' @importFrom Rcpp sourceCpp
#' @importFrom grDevices nclass.FD nclass.Sturges nclass.scott
#' @importFrom graphics axis barplot hist par text
#' @importFrom methods is
#' @importFrom stats approx quasibinomial binomial coef glm.control glm.fit plogis predict qlogis qnorm quantile rnorm terms var predict glm.control
#' @importFrom utils data head str
#-----------------------------------------------------------------------------
# Class Membership Tests
#-----------------------------------------------------------------------------
is.DatNet.sWsA <- function(DatNet.sWsA) "DatNet.sWsA"%in%class(DatNet.sWsA)
is.DatNet <- function(DatNet) "DatNet"%in%class(DatNet)
#-----------------------------------------------------------------------------
# ALL NETWORK VARIABLE NAMES MUST BE CONSTRUCTED BY CALLING THIS FUNCTION.
# In the future might return the network variable (column vector) itself.
# Helper function that for given variable name (varnm) and friend index (fidx)
# returns the characeter name of that network variable varnm[fidx],
# for fidx = 0 (var itself), ..., kmax. fidx can be a vector, in which case a
# character vector of network names is returned. If varnm is also a vector, a
# character vector for all possible combinations of (varnm x fidx) is returned.
# OUTPUT format: Varnm_net.j:
#-----------------------------------------------------------------------------
netvar <- function(varnm, fidx) {
cstr <- function(varnm, fidx) {
slen <- length(fidx)
rstr <- vector(mode = "character", length = slen)
netidxstr <- ! (fidx %in% 0L)
rstr[netidxstr] <- stringr::str_c('_netF', fidx[netidxstr]) # vs. 1
# rstr[netidxstr] <- str_c('.net.', fidx[netidxstr]) # vs. 2
return(stringr::str_c(varnm, rstr))
}
if (length(varnm) > 1) {
return(unlist(lapply(varnm, cstr, fidx)))
} else {
return(cstr(varnm, fidx))
}
}
# Examples:
# netvar("A", (0:5))
# netvar("A", c(0:5, 0, 3))
# netvar(c("A", "W"), c(0:5, 0, 3))
# netvar(c("A", "W"), c(0:5, 0, 3))
#-----------------------------------------------------------------------------
# General utilities / Global Vars
#-----------------------------------------------------------------------------
`%+%` <- function(a, b) paste0(a, b)
checkpkgs <- function(pkgs) {
for (pkg in pkgs) {
if (!requireNamespace(pkg, quietly = TRUE)) {
stop(pkg %+% " package needed for this function to work. Please install it.", call. = FALSE)
}
}
}
# Bound g(A|W) probability within supplied bounds
bound <- function(x, bounds){
x[x<min(bounds)] <- min(bounds)
x[x>max(bounds)] <- max(bounds)
return(x)
}
#if warning is in ignoreWarningList, ignore it; otherwise post it as usual
SuppressGivenWarnings <- function(expr, warningsToIgnore) {
h <- function (w) {
if (w$message %in% warningsToIgnore) invokeRestart( "muffleWarning" )
}
withCallingHandlers(expr, warning = h )
}
GetWarningsToSuppress <- function(update.step=FALSE) {
warnings.to.suppress <- c("glm.fit: fitted probabilities numerically 0 or 1 occurred",
"prediction from a rank-deficient fit may be misleading")
if (update.step) {
warnings.to.suppress <- c(warnings.to.suppress, "glm.fit: algorithm did not converge")
}
return(warnings.to.suppress)
}
# returns NULL if no factors exist, otherwise return the name of the factor variable(s)
CheckExistFactors <- function(data) {
testvec <- unlist(lapply(data, is.factor))
if (any(testvec)) {
return(names(data)[which(testvec)])
} else {
return(NULL)
}
}
# throw exception if 1) varname doesn't exist; 2) more than one varname is matched
CheckVarNameExists <- function(data, varname) {
idvar <- names(data) %in% varname
if (sum(idvar) < 1) stop("variable name " %+% varname %+% " not found in data input")
if (sum(idvar) > 1) stop("more than one column in the input data has been matched to name "
%+% varname %+% ". Consider renaming some of the columns: " %+%
paste0(names(data)[idvar], collapse=","))
return(invisible(NULL))
}
# THIS NEEDS MORE EVALUATION, DOESN'T SEEM TO WORK AS INTENDED DURING MC EVALUTION
# (GET HUGE ABSOLUTE VALUE WEIGHTS, THAT HAVE tiny % CONTRIBUTION)
scale_bound <- function(weights, max_npwt, n) {
# scale weights
weight_prop <- (weights/sum(weights)) # % contribution to the total weight, scaled by n
weight_prop_byn <- (weights/sum(weights))*n # % contribution to the total weight, scaled by n
# print("weight summary before trunc"); print(summary(weights))
# print("weight_prop before trunc, %"); print(summary(weight_prop))
# print("weight_prop before trunc, scaled by n"); print(summary(weight_prop_byn))
while (any(weight_prop_byn > (max_npwt+5))) {
weights[which(weight_prop_byn >= max_npwt)] <- (max_npwt / n) * sum(weights)
weight_prop_byn <- (weights/sum(weights)) * n # % contribution to the total weight, sclaed by n
# print("weight summary after trunc"); print(summary(weights))
# print("weight_prop after trunc, %"); print(summary(weights/sum(weights)))
# print("weight_prop after trunc, scaled by n"); print(summary(weight_prop_byn))
}
return(weights)
}
# Return the left hand side variable of formula f as a character
LhsVars <- function(f) {
f <- as.formula(f)
return(as.character(f[[2]]))
}
# Return the right hand side variables of formula f as a character vector
RhsVars <- function(f) {
f <- as.formula(f)
return(all.vars(f[[3]]))
}
#************************************************
# TMLEs
#************************************************
tmle.update <- function(estnames, Y, off, h_wts, determ.Q, predictQ = TRUE) {
QY.star <- NA
ctrl <- glm.control(trace = FALSE, maxit = 1000)
if ("TMLE_A" %in% estnames) {
#************************************************
# TMLE A: estimate the TMLE update via univariate ML (epsilon is coefficient for h^*/h) - ONLY FOR NON-DETERMINISTIC SUBSET
#************************************************
SuppressGivenWarnings(
m.Q.star <- speedglm::speedglm.wfit(X = matrix(h_wts, ncol=1), y = Y, family = quasibinomial(), trace = FALSE, maxit = 1000, offset = off),
GetWarningsToSuppress(TRUE))
m.Q.star.coef <- m.Q.star$coef
# m.Q.star.fit <- list(coef = m.Q.star$coef, linkfun = "logit_linkinv", fitfunname = "speedglm")
# class(m.Q.star.fit) <- c(class(m.Q.star.fit), c("speedglmS3"))
# SuppressGivenWarnings(m.Q.star <- glm(Y ~ -1 + h_wts + offset(off), data = data.frame(Y = Y, off = off, h_wts = h_wts),
# subset = !determ.Q, family = "quasibinomial", control = ctrl), GetWarningsToSuppress(TRUE))
# m.Q.star.coef <- coef(m.Q.star)
if (predictQ) {
QY.star <- Y
if (!is.na(m.Q.star.coef)) QY.star <- plogis(off + m.Q.star.coef * h_wts)
}
} else if ("TMLE_B" %in% estnames) {
#************************************************
# TMLE B: estimate the TMLE update via weighted univariate ML (espsilon is intercept)
#************************************************
SuppressGivenWarnings(
m.Q.star <- speedglm::speedglm.wfit(X = matrix(1L, ncol=1, nrow=length(Y)), y = Y, weights = h_wts, offset = off,
family = quasibinomial(), trace = FALSE, maxit = 1000),
GetWarningsToSuppress(TRUE))
# family = binomial()
m.Q.star.coef <- m.Q.star$coef
# m.Q.star.fit <- list(coef = m.Q.star$coef, linkfun = "logit_linkinv", fitfunname = "speedglm")
# class(m.Q.star.fit) <- c(class(m.Q.star.fit), c("speedglmS3"))
# SuppressGivenWarnings(m.Q.star <- glm(Y ~ offset(off), data = data.frame(Y = Y, off = off), weights = h_wts,
# subset = !determ.Q, family = "quasibinomial", control = ctrl), GetWarningsToSuppress(TRUE))
# m.Q.star.coef <- coef(m.Q.star)
if (predictQ) {
QY.star <- Y
if (!is.na(m.Q.star.coef)) QY.star <- plogis(off + m.Q.star.coef)
}
}
#************************************************
# (DISABLED) g_IPTW estimator (based on full likelihood factorization, prod(g^*)/prod(g_N):
#************************************************
# 02/16/13: IPTW estimator (Y_i * prod_{j \\in Fi} [g*(A_j|c^A)/g0_N(A_j|c^A)])
# g_wts <- iptw_est(k = est_params_list$Kmax, data = data, node_l = nodes, m.gN = est_params_list$m.g0N,
# f.gstar = est_params_list$f.gstar, f.g_args = est_params_list$f.g_args, family = "binomial",
# NetInd_k = est_params_list$NetInd_k, lbound = est_params_list$lbound, max_npwt = est_params_list$max_npwt,
# f.g0 = est_params_list$f.g0, f.g0_args = est_params_list$f.g0_args)
# Y_IPTW_g <- Y
# Y_IPTW_g[!determ.Q] <- Y[!determ.Q] * g_wts[!determ.Q]
#************************************************
# (DISABLED) g_IPTW-based clever covariate TMLE (based on FULL likelihood factorization), covariate based fluctuation
#************************************************
# SuppressGivenWarnings(m.Q.star_giptw <- glm(Y ~ -1 + g_wts + offset(off),
# data = data.frame(Y = Y, off = off, g_wts = g_wts),
# subset = !determ.Q, family = "quasibinomial", control = ctrl),
# GetWarningsToSuppress(TRUE))
return(list(m.Q.star.coef = m.Q.star.coef, QY.star = QY.star))
}
#---------------------------------------------------------------------------------
# Estimate h_bar under g_0 and g* given observed data and vector of c^Y's data is an DatNet.sWsA object
#---------------------------------------------------------------------------------
get_all_ests <- function(estnames, DatNet.ObsP0, est_params_list) {
#---------------------------------------------------------------------------------
# unified estimator naming used throughout the package:
# c("TMLE", "h_IPTW", "MLE")
#---------------------------------------------------------------------------------
# DatNet.ObsP0$det.Y # TRUE/FALSE for deterministic Y's
# DatNet.ObsP0$noNA.Ynodevals # actual observed Y's
# m.Q.init$getoutvarnm # reg outvar name (Ynode)
# DatNet.ObsP0$YnodeVals # visible Y's with NA for det.Y
# m.Q.init$getoutvarval # Yvals used in prediction (with det.Y obs set to NA)
# m.Q.init$getsubset # valid subset (!det.Y)
# m.Q.init$reg # regression class (Qreg)
# making sure the current dataset consists of the OBSERVED Anodes and sA (summaries)
if (!DatNet.ObsP0$Odata$curr_data_A_g0) {
if (is.null(DatNet.ObsP0$Odata$A_g0_DT))
stop("Can't recover the initial observed A (exposures), as those were over-written and not backed-up")
DatNet.ObsP0$Odata$swapAnodes()
if (!DatNet.ObsP0$Odata$restored_sA_Vars) DatNet.ObsP0$datnetA$make.sVar(sVar.object = est_params_list$sA)
}
nodes <- DatNet.ObsP0$nodes
Y <- DatNet.ObsP0$noNA.Ynodevals # actual observed Y`s
determ.Q <- DatNet.ObsP0$det.Y
m.Q.init <- est_params_list$m.Q.init
QY.init <- DatNet.ObsP0$noNA.Ynodevals # getting all node vals, inc. deterministic
QY.init[!DatNet.ObsP0$det.Y] <- m.Q.init$predict(newdata = DatNet.ObsP0)$getprobA1[!DatNet.ObsP0$det.Y] # getting predictions P(Y=1) for non-DET Y
off <- qlogis(QY.init) # offset log(x/[1-x])
#************************************************
# h^*/h_N clever covariate:
#************************************************
fit.hbars_t <- system.time(fit.hbars.res <- fit.hbars(DatNet.ObsP0 = DatNet.ObsP0, est_params_list = est_params_list)) # fit the clever covariate
DatNet.gstar <- fit.hbars.res$DatNet.gstar
m.h.fit <- fit.hbars.res$m.h.fit
h_wts <- fit.hbars.res$h_gstar_h_gN
#************************************************
# IPTW_h estimator:
#************************************************
h_IPTW <- Y
h_IPTW[!determ.Q] <- Y[!determ.Q] * h_wts[!determ.Q]
h_IPTW <- mean(h_IPTW)
# message("h_IPTW: "); message(h_IPTW)
# if (h_IPTW > 0.5) browser()
#************************************************
# Get a TMLE update:
#************************************************
# epsilon (intercept or coefficient for h): tmle.obj$m.Q.star
# boot_idx <- seq.int(DatNet.ObsP0$nobs)
tmle.obj <- tmle.update(estnames = estnames, Y = Y, off = off, h_wts = h_wts, determ.Q = determ.Q)
# print("tmle.obj$m.Q.star.coef"); print(tmle.obj$m.Q.star.coef)
#************************************************
# Run Monte-Carlo (MC) evaluation for all plug-in estimators (TMLE & Gcomp), under stochastic intervention g^*:
#************************************************
MC_fit_params <- append(est_params_list, list(m.Q.star = tmle.obj$m.Q.star.coef))
# make sure the current dataset constits of INTERVENED Anodes and sA (under g.star):
if (DatNet.ObsP0$datnetW$Odata$curr_data_A_g0) {
DatNet.gstar$datnetA$Odata$swapAnodes()
if (!DatNet.gstar$datnetA$Odata$restored_sA_Vars)
DatNet.gstar$datnetA$make.sVar(sVar.object = est_params_list$new.sA) # just recreate the summaries sA based on current Anode values in the data
}
# generate new A's under f.gstar, then re-create new summaries sA:
# DatNet.gstar$make.dat.sWsA(p = 1, f.g_fun = est_params_list$f.gstar, sA.object = sA, DatNet.ObsP0 = DatNet.ObsP0)
# DatNet.gstar$datnetA$Odata$OdataDT
# DatNet.gstar$datnetA$Odata$A_g0_DT
# DatNet.gstar$datnetA$Odata$sA_g0_DT
# both should be the same, as both are pointing to the same data.table:
# head(DatNet.ObsP0$dat.sVar)
# head(DatNet.gstar$dat.sVar)
MC.Eval.psi <- get.MCS_ests(DatNet.ObsP0 = DatNet.ObsP0,
DatNet.gstar = DatNet.gstar,
MC_fit_params = MC_fit_params,
m.h.fit = m.h.fit)
# -----------------------------------------------------------------
# Mean estimates
# -----------------------------------------------------------------
MCS_res <- MC.Eval.psi$psi_est_mean
ests <- c(TMLE = MCS_res[["TMLE"]],
h_IPTW = h_IPTW, # IPTW estimator based on h - clever covariate:
MLE = MCS_res[["MLE"]])
ests_mat <- matrix(0L, nrow = length(ests), ncol = 1)
ests_mat[, 1] <- ests
rownames(ests_mat) <- names(ests); colnames(ests_mat) <- "estimate"
wts_mat <- matrix(0L, nrow = DatNet.ObsP0$nobs, ncol = 1)
colnames(wts_mat) <- c("h_wts")
wts_mat[, "h_wts"] <- h_wts
# Components of asymptotic variance for tmle_net (E_g^*[\bar{Q}_0(A^s,W^s|W^s)]-\psi_0):
# SUBSTRACT overall estimate of psi_0 from fW_i i-specific components
fWi_mat <- matrix(0L, nrow = DatNet.ObsP0$nobs, ncol = 1)
colnames(fWi_mat) <- c("fWi_Qinit")
fWi_mat[,"fWi_Qinit"] <- MCS_res[agrep("fWi_init_", names(MCS_res), max.distance=list(insertions=0, substitutions=0))]
QY_mat <- matrix(0L, nrow = DatNet.ObsP0$nobs, ncol = 2)
colnames(QY_mat) <- c("QY.init", "QY.star")
QY_mat[,] <- cbind(QY.init, tmle.obj$QY.star)
if (gvars$verbose) {
print("time spent fitting new fit.hbars.res:"); print(fit.hbars_t)
if ("TMLE_A" %in% estnames) {
parsubmodel_fits <- rbind(tmle.obj$m.Q.star.coef)
rownames(parsubmodel_fits) <- c("epsilon (clever covariate coefficient)")
} else if ("TMLE_B" %in% estnames) {
parsubmodel_fits <- rbind(tmle.obj$m.Q.star.coef)
rownames(parsubmodel_fits) <- c("alpha (intercept)")
}
print("new parsubmodel_fits: "); print(parsubmodel_fits)
print(
c(
fWi_init = mean(fWi_mat[,"fWi_Qinit"] - ests["TMLE"])
));
print("new MC.ests mat: "); print(ests_mat)
}
# -----------------------------------------------------------------
# instance of an R6 object mcEvalPsi:
# -----------------------------------------------------------------
psi.evaluator <- MC.Eval.psi$psi.evaluator
return(list( ests_mat = ests_mat,
wts_mat = wts_mat,
fWi_mat = fWi_mat,
QY_mat = QY_mat,
# var_tmleB_boot = var_tmleB_boot,
psi.evaluator = psi.evaluator, # for par. bootstrap
m.Q.init = m.Q.init, # for par. bootstrap
m.h.fit = m.h.fit, # for par. bootstrap
DatNet.gstar = DatNet.gstar, # for par. bootstrap
sW = est_params_list$sW, # for par. bootstrap
sA = est_params_list$sA, # for par. bootstrap
f.gstar = est_params_list$f.gstar, # for par. bootstrap
new.sA = est_params_list$new.sA, # for par. bootstrap
MC_fit_params = MC_fit_params, # for Monte-Carlo eval of the iid W EIC
h_g0_SummariesModel = m.h.fit$summeas.g0,
h_gstar_SummariesModel = m.h.fit$summeas.gstar
))
}
#---------------------------------------------------------------------------------
# (NEW INTERFACE FOR SPECIFYING regressions for hform.g0, hform.gstar & Qform)
#---------------------------------------------------------------------------------
get_vars_fromlist <- function(varname, sVar.map) {
if (varname %in% names(sVar.map)) {
as.vector(sVar.map[[varname]])
} else {
varname
}
}
# Parse the formulas for summary measure names and create a map to actual covariate names in sA & sW
process_regform <- function(regform, sW.map = NULL, sA.map = NULL) {
# Getting predictors (sW names):
regformterms <- terms(regform)
sW.names <- attributes(regformterms)$term.labels
sW.names.alt <- colnames(attributes(regformterms)$factors)
assert_that(all(sW.names == sW.names.alt))
# Getting outcomes (sA names):
out.var <- rownames(attributes(regformterms)$factors)[1] # character string
out.vars.form <- as.formula(". ~ " %+% out.var)
out.vars.terms <- terms(out.vars.form)
sA.names <- attributes(out.vars.terms)$term.labels
outvars <- unlist(lapply(sA.names, get_vars_fromlist, sA.map))
predvars <- unlist(lapply(sW.names, get_vars_fromlist, sW.map))
return(list(outvars = outvars, predvars = predvars))
}
# When several reg forms are specified (multivariate Anodes), process outvars into one vector and process predvars in a named list of vectors
process_regforms <- function(regforms, sW.map = NULL, sA.map = NULL) {
if (length(regforms)==0L) {
default.reg <- list(outvars = list(as.vector(unlist(sA.map))), predvars = list(as.vector(unlist(sW.map))))
names(default.reg[["outvars"]]) <- names(default.reg[["predvars"]]) <- paste0(default.reg$outvars[[1]], collapse="+")
return(default.reg)
} else {
outvars <- vector(mode="list", length=length(regforms))
predvars <- vector(mode="list", length=length(regforms))
for (idx in seq_along(regforms)) {
res <- process_regform(as.formula(regforms[[idx]]), sW.map = sW.map, sA.map = sA.map)
outvars[[idx]] <- res$outvars
predvars[[idx]] <- res$predvars
names(outvars)[idx] <- names(predvars)[idx] <- paste0(outvars[[idx]], collapse="+")
}
return(list(outvars = outvars, predvars = predvars))
}
}
#' Evaluate Summary Measures sA and sW
#'
#' Take input data, create a network matrix (when input network matrix not provided) and evaluate the summary measures
#' previously defined with functions \code{\link{def_sW}} and \code{\link{def_sA}}.
#' This function is called internally by \code{tmlenet} for the evaluation of the summary measures.
#' The R6 class object named \code{DatNet.ObsP0} that is returned by this function can be supplied as an input to the
#' \code{tmlenet} function.
#' When \code{DatNet.ObsP0} is used as an input to \code{tmlenet}, the rest of the input arguments already provided to
#' this function can be omitted from the \code{tmlenet} function call.
#' @param data Same as \code{\link{tmlenet}} input argument.
#' @param Kmax Same as \code{\link{tmlenet}} input argument.
#' @param sW Same as \code{\link{tmlenet}} input argument.
#' @param sA Same as \code{\link{tmlenet}} input argument.
#' @param IDnode (Optional) Same as \code{\link{tmlenet}} input argument.
#' @param NETIDnode (Optional) Same as \code{\link{tmlenet}} input argument.
#' @param sep Optional friend ID character separator for friends listed in \code{NETIDnode} column of \code{data}, default is \code{' '};
#' same as \code{\link{tmlenet}} input argument \code{optPars$sep}.
#' @param NETIDmat (Optional) Same as \code{\link{tmlenet}} input argument.
#' @param verbose Set to \code{TRUE} to print messages on status and information to the console.
#' Turn this on by default using \code{options(tmlenet.verbose=TRUE)}.
#' @return A named list that contains:
#' \itemize{
#' \item \code{sW.matrix} - Matrix of evaluated summary measures for \code{sW}.
#' \item \code{sA.matrix} - Matrix of evaluated summary measures for \code{sA}.
#' \item \code{NETIDmat} - Network ID matrix that can be used for \code{NETIDmat} input argument to \code{tmlenet}.
#' \item \code{DatNet.ObsP0} - R6 object of class \code{\link{DatNet.sWsA}} that stores all the summary measures and the network information.
#' This object be passed to \code{\link{tmlenet}} as an argument, in which case the arguments already provided to \code{eval.summaries} no
#' longer need to be specified to \code{tmlenet}.
#' }
#' @seealso \code{\link{tmlenet}} for estimation of network effects and \code{\link{def_sW}} for defining the summary measures.
#' @example tests/examples/3_eval.summaries_examples.R
#' @export
eval.summaries <- function(data, Kmax, sW, sA, IDnode = NULL, NETIDnode = NULL, sep = ' ', NETIDmat = NULL,
verbose = getOption("tmlenet.verbose")) {
iid_data_flag <- FALSE # set to true if no network is provided (will run iid TMLE)
nFnode = "nF"
#----------------------------------------------------------------------------------
# SOME INPUT CHECKS
#----------------------------------------------------------------------------------
assert_that(is.data.frame(data))
assert_that(is.integerish(Kmax))
Kmax <- as.integer(Kmax)
assert_that(is.DefineSummariesClass(sW))
assert_that(is.DefineSummariesClass(sA))
nobs <- nrow(data)
# Check no factors exist in the input data:
check1 <- CheckExistFactors(data)
if (!is.null(check1)) stop("found factor column(s) in the input data, consider removing or recoding such column(s) as strings: "
%+% paste0(check1, collapse=','))
if (is.null(NETIDnode) && is.null(NETIDmat)) {
message("No network (friends) specified by NETIDnode or NETIDmat args, assuming the input data is i.i.d.")
nFnode <- NULL
iid_data_flag <- TRUE
if (missing(Kmax)) Kmax <- 1 # need to allow Kmax = 0
}
#---------------------------------------------------------------------------------
# Saving the observed data as a data.table in its own object class OdataDT
#---------------------------------------------------------------------------------
OdataDT_R6 <- OdataDT$new(Odata = data, nFnode = nFnode, iid_data_flag = iid_data_flag)
#----------------------------------------------------------------------------------
# Create an object with model estimates, data & network information that is passed on to estimation algorithm(s)
#----------------------------------------------------------------------------------
netind_cl <- NetIndClass$new(nobs = nobs, Kmax = Kmax)
if (!is.null(NETIDnode)) {
assert_that(is.character(NETIDnode))
# Net_str <- as.character(data[, NETIDnode])
Net_str <- as.character(OdataDT_R6$OdataDT[[NETIDnode]])
OdataDT_R6$OdataDT[, NETIDnode:=NULL]
if (!is.null(IDnode)) {
assert_that(is.character(IDnode))
# IDs_str <- as.character(data[, IDnode])
IDs_str <- as.character(OdataDT_R6$OdataDT[[IDnode]])
OdataDT_R6$OdataDT[, IDnode:=NULL]
} else {
IDs_str <- NULL
}
netind_cl$makeNetInd.fromIDs(Net_str = Net_str, IDs_str = IDs_str, sep = sep)
} else if (!is.null(NETIDmat)) {
assert_that(is.matrix(NETIDmat))
netind_cl$NetInd <- NETIDmat
netind_cl$make.nF()
}
if (verbose) {
message("evaluated the network ID matrix: "); print(head(netind_cl$NetInd))
message("evaluated and added to summary measures the number of friends for each observation (nF): "); print(head(netind_cl$nF))
}
#----------------------------------------------------------------------------------
# Test parsing and evaluating the summary measures (in class DefineSummariesClass):
#----------------------------------------------------------------------------------
# Testing the evaluation of summary measures:
# # sW.matrix <- sW$eval.nodeforms(data.df = data, netind_cl = netind_cl)
# sW.matrix <- sW$eval.nodeforms(data.df = OdataDT_R6$OdataDT, netind_cl = netind_cl)
# # sA.matrix <- sA$eval.nodeforms(data.df = data, netind_cl = netind_cl)
# sA.matrix <- sA$eval.nodeforms(data.df = OdataDT_R6$OdataDT, netind_cl = netind_cl)
# if (verbose) {
# print("sample matrix of sW summary measurs: : "); print(head(sW.matrix))
# print("sample matrix of sA summary measurs: "); print(head(sA.matrix))
# print("map of sW names to column names: "); print(sW$sVar.names.map)
# print("map of sA names to column names: "); print(sA$sVar.names.map)
# }
#---------------------------------------------------------------------------------
# BUILDING OBSERVED sW & sA: (obsdat.sW - a dataset (matrix) of n observed summary measures sW)
# ALL EVALUATIONS HAPPEN BY REFERENCE AND ONLY OdataDT_R6$OdataDT is modified, no copies are made
#---------------------------------------------------------------------------------
datnetW <- DatNet$new(netind_cl = netind_cl, nFnode = nFnode)
datnetW$make.sVar(Odata = OdataDT_R6, sVar.object = sW)
datnetW$fixmiss_sVar() # permanently replace NA values in sW with 0
datnetA <- DatNet$new(netind_cl = netind_cl)
datnetA$make.sVar(Odata = OdataDT_R6, sVar.object = sA)
DatNet.ObsP0 <- DatNet.sWsA$new(Odata = OdataDT_R6, datnetW = datnetW, datnetA = datnetA)$make.dat.sWsA()
OdataDT_R6$sW <- sW
OdataDT_R6$sA <- sA
return(list(NETIDmat = netind_cl$NetInd, DatNet.ObsP0 = DatNet.ObsP0))
}
#---------------------------------------------------------------------------------
# MAIN TMLENET FUNCTION
#---------------------------------------------------------------------------------
# layers of tmlenet input spec's:
# 1) spec sW, sA, Qform => assumes hform.g0 = "sA ~ sW", hform.gstar = "sA ~ sW"
# 2) spec sW, sA, Qform and hform.g0 => assumes hform.gstar = hform.g0
# 3) 2) + spec separate hform.gstar (outcome have to be the same sA for both hform.g0 & hform.gstar)
#------------------------------------
#' Estimate Average Network Effects For Arbitrary (Stochastic) Interventions
#'
#' Estimate the average network effect among dependent units with known network structure (in presence of
#' interference and/or spillover) using \strong{TMLE} (targeted maximum likelihood estimation), \strong{IPTW}
#' (Horvitz-Thompson or the inverse-probability-of-treatment) and \strong{GCOMP} (parametric G-computation formula).
#' @param DatNet.ObsP0 Instance of class \code{\link{DatNet.sWsA}} returned under the same name by the \code{\link{eval.summaries}} function.
#' Stores the evaluated summary measures (\code{sW},\code{sA}) for the observed data, along with the network information.
#' When this argument is specified, the
#' following arguments no longer need to be provided: \code{data}, \code{Kmax}, \code{sW}, \code{sA},
#' \code{IDnode}, \code{NETIDnode}, \code{optPars$sep}, \code{NETIDmat}.
#' @param data Observed data, a \code{data.frame} with named columns, containing the baseline covariates,
#' exposures (\code{Anodes}), the outcomes (\code{Ynode}) and possibly the network column (\code{NETIDnode}), where
#' network is specified by a vector of strings of friend IDs, each string using \code{optPars$sep} character to separate different friend IDs
#' (default is \code{optPars$sep=' '}).
# @param estimators (NOT IMPLEMENTED) Character vector with estimator names.
#' @param Kmax Integer constant specifying the maximal number of friends for any observation in the input \code{data} data.frame.
#' @param sW Summary measures constructed from baseline covariates alone. This must be an object of class
#' \code{DefineSummariesClass} that is returned by calling the function \code{\link{def_sW}}.
#' @param sA Summary measures constructed from exposures \code{Anodes} and baseline covariates. This must be an object of class
#' \code{DefineSummariesClass} that is returned by calling the function \code{\link{def_sW}}.
# @param Anodes Exposure (treatment) variable name (column name in \code{data}); exposures can be either binary, categorical or continuous.
# This variable can be instead specified with argument \code{sA} by adding a call \code{+def_sA(Anodes="ExposureVarName")} to \code{sA}.
# @param AnodeDET Optional column name for indicators of deterministic values of exposures in \code{Anodes},
# should be coded as (\code{TRUE}/\code{FALSE}) or (\code{1}/\code{0});
# observations with \code{AnodeDET}=\code{TRUE}/\code{1} are assumed to have deterministically assigned exposures
#' @param Ynode Outcome variable name (column name in \code{data}), assumed normalized between 0 and 1. This can instead be specified
#' on the left-side of the regression formula in argument \code{Qform}.
#' @param NETIDmat Network specification via matrix of friend IDs (\code{ncol=Kmax}, \code{nrow=nrow(data)}),
#' where each row \code{i} is a vector of \code{i}'s friends IDs or \code{i}'s friends row
#' numbers in \code{data} if \code{IDnode=NULL}. See Details.
#' @param IDnode Subject identifier variable in the input data, if not supplied the network string in
#' \code{NETIDnode} is assumed to be indexing the row numbers in the input \code{data}
#' @param NETIDnode Network specification by a column name in input \code{data} consisting of strings that identify the unit's friends
#' by their IDs or their row numbers (two friends are separated by space, e.g., \code{"1 2"}; unit with no friends should have
#' an empty \code{""} string). See Details.
#' @param intervene1.sA Intervention 1
#' @param f_gstar1 Either a function or a vector of counterfactual exposures. If a function, must return
#' a vector of counterfactual exposures evaluated based on the summary measures matrix (\code{sW,sA}) passed as a named
#' argument \code{"data"}, therefore, the function in \code{f_gstar1} must have a named argument \code{"data"} in its signature.
#' The interventions defined by \code{f_gstar1} can be static, dynamic or stochastic. If \code{f_gstar1} is specified as a
#' vector, it must be of length \code{nrow(data)} or 1 (constant treatment assigned to all observations).
#' See Details below and Examples in "EQUIVALENT WAYS OF SPECIFYING INTERVENTION \code{f_gstar1}" for demonstration.
#' @param intervene2.sA Intervention 2
#' @param f_gstar2 Either a function or a vector of counterfactual exposure assignments.
#' Used for estimating contrasts (average treatment effect) for two interventions, if omitted, only the average
#' counterfactual outcome under intervention \code{f_gstar1} is estimated. The requirements for \code{f_gstar2}
#' are identical to those for \code{f_gstar1}.
# @param nFnode (Optional) Name of the variable for the number of friends each unit has, this name can then be used
# inside the summary measures and regression formulas \code{sW}, \code{sA}, \code{Qform}, \code{hform.g0},
# \code{hform.gstar} and \code{gform}. See Details.
#' @param Qform Regression formula for outcome in \code{Ynode}, when omitted (\code{Ynode}=\code{NULL}), the outcome variable is
#' regressed on all variables defined in \code{sW} and \code{sA}. See Details.
#' @param hform.g0 Regression formula for estimating the conditional density of P(\code{sA} | \code{sW}) under \code{g0}
#' (the observed exposure mechanism), when omitted, this regression is defined by \code{sA~sW} where \code{sA}
#' are all summary measures defined by argument \code{sA} and \code{sW} are all baseline summary measures defined by argument \code{sW}.
#' @param hform.gstar Regression formula for estimating the conditional density P(\code{sA} | \code{sW}) under interventions
#' \code{f_gstar1} or \code{f_gstar2}.
#' When omitted, the same regression formula as in \code{hform.g0} will be used for \code{hform.gstar}. See Details.
# @param gform (Optional) Regression formula for treatment(s) A: P(A_i | W). See Details.
# @param args_f_g1star (Optional) Additional arguments to be passed to \code{f_gstar1} intervention function
# @param args_f_g2star (Optional) Additional arguments to be passed to \code{f_gstar2} intervention function
#' @param verbose Set to \code{TRUE} to print messages on status and information to the console.
#' Turn this on by default using \code{options(tmlenet.verbose=TRUE)}.
#' @param optPars A named list of additional optional parameters to be passed to \code{tmlenet}, such as
#' \code{bootstrap.var}, \code{n.bootstrap}, \code{alpha}, \code{lbound}, \code{family}, \code{runTMLE}, \code{YnodeDET}, \code{sep},
#' \code{f_g0}, \code{h_g0_SummariesModel} and
#' \code{h_gstar_SummariesModel}. See Details below for the description of each parameter.
# (REMOVED) \code{n_MCsims}
#((NOT IMPLEMENTED)) @param Q.SL.library SuperLearner libraries for outcome, Q
#((NOT IMPLEMENTED)) @param g.SL.library SuperLearner libraries for treatment mechanism, g
#((NOT IMPLEMENTED)) @param h_f.g0_args Additional arguments to be passed to f_g0
#((NOT IMPLEMENTED)) @param h_user_fcn User supplied function to calculate the clever covariate, h
#((NOT IMPLEMENTED)) @param h_logit_sep_k Flag for fitting a separate logistic regression for each strata of nFnode,
# used during estimation of the clever covariate, h
#'
#' @section Details:
#'
# (NOT IMPLEMENTED) When \code{sW} is missing, by default \code{sW} is constructed as follows.
# For each \code{"W"} in \code{Wnodes}, add a vector \code{data[,"W"]} as well as all friends covariate values of
# \code{"W"} to \code{sW} by running \code{netW = def_sW(W[[0:Kmax]], noname = TRUE)}.
# For each \code{"W"} in \code{Wnode}, a vector of \code{"W"} values of the first friend of observations
# \code{i} = 1,...,\code{nrow(data)} will be created and named \code{"W_netF1"}
# (i.e., variable "W_netF1" is constructed as def_sW(W_netF1 = W[[1]])).
# Similarly, the vector of "W" values of the jth friend for observations \code{i} = 1, ..., \code{nrow(data)}
# will be created and named "W_netFj", for j from 1 to \code{Kmax}, with all \code{W_netFj} then being added to
# \code{sW}.
#
# (NOT IMPLEMENTED) Similarly, when \code{sA} is missing, it is constructed by running
# \code{def_sW(netA = A[[0:Kmax]], noname = TRUE)} (assuming \code{"A"} is the value of \code{Anodes} in \code{data}),
# which combines the column \code{data[,"A"]} with all the friends treatment assignments of variable "A".
#
#'
#' Note that in case when both arguments \code{NETIDnode} and \code{NETIDmat} are left unspecified the input data are
#' assumed independent, i.e., no network dependency between the observations.
#' All inference will be performed based on the i.i.d. efficient influence curve for the target
#' parameter \eqn{(E_{g^*_1}[Y])}.
#'
#' Also note that the ordering of the friends IDs in \code{NETIDnode} or \code{NETIDmat} is unimportant.
#'
#' A special non-negative-integer-valued variable \code{nF} is automatically calculated each time
#' \code{tmlenet} or \code{eval.summaries} functions are called. \code{nF} contains the total number of friends for
#' each observation and it is always added as an additional column to the matrix of the baseline-covariates-based
#' summary measures \code{sWmat}. The variable \code{nF} can be used in the same ways as any of the column names in
#' the input data frame \code{data}. In particular, the name \code{nF} can be used inside the summary measure
#' expressions (calls to functions \code{def_sW} and \code{def_sA}) and inside any of the regression formulas
#' (\code{Qform}, \code{hform.g0}, \code{hform.gstar}).
#'
# However, if the column \code{data[,nFnode]}
# already exists in the input data it will be compared to the automatically calculated values, with an error produced
# if the two variables do not exactly match.
#' The regression formalas in \code{Qform}, \code{hform.g0} and \code{hform.gstar} can include any summary measures names defined in
#' \code{sW} and \code{sA}, referenced by their individual variable names or by their aggregate summary measure names.
#' For example, \code{hform.g0 = "netA ~ netW"} is equivalent to
#' \code{hform.g0 = "A + A_netF1 + A_netF2 ~ W + W_netF1 + W_netF2"} for \code{sW,sA} summary measures defined by
#' \code{def_sW(netW=W[[0:2]])} and \code{def_sA(netA=A[[0:2]])}.
#'
#' @section Additional parameters:
#'
#' Some of the parameters that control the estimation in \code{tmlenet} can be set by calling the function
#' \code{\link{tmlenet_options}}.
#'
#' Additional parameters can be also specified as a named list \code{optPars} argument of the
#' \code{tmlenet} function. The items that can be specified in \code{optPars} are:
#' \itemize{
#'
#' \item \code{bootstrap.var} - When set to \code{TRUE}, parametric bootstrap will be performed in order
#' to obtain an alternative estimate the TMLE variance (note that this will significantly increase the running time);
#'
#' \item \code{n.bootstrap} - Number of bootstrap samples, default is 100; Set to higher values, such as, 500+ to obtain a more precise
#' estimate of the tmle variance;
#'
#' \item \code{boot.nodes} - Which nodes should be re-sampled from their corresonding parametric model fits? Note that Y (outcome) is always
#' resampled from the model fit specified by \code{Qform}. Each of the nodes in \code{boot.nodes} must be conditionally independent, given
#' some set of covariates of summary measures (for example, it might be the exposure variable).
#' Any node name specified in \code{boot.nodes} must be fitted with a sepearate model,
#' specified either as part of \code{hform} (name in boot.nodes matches a variable on the left-side of the formula in \code{hform})
#' or as a seperate regression formula in \code{boot.form} argument. These
#' model fits are then used for re-sampling each of the nodes specified in \code{boot.nodes}.
#'
#' \item \code{boot.form} - Specify the regression formulas specifically for parametric bootstrap, which were previously specified on
#' the left-side of the regression formulas in \code{hform}.
#'
#' \item \code{alpha} - alpha-level for CI calculation (0.05 for 95% CIs);
#'
#' \item \code{lbound} - One value for symmetrical bounds on P(sW | sW).
#'
#' \item \code{family} - Family specification for regression models, defaults to binomial (CURRENTLY ONLY BINOMIAL
#' FAMILY IS IMPLEMENTED).
#
#(REMOVED)\item \code{n_MCsims} - Number of Monte-Carlo simulations performed, each of sample size \code{nrow(data)},
# for generating new exposures under \code{f_gstar1} or \code{f_gstar2} (if specified) or \code{f_g0} (if specified).
# These newly generated exposures are utilized when fitting the conditional densities P(\code{sA}|\code{sW})
# and when evaluating the substitution estimators \strong{GCOMP} and \strong{TMLE}
# under stochastic interventions \code{f_gstar1} or \code{f_gstar2}.
#'
#' \item \code{runTMLE} - Choose which of the two TMLEs to run, "tmle.intercept" or "tmle.covariate". The default is "tmle.intercept".
#'
#' \item \code{YnodeDET} - Optional column name for indicators of deterministic values of outcome \code{Ynode},
#' coded as (\code{TRUE}/\code{FALSE}) or
#' (\code{1}/\code{0}); observations with \code{YnodeDET}=\code{TRUE}/\code{1} are assumed to have constant value for their \code{Ynode}.
#'
#' \item \code{sep} - A character separating friend indices for the same observation in \code{NETIDnode}.
#'
#' \item \code{f_g0} - A function for generating true treatment mechanism under observed \code{Anodes}, if known (for example in a
#' randomized trial). This is used for estimating P(\code{sA}|\code{sW}) under \code{g0} by sampling large vector of \code{Anodes}
#' (of length \code{nrow(data)}) from \code{f_g0} function;
# (of length \code{nrow(data)*n_MCsims})
#'
#' \item \code{h_g0_SummariesModel} - Previously fitted model for P(\code{sA}|\code{sW}) under observed exposure mechanism \code{g0},
#' returned by the previous runs of the \code{tmlenet} function.
#' This has to be an object of \code{SummariesModel} \pkg{R6} class. When this argument is specified, all predictions
#' P(\code{sA}=\code{sa}|\code{sW}=\code{sw}) under \code{g0} will be based on the model fits provided by this argument.
#'
#' \item \code{h_gstar_SummariesModel} - Previously fitted model for P(\code{sA}|\code{sW}) under (stochastic) intervention
#' specified by \code{f_gstar1} or \code{f_gstar2}. Also an object of \code{SummariesModel} \pkg{R6} class.
#' When this argument is specified, the predictions P(\code{sA}=\code{sa}|\code{sW}=\code{sw})
#' under \code{f_gstar1} or \code{f_gstar2} will be based on the model fits provided by this argument.
#' }
#'
#' @section Specifying the counterfactual intervention function (\code{f_gstar1} and \code{optPars$f_gstar2}):
#'
#' The functions \code{f_gstar1} and \code{f_gstar2} can only depend on variables specified by the combined matrix
#' of summary measures (\code{sW},\code{sA}), which is passed using the argument \code{data}. The functions should
#' return a vector of length \code{nrow(data)} of counterfactual treatments for observations in the input data.
#'
#' @section Specifying the Network of Friends:
#'
#' The network of friends (connections) for observations in the input \code{data} can be specified in two
#' alternative ways, using either \code{NETIDnode} or \code{NETIDmat} input arguments.
#'
#' \code{NETIDnode} - The first (slower) method uses a vector of strings in \code{data[, NETIDnode]}, where each
#' string \code{i} must contain the space separated IDs or row numbers of all units in \code{data} thought to be
#' connected to observation i (friends of unit i);
#'
#' \code{NETIDmat} - An alternative (and faster) method is to pass a matrix with \code{Kmax} columns and nrow(data)
#' rows, where each row \code{NETIDmat[i,]} is a vector of observation \code{i}'s friends' IDs or \code{i}'s friends'
#' row numbers in \code{data} if \code{IDnode=NULL}. If observation \code{i} has fewer than \code{Kmax} friends, the
#' remainder of \code{NETIDmat[i,]} must be filled with \code{NA}s. Note that the ordering of friend indices is
#' irrelevant.
#'
#' @section IPTW estimator:
#' **********************************************************************
#'
#' \itemize{
#' \item As described in the following section, the first step is to construct an estimator \eqn{P_{g_N}(sA | sW)}
#' for the common (in \code{i}) conditional density \eqn{P_{g_0}(sA | sW)} for common (in \code{i}) unit-level summary
#' measures (\code{sA},\code{sW}).
#'
#' \item The same fitting algorithm is applied to construct an estimator \eqn{P_{g^*_N}(sA^* | sW^*)} of the common (in \code{i})
#' conditional density \eqn{P_{g^*}(sA^* | sW^*)} for common (in \code{i}) unit-level summary measures (\code{sA^*},\code{sW^*})
#' implied by the user-supplied stochastic intervention \code{f_gstar1} or \code{f_gstar2} and the observed distribution of \code{W}.
#'
#' \item These two density estimators form the basis of the IPTW estimator,
#' which is evaluated at the observed N data points \eqn{O_i=(sW_i, sA_i, Y_i), i=1,...,N} and is given by
#' \deqn{\psi^{IPTW}_n = \sum_{i=1,...,N}{Y_i \frac{P_{g^*_N}(sA^*=sA_i | sW=sW_i)}{P_{g_N}(sA=sA_i | sW=sW_i)}}.}
#' }
#'
#' @section GCOMP estimator:
#' **********************************************************************
#'
#' @section TMLE estimator:
#' **********************************************************************
#'
#' @section Modeling \code{P(sA|sW)} for summary measures \code{(sA,sW)}:
#' **********************************************************************
#'
#' Non-parametric
#' estimation of the common \strong{unit-level} multivariate joint conditional probability model \code{P_g0(sA|sW)},
#' for unit-level summary measures \code{(sA,sW)} generated from the observed exposures and baseline covariates
#' \eqn{(A,W)=(A_i,W_i : i=1,...,N)} (their joint density given by \eqn{g_0(A|W)Q(W)}), is performed by first
#' constructing the dataset of N summary measures, \eqn{(sA_i,sW_i : i=1,...,N)}, and then fitting the usual i.i.d. MLE
#' for the common density \code{P_g0(sA|sW)} based on the pooled N sample of these summary measures.
#'
#' Note that \code{sA} can be multivariate and any of its components \code{sA[j]} can be either binary, categorical
#' or continuous.
#' The joint probability model for \code{P(sA|sA)} = \code{P(sA[1],...,sA[k]|sA)} can be factorized as
#' \code{P(sA[1]|sA)} * \code{P(sA[2]|sA, sA[1])} * ... * \code{P(sA[k]|sA, sA[1],...,sA[k-1])},
#' where each of these conditional probability models is fit separately, depending on the type of the outcome variable
#' \code{sA[j]}.
#'
#' If \code{sA[j]} is binary, the conditional probability \code{P(sA[j]|sW,sA[1],...,sA[j-1])} is evaluated via logistic
#' regression model.
#' When \code{sA[j]} is continuous (or categorical), its range will be fist partitioned into \code{K} bins and the
#' corresponding \code{K}
#' bin indicators (\code{B_1,...,B_K}), where each bin indicator \code{B_j} is then used as an outcome in a
#' separate logistic regression model with predictors given by \code{sW, sA[1],...,sA[k-1]}.
#' Thus, the joint probability \code{P(sA|sW)} is defined by such a tree of binary logistic regressions.
#'
#' For simplicity, we now suppose \code{sA} is continuous and univariate and we describe here an algorithm for fitting
#' \eqn{P_{g_0}(sA | sW)} (the algorithm
#' for fitting \eqn{P_{g^*}(sA^* | sW^*)} is equivalent, except that exposure \code{A} is replaced with exposure \code{A^*}
#' generated under \code{f_gstar1} or \code{f_gstar2} and
#' the predictors \code{sW} from the regression formula \code{hform.g0} are replaced with predictors \code{sW^*}
#' specified by the regression formula \code{hform.gstar}).
#'
#' \enumerate{
#' \item Generate a dataset of N observed continuous summary measures (\code{sA_i}:i=1,...,N) from observed
#' ((\code{A_i},\code{W_i}):i=1,...,N).
#'
#' \item Divide the range of \code{sA} values into intervals S=(i_1,...,i_M,i_{M+1}) so that any observed data point
#' \code{sa_i} belongs to one interval in S, namely,
#' for each possible value sa of \code{sA} there is k\\in{1,...,M}, such that, i_k < \code{sa} <= i_{k+1}.
#' Let the mapping B(sa)\\in{1,...,M} denote a unique interval in S for sa, such that, i_{B(sa)} < sa <= i_{B(sa)+1}.
#' Let bw_{B(sa)}:=i_{B(sa)+1}-i_{B(sa)} be the length of the interval (bandwidth) (i_{B(sa)},i_{B(sa)+1}).
#' Also define the binary indicators b_1,...,b_M, where b_j:=I(B(sa)=j), for all j <= B(sa) and b_j:=NA for all j>B(sa).
#' That is we set b_j to missing ones the indicator I(B(sa)=j) jumps from 0 to 1.
#' Now let \code{sA} denote the random variable for the observed summary measure for one unit
#' and denote by (B_1,...,B_M) the corresponding random indicators for \code{sA} defined as B_j := I(B(\code{sA}) = j)
#' for all j <= B(\code{sA}) and B_j:=NA for all j>B(\code{sA}).
#'
#' \item For each j=1,...,M, fit the logistic regression model for the conditional probability P(B_j = 1 | B_{j-1}=0, sW), i.e.,
#' at each j this is defined as the conditional probability of B_j jumping from 0 to 1 at bin j, given that B_{j-1}=0 and
#' each of these logistic regression models is fit only among the observations that are still at risk of having B_j=1 with B_{j-1}=0.
#'
#' \item Normalize the above conditional probability of B_j jumping from 0 to 1 by its corresponding interval length (bandwidth) bw_j to
#' obtain the discrete conditional hazards h_j(sW):=P(B_j = 1 | (B_{j-1}=0, sW) / bw_j, for each j.
#' For the summary measure \code{sA}, the above conditional hazard h_j(sW) is equal to P(\code{sA} \\in (i_j,i_{j+1}) | \code{sA}>=i_j, sW),
#' i.e., this is the probability that \code{sA} falls in the interval (i_j,i_{j+1}), conditional on sW and conditional on the fact that
#' \code{sA} does not belong to any intervals before j.
#'
#' \item Finally, for any given data-point \code{(sa,sw)}, evaluate the discretized conditional density for P(\code{sA}=sa|sW=sw) by first
#' evaluating the interval number k=B(sa)\\in{1,...,M} for \code{sa} and then computing \\prod{j=1,...,k-1}{1-h_j(sW))*h_k(sW)}
#' which is equivalent to the joint conditional probability that \code{sa} belongs to the interval (i_k,i_{k+1}) and does not belong
#' to any of the intervals 1 to k-1, conditional on sW.
#' }
#'
#' The evaluation above utilizes a discretization of the fact that any continuous density f of random variable X can be written as f_X(x)=S_X(x)*h_X(x),
#' for a continuous density f of X where S_X(x):=P(X>x) is the survival function for X, h_X=P(X>x|X>=x) is the hazard function for X; as well as the fact that
#' the discretized survival function S_X(x) can be written as a of the hazards for s<x: S_X(x)=\\prod{s<x}h_X(x).
#'
#' @section Three methods for defining bin (interval) cuttoffs for a continuous one-dimenstional summary measure \code{sA[j]}:
#' **********************************************************************
#'
#' There are 3 alternative methods to defining the bin cutoffs S=(i_1,...,i_M,i_{M+1}) for a continuous summary measure
#' \code{sA}. The choice of which method is used along with other discretization parameters (e.g., total number of
#' bins) is controlled via the tmlenet_options() function. See \code{?tmlenet_options} argument \code{bin.method} for
#' additional details.
#'
#' Approach 1 (\code{equal.len}): equal length, default.
#'
#' *********************
#'
#' The bins are defined by splitting the range of observed \code{sA} (sa_1,...,sa_n) into equal length intervals.
#' This is the dafault discretization method, set by passing an argument \code{bin.method="equal.len"} to
#' \code{tmlenet_options} function prior to calling \code{tmlenet()}. The intervals will be defined by splitting the
#' range of (sa_1,...,sa_N) into \code{nbins} number of equal length intervals, where \code{nbins} is another argument
#' of \code{tmlenet_options()} function. When \code{nbins=NA} (the default setting) the actual value of \code{nbins}
#' is computed at run time by taking the integer value (floor) of \code{n/maxNperBin},
#' for \code{n} - the total observed sample size and \code{maxNperBin=1000} - another argument of
#' \code{tmlenet_options()} with the default value 1,000.
#'
#' Approach 2 (\code{equal.mass}): data-adaptive equal mass intervals.
#'
#' *********************
#'
#' The intervals are defined by splitting the range of \code{sA} into non-equal length data-adaptive intervals that
#' ensures that each interval contains around
#' \code{maxNperBin} observations from (sa_j:j=1,...,N).
#' This interval definition approach can be selected by passing an argument \code{bin.method="equal.mass"} to
#' \code{tmlenet_options()} prior to calling \code{tmlenet()}.
#' The method ensures that an approximately equal number of observations will belong to each interval, where that number
#' of observations for each interval
#' is controlled by setting \code{maxNperBin}. The default setting is \code{maxNperBin=1000} observations per interval.
#'
#' Approach 3 (\code{dhist}): combination of 1 & 2.
#'
#' *********************
#'
#' The data-adaptive approach dhist is a mix of Approaches 1 & 2. See Denby and Mallows "Variations on the Histogram"
#' (2009)). This interval definition method is selected by passing an argument \code{bin.method="dhist"} to
#' \code{tmlenet_options()} prior to calling \code{tmlenet()}.
#'
#' @return A named list with 3 items containing the estimation results for:
#' \itemize{
#' \item \code{EY_gstar1} - estimates of the mean counterfactual outcome under (stochastic) intervention function \code{f_gstar1} \eqn{(E_{g^*_1}[Y])}.
#' \item \code{EY_gstar2} - estimates of the mean counterfactual outcome under (stochastic) intervention function \code{f_gstar2} \eqn{(E_{g^*_2}[Y])},
#' \code{NULL} if \code{f_gstar2} not specified.
#' \item \code{ATE} - additive treatment effect (\eqn{E_{g^*_1}[Y]} - \eqn{E_{g^*_2}[Y]}) under interventions \code{f_gstar1}
#' vs. in \code{f_gstar2}, \code{NULL} if \code{f_gstar2} not specified.
#' }
#'
#' Each list item above is itself a list containing the items:
#' \itemize{
#' \item \code{estimates} - various estimates of the target parameter (network population counterfactual mean under
#' (stochastic) intervention).
#' \item \code{vars} - the asymptotic variance estimates, for \strong{IPTW} and \strong{TMLE}.
#' \item \code{CIs} - CI estimates at \code{alpha} level, for \strong{IPTW} and \strong{TMLE}.
#' \item \code{other.vars} - Placeholder for future versions.
#' \item \code{h_g0_SummariesModel} - The model fits for P(\code{sA}|\code{sW}) under observed exposure mechanism
#' \code{g0}. This is an object of \code{SummariesModel} \pkg{R6} class.
#' \item \code{h_gstar_SummariesModel} - The model fits for P(\code{sA}|\code{sW}) under intervention \code{f_gstar1}
#' or \code{f_gstar2}. This is an object of \code{SummariesModel} \pkg{R6} class.
#' }
#'
#' Currently implemented estimators are:
#' \itemize{
#' \item \code{tmle} - Either weighted regression intercept-based TMLE (\code{tmle.intercept} - the default)
#' with weights defined by the IPTW weights \code{h_gstar/h_gN} or
#' covariate-based unweighted TMLE (\code{tmle.covariate}) that uses the IPTW weights as a covariate
#' \code{h_gstar/h_gN}.
#' \item \code{h_iptw} - Efficient IPTW based on weights h_gstar/h_gN.
#' \item \code{gcomp} - Parametric G-computation formula substitution estimator.
#' }
#' @seealso \code{\link{tmlenet-package}} for the general overview of the package,
#' \code{\link{def_sW}} for defining the summary measures, \code{\link{eval.summaries}} for
#' evaluation and validation of the summary measures,
#' and \code{\link{df_netKmax2}}/\code{\link{df_netKmax6}}/\code{\link{NetInd_mat_Kmax6}}
#' for examples of network datasets.
#' @example tests/examples/1_tmlenet_example.R
#' @export
tmlenet <- function(DatNet.ObsP0, data, Kmax, sW, sA,
intervene1.sA, f_gstar1 = NULL, intervene2.sA = NULL, f_gstar2 = NULL,
# Anodes,
Ynode,
Qform = NULL, hform.g0 = NULL, hform.gstar = NULL,
# estimators = c("tmle", "iptw", "gcomp"),
# AnodeDET = NULL,
# YnodeDET = NULL,
NETIDmat = NULL,
IDnode = NULL, NETIDnode = NULL,
verbose = getOption("tmlenet.verbose"),
optPars = list(
bootstrap.var = FALSE,
n.bootstrap = 100,
boot.nodes = NULL,
boot.form = NULL,
alpha = 0.05,
lbound = 0.005,
family = "binomial", # NOT YET IMPLEMENTED
# n_MCsims = 1,
# n_MCsims = ifelse(!missing(data),ceiling(sqrt(nrow(data))),10),
runTMLE = c("tmle.intercept", "tmle.covariate"),
YnodeDET = NULL,
# f_gstar2 = NULL,
sep = ' ',
f_g0 = NULL,
h_g0_SummariesModel = NULL,
h_gstar_SummariesModel = NULL)
) {
oldverboseopt <- getOption("tmlenet.verbose")
options(tmlenet.verbose = verbose)
gvars$verbose <- verbose
n_MCsims <- 1
#----------------------------------------------------------------------------------
# ADDITIONAL ARGUMENTS (removed from input args of tmlenet())
#----------------------------------------------------------------------------------
AnodeDET <- NULL # removed from the input, since this is not implemented
iid_data_flag <- FALSE # set to true if no network is provided (usual iid TMLE)
Q.SL.library <- c("SL.glm", "SL.step", "SL.glm.interaction") # NOT USED
g.SL.library <- c("SL.glm", "SL.step", "SL.glm.interaction") # NOT USED
max_npwt <- 50 # NOT USED YET
h_logit_sep_k <- FALSE # NOT USED YET
bootstrap.var <- ifelse(is.null(optPars$bootstrap.var), FALSE, optPars$bootstrap.var)
n.bootstrap <- ifelse(is.null(optPars$n.bootstrap), 100, optPars$n.bootstrap)
boot.nodes <- if(is.null(optPars$boot.nodes)) {NULL} else {optPars$boot.nodes}
boot.form <- if(is.null(optPars$boot.form)) {NULL} else {optPars$boot.form}
alpha <- ifelse(is.null(optPars$alpha), 0.05, optPars$alpha)
lbound <- ifelse(is.null(optPars$lbound), 0.005, optPars$lbound)
family <- ifelse(is.null(optPars$family), "binomial", optPars$family)
sep <- ifelse(is.null(optPars$sep), ' ', optPars$sep)
assert_that(is.character(sep) && length(sep)==1L)
n_MCsims <- ifelse(is.null(optPars$n_MCsims), 1, optPars$n_MCsims)
f_g0 <- if(is.null(optPars$f_g0)) {NULL} else {optPars$f_g0}
if (!is.null(f_g0)) assert_that(is.function(f_g0))
YnodeDET <- if(is.null(optPars$YnodeDET)) {NULL} else {optPars$YnodeDET}
# f_gstar2 <- if(is.null(optPars$f_gstar2)) {NULL} else {optPars$f_gstar2}
# if (!is.null(f_gstar2)) assert_that(is.function(f_gstar2) || is.vector(f_gstar2))
# if TRUE, only evaluate the intercept-based TMLE (TMLE_B), if FALSE, evaluate only the covariate-based TMLE (TMLE_A)
runTMLE <- optPars$runTMLE[1]
if (is.null(runTMLE) || (runTMLE[1] %in% "tmle.intercept")) {
onlyTMLE_B <- TRUE
} else if (runTMLE %in% "tmle.covariate") {
onlyTMLE_B <- FALSE
} else {
stop("optPars[['runTMLE']] argument must be either 'tmle.intercept' or 'tmle.covariate'")
}
if (verbose) {
message("Running tmlenet with the following settings from tmlenet_options(): ");
str(gvars$opts)
message("Running tmlenet with the following settings from optPars arg of tmlenet(): ");
str(optPars)
}
#----------------------------------------------------------------------------------
# DETERMINING INTERNAL / EXTERNAL ESTIMATOR NAMES THAT WILL BE EVALUATED
#----------------------------------------------------------------------------------
# onlyTMLE_B <- TRUE
assert_that(assertthat::is.flag(onlyTMLE_B))
estnames.internal <- c("TMLE_A", "TMLE_B", "h_IPTW", "MLE")
# estnames.internal <- c("TMLE_A", "TMLE_B", "TMLE_g_IPTW", "h_IPTW", "g_IPTW", "MLE")
names(estnames.internal) <- estnames.internal
estnames.out <- c("tmle", "h_iptw", "gcomp")
if (onlyTMLE_B) {
estnames.internal <- estnames.internal[-which(estnames.internal%in%"TMLE_A")]
} else {
estnames.internal <- estnames.internal[-which(estnames.internal%in%"TMLE_B")]
}
names(estnames.out) <- estnames.internal
estnames.internal <- as.list(estnames.internal)
#----------------------------------------------------------------------------------
# MONTE-CARLO SIMULATION PARAMETERS
#----------------------------------------------------------------------------------
nQ.MCsims <- as.integer(n_MCsims) # number of times to sample MC sim for Q (each of size n)
ng.MCsims <- as.integer(n_MCsims) # number of times to sample MC sim for h (each of size n)
assert_that(is.count(nQ.MCsims))
assert_that(is.count(ng.MCsims))
max.err_est <- 0.1 # maximum percent error for MCS estimators
#----------------------------------------------------------------------------------
# PARAMETERS FOR ESTIMATING h under g0 & gstar
#----------------------------------------------------------------------------------
f.g0 <- f_g0
f.g0_args <- NULL
h_user_fcn <- NULL
h_user <- !(is.null(h_user_fcn))
#----------------------------------------------------------------------------------
# Perform some input checks
# Create an object with data & network information that is passed on to estimation algorithm(s)
# Build observed sW & sA: (obsdat.sW - a dataset (matrix) of n observed summary measures sW)
#----------------------------------------------------------------------------------
if (missing(Ynode)) Ynode <- NULL
if (is.null(Ynode) && is.null(Qform)) stop("Either Qform or Ynode must be specified")
if (!is.null(Qform) && !is.null(Ynode)) {
message("Since both Ynode and Qform are specified, the left-hand side of Qform will be ignored, with outcome being set to Ynode: " %+%
Ynode)
}
if (!is.null(Qform) && is.null(Ynode)) {
Ynode <- LhsVars(Qform)[1]
message("Setting the Ynode to: " %+% Ynode)
}
if (missing(DatNet.ObsP0)) {
# time_evalsumm <- system.time(
eval.summ.res <- eval.summaries(data = data, Kmax = Kmax, sW = sW, sA = sA,
IDnode = IDnode, NETIDnode = NETIDnode,
sep = sep, NETIDmat = NETIDmat, verbose = FALSE)
# )
# print("time eval.summaries(...): "); print(time_evalsumm)
DatNet.ObsP0 <- eval.summ.res$DatNet.ObsP0
data <- DatNet.ObsP0$datnetW$Odata
sW <- data$sW
sA <- data$sA
Kmax <- DatNet.ObsP0$Kmax
# sW <- DatNet.ObsP0$datnetW$sVar.object
# sA <- DatNet.ObsP0$datnetA$sVar.object
}
intervene1.sA <- update.intervention.sA(new.sA = intervene1.sA, sA = sA)
data$intervene1.sA <- intervene1.sA
if (!is.null(intervene2.sA)) {
intervene2.sA <- update.intervention.sA(new.sA = intervene2.sA, sA = sA)
data$intervene2.sA <- intervene2.sA
# making sure exactly the same Anodes are used for both interventions:
if (any(intervene1.sA$Anodes != intervene2.sA$Anodes))
stop("intervention node names have to be identical for intervene1.sA and intervene2.sA")
}
# new version of nodes:
node_l <- list(nFnode = DatNet.ObsP0$datnetW$nFnode,
Anodes = intervene1.sA$Anodes, # Note that Anodes might be different in intervene1.sA & intervene2.sA
AnodeDET = AnodeDET,
Ynode = Ynode, YnodeDET = YnodeDET)
data$nodes <- node_l
data$backupAnodes(Anodes = node_l$Anodes, sA = sA)
nobs <- DatNet.ObsP0$nobs
DatNet.ObsP0$nodes <- node_l
#----------------------------------------------------------------------------------
# Defining (and checking) Deterministic Y and A node flags:
#----------------------------------------------------------------------------------
if (is.null(AnodeDET)) {
determ.g <- rep_len(FALSE, nobs)
} else {
CheckVarNameExists(data$OdataDT, AnodeDET)
# determ.g <- (data[, AnodeDET] == 1)
setkeyv(data$OdataDT, cols=AnodeDET)
data$OdataDT[list(1),AnodeDET, with=FALSE]
}
if (is.null(YnodeDET)) {
determ.Q <- rep_len(FALSE, nobs)
} else {
CheckVarNameExists(data$OdataDT, YnodeDET)
# determ.Q <- (data[, YnodeDET] == 1)
setkeyv(data$OdataDT, cols=YnodeDET)
data$OdataDT[list(1),YnodeDET, with=FALSE]
}
for (Anode in node_l$Anodes) CheckVarNameExists(data$OdataDT, Anode)
# CheckVarNameExists(data, node_l$Anodes)
for (Ynode in node_l$Ynode) CheckVarNameExists(data$OdataDT, Ynode)
# CheckVarNameExists(data, node_l$Ynode)
#----------------------------------------------------------------------------------
# NOTE: YnodeVals = obsYvals, det.Y = determ.Q need to be added to DatNet.ObsP0 after returned from eval.summaries()
#----------------------------------------------------------------------------------
obsYvals <- data$OdataDT[[node_l$Ynode]]
DatNet.ObsP0$addYnode(YnodeVals = obsYvals, det.Y = determ.Q)
#----------------------------------------------------------------------------------
# (OPTIONAL) ADDING DETERMINISTIC/DEGENERATE Anode FLAG COLUMNS TO sA:
# cancelled adding DET nodes to sVar since all sVar are automatically get added to A ~ predictors + DETnodes...
#----------------------------------------------------------------------------------
# obsdat.sW <- O.datnetW$add_deterministic(Odata = data, userDETcol = "determ.g")$dat.sVar
# Testing NA for visible det.Y and true observed Y as protected:
# determ.Q <- c(FALSE, FALSE, FALSE, rep(TRUE, length(determ.Q)-3))
# length(determ.Q) == length(obsYvals)
# DatNet.ObsP0 <- DatNet.sWsA$new(datnetW = datnetW, datnetA = datnetA, YnodeVals = obsYvals, det.Y = determ.Q)$make.dat.sWsA()
# print("DatNet.ObsP0: "); print(DatNet.ObsP0)
# print(head(cbind(DatNet.ObsP0$noNA.Ynodevals, DatNet.ObsP0$YnodeVals, data[,node_l$Ynode]), 100))
#----------------------------------------------------------------------------------
# Optional regressions specs:
#----------------------------------------------------------------------------------
# Q.sVars <- process_regform(as.formula(Qform), sW.map = c(sW$sVar.names.map, sA$sVar.names.map), sA.map = node_l$Ynode)
Q.sVars <- process_regforms(Qform, sW.map = c(sW$sVar.names.map, sA$sVar.names.map), sA.map = node_l$Ynode)
# h.g0.sVars <- process_regform(as.formula(hform.g0[[1]]), sW.map = sW$sVar.names.map, sA.map = sA$sVar.names.map)
h.g0.sVars <- process_regforms(hform.g0, sW.map = sW$sVar.names.map, sA.map = sA$sVar.names.map)
if (!is.null(hform.gstar)) {
# h.gstar.sVars <- process_regform(as.formula(hform.gstar), sW.map = sW$sVar.names.map, sA.map = sA$sVar.names.map)
h.gstar.sVars <- process_regforms(hform.gstar, sW.map = sW$sVar.names.map, sA.map = sA$sVar.names.map)
} else {
h.gstar.sVars <- h.g0.sVars
}
if (verbose) {
print("Input regression(s) Qform (E(Y|sA,sW)): "); res <- lapply(Qform, function(Qform) print(as.formula(Qform),showEnv=FALSE))
print("Derived regression(s) from Qform:"); str(Q.sVars)
print("Input regression(s) hform.g0 (P(sA|sW) under g0): "); res <- lapply(hform.g0, function(hform.g0) print(as.formula(hform.g0),showEnv=FALSE))
print("Derived regression(s) from hform.g0: "); str(h.g0.sVars)
print("Input regression(s) hform.gstar (P(sA|sW) under g.star): "); res <- lapply(hform.gstar, function(hform.gstar) print(as.formula(hform.gstar),showEnv=FALSE))
print("Derived regression(s) from hform.gstar: "); str(h.gstar.sVars)
}
#-----------------------------------------------------------
# Defining and fitting regression for Y ~ sW + sA:
#-----------------------------------------------------------
check.Qpreds.exist <- unlist(lapply(Q.sVars$predvars, function(PredName) PredName %in% DatNet.ObsP0$names.sVar))
if (!all(check.Qpreds.exist)) stop("the following predictors in Qform regression could not be located among the summary measures: " %+%
paste0(Q.sVars$predvars[!check.Qpreds.exist], collapse = ","))
if (verbose) {
for (idx in seq_along(node_l$Ynode)) {
message("================================================================")
message("fitting E(Y|sA,sW):= ", "P(" %+% node_l$Ynode[idx] %+% "=1 | " %+% paste(Q.sVars$predvars[[idx]], collapse = ",") %+% ")")
message("================================================================")
}
}
Qreg <- RegressionClass$new(outvar = node_l$Ynode,
predvars = Q.sVars$predvars[[1]],
subset = !determ.Q, ReplMisVal0 = TRUE)
m.Q.init <- BinOutModel$new(glm = FALSE, reg = Qreg)$fit(data = DatNet.ObsP0)
#----------------------------------------------------------------------------------
# Create an object with model estimates, data & network information that is passed on to estimation procedure
#----------------------------------------------------------------------------------
# 1) define parameters for MC estimation of the substitution estimators
# 2) define parameters for estimation of the efficient weights h(A^s|W^s)
est_obj <- list(
estnames = estnames.internal,
lbound = lbound[1],
max.err_eps = max.err_est, # error tolerance for the mean/var M.C. estimate
m.Q.init = m.Q.init,
f.g0 = f.g0,
sW = sW,
sA = sA,
Q.sVars = Q.sVars,
h.g0.sVars = h.g0.sVars,
h.gstar.sVars = h.gstar.sVars,
nQ.MCsims = nQ.MCsims,
ng.MCsims = ng.MCsims,
h_g0_SummariesModel = optPars$h_g0_SummariesModel,
h_gstar_SummariesModel = optPars$h_gstar_SummariesModel,
# Cap the prop weights scaled at max_npwt (for =50 -> translates to max 10% of total weight for n=500 and 5% for n=1000):
max_npwt = max_npwt # NOT IMPLEMENTED
# h_logit_sep_k = h_logit_sep_k, # NOT IMPLEMENTED
# h_user = h_user, # NOT IMPLEMENTED
# h_user_fcn = h_user_fcn, # NOT IMPLEMENTED
)
est_obj_g1 <- append(est_obj,
list(
f.gstar = f_gstar1,
new.sA = intervene1.sA
)
)
if (!is.null(intervene2.sA) || !is.null(f_gstar2)) {
est_obj_g2 <- append(est_obj,
list(
f.gstar = f_gstar2,
new.sA = intervene2.sA
)
)
}
#----------------------------------------------------------------------------------
# Running MC evaluation for substitution TMLE ests
#----------------------------------------------------------------------------------
tmle_g1_out <- get_all_ests(estnames = estnames.internal, DatNet.ObsP0 = DatNet.ObsP0, est_params_list = est_obj_g1)
if (!is.null(intervene2.sA) || !is.null(f_gstar2)) {
tmle_g2_out <- get_all_ests(estnames = estnames.internal, DatNet.ObsP0 = DatNet.ObsP0, est_params_list = est_obj_g2)
} else {
tmle_g2_out <- NULL
}
iidEIC.eval <- TRUE
if (iidEIC.eval) {
# MC.tmle.eval.t <- system.time(
MC.tmle.eval <- MCeval_fWi(n.MC = n.bootstrap, DatNet.ObsP0 = DatNet.ObsP0, tmle_g1_out = tmle_g1_out, tmle_g2_out = tmle_g2_out)
# )
# tmle_g1_out$ests_mat["TMLE",] <- mean(MC.tmle.eval$EY_gstar1)
# if (!is.null(tmle_g2_out)) tmle_g2_out$ests_mat["TMLE",] <- mean(MC.tmle.eval$EY_gstar2)
} else {
MC.tmle.eval <- list(EY_gstar1 = NA, EY_gstar2 = NA, ATE = NA)
}
# print("MC.tmle.eval.t"); print(MC.tmle.eval.t)
if (bootstrap.var) {
# ------------------------------------------------------------------------------------------
# IID BOOSTRAP FOR THE TMLE (DOES'T WORK FOR DEP DATA):
# ------------------------------------------------------------------------------------------
# var_tmleB_boot <- iid_bootstrap_tmle(n.boot, estnames, DatNet.ObsP0, tmle_g_out, QY_mat, wts_mat)
# ------------------------------------------------------------------------------------------
# PARAMETRIC BOOSTRAP TMLE variance estimate:
# ------------------------------------------------------------------------------------------
var_tmleB_boot <- par_bootstrap_tmle(n.boot = n.bootstrap, boot.nodes = boot.nodes, boot.form = boot.form,
estnames = estnames.internal, DatNet.ObsP0 = DatNet.ObsP0,
tmle_g1_out = tmle_g1_out, tmle_g2_out = tmle_g2_out)
} else {
var_tmleB_boot <- list(EY_gstar1 = NA, EY_gstar2 = NA, ATE = NA)
}
#----------------------------------------------------------------------------------
# Create output list (estimates, as. variances, CIs)
#----------------------------------------------------------------------------------
EY_gstar1 <- make_EYg_obj(estnames = estnames.internal, estoutnames = estnames.out, alpha = alpha,
DatNet.ObsP0 = DatNet.ObsP0, tmle_g_out = tmle_g1_out,
MC.tmle.eval = MC.tmle.eval$EY_gstar1,
var_tmleB_boot = var_tmleB_boot$EY_gstar1)
EY_gstar2 <- NULL; ATE <- NULL
if (!is.null(intervene2.sA) || !is.null(f_gstar2)) {
EY_gstar2 <- make_EYg_obj(estnames = estnames.internal, estoutnames = estnames.out, alpha = alpha,
DatNet.ObsP0 = DatNet.ObsP0, tmle_g_out=tmle_g2_out,
MC.tmle.eval = MC.tmle.eval$EY_gstar2,
var_tmleB_boot = var_tmleB_boot$EY_gstar2)
ATE <- make_EYg_obj(estnames = estnames.internal, estoutnames = estnames.out, alpha = alpha,
DatNet.ObsP0 = DatNet.ObsP0, tmle_g_out = tmle_g1_out, tmle_g2_out = tmle_g2_out,
MC.tmle.eval = MC.tmle.eval$ATE,
var_tmleB_boot = var_tmleB_boot$ATE)
}
tmlenet.res <- list(EY_gstar1 = EY_gstar1, EY_gstar2 = EY_gstar2, ATE = ATE)
class(tmlenet.res) <- c(class(tmlenet.res), "tmlenet")
options(tmlenet.verbose = oldverboseopt)
gvars$verbose <- oldverboseopt
return(tmlenet.res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.