R/tmlenet.R

#################################################################################
########################### NETWORK TMLE R PACKAGE ##############################
# authors: Oleg Sofrygin <sofrygin@berkeley.edu> and Mark van der Laan <laan@berkeley.edu>
#################################################################################

# @title tmlenet-package
# @docType package
# @author Oleg Sofrygin, Mark J. van der Laan

#' @useDynLib tmlenet
#' @import R6
#' @importFrom Rcpp sourceCpp
#' @importFrom grDevices nclass.FD nclass.Sturges nclass.scott
#' @importFrom graphics axis barplot hist par text
#' @importFrom methods is
#' @importFrom stats approx quasibinomial binomial coef glm.control glm.fit plogis predict qlogis qnorm quantile rnorm terms var predict glm.control
#' @importFrom utils data head str

#-----------------------------------------------------------------------------
# Class Membership Tests
#-----------------------------------------------------------------------------
is.DatNet.sWsA <- function(DatNet.sWsA) "DatNet.sWsA"%in%class(DatNet.sWsA)
is.DatNet <- function(DatNet) "DatNet"%in%class(DatNet)

#-----------------------------------------------------------------------------
# ALL NETWORK VARIABLE NAMES MUST BE CONSTRUCTED BY CALLING THIS FUNCTION.
# In the future might return the network variable (column vector) itself.
# Helper function that for given variable name (varnm) and friend index (fidx)
# returns the characeter name of that network variable varnm[fidx],
# for fidx = 0 (var itself), ..., kmax. fidx can be a vector, in which case a
# character vector of network names is returned. If varnm is also a vector, a
# character vector for all possible combinations of (varnm x fidx) is returned.
# OUTPUT format: Varnm_net.j:
#-----------------------------------------------------------------------------
netvar <- function(varnm, fidx) {
  cstr <- function(varnm, fidx) {
    slen <- length(fidx)
    rstr <- vector(mode = "character", length = slen)
    netidxstr <- ! (fidx %in% 0L)
    rstr[netidxstr] <- stringr::str_c('_netF', fidx[netidxstr])  # vs. 1
    # rstr[netidxstr] <- str_c('.net.', fidx[netidxstr])  # vs. 2
    return(stringr::str_c(varnm, rstr))
  }
  if (length(varnm) > 1) {
    return(unlist(lapply(varnm, cstr, fidx)))
  } else {
    return(cstr(varnm, fidx))
  }
}
# Examples:
# netvar("A", (0:5))
# netvar("A", c(0:5, 0, 3))
# netvar(c("A", "W"), c(0:5, 0, 3))
# netvar(c("A", "W"), c(0:5, 0, 3))

#-----------------------------------------------------------------------------
# General utilities / Global Vars
#-----------------------------------------------------------------------------
`%+%` <- function(a, b) paste0(a, b)

checkpkgs <- function(pkgs) {
  for (pkg in pkgs) {
    if (!requireNamespace(pkg, quietly = TRUE)) {
      stop(pkg %+% " package needed for this function to work. Please install it.", call. = FALSE)
    }
  }
}

# Bound g(A|W) probability within supplied bounds
bound <- function(x, bounds){
  x[x<min(bounds)] <- min(bounds)
  x[x>max(bounds)] <- max(bounds)
  return(x)
}

#if warning is in ignoreWarningList, ignore it; otherwise post it as usual
SuppressGivenWarnings <- function(expr, warningsToIgnore) {
  h <- function (w) {
    if (w$message %in% warningsToIgnore) invokeRestart( "muffleWarning" )
  }
  withCallingHandlers(expr, warning = h )
}

GetWarningsToSuppress <- function(update.step=FALSE) {
  warnings.to.suppress <- c("glm.fit: fitted probabilities numerically 0 or 1 occurred",
                            "prediction from a rank-deficient fit may be misleading")
  if (update.step) {
    warnings.to.suppress <- c(warnings.to.suppress, "glm.fit: algorithm did not converge")
  }
  return(warnings.to.suppress)
}

# returns NULL if no factors exist, otherwise return the name of the factor variable(s)
CheckExistFactors <- function(data) {
  testvec <- unlist(lapply(data, is.factor))
  if (any(testvec)) {
    return(names(data)[which(testvec)])
  } else {
    return(NULL)
  }
}

# throw exception if 1) varname doesn't exist; 2) more than one varname is matched
CheckVarNameExists <- function(data, varname) {
  idvar <- names(data) %in% varname
  if (sum(idvar) < 1) stop("variable name " %+% varname %+% " not found in data input")
  if (sum(idvar) > 1) stop("more than one column in the input data has been matched to name "
                            %+% varname %+% ". Consider renaming some of the columns: " %+%
                            paste0(names(data)[idvar], collapse=","))
  return(invisible(NULL))
}

# THIS NEEDS MORE EVALUATION, DOESN'T SEEM TO WORK AS INTENDED DURING MC EVALUTION
# (GET HUGE ABSOLUTE VALUE WEIGHTS, THAT HAVE tiny % CONTRIBUTION)
scale_bound <- function(weights, max_npwt, n) {
  # scale weights
  weight_prop <- (weights/sum(weights))   # % contribution to the total weight, scaled by n
  weight_prop_byn <- (weights/sum(weights))*n   # % contribution to the total weight, scaled by n
  # print("weight summary before trunc"); print(summary(weights))
  # print("weight_prop before trunc, %"); print(summary(weight_prop))
  # print("weight_prop before trunc, scaled by n"); print(summary(weight_prop_byn))

  while (any(weight_prop_byn > (max_npwt+5))) {
    weights[which(weight_prop_byn >= max_npwt)] <- (max_npwt / n) * sum(weights)
    weight_prop_byn <- (weights/sum(weights)) * n   # % contribution to the total weight, sclaed by n
    # print("weight summary after trunc"); print(summary(weights))
    # print("weight_prop after trunc, %"); print(summary(weights/sum(weights)))
    # print("weight_prop after trunc, scaled by n"); print(summary(weight_prop_byn))
  }
  return(weights)
}
# Return the left hand side variable of formula f as a character
LhsVars <- function(f) {
  f <- as.formula(f)
  return(as.character(f[[2]]))
}
# Return the right hand side variables of formula f as a character vector
RhsVars <- function(f) {
  f <- as.formula(f)
  return(all.vars(f[[3]]))
}

#************************************************
# TMLEs
#************************************************
tmle.update <- function(estnames, Y, off, h_wts, determ.Q, predictQ = TRUE) {
  QY.star <- NA
  ctrl <- glm.control(trace = FALSE, maxit = 1000)
  if ("TMLE_A" %in% estnames) {
    #************************************************
    # TMLE A: estimate the TMLE update via univariate ML (epsilon is coefficient for h^*/h) - ONLY FOR NON-DETERMINISTIC SUBSET
    #************************************************
    SuppressGivenWarnings(
      m.Q.star <- speedglm::speedglm.wfit(X = matrix(h_wts, ncol=1), y = Y, family = quasibinomial(), trace = FALSE, maxit = 1000, offset = off),
      GetWarningsToSuppress(TRUE))
    m.Q.star.coef <- m.Q.star$coef
    # m.Q.star.fit <- list(coef = m.Q.star$coef, linkfun = "logit_linkinv", fitfunname = "speedglm")
    # class(m.Q.star.fit) <- c(class(m.Q.star.fit), c("speedglmS3"))

    # SuppressGivenWarnings(m.Q.star <- glm(Y ~ -1 + h_wts + offset(off), data = data.frame(Y = Y, off = off, h_wts = h_wts),
    #                                           subset = !determ.Q, family = "quasibinomial", control = ctrl), GetWarningsToSuppress(TRUE))
    # m.Q.star.coef <- coef(m.Q.star)

    if (predictQ) {
      QY.star <- Y
      if (!is.na(m.Q.star.coef)) QY.star <- plogis(off + m.Q.star.coef * h_wts)
    }

  } else if ("TMLE_B" %in% estnames) {
    #************************************************
    # TMLE B: estimate the TMLE update via weighted univariate ML (espsilon is intercept)
    #************************************************
    SuppressGivenWarnings(
      m.Q.star <- speedglm::speedglm.wfit(X = matrix(1L, ncol=1, nrow=length(Y)), y = Y, weights = h_wts, offset = off,
                                        family = quasibinomial(), trace = FALSE, maxit = 1000),
      GetWarningsToSuppress(TRUE))
    # family = binomial()
    m.Q.star.coef <- m.Q.star$coef
    # m.Q.star.fit <- list(coef = m.Q.star$coef, linkfun = "logit_linkinv", fitfunname = "speedglm")
    # class(m.Q.star.fit) <- c(class(m.Q.star.fit), c("speedglmS3"))
    # SuppressGivenWarnings(m.Q.star <- glm(Y ~ offset(off), data = data.frame(Y = Y, off = off), weights = h_wts,
                                              # subset = !determ.Q, family = "quasibinomial", control = ctrl), GetWarningsToSuppress(TRUE))
    # m.Q.star.coef <- coef(m.Q.star)

    if (predictQ) {
      QY.star <- Y
      if (!is.na(m.Q.star.coef)) QY.star <- plogis(off + m.Q.star.coef)
    }
  }
  #************************************************
  # (DISABLED) g_IPTW estimator (based on full likelihood factorization, prod(g^*)/prod(g_N):
  #************************************************
  # 02/16/13: IPTW estimator (Y_i * prod_{j \\in Fi} [g*(A_j|c^A)/g0_N(A_j|c^A)])
  # g_wts <- iptw_est(k = est_params_list$Kmax, data = data, node_l = nodes, m.gN = est_params_list$m.g0N,
  #                      f.gstar = est_params_list$f.gstar, f.g_args = est_params_list$f.g_args, family = "binomial",
  #                      NetInd_k = est_params_list$NetInd_k, lbound = est_params_list$lbound, max_npwt = est_params_list$max_npwt,
  #                      f.g0 = est_params_list$f.g0, f.g0_args = est_params_list$f.g0_args)
  # Y_IPTW_g <- Y
  # Y_IPTW_g[!determ.Q] <- Y[!determ.Q] * g_wts[!determ.Q]
  #************************************************
  # (DISABLED) g_IPTW-based clever covariate TMLE (based on FULL likelihood factorization), covariate based fluctuation
  #************************************************
  # SuppressGivenWarnings(m.Q.star_giptw <- glm(Y ~ -1 + g_wts + offset(off),
  #                                           data = data.frame(Y = Y, off = off, g_wts = g_wts),
  #                                           subset = !determ.Q, family = "quasibinomial", control = ctrl),
  #                                           GetWarningsToSuppress(TRUE))
  return(list(m.Q.star.coef = m.Q.star.coef, QY.star = QY.star))
}

#---------------------------------------------------------------------------------
# Estimate h_bar under g_0 and g* given observed data and vector of c^Y's data is an DatNet.sWsA object
#---------------------------------------------------------------------------------
get_all_ests <- function(estnames, DatNet.ObsP0, est_params_list) {
  #---------------------------------------------------------------------------------
  # unified estimator naming used throughout the package:
  # c("TMLE", "h_IPTW", "MLE")
  #---------------------------------------------------------------------------------
  # DatNet.ObsP0$det.Y             # TRUE/FALSE for deterministic Y's
  # DatNet.ObsP0$noNA.Ynodevals    # actual observed Y's
  # m.Q.init$getoutvarnm           # reg outvar name (Ynode)
  # DatNet.ObsP0$YnodeVals         # visible Y's with NA for det.Y
  # m.Q.init$getoutvarval          # Yvals used in prediction (with det.Y obs set to NA)
  # m.Q.init$getsubset             # valid subset (!det.Y)
  # m.Q.init$reg                   # regression class (Qreg)

  # making sure the current dataset consists of the OBSERVED Anodes and sA (summaries)
  if (!DatNet.ObsP0$Odata$curr_data_A_g0) {
    if (is.null(DatNet.ObsP0$Odata$A_g0_DT))
      stop("Can't recover the initial observed A (exposures), as those were over-written and not backed-up")
    DatNet.ObsP0$Odata$swapAnodes()
    if (!DatNet.ObsP0$Odata$restored_sA_Vars) DatNet.ObsP0$datnetA$make.sVar(sVar.object = est_params_list$sA)
  }

  nodes <- DatNet.ObsP0$nodes
  Y <- DatNet.ObsP0$noNA.Ynodevals # actual observed Y`s
  determ.Q <- DatNet.ObsP0$det.Y
  m.Q.init <- est_params_list$m.Q.init

  QY.init <- DatNet.ObsP0$noNA.Ynodevals # getting all node vals, inc. deterministic
  QY.init[!DatNet.ObsP0$det.Y] <- m.Q.init$predict(newdata = DatNet.ObsP0)$getprobA1[!DatNet.ObsP0$det.Y] # getting predictions P(Y=1) for non-DET Y
  off <- qlogis(QY.init)  # offset log(x/[1-x])

  #************************************************
  # h^*/h_N clever covariate:
  #************************************************
  fit.hbars_t <- system.time(fit.hbars.res <- fit.hbars(DatNet.ObsP0 = DatNet.ObsP0, est_params_list = est_params_list)) # fit the clever covariate
  DatNet.gstar <- fit.hbars.res$DatNet.gstar
  m.h.fit <- fit.hbars.res$m.h.fit
  h_wts <- fit.hbars.res$h_gstar_h_gN

  #************************************************
  # IPTW_h estimator:
  #************************************************
  h_IPTW <- Y
  h_IPTW[!determ.Q] <- Y[!determ.Q] * h_wts[!determ.Q]
  h_IPTW <- mean(h_IPTW)

  # message("h_IPTW: "); message(h_IPTW)
  # if (h_IPTW > 0.5) browser()

  #************************************************
  # Get a TMLE update:
  #************************************************
  # epsilon (intercept or coefficient for h): tmle.obj$m.Q.star
  # boot_idx <- seq.int(DatNet.ObsP0$nobs)
  tmle.obj <- tmle.update(estnames = estnames, Y = Y, off = off, h_wts = h_wts, determ.Q = determ.Q)
  # print("tmle.obj$m.Q.star.coef"); print(tmle.obj$m.Q.star.coef)

  #************************************************
  # Run Monte-Carlo (MC) evaluation for all plug-in estimators (TMLE & Gcomp), under stochastic intervention g^*:
	#************************************************
  MC_fit_params <- append(est_params_list, list(m.Q.star = tmle.obj$m.Q.star.coef))

  # make sure the current dataset constits of INTERVENED Anodes and sA (under g.star):
  if (DatNet.ObsP0$datnetW$Odata$curr_data_A_g0) {
    DatNet.gstar$datnetA$Odata$swapAnodes()
    if (!DatNet.gstar$datnetA$Odata$restored_sA_Vars)
      DatNet.gstar$datnetA$make.sVar(sVar.object = est_params_list$new.sA) # just recreate the summaries sA based on current Anode values in the data
  }

  # generate new A's under f.gstar, then re-create new summaries sA:
  # DatNet.gstar$make.dat.sWsA(p = 1, f.g_fun = est_params_list$f.gstar, sA.object = sA, DatNet.ObsP0 = DatNet.ObsP0)
  # DatNet.gstar$datnetA$Odata$OdataDT
  # DatNet.gstar$datnetA$Odata$A_g0_DT
  # DatNet.gstar$datnetA$Odata$sA_g0_DT
  # both should be the same, as both are pointing to the same data.table:
  # head(DatNet.ObsP0$dat.sVar)
  # head(DatNet.gstar$dat.sVar)

  MC.Eval.psi <- get.MCS_ests(DatNet.ObsP0 = DatNet.ObsP0,
                              DatNet.gstar = DatNet.gstar,
                              MC_fit_params = MC_fit_params,
                              m.h.fit = m.h.fit)

  # -----------------------------------------------------------------
  # Mean estimates
  # -----------------------------------------------------------------
  MCS_res <- MC.Eval.psi$psi_est_mean
  ests <- c(TMLE = MCS_res[["TMLE"]],
            h_IPTW = h_IPTW, # IPTW estimator based on h - clever covariate:
            MLE = MCS_res[["MLE"]])

  ests_mat <- matrix(0L, nrow = length(ests), ncol = 1)
  ests_mat[, 1] <- ests
  rownames(ests_mat) <- names(ests); colnames(ests_mat) <- "estimate"

  wts_mat <- matrix(0L, nrow = DatNet.ObsP0$nobs, ncol = 1)
  colnames(wts_mat) <- c("h_wts")
  wts_mat[, "h_wts"] <- h_wts

  # Components of asymptotic variance for tmle_net (E_g^*[\bar{Q}_0(A^s,W^s|W^s)]-\psi_0):
  # SUBSTRACT overall estimate of psi_0 from fW_i i-specific components
  fWi_mat <- matrix(0L, nrow = DatNet.ObsP0$nobs, ncol = 1)
  colnames(fWi_mat) <- c("fWi_Qinit")
  fWi_mat[,"fWi_Qinit"] <- MCS_res[agrep("fWi_init_", names(MCS_res), max.distance=list(insertions=0, substitutions=0))]

  QY_mat <- matrix(0L, nrow = DatNet.ObsP0$nobs, ncol = 2)
  colnames(QY_mat) <- c("QY.init", "QY.star")
  QY_mat[,] <- cbind(QY.init, tmle.obj$QY.star)


  if (gvars$verbose)  {
    print("time spent fitting new fit.hbars.res:"); print(fit.hbars_t)
    if ("TMLE_A" %in% estnames) {
      parsubmodel_fits <- rbind(tmle.obj$m.Q.star.coef)
      rownames(parsubmodel_fits) <- c("epsilon (clever covariate coefficient)")
    } else if ("TMLE_B" %in% estnames) {
      parsubmodel_fits <- rbind(tmle.obj$m.Q.star.coef)
      rownames(parsubmodel_fits) <- c("alpha (intercept)")
    }
    print("new parsubmodel_fits: "); print(parsubmodel_fits)
    print(
          c(
          fWi_init = mean(fWi_mat[,"fWi_Qinit"] - ests["TMLE"])
          ));
    print("new MC.ests mat: "); print(ests_mat)
  }


  # -----------------------------------------------------------------
  # instance of an R6 object mcEvalPsi:
  # -----------------------------------------------------------------
  psi.evaluator <- MC.Eval.psi$psi.evaluator

  return(list( ests_mat = ests_mat,
               wts_mat = wts_mat,
               fWi_mat = fWi_mat,
               QY_mat = QY_mat,
               # var_tmleB_boot = var_tmleB_boot,
               psi.evaluator = psi.evaluator, # for par. bootstrap
               m.Q.init = m.Q.init,           # for par. bootstrap
               m.h.fit = m.h.fit,             # for par. bootstrap
               DatNet.gstar = DatNet.gstar,   # for par. bootstrap
               sW = est_params_list$sW,       # for par. bootstrap
               sA = est_params_list$sA,       # for par. bootstrap
               f.gstar = est_params_list$f.gstar, # for par. bootstrap
               new.sA = est_params_list$new.sA, # for par. bootstrap
               MC_fit_params = MC_fit_params,   # for Monte-Carlo eval of the iid W EIC
               h_g0_SummariesModel = m.h.fit$summeas.g0,
               h_gstar_SummariesModel = m.h.fit$summeas.gstar
              ))
}


#---------------------------------------------------------------------------------
# (NEW INTERFACE FOR SPECIFYING regressions for hform.g0, hform.gstar & Qform)
#---------------------------------------------------------------------------------
get_vars_fromlist <- function(varname, sVar.map) {
  if (varname %in% names(sVar.map)) {
    as.vector(sVar.map[[varname]])
  } else {
    varname
  }
}

# Parse the formulas for summary measure names and create a map to actual covariate names in sA & sW
process_regform <- function(regform, sW.map = NULL, sA.map = NULL) {
  # Getting predictors (sW names):
  regformterms <- terms(regform)
  sW.names <- attributes(regformterms)$term.labels
  sW.names.alt <- colnames(attributes(regformterms)$factors)
  assert_that(all(sW.names == sW.names.alt))

  # Getting outcomes (sA names):
  out.var <- rownames(attributes(regformterms)$factors)[1] # character string
  out.vars.form <- as.formula(". ~ " %+% out.var)
  out.vars.terms <- terms(out.vars.form)
  sA.names <- attributes(out.vars.terms)$term.labels

  outvars <- unlist(lapply(sA.names, get_vars_fromlist, sA.map))
  predvars <- unlist(lapply(sW.names, get_vars_fromlist, sW.map))
  return(list(outvars = outvars, predvars = predvars))
}

# When several reg forms are specified (multivariate Anodes), process outvars into one vector and process predvars in a named list of vectors
process_regforms <- function(regforms, sW.map = NULL, sA.map = NULL) {
  if (length(regforms)==0L) {
    default.reg <- list(outvars =  list(as.vector(unlist(sA.map))), predvars = list(as.vector(unlist(sW.map))))
    names(default.reg[["outvars"]]) <- names(default.reg[["predvars"]]) <- paste0(default.reg$outvars[[1]], collapse="+")
    return(default.reg)
  } else {
    outvars <- vector(mode="list", length=length(regforms))
    predvars <- vector(mode="list", length=length(regforms))
    for (idx in seq_along(regforms)) {
      res <- process_regform(as.formula(regforms[[idx]]), sW.map = sW.map, sA.map = sA.map)
      outvars[[idx]] <- res$outvars
      predvars[[idx]] <- res$predvars
      names(outvars)[idx] <- names(predvars)[idx] <- paste0(outvars[[idx]], collapse="+")
    }
    return(list(outvars = outvars, predvars = predvars))
  }
}

#' Evaluate Summary Measures sA and sW
#'
#' Take input data, create a network matrix (when input network matrix not provided) and evaluate the summary measures
#'  previously defined with functions \code{\link{def_sW}} and \code{\link{def_sA}}.
#'  This function is called internally by \code{tmlenet} for the evaluation of the summary measures.
#'  The R6 class object named \code{DatNet.ObsP0} that is returned by this function can be supplied as an input to the
#'  \code{tmlenet} function.
#'  When \code{DatNet.ObsP0} is used as an input to \code{tmlenet}, the rest of the input arguments already provided to
#'  this function can be omitted from the \code{tmlenet} function call.
#' @param data Same as \code{\link{tmlenet}} input argument.
#' @param Kmax Same as \code{\link{tmlenet}} input argument.
#' @param sW Same as \code{\link{tmlenet}} input argument.
#' @param sA Same as \code{\link{tmlenet}} input argument.
#' @param IDnode (Optional) Same as \code{\link{tmlenet}} input argument.
#' @param NETIDnode (Optional) Same as \code{\link{tmlenet}} input argument.
#' @param sep Optional friend ID character separator for friends listed in \code{NETIDnode} column of \code{data}, default is \code{' '};
#'  same as \code{\link{tmlenet}} input argument \code{optPars$sep}.
#' @param NETIDmat (Optional) Same as \code{\link{tmlenet}} input argument.
#' @param verbose Set to \code{TRUE} to print messages on status and information to the console.
#'  Turn this on by default using \code{options(tmlenet.verbose=TRUE)}.
#' @return A named list that contains:
#'  \itemize{
#'  \item \code{sW.matrix} - Matrix of evaluated summary measures for \code{sW}.
#'  \item \code{sA.matrix} - Matrix of evaluated summary measures for \code{sA}.
#'  \item \code{NETIDmat} - Network ID matrix that can be used for \code{NETIDmat} input argument to \code{tmlenet}.
#'  \item \code{DatNet.ObsP0} - R6 object of class \code{\link{DatNet.sWsA}} that stores all the summary measures and the network information.
#'    This object be passed to \code{\link{tmlenet}} as an argument, in which case the arguments already provided to \code{eval.summaries} no
#'    longer need to be specified to \code{tmlenet}.
#'  }
#' @seealso \code{\link{tmlenet}} for estimation of network effects and \code{\link{def_sW}} for defining the summary measures.
#' @example tests/examples/3_eval.summaries_examples.R
#' @export
eval.summaries <- function(data, Kmax, sW, sA, IDnode = NULL, NETIDnode = NULL, sep = ' ', NETIDmat = NULL,
                            verbose = getOption("tmlenet.verbose")) {
  iid_data_flag <- FALSE  # set to true if no network is provided (will run iid TMLE)
  nFnode = "nF"
  #----------------------------------------------------------------------------------
  # SOME INPUT CHECKS
  #----------------------------------------------------------------------------------
  assert_that(is.data.frame(data))
  assert_that(is.integerish(Kmax))
  Kmax <- as.integer(Kmax)
  assert_that(is.DefineSummariesClass(sW))
  assert_that(is.DefineSummariesClass(sA))
  nobs <- nrow(data)

  # Check no factors exist in the input data:
  check1 <- CheckExistFactors(data)
  if (!is.null(check1)) stop("found factor column(s) in the input data, consider removing or recoding such column(s) as strings: "
                            %+% paste0(check1, collapse=','))

  if (is.null(NETIDnode) && is.null(NETIDmat)) {
    message("No network (friends) specified by NETIDnode or NETIDmat args, assuming the input data is i.i.d.")
    nFnode <- NULL
    iid_data_flag <- TRUE
    if (missing(Kmax)) Kmax <- 1 # need to allow Kmax = 0
  }

  #---------------------------------------------------------------------------------
  # Saving the observed data as a data.table in its own object class OdataDT
  #---------------------------------------------------------------------------------
  OdataDT_R6 <- OdataDT$new(Odata = data, nFnode = nFnode, iid_data_flag = iid_data_flag)

  #----------------------------------------------------------------------------------
  # Create an object with model estimates, data & network information that is passed on to estimation algorithm(s)
  #----------------------------------------------------------------------------------
  netind_cl <- NetIndClass$new(nobs = nobs, Kmax = Kmax)
  if (!is.null(NETIDnode)) {
    assert_that(is.character(NETIDnode))
    # Net_str <- as.character(data[, NETIDnode])
    Net_str <- as.character(OdataDT_R6$OdataDT[[NETIDnode]])
    OdataDT_R6$OdataDT[, NETIDnode:=NULL]
    if (!is.null(IDnode)) {
      assert_that(is.character(IDnode))
      # IDs_str <- as.character(data[, IDnode])
      IDs_str <- as.character(OdataDT_R6$OdataDT[[IDnode]])
      OdataDT_R6$OdataDT[, IDnode:=NULL]
    } else {
      IDs_str <- NULL
    }
    netind_cl$makeNetInd.fromIDs(Net_str = Net_str, IDs_str = IDs_str, sep = sep)
  } else if (!is.null(NETIDmat)) {
    assert_that(is.matrix(NETIDmat))
    netind_cl$NetInd <- NETIDmat
    netind_cl$make.nF()
  }

  if (verbose) {
    message("evaluated the network ID matrix: "); print(head(netind_cl$NetInd))
    message("evaluated and added to summary measures the number of friends for each observation (nF): "); print(head(netind_cl$nF))
  }

  #----------------------------------------------------------------------------------
  # Test parsing and evaluating the summary measures (in class DefineSummariesClass):
  #----------------------------------------------------------------------------------
  # Testing the evaluation of summary measures:
  # # sW.matrix <- sW$eval.nodeforms(data.df = data, netind_cl = netind_cl)
  # sW.matrix <- sW$eval.nodeforms(data.df = OdataDT_R6$OdataDT, netind_cl = netind_cl)
  # # sA.matrix <- sA$eval.nodeforms(data.df = data, netind_cl = netind_cl)
  # sA.matrix <- sA$eval.nodeforms(data.df = OdataDT_R6$OdataDT, netind_cl = netind_cl)
  # if (verbose) {
  #   print("sample matrix of sW summary measurs: : "); print(head(sW.matrix))
  #   print("sample matrix of sA summary measurs: "); print(head(sA.matrix))
  #   print("map of sW names to column names: "); print(sW$sVar.names.map)
  #   print("map of sA names to column names: "); print(sA$sVar.names.map)
  # }

  #---------------------------------------------------------------------------------
  # BUILDING OBSERVED sW & sA: (obsdat.sW - a dataset (matrix) of n observed summary measures sW)
  # ALL EVALUATIONS HAPPEN BY REFERENCE AND ONLY OdataDT_R6$OdataDT is modified, no copies are made
  #---------------------------------------------------------------------------------
  datnetW <- DatNet$new(netind_cl = netind_cl, nFnode = nFnode)
  datnetW$make.sVar(Odata = OdataDT_R6, sVar.object = sW)
  datnetW$fixmiss_sVar() # permanently replace NA values in sW with 0

  datnetA <- DatNet$new(netind_cl = netind_cl)
  datnetA$make.sVar(Odata = OdataDT_R6, sVar.object = sA)

  DatNet.ObsP0 <- DatNet.sWsA$new(Odata = OdataDT_R6, datnetW = datnetW, datnetA = datnetA)$make.dat.sWsA()

  OdataDT_R6$sW <- sW
  OdataDT_R6$sA <- sA

  return(list(NETIDmat = netind_cl$NetInd, DatNet.ObsP0 = DatNet.ObsP0))
}

#---------------------------------------------------------------------------------
# MAIN TMLENET FUNCTION
#---------------------------------------------------------------------------------
# layers of tmlenet input spec's:
# 1) spec sW, sA, Qform => assumes hform.g0 = "sA ~ sW", hform.gstar = "sA ~ sW"
# 2) spec sW, sA, Qform and hform.g0 => assumes hform.gstar = hform.g0
# 3) 2) + spec separate hform.gstar (outcome have to be the same sA for both hform.g0 & hform.gstar)

#------------------------------------
#' Estimate Average Network Effects For Arbitrary (Stochastic) Interventions
#'
#' Estimate the average network effect among dependent units with known network structure (in presence of
#'  interference and/or spillover) using \strong{TMLE} (targeted maximum likelihood estimation), \strong{IPTW}
#'  (Horvitz-Thompson or the inverse-probability-of-treatment) and \strong{GCOMP} (parametric G-computation formula).
#' @param DatNet.ObsP0 Instance of class \code{\link{DatNet.sWsA}} returned under the same name by the \code{\link{eval.summaries}} function.
#'  Stores the evaluated summary measures (\code{sW},\code{sA}) for the observed data, along with the network information.
#'  When this argument is specified, the
#'  following arguments no longer need to be provided: \code{data}, \code{Kmax}, \code{sW}, \code{sA},
#'  \code{IDnode}, \code{NETIDnode}, \code{optPars$sep}, \code{NETIDmat}.
#' @param data Observed data, a \code{data.frame} with named columns, containing the baseline covariates,
#'  exposures (\code{Anodes}), the outcomes (\code{Ynode}) and possibly the network column (\code{NETIDnode}), where
#'  network is specified by a vector of strings of friend IDs, each string using \code{optPars$sep} character to separate different friend IDs
#'  (default is \code{optPars$sep=' '}).
# @param estimators (NOT IMPLEMENTED) Character vector with estimator names.
#' @param Kmax Integer constant specifying the maximal number of friends for any observation in the input \code{data} data.frame.
#' @param sW Summary measures constructed from baseline covariates alone. This must be an object of class
#'  \code{DefineSummariesClass} that is returned by calling the function \code{\link{def_sW}}.
#' @param sA Summary measures constructed from exposures \code{Anodes} and baseline covariates. This must be an object of class
#'  \code{DefineSummariesClass} that is returned by calling the function \code{\link{def_sW}}.
# @param Anodes Exposure (treatment) variable name (column name in \code{data}); exposures can be either binary, categorical or continuous.
#  This variable can be instead specified with argument \code{sA} by adding a call \code{+def_sA(Anodes="ExposureVarName")} to \code{sA}.
# @param AnodeDET Optional column name for indicators of deterministic values of exposures in \code{Anodes},
#  should be coded as (\code{TRUE}/\code{FALSE}) or (\code{1}/\code{0});
#  observations with \code{AnodeDET}=\code{TRUE}/\code{1} are assumed to have deterministically assigned exposures
#' @param Ynode  Outcome variable name (column name in \code{data}), assumed normalized between 0 and 1. This can instead be specified
#'  on the left-side of the regression formula in argument \code{Qform}.
#' @param NETIDmat Network specification via matrix of friend IDs (\code{ncol=Kmax}, \code{nrow=nrow(data)}),
#'  where each row \code{i} is a vector of \code{i}'s friends IDs or \code{i}'s friends row
#'  numbers in \code{data} if \code{IDnode=NULL}. See Details.
#' @param IDnode Subject identifier variable in the input data, if not supplied the network string in
#'  \code{NETIDnode} is assumed to be indexing the row numbers in the input \code{data}
#' @param NETIDnode Network specification by a column name in input \code{data} consisting of strings that identify the unit's friends
#'  by their IDs or their row numbers (two friends are separated by space, e.g., \code{"1 2"}; unit with no friends should have
#'  an empty \code{""} string). See Details.
#' @param intervene1.sA Intervention 1
#' @param f_gstar1 Either a function or a vector of counterfactual exposures. If a function, must return
#'  a vector of counterfactual exposures evaluated based on the summary measures matrix (\code{sW,sA}) passed as a named
#'  argument \code{"data"}, therefore, the function in \code{f_gstar1} must have a named argument \code{"data"} in its signature.
#'  The interventions defined by \code{f_gstar1} can be static, dynamic or stochastic. If \code{f_gstar1} is specified as a
#'  vector, it must be of length \code{nrow(data)} or 1 (constant treatment assigned to all observations).
#'  See Details below and Examples in "EQUIVALENT WAYS OF SPECIFYING INTERVENTION \code{f_gstar1}" for demonstration.
#' @param intervene2.sA Intervention 2
#' @param f_gstar2 Either a function or a vector of counterfactual exposure assignments.
#'  Used for estimating contrasts (average treatment effect) for two interventions, if omitted, only the average
#'  counterfactual outcome under intervention \code{f_gstar1} is estimated. The requirements for \code{f_gstar2}
#'  are identical to those for \code{f_gstar1}.
# @param nFnode (Optional) Name of the variable for the number of friends each unit has, this name can then be used
#  inside the summary measures and regression formulas \code{sW}, \code{sA}, \code{Qform}, \code{hform.g0},
#  \code{hform.gstar} and \code{gform}. See Details.
#' @param Qform Regression formula for outcome in \code{Ynode}, when omitted (\code{Ynode}=\code{NULL}), the outcome variable is
#'  regressed on all variables defined in \code{sW} and \code{sA}. See Details.
#' @param hform.g0 Regression formula for estimating the conditional density of P(\code{sA} | \code{sW}) under \code{g0}
#'  (the observed exposure mechanism), when omitted, this regression is defined by \code{sA~sW} where \code{sA}
#'  are all summary measures defined by argument \code{sA} and \code{sW} are all baseline summary measures defined by argument \code{sW}.
#' @param hform.gstar Regression formula for estimating the conditional density P(\code{sA} | \code{sW}) under interventions
#'  \code{f_gstar1} or \code{f_gstar2}.
#'  When omitted, the same regression formula as in \code{hform.g0} will be used for \code{hform.gstar}. See Details.
# @param gform  (Optional) Regression formula for treatment(s) A: P(A_i | W). See Details.
# @param args_f_g1star (Optional) Additional arguments to be passed to \code{f_gstar1} intervention function
# @param args_f_g2star (Optional) Additional arguments to be passed to \code{f_gstar2} intervention function
#' @param verbose Set to \code{TRUE} to print messages on status and information to the console.
#'  Turn this on by default using \code{options(tmlenet.verbose=TRUE)}.
#' @param optPars A named list of additional optional parameters to be passed to \code{tmlenet}, such as
#'  \code{bootstrap.var}, \code{n.bootstrap}, \code{alpha}, \code{lbound}, \code{family}, \code{runTMLE}, \code{YnodeDET}, \code{sep},
#'  \code{f_g0}, \code{h_g0_SummariesModel} and
#'  \code{h_gstar_SummariesModel}. See Details below for the description of each parameter.
# (REMOVED) \code{n_MCsims}
#((NOT IMPLEMENTED)) @param Q.SL.library SuperLearner libraries for outcome, Q
#((NOT IMPLEMENTED)) @param g.SL.library SuperLearner libraries for treatment mechanism, g
#((NOT IMPLEMENTED)) @param h_f.g0_args Additional arguments to be passed to f_g0
#((NOT IMPLEMENTED)) @param h_user_fcn User supplied function to calculate the clever covariate, h
#((NOT IMPLEMENTED)) @param h_logit_sep_k Flag for fitting a separate logistic regression for each strata of nFnode,
# used during estimation of the clever covariate, h
#'
#' @section Details:
#'
# (NOT IMPLEMENTED) When \code{sW} is missing, by default \code{sW} is constructed as follows.
#  For each \code{"W"} in \code{Wnodes}, add a vector \code{data[,"W"]} as well as all friends covariate values of
#  \code{"W"} to \code{sW} by running \code{netW = def_sW(W[[0:Kmax]], noname = TRUE)}.
#  For each \code{"W"} in \code{Wnode}, a vector of \code{"W"} values of the first friend of observations
#  \code{i} = 1,...,\code{nrow(data)} will be created and named \code{"W_netF1"}
#  (i.e., variable "W_netF1" is constructed as def_sW(W_netF1 = W[[1]])).
#  Similarly, the vector of "W" values of the jth friend for observations \code{i} = 1, ..., \code{nrow(data)}
#  will be created and named "W_netFj", for j from 1 to \code{Kmax}, with all \code{W_netFj} then being added to
#  \code{sW}.
#
# (NOT IMPLEMENTED) Similarly, when \code{sA} is missing, it is constructed by running
#  \code{def_sW(netA = A[[0:Kmax]], noname = TRUE)} (assuming \code{"A"} is the value of \code{Anodes} in \code{data}),
#  which combines the column \code{data[,"A"]} with all the friends treatment assignments of variable "A".
#
#'
#' Note that in case when both arguments \code{NETIDnode} and \code{NETIDmat} are left unspecified the input data are
#'  assumed independent, i.e., no network dependency between the observations.
#'  All inference will be performed based on the i.i.d. efficient influence curve for the target
#'  parameter \eqn{(E_{g^*_1}[Y])}.
#'
#' Also note that the ordering of the friends IDs in \code{NETIDnode} or \code{NETIDmat} is unimportant.
#'
#' A special non-negative-integer-valued variable \code{nF} is automatically calculated each time
#'  \code{tmlenet} or \code{eval.summaries} functions are called. \code{nF} contains the total number of friends for
#'  each observation and it is always added as an additional column to the matrix of the baseline-covariates-based
#'  summary measures \code{sWmat}. The variable \code{nF} can be used in the same ways as any of the column names in
#'  the input data frame \code{data}. In particular, the name \code{nF} can be used inside the summary measure
#'  expressions (calls to functions \code{def_sW} and \code{def_sA}) and inside any of the regression formulas
#'  (\code{Qform}, \code{hform.g0}, \code{hform.gstar}).
#'
# However, if the column \code{data[,nFnode]}
# already exists in the input data it will be compared to the automatically calculated values, with an error produced
# if the two variables do not exactly match.
#' The regression formalas in \code{Qform}, \code{hform.g0} and \code{hform.gstar} can include any summary measures names defined in
#'  \code{sW} and \code{sA}, referenced by their individual variable names or by their aggregate summary measure names.
#'  For example, \code{hform.g0 = "netA ~ netW"} is equivalent to
#'  \code{hform.g0 = "A + A_netF1 + A_netF2 ~ W + W_netF1 + W_netF2"} for \code{sW,sA} summary measures defined by
#'  \code{def_sW(netW=W[[0:2]])} and \code{def_sA(netA=A[[0:2]])}.
#'
#' @section Additional parameters:
#'
#' Some of the parameters that control the estimation in \code{tmlenet} can be set by calling the function
#'  \code{\link{tmlenet_options}}.
#'
#' Additional parameters can be also specified as a named list \code{optPars} argument of the
#' \code{tmlenet} function. The items that can be specified in \code{optPars} are:
#' \itemize{
#'
#' \item \code{bootstrap.var} - When set to \code{TRUE}, parametric bootstrap will be performed in order
#' to obtain an alternative estimate the TMLE variance (note that this will significantly increase the running time);
#'
#' \item \code{n.bootstrap} - Number of bootstrap samples, default is 100; Set to higher values, such as, 500+ to obtain a more precise
#' estimate of the tmle variance;
#'
#' \item \code{boot.nodes} - Which nodes should be re-sampled from their corresonding parametric model fits? Note that Y (outcome) is always
#' resampled from the model fit specified by \code{Qform}. Each of the nodes in \code{boot.nodes} must be conditionally independent, given
#' some set of covariates of summary measures (for example, it might be the exposure variable).
#' Any node name specified in \code{boot.nodes} must be fitted with a sepearate model,
#' specified either as part of \code{hform} (name in boot.nodes matches a variable on the left-side of the formula in \code{hform})
#' or as a seperate regression formula in \code{boot.form} argument. These
#' model fits are then used for re-sampling each of the nodes specified in \code{boot.nodes}.
#'
#' \item \code{boot.form} - Specify the regression formulas specifically for parametric bootstrap, which were previously specified on
#' the left-side of the regression formulas in \code{hform}.
#'
#' \item \code{alpha} - alpha-level for CI calculation (0.05 for 95% CIs);
#'
#' \item \code{lbound} - One value for symmetrical bounds on P(sW | sW).
#'
#' \item \code{family} - Family specification for regression models, defaults to binomial (CURRENTLY ONLY BINOMIAL
#'  FAMILY IS IMPLEMENTED).
#
#(REMOVED)\item \code{n_MCsims} - Number of Monte-Carlo simulations performed, each of sample size \code{nrow(data)},
#    for generating new exposures under \code{f_gstar1} or \code{f_gstar2} (if specified) or \code{f_g0} (if specified).
#    These newly generated exposures are utilized when fitting the conditional densities P(\code{sA}|\code{sW})
#    and when evaluating the substitution estimators \strong{GCOMP} and \strong{TMLE}
#    under stochastic interventions \code{f_gstar1} or \code{f_gstar2}.
#'
#' \item \code{runTMLE} - Choose which of the two TMLEs to run, "tmle.intercept" or "tmle.covariate". The default is "tmle.intercept".
#'
#' \item \code{YnodeDET} - Optional column name for indicators of deterministic values of outcome \code{Ynode},
#'    coded as (\code{TRUE}/\code{FALSE}) or
#'    (\code{1}/\code{0}); observations with \code{YnodeDET}=\code{TRUE}/\code{1} are assumed to have constant value for their \code{Ynode}.
#'
#' \item \code{sep} - A character separating friend indices for the same observation in \code{NETIDnode}.
#'
#' \item \code{f_g0} - A function for generating true treatment mechanism under observed \code{Anodes}, if known (for example in a
#'    randomized trial). This is used for estimating P(\code{sA}|\code{sW}) under \code{g0} by sampling large vector of \code{Anodes}
#'    (of length \code{nrow(data)}) from \code{f_g0} function;
# (of length \code{nrow(data)*n_MCsims})
#'
#' \item \code{h_g0_SummariesModel} - Previously fitted model for P(\code{sA}|\code{sW}) under observed exposure mechanism \code{g0},
#'    returned by the previous runs of the \code{tmlenet} function.
#'    This has to be an object of \code{SummariesModel} \pkg{R6} class. When this argument is specified, all predictions
#'    P(\code{sA}=\code{sa}|\code{sW}=\code{sw}) under \code{g0} will be based on the model fits provided by this argument.
#'
#' \item \code{h_gstar_SummariesModel} - Previously fitted model for P(\code{sA}|\code{sW}) under (stochastic) intervention
#'    specified by \code{f_gstar1} or \code{f_gstar2}. Also an object of \code{SummariesModel} \pkg{R6} class.
#'    When this argument is specified, the predictions P(\code{sA}=\code{sa}|\code{sW}=\code{sw})
#'    under \code{f_gstar1} or \code{f_gstar2} will be based on the model fits provided by this argument.
#' }
#'
#' @section Specifying the counterfactual intervention function (\code{f_gstar1} and \code{optPars$f_gstar2}):
#'
#' The functions \code{f_gstar1} and \code{f_gstar2} can only depend on variables specified by the combined matrix
#'  of summary measures (\code{sW},\code{sA}), which is passed using the argument \code{data}. The functions should
#'  return a vector of length \code{nrow(data)} of counterfactual treatments for observations in the input data.
#'
#' @section Specifying the Network of Friends:
#'
#' The network of friends (connections) for observations in the input \code{data} can be specified in two
#'  alternative ways, using either \code{NETIDnode} or \code{NETIDmat} input arguments.
#'
#' \code{NETIDnode} - The first (slower) method uses a vector of strings in \code{data[, NETIDnode]}, where each
#'  string \code{i} must contain the space separated IDs or row numbers of all units in \code{data} thought to be
#'  connected to observation i (friends of unit i);
#'
#' \code{NETIDmat} - An alternative (and faster) method is to pass a matrix with \code{Kmax} columns and nrow(data)
#'  rows, where each row \code{NETIDmat[i,]} is a vector of observation \code{i}'s friends' IDs or \code{i}'s friends'
#'  row numbers in \code{data} if \code{IDnode=NULL}. If observation \code{i} has fewer than \code{Kmax} friends, the
#'  remainder of \code{NETIDmat[i,]} must be filled with \code{NA}s. Note that the ordering of friend indices is
#'  irrelevant.
#'
#' @section IPTW estimator:
#' **********************************************************************
#'
#' \itemize{
#' \item As described in the following section, the first step is to construct an estimator \eqn{P_{g_N}(sA | sW)}
#'    for the common (in \code{i}) conditional density \eqn{P_{g_0}(sA | sW)} for common (in \code{i}) unit-level summary
#'    measures (\code{sA},\code{sW}).
#'
#' \item The same fitting algorithm is applied to construct an estimator \eqn{P_{g^*_N}(sA^* | sW^*)} of the common (in \code{i})
#'    conditional density \eqn{P_{g^*}(sA^* | sW^*)} for common (in \code{i}) unit-level summary measures (\code{sA^*},\code{sW^*})
#'    implied by the user-supplied stochastic intervention \code{f_gstar1} or \code{f_gstar2} and the observed distribution of \code{W}.
#'
#' \item These two density estimators form the basis of the IPTW estimator,
#'    which is evaluated at the observed N data points \eqn{O_i=(sW_i, sA_i, Y_i), i=1,...,N} and is given by
#'    \deqn{\psi^{IPTW}_n = \sum_{i=1,...,N}{Y_i \frac{P_{g^*_N}(sA^*=sA_i | sW=sW_i)}{P_{g_N}(sA=sA_i | sW=sW_i)}}.}
#' }
#'
#' @section GCOMP estimator:
#' **********************************************************************
#'
#' @section TMLE estimator:
#' **********************************************************************
#'
#' @section Modeling \code{P(sA|sW)} for summary measures \code{(sA,sW)}:
#' **********************************************************************
#'
#' Non-parametric
#'  estimation of the common \strong{unit-level} multivariate joint conditional probability model \code{P_g0(sA|sW)},
#'  for unit-level summary measures \code{(sA,sW)} generated from the observed exposures and baseline covariates
#'  \eqn{(A,W)=(A_i,W_i : i=1,...,N)} (their joint density given by \eqn{g_0(A|W)Q(W)}), is performed by first
#'  constructing the dataset of N summary measures, \eqn{(sA_i,sW_i : i=1,...,N)}, and then fitting the usual i.i.d. MLE
#'  for the common density \code{P_g0(sA|sW)} based on the pooled N sample of these summary measures.
#'
#'  Note that \code{sA} can be multivariate and any of its components \code{sA[j]} can be either binary, categorical
#'  or continuous.
#'  The joint probability model for \code{P(sA|sA)} = \code{P(sA[1],...,sA[k]|sA)} can be factorized as
#'  \code{P(sA[1]|sA)} * \code{P(sA[2]|sA, sA[1])} * ... * \code{P(sA[k]|sA, sA[1],...,sA[k-1])},
#'  where each of these conditional probability models is fit separately, depending on the type of the outcome variable
#'  \code{sA[j]}.
#'
#'  If \code{sA[j]} is binary, the conditional probability \code{P(sA[j]|sW,sA[1],...,sA[j-1])} is evaluated via logistic
#'  regression model.
#'  When \code{sA[j]} is continuous (or categorical), its range will be fist partitioned into \code{K} bins and the
#'  corresponding \code{K}
#'  bin indicators (\code{B_1,...,B_K}), where each bin indicator \code{B_j} is then used as an outcome in a
#'  separate logistic regression model with predictors given by \code{sW, sA[1],...,sA[k-1]}.
#'  Thus, the joint probability \code{P(sA|sW)} is defined by such a tree of binary logistic regressions.
#'
#' For simplicity, we now suppose \code{sA} is continuous and univariate and we describe here an algorithm for fitting
#'  \eqn{P_{g_0}(sA | sW)} (the algorithm
#'  for fitting \eqn{P_{g^*}(sA^* | sW^*)} is equivalent, except that exposure \code{A} is replaced with exposure \code{A^*}
#'  generated under \code{f_gstar1} or \code{f_gstar2} and
#'  the predictors \code{sW} from the regression formula \code{hform.g0} are replaced with predictors \code{sW^*}
#'  specified by the regression formula \code{hform.gstar}).
#'
#' \enumerate{
#' \item Generate a dataset of N observed continuous summary measures (\code{sA_i}:i=1,...,N) from observed
#'  ((\code{A_i},\code{W_i}):i=1,...,N).
#'
#' \item Divide the range of \code{sA} values into intervals S=(i_1,...,i_M,i_{M+1}) so that any observed data point
#'    \code{sa_i} belongs to one interval in S, namely,
#'    for each possible value sa of \code{sA} there is k\\in{1,...,M}, such that, i_k < \code{sa} <= i_{k+1}.
#'    Let the mapping B(sa)\\in{1,...,M} denote a unique interval in S for sa, such that, i_{B(sa)} < sa <= i_{B(sa)+1}.
#'    Let bw_{B(sa)}:=i_{B(sa)+1}-i_{B(sa)} be the length of the interval (bandwidth) (i_{B(sa)},i_{B(sa)+1}).
#'    Also define the binary indicators b_1,...,b_M, where b_j:=I(B(sa)=j), for all j <= B(sa) and b_j:=NA for all j>B(sa).
#'    That is we set b_j to missing ones the indicator I(B(sa)=j) jumps from 0 to 1.
#'    Now let \code{sA} denote the random variable for the observed summary measure for one unit
#'    and denote by (B_1,...,B_M) the corresponding random indicators for \code{sA} defined as B_j := I(B(\code{sA}) = j)
#'    for all j <= B(\code{sA}) and B_j:=NA for all j>B(\code{sA}).
#'
#' \item For each j=1,...,M, fit the logistic regression model for the conditional probability P(B_j = 1 | B_{j-1}=0, sW), i.e.,
#'    at each j this is defined as the conditional probability of B_j jumping from 0 to 1 at bin j, given that B_{j-1}=0 and
#'    each of these logistic regression models is fit only among the observations that are still at risk of having B_j=1 with B_{j-1}=0.
#'
#' \item Normalize the above conditional probability of B_j jumping from 0 to 1 by its corresponding interval length (bandwidth) bw_j to
#'    obtain the discrete conditional hazards h_j(sW):=P(B_j = 1 | (B_{j-1}=0, sW) / bw_j, for each j.
#'    For the summary measure \code{sA}, the above conditional hazard h_j(sW) is equal to P(\code{sA} \\in (i_j,i_{j+1}) | \code{sA}>=i_j, sW),
#'    i.e., this is the probability that \code{sA} falls in the interval (i_j,i_{j+1}), conditional on sW and conditional on the fact that
#'    \code{sA} does not belong to any intervals before j.
#'
#' \item  Finally, for any given data-point \code{(sa,sw)}, evaluate the discretized conditional density for P(\code{sA}=sa|sW=sw) by first
#'    evaluating the interval number k=B(sa)\\in{1,...,M} for \code{sa} and then computing \\prod{j=1,...,k-1}{1-h_j(sW))*h_k(sW)}
#'    which is equivalent to the joint conditional probability that \code{sa} belongs to the interval (i_k,i_{k+1}) and does not belong
#'    to any of the intervals 1 to k-1, conditional on sW.
#'  }
#'
#' The evaluation above utilizes a discretization of the fact that any continuous density f of random variable X can be written as f_X(x)=S_X(x)*h_X(x),
#'  for a continuous density f of X where S_X(x):=P(X>x) is the survival function for X, h_X=P(X>x|X>=x) is the hazard function for X; as well as the fact that
#'  the discretized survival function S_X(x) can be written as a of the hazards for s<x: S_X(x)=\\prod{s<x}h_X(x).
#'
#' @section Three methods for defining bin (interval) cuttoffs for a continuous one-dimenstional summary measure \code{sA[j]}:
#' **********************************************************************
#'
#' There are 3 alternative methods to defining the bin cutoffs S=(i_1,...,i_M,i_{M+1}) for a continuous summary measure
#'  \code{sA}. The choice of which method is used along with other discretization parameters (e.g., total number of
#'  bins) is controlled via the tmlenet_options() function. See \code{?tmlenet_options} argument \code{bin.method} for
#'  additional details.
#'
#' Approach 1 (\code{equal.len}): equal length, default.
#'
#' *********************
#'
#' The bins are defined by splitting the range of observed \code{sA} (sa_1,...,sa_n) into equal length intervals.
#'  This is the dafault discretization method, set by passing an argument \code{bin.method="equal.len"} to
#'  \code{tmlenet_options} function prior to calling \code{tmlenet()}. The intervals will be defined by splitting the
#'  range of (sa_1,...,sa_N) into \code{nbins} number of equal length intervals, where \code{nbins} is another argument
#'  of \code{tmlenet_options()} function. When \code{nbins=NA} (the default setting) the actual value of \code{nbins}
#'  is computed at run time by taking the integer value (floor) of \code{n/maxNperBin},
#'  for \code{n} - the total observed sample size and \code{maxNperBin=1000} - another argument of
#'  \code{tmlenet_options()} with the default value 1,000.
#'
#' Approach 2 (\code{equal.mass}): data-adaptive equal mass intervals.
#'
#' *********************
#'
#' The intervals are defined by splitting the range of \code{sA} into non-equal length data-adaptive intervals that
#'  ensures that each interval contains around
#'  \code{maxNperBin} observations from (sa_j:j=1,...,N).
#'  This interval definition approach can be selected by passing an argument \code{bin.method="equal.mass"} to
#'  \code{tmlenet_options()} prior to calling \code{tmlenet()}.
#'  The method ensures that an approximately equal number of observations will belong to each interval, where that number
#'  of observations for each interval
#'  is controlled by setting \code{maxNperBin}. The default setting is \code{maxNperBin=1000} observations per interval.
#'
#' Approach 3 (\code{dhist}): combination of 1 & 2.
#'
#' *********************
#'
#' The data-adaptive approach dhist is a mix of Approaches 1 & 2. See Denby and Mallows "Variations on the Histogram"
#'  (2009)). This interval definition method is selected by passing an argument \code{bin.method="dhist"} to
#'  \code{tmlenet_options()}  prior to calling \code{tmlenet()}.
#'
#' @return A named list with 3 items containing the estimation results for:
#'  \itemize{
#'  \item \code{EY_gstar1} - estimates of the mean counterfactual outcome under (stochastic) intervention function \code{f_gstar1} \eqn{(E_{g^*_1}[Y])}.
#'  \item \code{EY_gstar2} - estimates of the mean counterfactual outcome under (stochastic) intervention function \code{f_gstar2} \eqn{(E_{g^*_2}[Y])},
#'    \code{NULL} if \code{f_gstar2} not specified.
#'  \item \code{ATE} - additive treatment effect (\eqn{E_{g^*_1}[Y]} - \eqn{E_{g^*_2}[Y]}) under interventions \code{f_gstar1}
#'    vs. in \code{f_gstar2}, \code{NULL} if \code{f_gstar2} not specified.
#' }
#'
#' Each list item above is itself a list containing the items:
#'  \itemize{
#'  \item \code{estimates} - various estimates of the target parameter (network population counterfactual mean under
#'    (stochastic) intervention).
#'  \item \code{vars} - the asymptotic variance estimates, for \strong{IPTW} and \strong{TMLE}.
#'  \item \code{CIs} - CI estimates at \code{alpha} level, for \strong{IPTW} and \strong{TMLE}.
#'  \item \code{other.vars} - Placeholder for future versions.
#'  \item \code{h_g0_SummariesModel} - The model fits for P(\code{sA}|\code{sW}) under observed exposure mechanism
#'    \code{g0}. This is an object of \code{SummariesModel} \pkg{R6} class.
#'  \item \code{h_gstar_SummariesModel} - The model fits for P(\code{sA}|\code{sW}) under intervention \code{f_gstar1}
#'    or \code{f_gstar2}. This is an object of \code{SummariesModel} \pkg{R6} class.
#' }
#'
#' Currently implemented estimators are:
#'  \itemize{
#'  \item \code{tmle} - Either weighted regression intercept-based TMLE (\code{tmle.intercept} - the default)
#'    with weights defined by the IPTW weights \code{h_gstar/h_gN} or
#'    covariate-based unweighted TMLE (\code{tmle.covariate}) that uses the IPTW weights as a covariate
#'    \code{h_gstar/h_gN}.
#'  \item \code{h_iptw} - Efficient IPTW based on weights h_gstar/h_gN.
#'  \item \code{gcomp} - Parametric G-computation formula substitution estimator.
#' }
#' @seealso \code{\link{tmlenet-package}} for the general overview of the package,
#'  \code{\link{def_sW}} for defining the summary measures, \code{\link{eval.summaries}} for
#'  evaluation and validation of the summary measures,
#'  and \code{\link{df_netKmax2}}/\code{\link{df_netKmax6}}/\code{\link{NetInd_mat_Kmax6}}
#'  for examples of network datasets.
#' @example tests/examples/1_tmlenet_example.R
#' @export
tmlenet <- function(DatNet.ObsP0, data, Kmax, sW, sA,
                    intervene1.sA, f_gstar1 = NULL, intervene2.sA = NULL, f_gstar2 = NULL,
                    # Anodes,
                    Ynode,
                    Qform = NULL, hform.g0 = NULL, hform.gstar = NULL,
                    # estimators = c("tmle", "iptw", "gcomp"),
                    # AnodeDET = NULL,
                    # YnodeDET = NULL,
                    NETIDmat = NULL,
                    IDnode = NULL, NETIDnode = NULL,
                    verbose = getOption("tmlenet.verbose"),
                    optPars = list(
                      bootstrap.var = FALSE,
                      n.bootstrap = 100,
                      boot.nodes = NULL,
                      boot.form = NULL,
                      alpha = 0.05,
                      lbound = 0.005,
                      family = "binomial", # NOT YET IMPLEMENTED
                      # n_MCsims = 1,
                      # n_MCsims = ifelse(!missing(data),ceiling(sqrt(nrow(data))),10),
                      runTMLE = c("tmle.intercept", "tmle.covariate"),
                      YnodeDET = NULL,
                      # f_gstar2 = NULL,
                      sep = ' ',
                      f_g0 = NULL,
                      h_g0_SummariesModel = NULL,
                      h_gstar_SummariesModel = NULL)
                    ) {

  oldverboseopt <- getOption("tmlenet.verbose")
  options(tmlenet.verbose = verbose)
  gvars$verbose <- verbose
  n_MCsims <- 1
  #----------------------------------------------------------------------------------
  # ADDITIONAL ARGUMENTS (removed from input args of tmlenet())
  #----------------------------------------------------------------------------------
  AnodeDET <- NULL # removed from the input, since this is not implemented
  iid_data_flag <- FALSE  # set to true if no network is provided (usual iid TMLE)
  Q.SL.library <- c("SL.glm", "SL.step", "SL.glm.interaction") # NOT USED
  g.SL.library <- c("SL.glm", "SL.step", "SL.glm.interaction") # NOT USED
  max_npwt <- 50 # NOT USED YET
  h_logit_sep_k <- FALSE # NOT USED YET
  bootstrap.var <- ifelse(is.null(optPars$bootstrap.var), FALSE, optPars$bootstrap.var)
  n.bootstrap <- ifelse(is.null(optPars$n.bootstrap), 100, optPars$n.bootstrap)
  boot.nodes <- if(is.null(optPars$boot.nodes)) {NULL} else {optPars$boot.nodes}
  boot.form <- if(is.null(optPars$boot.form)) {NULL} else {optPars$boot.form}
  alpha <- ifelse(is.null(optPars$alpha), 0.05, optPars$alpha)
  lbound <- ifelse(is.null(optPars$lbound), 0.005, optPars$lbound)
  family <- ifelse(is.null(optPars$family), "binomial", optPars$family)
  sep <- ifelse(is.null(optPars$sep), ' ', optPars$sep)
  assert_that(is.character(sep) && length(sep)==1L)
  n_MCsims <- ifelse(is.null(optPars$n_MCsims), 1, optPars$n_MCsims)
  f_g0 <- if(is.null(optPars$f_g0)) {NULL} else {optPars$f_g0}
  if (!is.null(f_g0)) assert_that(is.function(f_g0))
  YnodeDET <- if(is.null(optPars$YnodeDET)) {NULL} else {optPars$YnodeDET}
  # f_gstar2 <- if(is.null(optPars$f_gstar2)) {NULL} else {optPars$f_gstar2}
  # if (!is.null(f_gstar2)) assert_that(is.function(f_gstar2) || is.vector(f_gstar2))
  # if TRUE, only evaluate the intercept-based TMLE (TMLE_B), if FALSE, evaluate only the covariate-based TMLE (TMLE_A)
  runTMLE <- optPars$runTMLE[1]
  if (is.null(runTMLE) || (runTMLE[1] %in% "tmle.intercept")) {
    onlyTMLE_B <- TRUE
  } else if (runTMLE %in% "tmle.covariate") {
    onlyTMLE_B <- FALSE
  } else {
    stop("optPars[['runTMLE']] argument must be either 'tmle.intercept' or 'tmle.covariate'")
  }

  if (verbose) {
    message("Running tmlenet with the following settings from tmlenet_options(): ");
    str(gvars$opts)
    message("Running tmlenet with the following settings from optPars arg of tmlenet(): ");
    str(optPars)
  }

  #----------------------------------------------------------------------------------
  # DETERMINING INTERNAL / EXTERNAL ESTIMATOR NAMES THAT WILL BE EVALUATED
  #----------------------------------------------------------------------------------
  # onlyTMLE_B <- TRUE
  assert_that(assertthat::is.flag(onlyTMLE_B))
  estnames.internal <- c("TMLE_A", "TMLE_B", "h_IPTW", "MLE")
  # estnames.internal <- c("TMLE_A", "TMLE_B", "TMLE_g_IPTW", "h_IPTW", "g_IPTW", "MLE")
  names(estnames.internal) <- estnames.internal
  estnames.out <- c("tmle", "h_iptw", "gcomp")
  if (onlyTMLE_B) {
    estnames.internal <- estnames.internal[-which(estnames.internal%in%"TMLE_A")]
  } else {
    estnames.internal <- estnames.internal[-which(estnames.internal%in%"TMLE_B")]
  }
  names(estnames.out) <- estnames.internal
  estnames.internal <- as.list(estnames.internal)

  #----------------------------------------------------------------------------------
  # MONTE-CARLO SIMULATION PARAMETERS
  #----------------------------------------------------------------------------------
  nQ.MCsims <- as.integer(n_MCsims)  # number of times to sample MC sim for Q (each of size n)
  ng.MCsims <- as.integer(n_MCsims)  # number of times to sample MC sim for h (each of size n)
  assert_that(is.count(nQ.MCsims))
  assert_that(is.count(ng.MCsims))
  max.err_est <- 0.1    # maximum percent error for MCS estimators

  #----------------------------------------------------------------------------------
  # PARAMETERS FOR ESTIMATING h under g0 & gstar
  #----------------------------------------------------------------------------------
  f.g0 <- f_g0
  f.g0_args <- NULL
  h_user_fcn <- NULL
  h_user <- !(is.null(h_user_fcn))

  #----------------------------------------------------------------------------------
  # Perform some input checks
  # Create an object with data & network information that is passed on to estimation algorithm(s)
  # Build observed sW & sA: (obsdat.sW - a dataset (matrix) of n observed summary measures sW)
  #----------------------------------------------------------------------------------
  if (missing(Ynode)) Ynode <- NULL
  if (is.null(Ynode) && is.null(Qform)) stop("Either Qform or Ynode must be specified")
  if (!is.null(Qform) && !is.null(Ynode)) {
    message("Since both Ynode and Qform are specified, the left-hand side of Qform will be ignored, with outcome being set to Ynode: " %+%
      Ynode)
  }
  if (!is.null(Qform) && is.null(Ynode)) {
    Ynode <- LhsVars(Qform)[1]
    message("Setting the Ynode to: " %+% Ynode)
  }

  if (missing(DatNet.ObsP0)) {
    # time_evalsumm <- system.time(
      eval.summ.res <- eval.summaries(data = data, Kmax = Kmax, sW = sW, sA = sA,
                                      IDnode = IDnode, NETIDnode = NETIDnode,
                                      sep = sep, NETIDmat = NETIDmat, verbose = FALSE)
      # )
    # print("time eval.summaries(...): "); print(time_evalsumm)
    DatNet.ObsP0 <- eval.summ.res$DatNet.ObsP0
    data <- DatNet.ObsP0$datnetW$Odata
    sW <- data$sW
    sA <- data$sA
    Kmax <- DatNet.ObsP0$Kmax
    # sW <- DatNet.ObsP0$datnetW$sVar.object
    # sA <- DatNet.ObsP0$datnetA$sVar.object
  }

  intervene1.sA <- update.intervention.sA(new.sA = intervene1.sA, sA = sA)
  data$intervene1.sA <- intervene1.sA
  if (!is.null(intervene2.sA)) {
    intervene2.sA <- update.intervention.sA(new.sA = intervene2.sA, sA = sA)
    data$intervene2.sA <- intervene2.sA
    # making sure exactly the same Anodes are used for both interventions:
    if (any(intervene1.sA$Anodes != intervene2.sA$Anodes))
      stop("intervention node names have to be identical for intervene1.sA and intervene2.sA")
  }

  # new version of nodes:
  node_l <- list(nFnode = DatNet.ObsP0$datnetW$nFnode,
                  Anodes = intervene1.sA$Anodes,  # Note that Anodes might be different in intervene1.sA & intervene2.sA
                  AnodeDET = AnodeDET,
                  Ynode = Ynode, YnodeDET = YnodeDET)

  data$nodes <- node_l
  data$backupAnodes(Anodes = node_l$Anodes, sA = sA)

  nobs <- DatNet.ObsP0$nobs
  DatNet.ObsP0$nodes <- node_l

  #----------------------------------------------------------------------------------
  # Defining (and checking) Deterministic Y and A node flags:
  #----------------------------------------------------------------------------------
  if (is.null(AnodeDET)) {
    determ.g <- rep_len(FALSE, nobs)
  } else {
    CheckVarNameExists(data$OdataDT, AnodeDET)
    # determ.g <- (data[, AnodeDET] == 1)
    setkeyv(data$OdataDT, cols=AnodeDET)
    data$OdataDT[list(1),AnodeDET, with=FALSE]
  }
  if (is.null(YnodeDET)) {
    determ.Q <- rep_len(FALSE, nobs)
  } else {
    CheckVarNameExists(data$OdataDT, YnodeDET)
    # determ.Q <- (data[, YnodeDET] == 1)
    setkeyv(data$OdataDT, cols=YnodeDET)
    data$OdataDT[list(1),YnodeDET, with=FALSE]
  }

  for (Anode in node_l$Anodes) CheckVarNameExists(data$OdataDT, Anode)
  # CheckVarNameExists(data, node_l$Anodes)
  for (Ynode in node_l$Ynode) CheckVarNameExists(data$OdataDT, Ynode)
  # CheckVarNameExists(data, node_l$Ynode)

  #----------------------------------------------------------------------------------
  # NOTE: YnodeVals = obsYvals, det.Y = determ.Q need to be added to DatNet.ObsP0 after returned from eval.summaries()
  #----------------------------------------------------------------------------------
  obsYvals <- data$OdataDT[[node_l$Ynode]]
  DatNet.ObsP0$addYnode(YnodeVals = obsYvals, det.Y = determ.Q)

  #----------------------------------------------------------------------------------
  # (OPTIONAL) ADDING DETERMINISTIC/DEGENERATE Anode FLAG COLUMNS TO sA:
  # cancelled adding DET nodes to sVar since all sVar are automatically get added to A ~ predictors + DETnodes...
  #----------------------------------------------------------------------------------
  # obsdat.sW <- O.datnetW$add_deterministic(Odata = data, userDETcol = "determ.g")$dat.sVar
  # Testing NA for visible det.Y and true observed Y as protected:
  # determ.Q <- c(FALSE, FALSE, FALSE, rep(TRUE, length(determ.Q)-3))
  # length(determ.Q) == length(obsYvals)
  # DatNet.ObsP0 <- DatNet.sWsA$new(datnetW = datnetW, datnetA = datnetA, YnodeVals = obsYvals, det.Y = determ.Q)$make.dat.sWsA()
  # print("DatNet.ObsP0: "); print(DatNet.ObsP0)
  # print(head(cbind(DatNet.ObsP0$noNA.Ynodevals, DatNet.ObsP0$YnodeVals, data[,node_l$Ynode]), 100))

  #----------------------------------------------------------------------------------
  # Optional regressions specs:
  #----------------------------------------------------------------------------------
  # Q.sVars <- process_regform(as.formula(Qform), sW.map = c(sW$sVar.names.map, sA$sVar.names.map), sA.map = node_l$Ynode)
  Q.sVars <- process_regforms(Qform, sW.map = c(sW$sVar.names.map, sA$sVar.names.map), sA.map = node_l$Ynode)
  # h.g0.sVars <- process_regform(as.formula(hform.g0[[1]]), sW.map = sW$sVar.names.map, sA.map = sA$sVar.names.map)
  h.g0.sVars <- process_regforms(hform.g0, sW.map = sW$sVar.names.map, sA.map = sA$sVar.names.map)

  if (!is.null(hform.gstar)) {
    # h.gstar.sVars <- process_regform(as.formula(hform.gstar), sW.map = sW$sVar.names.map, sA.map = sA$sVar.names.map)
    h.gstar.sVars <- process_regforms(hform.gstar, sW.map = sW$sVar.names.map, sA.map = sA$sVar.names.map)
  } else {
    h.gstar.sVars <- h.g0.sVars
  }

  if (verbose) {
    print("Input regression(s) Qform (E(Y|sA,sW)): "); res <- lapply(Qform, function(Qform) print(as.formula(Qform),showEnv=FALSE))
    print("Derived regression(s) from Qform:"); str(Q.sVars)
    print("Input regression(s) hform.g0 (P(sA|sW) under g0): "); res <- lapply(hform.g0, function(hform.g0) print(as.formula(hform.g0),showEnv=FALSE))
    print("Derived regression(s) from hform.g0: "); str(h.g0.sVars)
    print("Input regression(s) hform.gstar (P(sA|sW) under g.star): "); res <- lapply(hform.gstar, function(hform.gstar) print(as.formula(hform.gstar),showEnv=FALSE))
    print("Derived regression(s) from hform.gstar: "); str(h.gstar.sVars)
  }

  #-----------------------------------------------------------
  # Defining and fitting regression for Y ~ sW + sA:
  #-----------------------------------------------------------
  check.Qpreds.exist <- unlist(lapply(Q.sVars$predvars, function(PredName) PredName %in% DatNet.ObsP0$names.sVar))
  if (!all(check.Qpreds.exist)) stop("the following predictors in Qform regression could not be located among the summary measures: " %+%
                                    paste0(Q.sVars$predvars[!check.Qpreds.exist], collapse = ","))

  if (verbose) {
    for (idx in seq_along(node_l$Ynode)) {
      message("================================================================")
      message("fitting E(Y|sA,sW):= ", "P(" %+% node_l$Ynode[idx] %+% "=1 | " %+% paste(Q.sVars$predvars[[idx]], collapse = ",") %+% ")")
      message("================================================================")
    }
  }

  Qreg <- RegressionClass$new(outvar = node_l$Ynode,
                              predvars = Q.sVars$predvars[[1]],
                              subset = !determ.Q, ReplMisVal0 = TRUE)

  m.Q.init <- BinOutModel$new(glm = FALSE, reg = Qreg)$fit(data = DatNet.ObsP0)

  #----------------------------------------------------------------------------------
  # Create an object with model estimates, data & network information that is passed on to estimation procedure
  #----------------------------------------------------------------------------------
  # 1) define parameters for MC estimation of the substitution estimators
  # 2) define parameters for estimation of the efficient weights h(A^s|W^s)
  est_obj <- list(
                  estnames = estnames.internal,
                  lbound = lbound[1],
                  max.err_eps = max.err_est,  # error tolerance for the mean/var M.C. estimate
                  m.Q.init = m.Q.init,
                  f.g0 = f.g0,
                  sW = sW,
                  sA = sA,
                  Q.sVars = Q.sVars,
                  h.g0.sVars = h.g0.sVars,
                  h.gstar.sVars = h.gstar.sVars,
                  nQ.MCsims = nQ.MCsims,
                  ng.MCsims = ng.MCsims,
                  h_g0_SummariesModel = optPars$h_g0_SummariesModel,
                  h_gstar_SummariesModel = optPars$h_gstar_SummariesModel,
                  # Cap the prop weights scaled at max_npwt (for =50 -> translates to max 10% of total weight for n=500 and 5% for n=1000):
                  max_npwt = max_npwt # NOT IMPLEMENTED
                  # h_logit_sep_k = h_logit_sep_k, # NOT IMPLEMENTED
                  # h_user = h_user, # NOT IMPLEMENTED
                  # h_user_fcn = h_user_fcn, # NOT IMPLEMENTED
                  )

  est_obj_g1 <- append(est_obj,
                      list(
                        f.gstar = f_gstar1,
                        new.sA = intervene1.sA
                        )
                      )

  if (!is.null(intervene2.sA) || !is.null(f_gstar2)) {
    est_obj_g2 <- append(est_obj,
                      list(
                        f.gstar = f_gstar2,
                        new.sA = intervene2.sA
                        )
                      )
  }

  #----------------------------------------------------------------------------------
  # Running MC evaluation for substitution TMLE ests
  #----------------------------------------------------------------------------------
  tmle_g1_out <- get_all_ests(estnames = estnames.internal, DatNet.ObsP0 = DatNet.ObsP0, est_params_list = est_obj_g1)

  if (!is.null(intervene2.sA) || !is.null(f_gstar2)) {
    tmle_g2_out <- get_all_ests(estnames = estnames.internal, DatNet.ObsP0 = DatNet.ObsP0, est_params_list = est_obj_g2)
  } else {
    tmle_g2_out <- NULL
  }

  iidEIC.eval <- TRUE
  if (iidEIC.eval) {
    # MC.tmle.eval.t <- system.time(
      MC.tmle.eval <- MCeval_fWi(n.MC = n.bootstrap, DatNet.ObsP0 = DatNet.ObsP0, tmle_g1_out = tmle_g1_out, tmle_g2_out = tmle_g2_out)
    # )
    # tmle_g1_out$ests_mat["TMLE",] <- mean(MC.tmle.eval$EY_gstar1)
    # if (!is.null(tmle_g2_out)) tmle_g2_out$ests_mat["TMLE",] <- mean(MC.tmle.eval$EY_gstar2)
  } else {
    MC.tmle.eval <- list(EY_gstar1 = NA, EY_gstar2 = NA, ATE = NA)
  }
  # print("MC.tmle.eval.t"); print(MC.tmle.eval.t)

  if (bootstrap.var) {
    # ------------------------------------------------------------------------------------------
    # IID BOOSTRAP FOR THE TMLE (DOES'T WORK FOR DEP DATA):
    # ------------------------------------------------------------------------------------------
    # var_tmleB_boot <- iid_bootstrap_tmle(n.boot, estnames, DatNet.ObsP0, tmle_g_out, QY_mat, wts_mat)

    # ------------------------------------------------------------------------------------------
    # PARAMETRIC BOOSTRAP TMLE variance estimate:
    # ------------------------------------------------------------------------------------------
    var_tmleB_boot <- par_bootstrap_tmle(n.boot = n.bootstrap, boot.nodes = boot.nodes, boot.form = boot.form,
                                         estnames = estnames.internal, DatNet.ObsP0 =  DatNet.ObsP0,
                                         tmle_g1_out = tmle_g1_out, tmle_g2_out = tmle_g2_out)
  } else {
    var_tmleB_boot <- list(EY_gstar1 = NA, EY_gstar2 = NA, ATE = NA)
  }

  #----------------------------------------------------------------------------------
  # Create output list (estimates, as. variances, CIs)
  #----------------------------------------------------------------------------------
  EY_gstar1 <- make_EYg_obj(estnames = estnames.internal, estoutnames = estnames.out, alpha = alpha,
                            DatNet.ObsP0 = DatNet.ObsP0, tmle_g_out = tmle_g1_out,
                            MC.tmle.eval = MC.tmle.eval$EY_gstar1,
                            var_tmleB_boot = var_tmleB_boot$EY_gstar1)

  EY_gstar2 <- NULL; ATE <- NULL

  if (!is.null(intervene2.sA) || !is.null(f_gstar2)) {
    EY_gstar2 <- make_EYg_obj(estnames = estnames.internal, estoutnames = estnames.out, alpha = alpha,
                              DatNet.ObsP0 = DatNet.ObsP0, tmle_g_out=tmle_g2_out,
                              MC.tmle.eval = MC.tmle.eval$EY_gstar2,
                              var_tmleB_boot = var_tmleB_boot$EY_gstar2)

    ATE <- make_EYg_obj(estnames = estnames.internal, estoutnames = estnames.out, alpha = alpha,
                        DatNet.ObsP0 = DatNet.ObsP0, tmle_g_out = tmle_g1_out, tmle_g2_out = tmle_g2_out,
                        MC.tmle.eval = MC.tmle.eval$ATE,
                        var_tmleB_boot = var_tmleB_boot$ATE)
	}

	tmlenet.res <- list(EY_gstar1 = EY_gstar1, EY_gstar2 = EY_gstar2, ATE = ATE)
	class(tmlenet.res) <- c(class(tmlenet.res), "tmlenet")

  options(tmlenet.verbose = oldverboseopt)
  gvars$verbose <- oldverboseopt
	return(tmlenet.res)
}
osofr/tmlenet documentation built on May 24, 2019, 4:58 p.m.