R/DatNetClass.R

#-----------------------------------------------------------------------------
# DatNet Classes
#-----------------------------------------------------------------------------
# Creates (make.sVar) and stores the summary measure matrix sW or sA in DatNet class;
# Creates (make.dat.sWsA) and stores the combined summary measure matrix (sW,sA) in DatNet.sWsA class;
# Contains Methods for:
  # *) detecting sVar types (detect.col.types);
  # *) normalizing continous sVar (normalize_sVar)
  # *) defining interval cuttoffs for continuous sVar (define.intervals)
  # *) turning continuous sVar into categorical (discretize.sVar)
  # *) creating binary indicator matrix for continous/categorical sVar (binirize.sVar, binirize.cat.sVar)
  # *) creating design matrix (Xmat) based on predvars and row subsets (evalsubst)
  # *) sampling exposures from input intervention f_gstar and generating summary measures (sW,sA) from this new exposure (make.dat.sWsA, f.gen.A.star)
#-----------------------------------------------------------------------------

#-----------------------------------------------------------------------------
# DatNet.sWsA CLASS STRUCTURE:
#-----------------------------------------------------------------------------
# * DatNet.sWsA class$make.dat.sWsA() creates one large matrix that stores all summary measures (sW,sA);
# * The observed data is always stored in $datnetW, $datnetA fields in every DatNet.sWsA object (under g_0 or g_star);
# * DatNet.sWsA$new(datnetW,datnetW) only saves obs data objects datnetW and datnetW as pointers (no copying);
# * DatNet.sWsA$new() has to be called only twice: once for g_0 (g_N) and once for g_star;
# * $make.dat.sWsA() must be called to create $dat.sWsA ($mat.sVar) - a df / mat of COMBINED sWsA;
# * All binning / interval methods MUST BE called on DatNet.sWsA (NOT $datnetA, $datnetW) (inherited from DatNet);
# * Both copies of DatNet.sWsA are storing datnetA/datnetW by reference - same copy;
# * MOST IMPORTANTLY this gets rid of the Monte-Carlo simulation loop for evaling psi_n under g_star.
    # -> just use already sampled DatNet.gstar dataset and evaluate psi_n only once.
# * NOTE: Changing datnetA/datnetW in one copy of DatNet.sWsA will result them being changed in the other copy of DatNet.sWsA as well.

#-----------------------------------------------------------------------------
# - DatNet.sWsA$binirize: Current implementation will often create fewer new cats than unique(sVar) for categorical sVar.
  # One way to avoid this is to set bin_by_mass = FALSE for sVar categoricals;
  # Another approach is to collapse the intervals to only unique(intrvls) with a warning;
  # Example: X is cat with 7 unique vals (sum_1mAW2_nets), asked for 7 bins, only 5 non-degenerate bins were created;
  # This is fine for wide format, but will not work when pooling across bins. Pooling requires only non-degenerate bins.
  # Will have to find a way to re-define such bins, so that only 4 final bins are created instead of 7 (with 4th bin being degenerate)
  # [1] "freq count for original variable: "
  #   0   1   2   3   4   5   6
  # 197 292 234 180  72  18   7
  # [1] "freq count for transformed ord.sVar: "
  # ord.sVar
  #   2   4   6   7
  # 197 292 234 277
#-----------------------------------------------------------------------------

is.integerish <- function (x) is.integer(x) || (is.numeric(x) && all(x == as.integer(x)))

#' @importFrom simcausal NetIndClass
#' @importFrom stats as.formula glm na.exclude rbinom
NULL

# Get the actual A^* sampled from user-supplied intervention f_gstar (fcn_name):
f.gen.A.star <- function(data, f.g_fun, Anodes) {
  .f_g_wrapper <- function(data, f.g_fun, ...) {
      args0 <- list(data = data)
      args <- c(args0, ...)
    do.call(f.g_fun, args)
  }
  # test f.g_fun is a function, if not it must be either:
  # 1) a named list of functions (numAnodes) or
  # 2) a vector (numAnodes x 1) or
  # 3) a matrix (n x numAnodes) of counterfactual treatment assignments
  if (is.function(f.g_fun)) {
    if (!("data" %in% names(formals(f.g_fun)))) stop("functions f_gstar1 / f_gstar2 must have a named argument 'data'")
    newA <- .f_g_wrapper(data = data, f.g_fun = f.g_fun)
    assert_that(is.matrix(newA) || is.vector(newA))
    if (is.matrix(newA)) {
      if (length(colnames(newA))==0 | !all(colnames(newA) %in% Anodes)) stop("the Anode matrix specified in f_gstar1/f_gstar2 must have column names matching Anodes")
      assert_that(nrow(newA)==nrow(data) && ncol(newA)==length(Anodes))
    } else {
      if (length(Anodes) > 1) stop("the evaluation result of f_gstar1 / f_gstar2 must be a matrix for multivariate Anodes")
      if (length(newA) != nrow(data)) stop("the evaluation result of f_gstar1 / f_gstar2 must be a vector of length n for univariate Anodes")
    }

  } else if (is.list(f.g_fun)) {
    assert_that(length(f.g_fun)==length(Anodes))
    assert_that(length(names(f.g_fun))>0)
    assert_that(all(names(f.g_fun) %in% Anodes))
    newA <- matrix(nrow=nrow(data), ncol=length(Anodes))
    colnames(newA) <- Anodes
    for (Anode in Anodes) {
      newA[, Anode] <- .f_g_wrapper(data = data, f.g_fun = f.g_fun[[Anode]])
    }

  } else if (is.matrix(f.g_fun)){
    newA <- f.g_fun
    assert_that(nrow(newA)==nrow(data) && ncol(newA)==length(Anodes))
    if (length(colnames(newA))==0 | !all(colnames(newA) %in% Anodes)) stop("the Anode matrix specified in f_gstar1/f_gstar2 must have column names matching Anodes")

  } else {
    newA <- as.vector(f.g_fun)
    if (length(Anodes)==1) {
      if (length(newA)!=nrow(data) && length(newA)!=1L) stop("f_gstar1/f_gstar2 must be either a function or a vector of length nrow(data) or 1")
      # if (length(newA)==1L) {
      #   # newA <- rep_len(newA, nrow(data))
      # }
    # allow vector (numAnodes x 1) for constant treatment assignment

    } else {
      if (length(newA) != length(Anodes)) stop("for multivariate Anodes f_gstar1/f_gstar2 must be either a vector of the same dim or a matrix with length(Anodes) cols and nrow(data) rows")
    }
    newA <- matrix(newA, nrow=nrow(data), ncol=length(Anodes), byrow=TRUE)
    colnames(newA) <- Anodes
  }
  return(newA)
}

## ---------------------------------------------------------------------
# DETECTING VECTOR TYPES
# sVartypes <- list(bin = "binary", cat = "categor", cont = "contin")
## ---------------------------------------------------------------------
detect.col.types <- function(sVar_mat){
  detect_vec_type <- function(vec) {
    vec_nomiss <- vec[!gvars$misfun(vec)]
    nvals <- length(unique(vec_nomiss))
    if (nvals <= 2L) {
      sVartypes$bin
    } else if ((nvals <= maxncats) && (is.integerish(vec_nomiss))) {
      sVartypes$cat
    } else {
      sVartypes$cont
    }
  }

  assert_that(is.integerish(getopt("maxncats")) && getopt("maxncats") > 1)
  maxncats <- getopt("maxncats")
  sVartypes <- gvars$sVartypes

  # for matrix:
  if (is.matrix(sVar_mat)) {
    return(as.list(apply(sVar_mat, 2, detect_vec_type)))
  # for data.table:
  } else if (is.data.table(sVar_mat)) {
    return(as.list(sVar_mat[, lapply(.SD, detect_vec_type)]))
  } else {
    stop("unrecognized sVar_mat class: " %+% class(sVar_mat))
  }
}

## ---------------------------------------------------------------------
# Normalizing / Defining bin intervals / Converting contin. to ordinal / Converting ordinal to bin indicators
## ---------------------------------------------------------------------
normalize <- function(x) {
  if (abs(max(x) - min(x)) > gvars$tolerr) { # Normalize to 0-1 only when x is not constant
    return((x - min(x)) / (max(x) - min(x)))
  } else {  # What is the thing to do when x constant? Set to abs(x), abs(x)/x or 0???
    return(x)
  }
}
normalize_sVar <- function(sVar_vec) {
  nonmiss_idx <- !gvars$misfun(sVar_vec)
  if (sum(nonmiss_idx) > 0) {
    sVar_vec[nonmiss_idx] <- normalize(sVar_vec[nonmiss_idx])
  }
  sVar_vec
}
normalize_matsVar <- function(sVar_mat) apply(sVar_mat, 2, normalize_sVar)

# Define bin cutt-offs for continuous x:
define.intervals <- function(x, nbins, bin_bymass, bin_bydhist, max_nperbin) {
  x <- x[!gvars$misfun(x)]  # remove missing vals
  nvals <- length(unique(x))
  if (is.na(nbins)) nbins <- as.integer(length(x) / max_nperbin)
  # if nbins is too high, for ordinal, set nbins to n unique obs and cancel quantile based interval defns
  if (nvals < nbins) {
    nbins <- nvals
    bin_bymass <- FALSE
  }
  if (abs(max(x) - min(x)) > gvars$tolerr) {  # when x is not constant
    if ((bin_bymass) & !is.null(max_nperbin)) {
      if ((length(x) / max_nperbin) > nbins) nbins <- as.integer(length(x) / max_nperbin)
    }
    intvec <- seq.int(from = min(x), to = max(x) + 1, length.out = (nbins + 1)) # interval type 1: bin x by equal length intervals of 0-1
    # intvec <- c(min(x), sort(runif(n = nbins, min = min(x), max = max(x))), max(x) + 1)
  } else {  # when x is constant, force the smallest possible interval to be at least [0,1]
    intvec <- seq.int(from = min(0L, min(x)), to = max(1L, max(x)), length.out = (nbins + 1))
  }
  if (bin_bymass) {
    intvec <- quantile(x = x, probs = normalize(intvec)) # interval type 2: bin x by mass (quantiles of 0-1 intvec as probs)
    intvec[1] <- intvec[1] - 0.01
    intvec[length(intvec)] <- intvec[length(intvec)] + 0.01
  } else if (bin_bydhist) {
    intvec <- dhist(x, plot = FALSE, nbins = nbins)$xbr
    intvec[1] <- intvec[1] - 0.01
    intvec[length(intvec)] <- intvec[length(intvec)] + 0.01
  }
  # adding -Inf & +Inf as leftmost & rightmost cutoff points to make sure all future data points end up in one of the intervals:
  # intvec <- c(-Inf, min(intvec)-0.01, intvec)
  # intvec <- c(min(intvec) - 0.1, intvec)
  intvec <- c(min(intvec)-1000L, intvec, max(intvec)+1000L)
  # intvec <- c(min(intvec) - 9999999L, min(intvec) - 0.1, intvec, max(intvec) + 0.1, max(intvec) + 9999999L)
  return(intvec) # return(list(intbylen = intvec, intbymass = intvecq))
}
# Turn any x into ordinal (1, 2, 3, ..., nbins) for a given interval cutoffs (length(intervals)=nbins+1)
make.ordinal <- function(x, intervals) findInterval(x = x, vec = intervals, rightmost.closed = TRUE)

# # Make dummy indicators for ordinal x (sA[j])
# # Approach: creates B_j that jumps to 1 only once and stays 1 (degenerate) excludes reference category (last)
# make.bins_mtx_1 <- function(x.ordinal, nbins, bin.nms) {
#   n <- length(x.ordinal)
#   cats <- 1 : nbins
#   dummies_mat <- matrix(1L, nrow = n, ncol = length(cats))
#   for(level in cats[-length(cats)]) {
#     subset_Bj0 <- x.ordinal > level
#     dummies_mat[subset_Bj0, level] <- 0L
#     subset_Bjmiss <- x.ordinal < level
#     dummies_mat[subset_Bjmiss, level] <- gvars$misval
#   }
#   dummies_mat[, cats[length(cats)]] <- gvars$misval
#   colnames(dummies_mat) <- bin.nms
#   dummies_mat
# }

# Make dummy indicators for ordinal x (sA[j])
# Approach: creates B_j that jumps to 1 only once and stays 1 (degenerate) excludes reference category (last)
make.bins_mtx_1 <- function(x.ordinal, nbins, bin.nms, levels = 1:nbins) {
  n <- length(x.ordinal)
  new.cats <- 1:nbins
  dummies_mat <- matrix(1L, nrow = n, ncol = length(new.cats))
  for(cat in new.cats[-length(new.cats)]) {
    subset_Bj0 <- x.ordinal > levels[cat]
    dummies_mat[subset_Bj0, cat] <- 0L
    subset_Bjmiss <- x.ordinal < levels[cat]
    dummies_mat[subset_Bjmiss, cat] <- gvars$misval
  }
  dummies_mat[, new.cats[length(new.cats)]] <- gvars$misval
  colnames(dummies_mat) <- bin.nms
  dummies_mat
}


#' R6 class for storing and managing the user-input data.
#'
#' The data is stored as a data.table under the public field \code{OdataDT}.
#'
#' @docType class
#' @format An \code{\link{R6Class}} generator object
#' @keywords R6 class
#' @details
#' \itemize{
#' \item{\code{nOdata}} - Maximum number of friends for any observation.
#' \item{\code{nOdata}} - number of observations in the observed data frame.
#' \item{\code{nFnode}} - Name of the vector that stores the number of friends for each observation (always set to 'nF').
#' \code{iid_data_flag} - Is the input data iid (i.e., no network)?
#' \item{\code{Wnodes}} - Names of the baseline covariates (not used).
#' \item{\code{Anodes}} - Names of the intervention nodes.
#' \item{\code{Lnodes}} - Names of the time-varying covariates (not used).
#' \item{\code{Ynodes}} - Names of the outcome nodes (not used).
#' \item{\code{A_g0_DT}} - Data.table of observed exposures
#' }
#' @section Methods:
#' \describe{
#'   \item{\code{new(Odata, nFnode, iid_data_flag, nodes, ...)}}{...}
#'   \item{\code{replaceVar(varName, newVarVal)}}{...}
#'   \item{\code{replaceOneAnode(AnodeName, newAnodeVal)}}{...}
#'   \item{\code{replaceManyAnodes(Anodes, newAnodesMat)}}{...}
#' }
#' @section Active Bindings:
#' \describe{
#'    \item{\code{ncols.sVar}}{...}
#'    \item{\code{emptydat.sVar}}{...}
#'    \item{\code{nodes}}{...}
#'    \item{\code{dat.sVar}}{...}
#'    \item{\code{emptydat.sVar}}{...}
#'    \item{\code{nodes}}{...}
#' }
#'
#' @importFrom assertthat assert_that
#' @export
OdataDT <- R6Class(classname = "OdataDT",
  portable = TRUE,
  class = TRUE,
  public = list(
    OdataDT = NULL,            # data.table used for creating the summary measures
    nOdata = NA_integer_,      # n of samples in the OBSERVED (original) data
    nFnode = "nF",
    iid_data_flag = FALSE,
    sW = NULL,
    sA = NULL,
    intervene1.sA = NULL,
    intervene2.sA = NULL,
    # Wnodes = NULL,
    # Anodes = NULL,
    # Lnodes = NULL,
    # Ynodes = NULL,
    A_g0_DT = NULL,             # Backed-up versions of the Anodes vars that come from the observed data
    sA_g0_DT = NULL,            # Backed-up versions of the summaries in sA (but not Anodes) that come from the observed data
    # A.g0.mat = NULL,
    # A.gstar.mat = NULL,

    save_sA_Vars = NULL,        # summary measure variables that were pre-saved (backed-up) and were not part of new.sA (Anodes)
    restored_sA_Vars = FALSE,   # were the summary measures (not Anodes) restored as well? If not, they need to be reconstructed
    curr_data_A_g0 = TRUE,      # is the current data in OdataDT generated under observed (g0)? If FALSE, current data is under g.star (intervention)

    initialize = function(Odata, nFnode, iid_data_flag, ...) {
      assert_that(is.data.frame(Odata))
      self$curr_data_A_g0 <- TRUE

      self$OdataDT <- data.table(Odata)
      # alternative is to set it without copying Odata
      # setDT(Odata); self$OdataDT <- Odata

      self$nOdata <- nrow(self$OdataDT)
      if (!missing(nFnode)) self$nFnode <- nFnode
      self$iid_data_flag <- iid_data_flag
      # if (!missing(nodes))
      # self$nodes <- nodes
      # self$Anodes <- Anodes
      # self$Wnodes <- Wnodes
      # self$Lnodes <- Lnodes
      # self$Ynodes <- Ynodes
      invisible(self)
    },

    replaceVar = function(varName, newVarVal) {
      assert_that(is.integer(newVarVal) | is.numeric(newVarVal))
      assert_that(length(newVarVal)==self$nOdata | length(newVarVal)==1)
      assert_that(varName %in% colnames(self$OdataDT))
      self$OdataDT[, (varName) := newVarVal]
      invisible(self)
    },

    replaceOneAnode = function(AnodeName, newAnodeVal) {
      self$replaceVar(AnodeName, newAnodeVal)
      invisible(self)
    },

    replaceManyAnodes = function(Anodes, newAnodesMat) {
      assert_that(is.matrix(newAnodesMat))
      assert_that(ncol(newAnodesMat) == length(Anodes))
      for (col in Anodes) {
        idx <- which(Anodes %in% col)
        assert_that(col %in% colnames(newAnodesMat))
        assert_that(col %in% colnames(self$OdataDT))
        self$OdataDT[, (col) := newAnodesMat[, idx]]
      }
      invisible(self)
    },

    # Save Anodes and summaries sA from main data.table into separate fields
    backupAnodes = function(Anodes, sA) {

      if (missing(Anodes)) Anodes <- self$nodes$Anodes
      self$A_g0_DT <- self$OdataDT[, Anodes, with = FALSE]

      # Back-up the summary measures as well (to not have to reconstruct them):
      if (!missing(sA)) {
        sA.Vars <- unlist(sA$sVar.names.map)
        save_sA_Vars <- sA.Vars[!sA.Vars%in%Anodes]
        if (length(save_sA_Vars)>0) {
          self$save_sA_Vars <- save_sA_Vars
          self$sA_g0_DT <- self$OdataDT[, save_sA_Vars, with = FALSE]
        }
      }
      invisible(self)
    },

    # Put saved Anodes and summaries sA back into main data.table
    restoreAnodes = function(Anodes) {
      if (missing(Anodes)) Anodes <- self$nodes$Anodes
      if (is.null(self$A_g0_DT)) stop("Anodes in OdataDT cannot be restored, self$A_g0_DT is null!")

      self$OdataDT[, (Anodes) := self$A_g0_DT]

      if (!is.null(self$sA_g0_DT) && !is.null(self$save_sA_Vars)) {
        self$OdataDT[, (self$save_sA_Vars) := self$sA_g0_DT]
      }
      self$restored_sA_Vars <- TRUE
      # else {
      #   self$restored_sA_Vars <- FALSE
      # }
      invisible(self)
    },

    # Swap re-saved Anodes and summaries sA with those in main data.table
    swapAnodes = function(Anodes) {
      if (missing(Anodes)) Anodes <- self$nodes$Anodes

      # 1) Save the current values of Anodes and sA in the data:
      temp.Anodes <- self$OdataDT[, Anodes, with = FALSE]
      if (!is.null(self$sA_g0_DT) && !is.null(self$save_sA_Vars)) {
        temp.sA <- self$OdataDT[, self$save_sA_Vars, with = FALSE]
      } else {
        temp.sA <- NULL
      }

      # 2) Restore previously saved Anodes / sA into the data:
      self$restoreAnodes()

      # 3) Over-write the back-up values with new ones:
      self$A_g0_DT <- temp.Anodes
      self$sA_g0_DT <- temp.sA

      # 4) Reverse the indicator of current data Anodes:
      self$curr_data_A_g0 <- !self$curr_data_A_g0

      invisible(self)
    }

    # (OPTIONAL) ENABLE ADDING DETERMINISTIC/DEGENERATE Anode FLAG COLUMNS TO DatNet
    # will be automatically added to A ~ predictors
    # add_deterministic = function(Odata, userDETcol) {
    #   determ.g_user <- as.vector(Odata[,userDETcol]) # get deterministic As for the entire network of each unit (set by user)
    #   # determ.gvals_user <- Odata[,AnodeDET] # add values to be assigned to deterministic nodes (already have: netA)
    #   determ_cols_user <- .f.allCovars(k = self$Kmax, NetInd_k = self$netind_cl$NetInd_k, Var = determ.g_user,
    #                                     VarNm = "determ.g_true", misval = gvars$misval)
    #   # determ_cols <- (determ_cols_user | determ_cols_Friend)
    #   determ_cols <- determ_cols_user
    #   # THIS IS A VERY BAD WAY TO DO THAT, REMOVING
    #   # self$mat.sVar <- cbind(determ_cols, self$mat.sVar)
    #   determ_cols_type <- as.list(rep.int(gvars$sVartypes$bin, ncol(determ_cols)))
    #   names(determ_cols_type) <- colnames(determ_cols)
    #   self$type.sVar <- c(determ_cols_type, self$type.sVar)
    #   invisible(self)
    # },
    # # Replaces all misval values in mat.netVar / mat.sVar with gvars$misXreplace
    # fixmiss_netVar = function() {
    #   self$mat.netVar[gvars$misfun(self$mat.netVar)] <- gvars$misXreplace
    #   invisible(self)
    # },

  ),
  active = list(
    names.sVar = function() { colnames(self$OdataDT) },
    ncols.sVar = function() { length(self$names.sVar) },
    # emptydat.netVar = function() {self$mat.netVar <- NULL },    # wipe out mat.netVar
    emptydat.sVar = function() { self$OdataDT <- NULL },         # wipe out mat.sVar
    nodes = function(nodes) {
      if (missing(nodes)) {
        return(private$.nodes)
      } else {
        assert_that(is.list(nodes))
        private$.nodes <- nodes
      }
    }
  ),
  private = list(
    .nodes = list()           # names of the nodes in the data (Anode, Ynode, nFnode, etc..)
  )
)

#' R6 class for storing and managing already evaluated summary measures \code{sW} or \code{sA} (but not both at the same time).
#'
#' Class for evaluating and storing arbitrary summary measures sVar.
#'  The summary measures are evaluated based on the user-specified sVar expressions in sVar.object (sW or sA),
#'  in the environment of the input data.frame (Odata).
#'  The evaluated summary measures from sVar.object are stored as a matrix (self$mat.sVar).
#'  Contains methods for replacing missing values with default in gvars$misXreplace.
#'  Also contains method for detecting / setting sVar variable type (binary, categor, contin).
#'
#' @docType class
#' @format An \code{\link{R6Class}} generator object
#' @keywords R6 class
#' @details
#' \itemize{
#' \item{\code{Kmax}} - Maximum number of friends for any observation.
#' \item{\code{nFnode}} - Name of the vector that stores the number of friends for each observation (always set to 'nF').
#' \item{\code{netind_cl}} - Pointer to a network instance of class \code{simcausal::NetIndClass}.
#' \item{\code{Odata}} - Pointer to the user input data R6 object of class OdataDT
#' \item{\code{mat.sVar}} - The evaluated matrix of summary measures for \code{sW} or \code{sA}.
#' \item{\code{sVar.object}} - Instance of the \code{\link{DefineSummariesClass}} class which contains the summary measure expressions for \code{sW} or \code{sA}.
#' \item{\code{type.sVar}} - named list of length \code{ncol(mat.sVar)} with \code{sVar} variable types: "binary"/"categor"/"contin".
#' \item{\code{norm.c.sVars}} - \code{flag = TRUE} if continous covariates need to be normalized.
#' \item{\code{nOdata}} - number of observations in the observed data frame.
#' }
#' @section Methods:
#' \describe{
#'   \item{\code{new(netind_cl, nodes, nFnode, ...)}}{...}
#'   \item{\code{make.sVar(Odata, sVar.object = NULL, type.sVar = NULL, norm.c.sVars = FALSE)}}{...}
#'   \item{\code{def.types.sVar(type.sVar = NULL)}}{...}
#'   \item{\code{norm_c_sVars()}}{...}
#'   \item{\code{fixmiss_sVar()}}{...}
#'   \item{\code{norm.sVar(name.sVar)}}{...}
#'   \item{\code{set.sVar(name.sVar, new.sVar)}}{...}
#'   \item{\code{get.sVar(name.sVar)}}{...}
#'   \item{\code{set.sVar.type(name.sVar, new.type)}}{...}
#'   \item{\code{get.sVar.type(name.sVar)}}{...}
#' }
#' @section Active Bindings:
#' \describe{
#'    \item{\code{names.sVar}}{...}
#'    \item{\code{names.c.sVar}}{...}
#'    \item{\code{ncols.sVar}}{...}
#'    \item{\code{dat.sVar}}{...}
#'    \item{\code{emptydat.sVar}}{...}
#'    \item{\code{nodes}}{...}
#' }
#' @importFrom assertthat assert_that is.count is.flag
#' @export
DatNet <- R6Class(classname = "DatNet",
  portable = TRUE,
  class = TRUE,
  public = list(
    Kmax = integer(),          # max n of Friends in the network
    nFnode = "nF",
    netind_cl = NULL,          # class NetIndClass object holding $NetInd_k network matrix
    Odata = NULL,              # object of class OdataDT with observed data saved under OdataDT field as a data.table, used for creating the summary measures in mat.sVar
    mat.sVar = NULL,           # Matrix storing all evaluated sVars, with named columns
    sVar.object = NULL,        # DefineSummariesClass object that contains / evaluates sVar expressions
    type.sVar = NULL,          # named list with sVar types: list(names.sVar[i] = "binary"/"categor"/"contin"), can be overridden
    norm.c.sVars = FALSE,      # flag = TRUE if want to normalize continous covariates
    nOdata = NA_integer_,      # n of samples in the OBSERVED (original) data

    initialize = function(netind_cl, nodes, nFnode, ...) {
      self$netind_cl <- netind_cl
      self$Kmax <- netind_cl$Kmax
      if (!missing(nFnode)) self$nFnode <- nFnode
      if (!missing(nodes)) self$nodes <- nodes
      invisible(self)
    },

    # **********************
    # Define and evalute summary measure (sVar) data; Save it (and over-write prev. values) as self$dat.sVar
    # **********************
    make.sVar = function(Odata, sVar.object = NULL, type.sVar = NULL, norm.c.sVars = FALSE) {
      # assert_that(is.data.frame(Odata))
      # self$nOdata <- nrow(Odata)
      if (missing(Odata)) {
        assert_that(!is.null(self$Odata))
      } else {
        self$nOdata <- Odata$nOdata
        self$Odata <- Odata
      }

      if (is.null(sVar.object)) {
        stop("Not Implemented. To Be replaced with netVar construction when sVar.object is null...")
      }

      self$sVar.object <- sVar.object
      self$dat.sVar <- sVar.object$eval.nodeforms(data.df = self$Odata$OdataDT, netind_cl = self$netind_cl)

      # MAKE def.types.sVar an active binding? calling self$def.types.sVar <- type.sVar assigns, calling self$def.types.sVar defines.
      self$def.types.sVar(type.sVar) # Define the type of each sVar[i]: bin, cat or cont
      # normalize continuous and non-missing sVars, overwrite their columns in mat.sVar with normalized [0,1] vals
      if (norm.c.sVars) {
        self$norm.c.sVars <- norm.c.sVars
        self$norm_c_sVars()
      }
      invisible(self)
    },

    # *** MAKE A PRIVATE METHOD ***
    # Define the type (class) of each summary measure: bin, cat or cont
    # type.sVar acts as a flag: only detect types when !is.null(type.sVar)
    # otherwise can pass type.sVar = list(sVar = NA, ...) or a value type.sVar = NA/gvars$sVartypes$bin/etc
    def.types.sVar = function(type.sVar = NULL) {
      # Detect the type of each sVar[i]: gvars$sVartypes$bin,  gvars$sVartypes$cat, gvars$sVartypes$cont
      if (is.null(type.sVar)) {
        self$type.sVar <- detect.col.types(self$dat.sVar)
      } else {
        n.sVar <- length(self$names.sVar)
        len <- length(type.sVar)
        assert_that((len == n.sVar) || (len == 1L))
        if (len == n.sVar) {
          assert_that(is.list(type.sVar))
          assert_that(all(names(type.sVar) %in% self$names.sVar))
        } else {
          assert_that(is.string(type.sVar))
          type.sVar <- as.list(rep(type.sVar, n.sVar))
          names(type.sVar) <- self$names.sVar
        }
        self$type.sVar <- type.sVar
      }
      invisible(self)
    },

    # *** MAKE A PRIVATE METHOD ***
    # Normalize continuous sVars # This could be memory-costly
    norm_c_sVars = function() {
      names.c.sVar <- self$names.c.sVar
      if (length(names.c.sVar) == 0L) return(invisible(self))

      if (self$norm.c.sVars && (length(names.c.sVar) > 0)) {
        for (name.c.sVar in names.c.sVar) {
          self$mat.sVar[, name.c.sVar] <- normalize_sVar(self$mat.sVar[, name.c.sVar])
        }
      }
      invisible(self)
    },

    fixmiss_sVar_mat = function() {
      self$dat.sVar[gvars$misfun(self$dat.sVar)] <- gvars$misXreplace
      invisible(self)
    },

    fixmiss_sVar_DT = function() {
      # see http://stackoverflow.com/questions/7235657/fastest-way-to-replace-nas-in-a-large-data-table
      dat.sVar <- self$dat.sVar
      # OdataDT <- self$Odata$OdataDT
      for (j in names(dat.sVar))
        set(dat.sVar, which(gvars$misfun(dat.sVar[[j]])), j , gvars$misXreplace)
      invisible(self)
    },

    fixmiss_sVar = function() {
      if (is.matrix(self$dat.sVar)) {
        self$fixmiss_sVar_mat()
      } else if (is.data.table(self$dat.sVar)) {
        self$fixmiss_sVar_DT()
      } else {
        stop("self$dat.sVar is of unrecognized class")
      }
    },

    # --------------------------------------------------
    # Methods for directly handling one continous/categorical sVar in self$mat.sVar;
    # No checking of incorrect input is performed, use at your own risk!
    # --------------------------------------------------
    norm.sVar = function(name.sVar) { normalize_sVar(self$dat.sVar[, name.sVar]) },  # return normalized 0-1 sVar
    # set.sVar = function(name.sVar, new.sVar) { self$mat.sVar[, name.sVar] <- new.sVar },
    # get.sVar = function(name.sVar) { self$dat.sVar[, name.sVar] },
    set.sVar = function(name.sVar, new.sVar) { self$mat.sVar[, .(name.sVar) := eval(new.sVar)]},
    get.sVar = function(name.sVar) {
      x <- self$dat.sVar[, name.sVar, with=FALSE]
      if (is.list(x) || is.data.table(x) || is.data.frame(x)) x <- x[[1]]
      return(x)
    },
    set.sVar.type = function(name.sVar, new.type) { self$type.sVar[[name.sVar]] <- new.type },
    get.sVar.type = function(name.sVar) { if (missing(name.sVar)) { self$type.sVar } else { self$type.sVar[[name.sVar]] } }
  ),

  active = list(
    # names.netVar = function() { colnames(self$dat.netVar) },
    names.sVar = function() { colnames(self$dat.sVar) },
    names.c.sVar = function() { names(self$type.sVar[self$type.sVar %in% gvars$sVartypes$cont]) }, # names of cont sVars
    # ncols.netVar = function() { length(self$names.netVar) },
    ncols.sVar = function() { length(self$names.sVar) },
    # dat.netVar = function() { self$mat.netVar },
    dat.sVar = function(dat.sVar) {
      if (missing(dat.sVar)) {
        return(self$mat.sVar)
      } else {
        assert_that(is.matrix(dat.sVar) | is.data.table(dat.sVar))
        self$mat.sVar <- dat.sVar
      }
    },
    # emptydat.netVar = function() {self$mat.netVar <- NULL },    # wipe out mat.netVar
    emptydat.sVar = function() { self$mat.sVar <- NULL },         # wipe out mat.sVar
    nodes = function(nodes) {
      if (missing(nodes)) {
        return(private$.nodes)
      } else {
        assert_that(is.list(nodes))
        private$.nodes <- nodes
      }
    }
  ),
  private = list(
    .nodes = list()           # names of the nodes in the data (Anode, Ynode, nFnode, etc..)
  )
)

## ---------------------------------------------------------------------
#' R6 class for storing and managing the combined summary measures \code{sW} & \code{sA} from DatNet classes.
#'
#' This class inherits from \code{DatNet} and extends its methods to handle a single matrix dataset of
#'  all summary measures \code{(sA,sW)}
#'  The class \code{DatNet.sWsA} is the only way to access data in the entire package.
#'  Contains methods for combining, subsetting, discretizing & binirizing summary measures \code{(sW,sA)}.
#'  For continous sVar this class provides methods for detecting / setting bin intervals,
#'  normalization, disretization and construction of bin indicators.
#'  The pointers to this class get passed on to \code{SummariesModel} functions: \code{$fit()},
#'  \code{$predict()} and \code{$predictAeqa()}.
#'
#' @docType class
#' @format An \code{\link{R6Class}} generator object
#' @keywords R6 class
#' @details
#' \itemize{
#' \item{\code{datnetW}} - .
#' \item{\code{datnetA}} - .
#' \item{\code{active.bin.sVar}} - Currently discretized continous \code{sVar} column in data matrix \code{mat.sVar}.
#' \item{\code{mat.bin.sVar}} - Matrix of the binary indicators for discretized continuous covariate \code{active.bin.sVar}.
#' \item{\code{ord.sVar}} - Ordinal (categorical) transformation of a continous covariate \code{sVar}.
#' \item{\code{YnodeVals}} - .
#' \item{\code{det.Y}} - .
#' \item{\code{p}} - .
#' }
#' @section Methods:
#' \describe{
#'   \item{\code{new(datnetW, datnetA, YnodeVals, det.Y, ...)}}{...}
#'   \item{\code{addYnode(YnodeVals, det.Y)}}{...}
#'   \item{\code{evalsubst(subsetexpr, subsetvars)}}{...}
#'   \item{\code{get.dat.sWsA(rowsubset = TRUE, covars)}}{...}
#'   \item{\code{get.outvar(rowsubset = TRUE, var)}}{...}
#'   \item{\code{copy.sVar.types()}}{...}
#'   \item{\code{bin.nms.sVar(name.sVar, nbins)}}{...}
#'   \item{\code{pooled.bin.nm.sVar(name.sVar)}}{...}
#'   \item{\code{detect.sVar.intrvls(name.sVar, nbins, bin_bymass, bin_bydhist, max_nperbin)}}{...}
#'   \item{\code{detect.cat.sVar.levels(name.sVar)}}{...}
#'   \item{\code{discretize.sVar(name.sVar, intervals)}}{...}
#'   \item{\code{binirize.sVar(name.sVar, intervals, nbins, bin.nms)}}{...}
#'   \item{\code{binirize.cat.sVar(name.sVar, levels)}}{...}
#'   \item{\code{get.sVar.bw(name.sVar, intervals)}}{...}
#'   \item{\code{get.sVar.bwdiff(name.sVar, intervals)}}{...}
#'   \item{\code{make.dat.sWsA(p = 1, f.g_fun = NULL, sA.object = NULL)}}{...}
#' }
#' @section Active Bindings:
#' \describe{
#'    \item{\code{dat.sWsA}}{...}
#'    \item{\code{dat.bin.sVar}}{...}
#'    \item{\code{emptydat.bin.sVar}}{...}
#'    \item{\code{names.sWsA}}{...}
#'    \item{\code{nobs}}{...}
#'    \item{\code{noNA.Ynodevals}}{...}
#'    \item{\code{nodes}}{...}
#' }
#' @importFrom assertthat assert_that is.count is.flag
#' @export
DatNet.sWsA <- R6Class(classname = "DatNet.sWsA",
  inherit = DatNet,
  portable = TRUE,
  class = TRUE,
  public = list(
    Odata = NULL,
    datnetW = NULL,            # *** RENAME TO O.datnetW for clarity ***
    datnetA = NULL,            # *** RENAME TO O.datnetA for clarity ***
    active.bin.sVar = NULL,    # name of active binarized cont sVar, changes as fit/predict is called (bin indicators are temp. stored in mat.bin.sVar)
    # dat.bin.sVar = NULL,     # (MOVED TO AN ACT BIND) points to self$mat.bin.sVar
    mat.bin.sVar = NULL,       # temp storage mat for bin indicators on currently binarized continous sVar (from self$active.bin.sVar)
    ord.sVar = NULL,           # Ordinal (cat) transform for continous sVar
    YnodeVals = NULL,          # Values of the binary outcome (Ynode) in observed data where det.Y = TRUE obs are set to NA
    det.Y = NULL,              # Logical vector, where YnodeVals[det.Y==TRUE] are deterministic (0 or 1)
    p = 1,
    # **********
    # dat.sVar - (inherited act bind): now points to combine mat.sVar of above cbind(dat.sW, dat.sA)
    # this keeps ALL methods and active bindings of DatNet valid in DatNet.sWsA for this combined data mat
    # **********
    initialize = function(Odata, datnetW, datnetA, YnodeVals, det.Y, ...) {
      assert_that("OdataDT" %in% class(Odata))
      assert_that("DatNet" %in% class(datnetW))
      assert_that("DatNet" %in% class(datnetA))

      self$Odata <- Odata
      self$datnetW <- datnetW
      self$datnetA <- datnetA

      self$netind_cl <- datnetW$netind_cl
      self$Kmax <- self$netind_cl$Kmax
      # re-assign nodes object if it already exists in datnetW
      if (length(datnetW$nodes) > 0) self$nodes <- datnetW$nodes
      if (!missing(YnodeVals)) self$addYnode(YnodeVals = YnodeVals, det.Y = det.Y)
      invisible(self)
    },

    addYnode = function(YnodeVals, det.Y) {
        if (missing(det.Y)) det.Y <- rep.int(FALSE, length(YnodeVals))
        self$noNA.Ynodevals <- YnodeVals  # Adding actual observed Y as protected (without NAs)
        self$YnodeVals <- YnodeVals
        self$YnodeVals[det.Y] <- NA       # Adding public YnodeVals & setting det.Y values to NA
        self$det.Y <- det.Y
    },

    # Eval the expression (in the environment of the data.frame "data" + global constants "gvars"):
    # Could also do evaluation in a special env with a custom subsetting fun '[' that will dynamically find the correct dataset that contains
    # sVar.name (dat.sVar or dat.bin.sVar) and will return sVar vector
    evalsubst = function(subsetexpr, subsetvars) {
      if (missing(subsetexpr)) {
        assert_that(!missing(subsetvars))
        res <- rep.int(TRUE, self$nobs)
        for (subsetvar in subsetvars) {
          # *) find the var of interest (in self$dat.sWsA or self$dat.bin.sVar), give error if not found
          sVar.vec <- self$get.outvar(var = subsetvar)
          assert_that(!is.null(sVar.vec))
          # *) reconstruct correct expression that tests for missing values
          res <- res & (!gvars$misfun(sVar.vec))
        }
        return(res)
      # ******************************************************
      # NOTE: Below is currently not being used, all subsetting now is done with subsetvars above, for speed & memory efficiency
      # ******************************************************
      } else {
        if (is.logical(subsetexpr)) {
          return(subsetexpr)
        } else {
          # ******************************************************
          # THIS WAS A BOTTLENECK: for 500K w/ 1000 bins: 4-5sec
          # REPLACING WITH env that is made of data.frames instead of matrices
          # ******************************************************
          # eval.env <- c(data.frame(self$dat.sWsA), data.frame(self$dat.bin.sVar), as.list(gvars))
          # res <- try(eval(subsetexpr, envir = eval.env, enclos = baseenv())) # to evaluate vars not found in data in baseenv()
          stop("disabled for memory/speed efficiency")
          return(res)
        }
      }
    },

    # return a covar matrix which will be used as a design matrix for BinOutModelClass
    get.dat.sWsA = function(rowsubset = TRUE, covars) {
      if (!missing(covars)) {

        if (length(unique(colnames(self$dat.sWsA))) < length(colnames(self$dat.sWsA))) {
          warning("repeating column names in the final data set; please check for duplicate summary measure / node names")
        }

        # columns to select from main design matrix (in the same order as listed in covars):
        sel.sWsA <- intersect(covars, colnames(self$dat.sWsA))

        # old version (returned names by column order in self$dat.sWsA):
        # sel.sWsA <- unique(colnames(self$dat.sWsA)[(colnames(self$dat.sWsA) %in% covars)])

        if (is.matrix(self$dat.sWsA)) {
          dfsel <- self$dat.sWsA[rowsubset, sel.sWsA, drop = FALSE] # data stored as matrix
        } else if (is.data.table(self$dat.sWsA)) {
          dfsel <- self$dat.sWsA[rowsubset, sel.sWsA, drop = FALSE, with = FALSE] # data stored as data.table
        } else {
          stop("self$dat.sWsA is of unrecognized class: " %+% class(self$dat.sWsA))
        }

        # columns to select from binned continuous/cat var matrix (if it was previously constructed):
        if (!is.null(self$dat.bin.sVar)) {
          # sel.binsA <- colnames(self$dat.bin.sVar)[(colnames(self$dat.bin.sVar) %in% covars)]
          sel.binsA <- intersect(covars, colnames(self$dat.bin.sVar))
        } else {
          sel.binsA <- NULL
        }
        if (length(sel.binsA)>0) {
          dfsel <- cbind(dfsel, self$dat.bin.sVar[rowsubset, sel.binsA, drop = FALSE])
        }

        found_vars <- covars %in% colnames(dfsel)
        if (!all(found_vars)) stop("some covariates can't be found (perhaps not declared as summary measures (def_sW(...) or def_sW(...))): "%+%
                                    paste(covars[!found_vars], collapse=","))
        return(dfsel)
      } else {
        return(self$dat.sWsA[rowsubset, , drop = FALSE])
      }
    },

    get.outvar = function(rowsubset = TRUE, var) {
      if (length(self$nodes) < 1) stop("DatNet.sWsA$nodes list is empty!")

      if (var %in% self$names.sWsA) {
        # self$dat.sWsA[rowsubset, var]
        out <- self$dat.sWsA[rowsubset, var, with = FALSE]
      } else if (var %in% colnames(self$dat.bin.sVar)) {
        out <- self$dat.bin.sVar[rowsubset, var]
      } else if ((var %in% self$nodes$Ynode) && !is.null(self$YnodeVals)) {
        out <- self$YnodeVals[rowsubset]
      } else {
        stop("requested variable " %+% var %+% " does not exist in DatNet.sWsA!")
      }

      if ((is.list(out) || is.data.table(out)) && (length(out)>1)) {
        stop("selecting regression outcome covariate resulted in more than one column: " %+% var)
      } else if (is.list(out) || is.data.table(out)) {
        return(out[[1]])
      } else {
        return(out)
      }

    },

    copy.sVar.types = function() {
      self$type.sVar <- c(self$datnetW$type.sVar, self$datnetA$type.sVar)
    },

    # ------------------------------------------------------------------------------------------------------------
    # MOVED THESE METHODS TO DatNet.sWsA class
    # ------------------------------------------------------------------------------------------------------------
    # Need to find a way to over-ride nbins for categorical vars (allowing it to be set to more than gvars$maxncats)!
    # Return names of bin indicators for sVar:
    bin.nms.sVar = function(name.sVar, nbins) { name.sVar%+%"_"%+%"B."%+%(1L:nbins) },
    pooled.bin.nm.sVar = function(name.sVar) { name.sVar %+% "_allB.j" },
    detect.sVar.intrvls = function(name.sVar, nbins, bin_bymass, bin_bydhist, max_nperbin) {
      tol.int <- 0.001
      int <- define.intervals(x = self$get.sVar(name.sVar), nbins = nbins, bin_bymass = bin_bymass, bin_bydhist = bin_bydhist, max_nperbin = max_nperbin)
      diffvec <- diff(int)
      if (sum(abs(diffvec) < tol.int) > 0) {
      # if (length(unique(int)) < length(int)) {
        if (gvars$verbose) {
          # message("No. of categories for " %+% name.sVar %+% " was collapsed from " %+%
          #         (length(int)-1) %+% " to " %+% (length(unique(int))-1) %+% " due to too few obs.")
          message("No. of categories for " %+% name.sVar %+% " was collapsed from " %+%
                  (length(int)-1) %+% " to " %+% (length(int[diffvec >= tol.int])-1) %+% " due to too few obs.")
          print("old intervals: "); print(as.numeric(int))
        }
        # Just taking unique interval values is insufficient
        # Instead need to drop all intervals that are "too close" to each other based on some tol value
        # remove all intervals (a,b) where |b-a| < tol.int, but always keep the very first interval (int[1])
        int <- c(int[1], int[2:length(int)][abs(diffvec) >= tol.int])
        # int <- unique(int)
        if (gvars$verbose) {
          print("new intervals: "); print(as.numeric(int))
        }
      }
      return(int)
    },
    detect.cat.sVar.levels = function(name.sVar) {
      levels <- sort(unique(self$get.sVar(name.sVar)))
      return(levels)
    },

    # create a vector of ordinal (categorical) vars out of cont. sVar vector:
    discretize.sVar = function(name.sVar, intervals) {
      self$ord.sVar <- make.ordinal(x = self$get.sVar(name.sVar), intervals = intervals)
      invisible(self$ord.sVar)
    },

    # return matrix of bin indicators for continuous sVar:
    # change name to:
    # binirize.cont.sVar = function(name.sVar, intervals, nbins, bin.nms) {
    binirize.sVar = function(name.sVar, intervals, nbins, bin.nms) {
      self$active.bin.sVar <- name.sVar
      self$dat.bin.sVar <- make.bins_mtx_1(x.ordinal = self$discretize.sVar(name.sVar, intervals), nbins = nbins, bin.nms = bin.nms)
      invisible(self$dat.bin.sVar)
    },

    # return matrix of bin indicators for ordinal sVar:
    binirize.cat.sVar = function(name.sVar, levels) {
      nbins <- length(levels)
      bin.nms <- self$bin.nms.sVar(name.sVar, nbins)
      self$active.bin.sVar <- name.sVar
      self$dat.bin.sVar <- make.bins_mtx_1(x.ordinal = self$get.sVar(name.sVar), nbins = nbins, bin.nms = bin.nms, levels = levels)
      invisible(self$dat.bin.sVar)
    },

    # return the bin widths vector for the discretized continuous sVar (self$ord.sVar):
    get.sVar.bw = function(name.sVar, intervals) {
      if (!(self$active.bin.sVar %in% name.sVar)) stop("current discretized sVar name doesn't match name.sVar in get.sVar.bin.widths()")
      if (is.null(self$ord.sVar)) stop("sVar hasn't been discretized yet")
      intrvls.width <- diff(intervals)
      intrvls.width[intrvls.width <= gvars$tolerr] <- 1
      ord.sVar_bw <- intrvls.width[self$ord.sVar]
      return(ord.sVar_bw)
    },

   # return the bin widths vector for the discretized continuous sVar (self$ord.sVar):
    get.sVar.bwdiff = function(name.sVar, intervals) {
      if (!(self$active.bin.sVar %in% name.sVar)) stop("current discretized sVar name doesn't match name.sVar in get.sVar.bin.widths()")
      if (is.null(self$ord.sVar)) stop("sVar hasn't been discretized yet")
      ord.sVar_leftint <- intervals[self$ord.sVar]
      diff_bw <- self$get.sVar(name.sVar) - ord.sVar_leftint
      return(diff_bw)
    },

    # This function returns mat.sVar, which is a matrix that combines all sW and sA summary measures;
    # Odata is only needed for evaluating new sA (!is.null(f.g_fun));
    # When !is.null(f.g_fun) create p new datnetA.gstar's (n obs at a time), which are not saved separately (only combined);
    # When is.null(f.g_fun), returns combined cbind(sW, sA) for observed O.datnetW, O.datnetA;
    # TO ADD: Consider passing ahead a total number of sA that will be created by DatNet class (need it to pre-allocate self$dat.sWsA);
    make.dat.sWsA = function(p = 1, f.g_fun = NULL, new.sA.object = NULL, sA.object = NULL, DatNet.ObsP0 = NULL)  {
      Odata <- self$Odata
      datnetW <- self$datnetW
      datnetA <- self$datnetA
      assert_that(is.count(p))
      self$p <- p
      nobs <- datnetW$nOdata

      # Copy variable detected types (bin, cat or contin) from the observed data classes (datnetW, datnetA) to self:
      self$copy.sVar.types()

      # Setting df.sWsA to observed data (sW,sA) when both f.g_fun and new.sA.object are null:
      if (is.null(f.g_fun) & is.null(new.sA.object)) {
        # ... nothing to do since all the summary measures have been created and put into Odata$Odata_DT data.table
        # ... in the future might need to call this to re-construct A and sA under g0, after those were over-written by A under g.star
        self$dat.sVar <- Odata$OdataDT

      # Need to sample A under f.g_fun (gstar or known g0), possibly re-evaluate sW from O.datnetW
      } else {
        if (is.null(Odata$nodes$Anodes))
          stop("Anodes were not appropriately specified and are null; can't replace observed Anodes with those sampled from f.g0, interevene1.sA or interevene2.sA")
        assert_that(!is.null(DatNet.ObsP0))

        # datnetA.gstar <- DatNet$new(netind_cl = datnetW$netind_cl, nodes = self$nodes)
        datnetA.gstar <- datnetA$clone()

        # for (i in seq_len(p)) {

        i <- 1
        # if need to separately generate Anodes from a function (when not using new.sA.object)
        if (!is.null(f.g_fun)) {
          A.gstar.mat <- f.gen.A.star(data = DatNet.ObsP0$dat.sVar, f.g_fun = f.g_fun, Anodes = self$nodes$Anodes)
          if (is.matrix(A.gstar.mat)) {
            Odata$replaceManyAnodes(Anodes = self$nodes$Anodes, newAnodesMat = A.gstar.mat)
          } else {
            Odata$replaceOneAnode
          }
          # will re-build all the summary measures A^s under new Anodes (sampled from f.g_fun):
          new.sA.object <- sA.object
        }

        if (is.null(Odata$A_g0_DT)) warning("observed data Anodes were not backed-up and are being over-written with intervened Anodes values")
        # will modify OdataDT (data.table) in Odata object by reference:
        datnetA.gstar$make.sVar(Odata = Odata, sVar.object = new.sA.object) # create new summary measures sA (under g.star)

        # alternative way to do the same without datnetA.gstar (evaluate new summaries):
        # self$dat.sVar <- new.sA$eval.nodeforms(data.df = Odata$OdataDT, netind_cl = datnetW$netind_cl)
        Odata$curr_data_A_g0 <- FALSE
        self$dat.sVar <- Odata$OdataDT

        # Assigning the summary measures to one output data matrix:
        # df.sWsA[((i - 1) * nobs + 1):(nobs * i), ] <- cbind(datnetW$dat.sVar, datnetA.gstar$dat.sVar)[, ]
        # }
      }
      # self$dat.sVar <- df.sWsA
      return(invisible(self))
    }
  ),

  active = list(
    dat.sWsA = function() { self$mat.sVar }, # NO LONGER NEEDED, using only self$dat.sVar, KEPT FOR COMPATIBILITY
    dat.bin.sVar = function(dat.bin.sVar) {
      if (missing(dat.bin.sVar)) {
        return(self$mat.bin.sVar)
      } else {
        assert_that(is.matrix(dat.bin.sVar))
        self$mat.bin.sVar <- dat.bin.sVar
      }
    },
    # wipe out binirized mat.sVar:
    emptydat.bin.sVar = function() {
      self$mat.bin.sVar <- NULL
      self$active.bin.sVar <- NULL
    },
    names.sWsA = function() { c(self$datnetW$names.sVar, self$datnetA$names.sVar) },
    nobs = function() { nrow(self$dat.sVar) },
    noNA.Ynodevals = function(noNA.Yvals) {
      if (missing(noNA.Yvals)) return(private$protected.YnodeVals)
      else private$protected.YnodeVals <- noNA.Yvals
    },
    nodes = function(nodes) {
      if (missing(nodes)) {
        return(private$.nodes)
      } else {
        assert_that(is.list(nodes))
        if (length(self$nodes)>0) message("warning: overwriting non-empty self$nodes in DatNet.sWsA")
        private$.nodes <- nodes
        # propagate new nodes to parent objects:
        if (length(self$datnetW$nodes) < 1) self$datnetW$nodes <- nodes
        if (length(self$datnetA$nodes) < 1) self$datnetA$nodes <- nodes
      }
    }
  ),
  private = list(
    protected.YnodeVals = NULL  # Actual observed values of the binary outcome (Ynode), along with deterministic vals
  )
)
osofr/tmlenet documentation built on May 24, 2019, 4:58 p.m.