#----------------------------------------------------------------------------------
# Class for defining, parsing and evaluating the summary measures.
# Expressions sVar.exprs are evaluated in the environment of a given data.frame.
#----------------------------------------------------------------------------------
is.DefineSummariesClass <- function(obj) "DefineSummariesClass" %in% class(obj)
# Useful function for testing if a name is a valid R object name:
isValidAndUnreservedName <- function(string) {
make.names(string) == string # make.names converts any string into a valid R object name
}
capture.exprs <- function(...) {
sVar.exprs <- eval(substitute(alist(...)))
if (length(sVar.exprs)>0) { # deparse into characters when expr is.call, but keep as-is otherwise
sVar.exprs <- lapply(sVar.exprs, function(x) if (is.character(x)) {x} else {deparse(x)})
}
# if not a single argument was named, names attribute of sVar.exprs will be null => add names attribute
if (is.null(names(sVar.exprs))) names(sVar.exprs) <- rep_len("", length(sVar.exprs))
if (length(sVar.exprs)!=0 && any(names(sVar.exprs)%in%"")) {
message("Some summary measures were not named, automatic column name(s) will be generated during evaluation")
}
return(sVar.exprs)
}
#' Define Summary Measures sA and sW
#'
#' Define and store summary measures \code{sW} and \code{sA} that can be later processed inside
#' \code{\link{eval.summaries}} or \code{\link{tmlenet}} functions.
#' \code{def_sW} and \code{def_sA} return an \code{R6} object of class \code{\link{DefineSummariesClass}}
#' which stores the user-defined summary measure functions of the baseline covariates \code{W}
#' and exposure \code{A}, which can be later evaluated inside the environment of the input \code{data} data frame.
#' Note that calls to \code{def_sW} must be used for defining the summary measures that are functions
#' of \strong{only the baseline covariates \code{W}}, while calls to \code{def_sA} must be used
#' for defining the summary measures that are functions of both, \strong{the baseline covariates \code{W}
#' and exposure \code{A}}.
#' Each summary measure is specified as an evaluable R expression or a string that can be parsed into
#' an evaluable R expression. Any variable name that exists as a named column in the input \code{data}
#' data frame can be used as part of these expressions.
#' Separate calls to \code{def_sW/def_sA} functions can be aggregated into a single collection with '+' function,
#' e.g., \code{def_sW(W1)+def_sW(W2)}.
#' A special syntax is allowed inside these summary expressions:
#' \itemize{
#' \item \code{'Var[[index]]'} - will index the friend covariate values of the variable \code{Var}, e.g.,
#' \code{'Var[[1]]'} will pull the covariate value of \code{Var} for the first friend, \code{'Var[[Kmax]]'}
#' of the last friend, and
#' \code{'Var[[0]]'} is equivalent to writing \code{'Var'} itself (indexes itself).
#' }
#' A special argument named \code{replaceNAw0} can be also passed to the \code{def_sW}, \code{def_sA} functions:
#' \itemize{
#' \item \code{replaceNAw0 = TRUE} - automatically replaces all the missing network covariate values
#' (\code{NA}) with \code{0}.
#' }
#' One can then test the evaluation of these summary measures by either passing the returned
#' \code{\link{DefineSummariesClass}} object to function \code{\link{eval.summaries}} or by calling the
#' internal method \code{eval.nodeforms(data.df, netind_cl)} on the result returned by \code{def_sW} or \code{def_sA}.
#' Each separate argument to \code{def_sW} or \code{def_sA} represents a new summary measure.
#' The user-specified argument name defines the name of the corresponding summary measure
#' (where the summary measure represents the result of the evaluation of the corresponding R expression specified by the argument).
#' When a particular argument is unnamed, the summary measure name
#' will be generated automatically (see Details, Naming Conventions and Examples below).
#'
#' @section Details:
#'
#' The R expressions passed to these functions are evaluated later inside \code{\link{tmlenet}} or
#' \code{\link{eval.summaries}} functions,
#' using the environment of the input data frame, which is enclosed within the user-calling environment.
#'
#' Note that when observation \code{i} has only \code{j-1} friends, the \code{i}'s value of \code{"W_netFj"} is
#' automatically set to \code{NA}.
#' This can be an undersirable behavior in some circumstances, in which case one can automatically replace all such
#' \code{NA}'s with \code{0}'s by setting the argument \code{replaceMisVal0 = TRUE} when calling functions
#' \code{def_sW} or \code{def_sA}, i.e., \code{def_sW(W[[1]], replaceMisVal0 = TRUE)}.
#'
#' @section Naming conventions:
#' Naming conventions for summary measures with no user-supplied name (e.g., \code{def_sW(W1)}).
#'
#' ....................................
#' \itemize{
#' \item If only one unique variable name is used in the summary expression (only one parent), use the variable
#' name itself to name the summary measure;
#' \item If there is more than 1 unique variable name (e.g., \code{"W1+W2"}) in the summary expression, throw an exception
#' (user must always supply summary measure names for such expressions).
#' }
#'
#' Naming conventions for the evaluation results of summary measures defined by \code{def_sW} & \code{def_sA}.
#'
#' ....................................
#' \itemize{
#' \item When summary expression evaluates to a vector result, the vector is first converted to a 1 col matrix,
#' with column name set equal to the summary expression name;
#' \item When the summary measure evaluates to a matrix result and the expression has only one unique variable
#' name (one parent), the matrix column names are generated as follows: for the expressions such as \code{"Var"}
#' or \code{"Var[[0]]"}, the column names \code{"Var"} are assigned
#' and for the expressions such as \code{"Var[[j]]"}, the column names \code{"Var_netFj"} are assigned.
#' \item When the summary measure (e.g., named \code{"SummName"}) evaluates to a matrix and either: 1) there is
#' more than one unique variable name used inside the expression (e.g., \code{"A + 2*W"}),
#' or 2) the resulting matrix has empty (\code{""}) column names, the column names are assigned according to the
#' convention:
#' \code{"SummName.1"}, ..., \code{"SummName.ncol"},
#' where \code{"SummName"} is replaced by the actual summary measure name and \code{ncol} is the number of columns
#' in the resulting matrix.
#' }
#' @param ... Named R expressions or character strings that specify the formula for creating the summary measures.
#' @return R6 object of class \code{DefineSummariesClass} which can be passed as an argument to \code{eval.summaries}
#' and \code{tmlenet} functions.
#' @seealso \code{\link{eval.summaries}} for
#' evaluation and validation of the summary measures,
#' \code{\link{tmlenet}} for estimation,
#' \code{\link{DefineSummariesClass}} for details on how the summary measures are stored and evaluated.
#' @example tests/examples/2_defsWsA_examples.R
#' @export
def_sW <- function(...) {
# call outside fun that parses ... and assigns empty names "" if the names attribute not set:
sVar.exprs <- capture.exprs(...)
sVar.exprs <- c(sVar.exprs, list(nF = "nF")) # add nF node (vector with counts of friends):
node_evaluator <- DefineSummariesClass$new(type = "sW")
node_evaluator$set.user.env(user.env = parent.frame())
node_evaluator$set.new.exprs(exprs_list = sVar.exprs)
return(node_evaluator)
}
#' @rdname def_sW
#' @export
def_sA <- function(...) {
# call outside fun that parses ... and assigns empty names "" if the names attribute not set:
sVar.exprs <- capture.exprs(...)
node_evaluator <- DefineSummariesClass$new(type = "sA")
node_evaluator$set.user.env(user.env = parent.frame())
node_evaluator$set.new.exprs(exprs_list = sVar.exprs)
return(node_evaluator)
}
#' @rdname def_sW
#' @export
def_new_sA <- function(...) {
# call outside fun that parses ... and assigns empty names "" if the names attribute not set:
sVar.exprs <- capture.exprs(...)
node_evaluator <- DefineSummariesClass$new(type = "new.sA")
node_evaluator$set.user.env(user.env = parent.frame())
node_evaluator$set.new.exprs(exprs_list = sVar.exprs)
return(node_evaluator)
}
# S3 method '+' for adding two DefineSummariesClass objects
# Summary measure lists in both get added as c(,) into the summary measures in sVar1 object
#' @rdname def_sW
#' @param sVar1 An object returned by a call to \code{def_sW} or \code{def_sA} functions.
#' @param sVar2 An object returned by a call to \code{def_sW} or \code{def_sA} functions.
#' @export
`+.DefineSummariesClass` <- function(sVar1, sVar2) {
assert_that(is.DefineSummariesClass(sVar1))
assert_that(is.DefineSummariesClass(sVar2))
assert_that(all.equal(sVar1$type, sVar2$type))
# Copy the content of both objects into a new instance, so as not to ruin the objects sVar1 & sVar2 by reference
# node_evaluator <- DefineSummariesClass$new(type = sVar1$type)
sVar1.clone <- sVar1$clone()
# remove duplicate nF node from sVar1 (keep the one in sVar2)
if (sVar1.clone$type %in% "sW") {
sVar1.clone <- sVar1.clone$remove.expr(SummaryName = "nF")
}
sVar1.clone$add.new.exprs(NewSummaries = sVar2)
return(sVar1.clone)
}
# ------------------------------------------------------------------------------------------
# Standardize all names (and fill-in the empty names) according TO THE *SAME* *NAMING* *CONVENTION*;
# ------------------------------------------------------------------------------------------
eval.standardize.expr <- function(expr.idx, self, data.df) {
# -------------------------------------------------------
# First evaluate the expression result:
# -------------------------------------------------------
evalres <- eval.nodeform.out(expr.idx = expr.idx, self = self, data.df = data.df)
expr_char <- self$exprs_list[[expr.idx]] # expression itself as string
expr_nm <- names(self$exprs_list)[expr.idx] # current expression name
expr_parents <- evalres[["par.nodes"]] # names of parents vars for this expression
# -------------------------------------------------------
# no user-supplied argument name, hence need to name this expression:
# -------------------------------------------------------
# flag TRUE if user did not provide an argument name for self$exprs_list[expr.idx]:
expr_noname <- (names(self$exprs_list)[expr.idx] %in% "")
if (expr_noname && (length(expr_parents)>1)) {
stop("must name complex expressions that involve more than one variable: " %+% expr_char)
} else if (expr_noname && !is.null(expr_parents) && is.character(expr_parents)) {
if (gvars$verbose) {
message("assigning a name '" %+% expr_parents %+% "' to expression: " %+% expr_char)
}
expr_nm <- expr_parents
} else if (expr_noname) stop(expr_char%+% ": parents are null or not a character vector")
# -------------------------------------------------------
# convert vectors to columns, name the matrix columns according to the same naming convention:
# -------------------------------------------------------
# evaluation result:
# if result a vector: convert to one-col matrix assign a name: names(self$exprs_list)[expr.idx] = expr_parents
if (is.vector(evalres[["evaled_expr"]])) {
expr_res <- matrix(data = evalres[["evaled_expr"]], ncol = 1)
colnames(expr_res) <- expr_nm
return(list(new_expr_name = expr_nm, evaled_expr = expr_res, par.nodes = evalres[["par.nodes"]]))
# for matrix results: if column names exist (!is.null(colnames(expr_res))): DO NOTHING
# if column names don't exist (is.null(colnames(expr_res))) or some are empty strings "":
} else if (is.matrix(evalres[["evaled_expr"]])) {
if (is.null(colnames(evalres[["evaled_expr"]])) || (any(colnames(evalres[["evaled_expr"]])%in%"")) || (length(expr_parents)>1)) {
# assign names by convention: <- expr_nm%+%"."%+%c(1:ncol(expr_res))
colnames(evalres[["evaled_expr"]]) <- expr_nm%+%"."%+%c(1:ncol(evalres[["evaled_expr"]]))
}
return(list(new_expr_name = expr_nm, evaled_expr = evalres[["evaled_expr"]], par.nodes = evalres[["par.nodes"]]))
} else {
# if result is not a vector or matrix: throw an exception
stop(expr_char%+% ": summary measure result type is " %+%class(evalres[["evaled_expr"]])%+%"; only matrix or a vector results are supported")
}
}
# -----------------------------------------------------------------------------------------------------------------------
# Special functions for network/time subsetting.
# Kept in a list, these functions over-ride the standard function `[` and `[[` when evaluating the summary measures
# -----------------------------------------------------------------------------------------------------------------------
node_fun <- list(
vecapply = function(X, idx, func) { # custom wrapper for apply that turns a vector X into one column matrix
if (is.vector(X)) dim(X) <- c(length(X), 1) # returns TRUE only if the object is a vector with no attributes apart from names
# if (is.atomic(x) || is.list(x)) dim(X) <- c(length(X), 1) # alternative way to test for vectors
x <- parse(text = deparse(func))[[1]]
nargs <- length(x[[2]])
if (nargs>1) {
funline <- deparse(func)[1]
stop(funline%+%
". Node formulas cannot call non-vectorized functions with more than one named argument. If this is a vectorized function, pass its name to set.DAG(, vecfun=).")
}
apply(X, idx, func)
},
cbind_mod = function(...) { # cbind wrapper for c(,) calls in node formulas, turns one row matrix into repeat Nsamp row matrix
env <- parent.frame()
cbind_res <- do.call("cbind", eval(substitute(alist(...)), envir = env) , envir = env)
if (nrow(cbind_res)==1) {
# Nsamp <- get("Nsamp", envir = env)
Nsamp <- env$self$Nsamp
dprint("env$self$Nsamp:"); dprint(env$self$Nsamp)
assert_that(!is.null(Nsamp))
cbind_res <- matrix(cbind_res, nrow = Nsamp, ncol = ncol(cbind_res), byrow = TRUE)
}
dprint("cbind_res"); dprint(cbind_res)
cbind_res
},
# custom function for vector look up '['
# function takes the name of the TD var and index vector => creates a vector of time-varying column names in df
# returns matrix TD_var[indx]
# ***NOTE: current '[' cannot evalute subsetting that is based on values of other covariates such as A1C[ifelse(BMI<5, 1, 2)]
`[` = function(var, indx, ...) {
env <- parent.frame()
t <- env$t # t <- get("t", envir = env)
var <- substitute(var)
var.chr <- as.character(var)
if (missing(indx)) stop("missing tindex when using Var[tindex] inside the node formula")
if (identical(class(indx),"logical")) indx <- which(indx)
if (is.null(t)) stop("references, s.a. Var[t] are not allowed when t is undefined")
if (max(indx)>t) stop(paste0(var, "[", max(indx),"] cannot be referenced in node formulas at t = ", t)) # check indx<= t
# ******* NOTE *******
# Don't like the current implementation that defines TDvars as characters and then returns a matrix by cbinding
# the existing columins in existing data.frame. This is possibly wasteful. Could we instead subset the existing data.frame?
TDvars <- var.chr%+%"_"%+%indx
# Checking the variables paste0(var, "_", indx) exist in simulated data.frame environment:
dprint("ANCHOR_ALLVARNMS_VECTOR_0:"); dprint(env[["ANCHOR_ALLVARNMS_VECTOR_0"]])
# TO DO: ****
# EXTEND TO CHECKING FOR TDvar IN ENCLOSING ENVIRONMENT (user.env) AS WELL IF TDvar_t doesn't exist in the data
# IF TDvar exists check that its a vector of appropriate length, index it accordinly (using which(t%in%tvec))
# will need to first eval such vector the variable as in:
# var.val <- eval(var, envir = env)
existsTDVar <- function(TDvar_t) TDvar_t %in% env[["ANCHOR_ALLVARNMS_VECTOR_0"]]
check_exist <- sapply(TDvars, existsTDVar)
if (!all(check_exist)) stop("undefined time-dependent variable(s): "%+%TDvars[which(!check_exist)])
# THIS STEP COULD BE MORE MEMORY EFFICIENT IF WAS SUBSETTING INSTEAD (BY COLS) ON EXISTING data MATRIX:
TDvars_eval <- eval(parse(text=paste0("cbind(",paste0(TDvars, collapse=","),")")), envir = env)
return(TDvars_eval)
},
# Builds netVar matrix by using matrix env$NetIndobj$NetInd_k, cbind on result
# For W[[0]] to work without if else below need to do this:
# NetInd_k <- cbind(c(1:n), NetInd_k) and then netidx <- netidx + 1
`[[` = function(var, netidx, ...) {
env <- parent.frame()
t <- env$t # t <- get("t", envir = env)
if (!is.null(t)) stop("simultaneous time varying node references Var[t] and network references Var[[netidx]] are currently not supported")
if (missing(netidx)) stop("network index (netidx) must be specified when using Var[[netidx]]")
netind_cl <- env$netind_cl
if (is.null(netind_cl)) stop("Network must be defined when using Var[[netidx]] syntax")
Kmax <- netind_cl$Kmax
var <- substitute(var)
var.chr <- as.character(var)
if (! (var.chr %in% env[["ANCHOR_ALLVARNMS_VECTOR_0"]])) stop("variable " %+% var.chr %+% " doesn't exist")
var.val <- eval(var, envir = env)
n <- length(var.val)
if (identical(class(netidx),"logical")) netidx <- which(netidx)
netVars_eval <- matrix(0L, nrow = n, ncol = length(netidx))
colnames(netVars_eval) <- netvar(var.chr, netidx)
# make_net_mat_time <- system.time(
for (neti in seq_along(netidx)) {
if (netidx[neti] %in% 0L) {
netVars_eval[, neti] <- var.val
} else {
netVars_eval[, neti] <- var.val[netind_cl$NetInd_k[, netidx[neti]]]
# opting for replace on entire netVars_eval, will need to do benchmarks later to compare:
# netVars_eval[is.na(netVars_eval[, neti]), neti] <- env$misXreplace
}
}
# )
# print("make_net_mat_time: "); print(make_net_mat_time)
# Don't need to do this if env$misXreplace==gvars$misval (i.e., when want to leave NAs as is)
# replaceNA_time <- system.time(
netVars_eval[is.na(netVars_eval)] <- env$misXreplace
# )
# print("replaceNA_time"); print(replaceNA_time)
return(netVars_eval)
}
)
update.intervention.sA <- function(new.sA, sA) {
assert_that(is.DefineSummariesClass(new.sA))
# ------------------------------------------------------------------------------------------------
# DEFINE AND EVALUATE INTERVENTION SUMMARIES:
# ------------------------------------------------------------------------------------------------
# 1. Copy (clone) sA object since we are going to modify it (over-write some summaries in sA$exprs_list with new intervention summaries)
intervened.sA <- sA$clone()
intervened.sA$type <- new.sA$type
# 2. Replace summaries/nodes in observed sA that were also defined in new.sA with their new (intervened) expressions:
for (expr.name.sA in names(new.sA$exprs_list)) {
intervened.sA$replace.expr(SummaryName = expr.name.sA, newSummaries = new.sA)
}
new.sA <- intervened.sA
print("updated expression list for new.sA: "); print(new.sA$exprs_list)
Anodes <- new.sA$Anodes
print("detected Anodes based on new.sA:"); print(Anodes)
# 3. Evaluate a copy of new summaries ->
# Note that this will require making a copy of OdataDT in OdataDT_R6, since calling
# datnetA$make.sVar(Odata = OdataDT_R6, sVar.object = new.sA) will modify OdataDT IN PLACE
# in all instances at the same time: datnetW$dat.sVar, datnetA$dat.sVar & DatNet.ObsP0$dat.sVar
# OdataDT_R6$OdataDT
# OdataDT_R6$backupAnodes(Anodes = Anodes)
# OdataDT_R6$A_g0_DT
# new.sA.dat <- new.sA$eval.nodeforms(data.df = OdataDT_R6$OdataDT, netind_cl = netind_cl)
# OdataDT_R6$restoreAnodes(Anodes = Anodes)
# OdataDT_R6$OdataDT
# obs.sA.dat <- sA$eval.nodeforms(data.df = OdataDT_R6$OdataDT, netind_cl = netind_cl)
# 4. Clean-up input data: remove all variables not mentioned in sW/sA from OdataDT_R6$OdataDT
# This could be a problem if a formula, such as def_sw(PA0 = (PA==0)) exists, but PA itself wasn't defined
# The code below will then remove PA from the observed data (which should have been kept)
# *** NEED TO ALSO ADD Y TO THIS (outcome)
# allvars <- as.vector(c(unlist(sW$sVar.names.map), unlist(sA$sVar.names.map), "Y"))
# allDatNames <- names(OdataDT_R6$OdataDT)
# remove_vars <- allDatNames[!(allDatNames %in% allvars)]
# OdataDT_R6$OdataDT[, (remove_vars):=NULL]
return(new.sA)
}
## ---------------------------------------------------------------------
#' R6 class for parsing and evaluating user-specified summary measures (in \code{exprs_list})
#'
#' This \pkg{R6} class that inherits from \code{Define_sVar} and can parse and evaluate (given the input data frame) the summary measures defined by functions
#' \code{\link{def_sW}} and \code{\link{def_sA}}.
#' The object of this class is generally instantiated by calling functions \code{def_sA} or \code{def_sW}.
#' The summary expressions (stored in \code{exprs_list}) are evaluated in the environment of the input data.frame.
#' Note that the evaluation results of the summary measures are never stored inside this class,
#' data can be stored only inside \code{\link{DatNet}} and \code{\link{DatNet.sWsA}} \pkg{R6} classes.
#'
#' @docType class
#' @format An \code{\link{R6Class}} generator object
#' @keywords R6 class
#' @details
#' \itemize{
#' \item{\code{type}} - Type of the summary measure, \code{sW} or \code{sA}, determined by the calling functions \code{\link{def_sW}} or \code{\link{def_sA}}.
#' \item{\code{exprs_list}} - Deparsed list of summary expressions (as strings).
#' \item{\code{new_expr_names}} - The summary measure names, if none were provided by the user these will be
#' evaluated on the basis of variable names used in the summary expression itself.
#' \item{\code{sVar.names.map}} - Named list that maps the user specified summary measure names to the corresponding matrix column names
#' from the summary measure evaluation result.
#' }
#' @section Methods:
#' \describe{
#' \item{\code{new(type)}}{Instantiate a new object of class \code{DefineSummariesClass} by providing a type, \code{"sW"} or \code{"sA"}.}
#' \item{\code{set.new.exprs(exprs_list)}}{Sets the internal summary measure expressions to the list provided in \code{exprs_list}.}
#' \item{\code{add.new.exprs(NewSummaries)}}{Adds new internal summary measure expressions to the existing ones, \code{NewSummaries}
#' must be an object of class \code{DefineSummariesClass} (to enable \code{Object1 + Object2} syntax).}
#' item{\code{remove.expr(SummaryName)}}{Remove expression by name (for removing duplicate 'nF' expressions for repeated calls with def_sW()+def_sW()).}
#' \item{\code{eval.nodeforms(data.df, netind_cl)}}{Evaluate the expressions one by one, standardize all names according to one naming
#' convention (described in \code{\link{def_sW}}), \code{cbind}ing results together into one output matrix. \code{data.df} is the input
#' data.frame and \code{netind_cl} is the input network stored in an object of class \code{\link[simcausal]{NetIndClass}}.}
#' \item{\code{df.names(data.df)}}{List of variables in the input data \code{data.df} gets assigned to a special
#' variable (\code{ANCHOR_ALLVARNMS_VECTOR_0}).}
#' }
#' @importFrom assertthat assert_that
#' @export
DefineSummariesClass <- R6Class("DefineSummariesClass",
class = TRUE,
portable = TRUE,
# inherit = Define_sVar,
public = list(
type = NA, # "sW" or "sA" depending on which functions called the constructor: def_sW or def_sA
exprs_list = list(), # list of expressions in character strings, with attribute "names" set to the user-supplied names of the expressions
user.env = NULL, # user environment to be used as enclos arg to eval(sVar)
cur.node = list(), # current evaluation node (set by self$eval.nodeforms())
asis.flags = list(), # list of flags, TRUE for "as is" node expression evaluation
ReplMisVal0 = FALSE, # vector of indicators, for each TRUE sVar.expr[[idx]] will replace all NAs with gvars$misXreplace (0)
sVar.misXreplace = NULL, # replacement values for missing sVar, vector of length(exprs_list)
netind_cl = NULL,
Kmax = NULL,
Nsamp = NULL, # sample size (nrows) of the simulation dataset
new_expr_names = list(), # re-evaluated summary measure names, if non provided by the user these will be evaluated on the basis of variable names used in the expression
sVar.names.map = list(), # the map between user-supplied expression (argument names) to the column names of each expression in self$exprs_list
Anodes = character(), # vector with unique intervention node/summary names (from new.sA)
node_fun = node_fun,
evaled_before = FALSE, # Indicator that these summaries have been evaluated on the real data at least once
initialize = function(type) {
self$type <- type
invisible(self)
},
# define new summary measures to be evaluated later:
# will define 1) self$exprs_list; 2) self$asis.flags; 3) self$ReplMisVal0; 4) self$sVar.misXreplace
# initialize the map from self$exprs_list to variable names in each expression (column names)
set.new.exprs = function(exprs_list) {
self$exprs_list <- exprs_list
self$asis.flags <- attributes(exprs_list)[["asis.flags"]]
# check for special argument replaceNAw0, if exists, remove it from the list of expressions:
if (any(names(self$exprs_list) %in% "replaceNAw0")) {
ReplMisVal0.idx <- which(names(self$exprs_list) %in% "replaceNAw0")
self$ReplMisVal0 <- as.logical(self$exprs_list[[ReplMisVal0.idx]])
self$exprs_list <- self$exprs_list[-ReplMisVal0.idx]
if (gvars$verbose) {
print("Detected replaceNAw0 flag with value: " %+% self$ReplMisVal0);
}
}
# If doesn't already exist, init setting for the names attribute of self$exprs_list:
if (is.null(names(self$exprs_list))) names(self$exprs_list) <- rep_len("", length(self$exprs_list))
# Error when un-named expressions (name(self$exprs_list[[i]])=="") are defined for intervention summaries:
if ((self$type %in% "new.sA") && length(self$exprs_list) != 0 && (is.null(names(self$exprs_list)) || any(names(self$exprs_list)==""))) {
stop("must provide a name for each node expression")
}
if (is.null(self$asis.flags)) {
self$asis.flags <- as.list(rep.int(FALSE, length(self$exprs_list)))
names(self$asis.flags) <- names(self$exprs_list)
}
self$ReplMisVal0 <- rep_len(self$ReplMisVal0, length(self$exprs_list))
self$sVar.misXreplace <- ifelse(self$ReplMisVal0, gvars$misXreplace, gvars$misval)
# self$sVar.noname <- rep_len(self$sVar.noname, length(self$exprs_list))
if (gvars$verbose) {
print("Final node expression(s) list: "); print(self$exprs_list)
}
self$sVar.names.map <- vector(mode="list", length = length(self$exprs_list))
invisible(self)
},
# add summary measures to existing ones (to enable Object1 + Object2 syntax):
add.new.exprs = function(NewSummaries) {
assert_that(is.DefineSummariesClass(NewSummaries))
self$exprs_list <- c(self$exprs_list, NewSummaries$exprs_list)
self$asis.flags <- c(self$asis.flags, NewSummaries$asis.flags)
self$ReplMisVal0 <- c(self$ReplMisVal0, NewSummaries$ReplMisVal0)
self$sVar.misXreplace <- c(self$sVar.misXreplace, NewSummaries$sVar.misXreplace)
self$sVar.names.map <- c(self$sVar.names.map, NewSummaries$sVar.names.map)
return(self)
},
# remove existing summary measure
# to enable overwriting nF summary measure in Object1 with nF summary measure from Object2
# when doing Object1 + Object2 syntax.
remove.expr = function(SummaryName) {
assert_that(is.character(SummaryName) && (length(SummaryName)==1L) && (!SummaryName%in%""))
if (any(names(self$exprs_list) %in% SummaryName)) {
remove_idx <- which(names(self$exprs_list)%in% SummaryName)
self$exprs_list <- self$exprs_list[-remove_idx]
self$asis.flags <- self$asis.flags[-remove_idx]
self$ReplMisVal0 <- self$ReplMisVal0[-remove_idx]
self$sVar.misXreplace <- self$sVar.misXreplace[-remove_idx]
self$sVar.names.map <- self$sVar.names.map[-remove_idx]
}
return(self)
},
# Replace old summary measure with its new definition (give an error if the summary doesn't exist)
# to enable overwriting observed summaries/nodes in def_sA with new intervention summaries/nodes.
replace.expr = function(SummaryName, newSummaries) {
assert_that(is.character(SummaryName) && (length(SummaryName)==1L) && (!SummaryName%in%""))
if (!self$evaled_before) stop("Can't replace existing summaries which haven't yet been evaluated on the real data at least once")
newSummaries_idx <- which(names(newSummaries$exprs_list) %in% SummaryName)
if (any(names(self$exprs_list) %in% SummaryName)) {
replace_idx <- which(names(self$exprs_list) %in% SummaryName)
self$exprs_list[replace_idx] <- newSummaries$exprs_list[newSummaries_idx]
self$asis.flags[replace_idx] <- newSummaries$asis.flags[newSummaries_idx]
self$ReplMisVal0[replace_idx] <- newSummaries$ReplMisVal0[newSummaries_idx]
self$sVar.misXreplace[replace_idx] <- newSummaries$sVar.misXreplace[newSummaries_idx]
# self$sVar.names.map[replace_idx] <- newSummaries$sVar.names.map[newSummaries_idx] # should not be replacing the map
# mark the summary/node name(s) that was replaced:
newAnodes <- self$sVar.names.map[[SummaryName]]
self$Anodes <- c(self$Anodes, newAnodes)
# self$Anodes <- c(self$Anodes, SummaryName)
} else {
stop("the intervention summary measure name has not been previously defined: " %+% SummaryName)
}
return(self)
},
# Evaluate the expressions one by one, standardize all names according to one naming convention,
# cbinding results together into one output matrix
eval.nodeforms = function(data.df, netind_cl) {
assert_that(is.data.frame(data.df) | is.data.table(data.df))
if (is.data.frame(data.df)) setDT(data.df)
if (missing(netind_cl) && is.null(self$netind_cl)) stop("must specify netind_cl arg at least once")
if (!missing(netind_cl)) self$netind_cl <- netind_cl
self$Kmax <- self$netind_cl$Kmax
self$Nsamp <- nrow(data.df)
sVar.res_l <- self$new_expr_names <- vector(mode = "list", length = length(self$exprs_list))
if (!is.null(self$sVar.names.map[[1]])) {
preserved.sVar.names.map <- self$sVar.names.map
} else {
preserved.sVar.names.map <- NULL
}
self$sVar.names.map <- self$new_expr_names
for (i in seq_along(self$exprs_list)) {
saved.col.names <- NULL
if (!is.null(preserved.sVar.names.map) && (self$exprs_list[[i]] %in% names(preserved.sVar.names.map))) {
saved.col.names <- preserved.sVar.names.map[[which(names(preserved.sVar.names.map) %in% self$exprs_list[[i]])]]
self$exprs_list[[i]] <- saved.col.names
if (length(self$exprs_list[[i]]) > 1) self$exprs_list[[i]] <- "c(" %+% paste0(self$exprs_list[[i]], collapse = ",") %+% ")"
}
sVar.eval.res <- eval.standardize.expr(i, self = self, data.df = data.df)
# sVar.res_l[[i]] <- sVar.eval.res
if (!is.null(preserved.sVar.names.map) && !is.null(saved.col.names)) colnames(sVar.eval.res$evaled_expr) <- saved.col.names
self$new_expr_names[[i]] <- sVar.eval.res$new_expr_name
self$sVar.names.map[[i]] <- colnames(sVar.eval.res$evaled_expr)
for (colname in colnames(sVar.eval.res$evaled_expr))
data.df[, (colname):= sVar.eval.res$evaled_expr[,colname]]
}
# print("eval_and_addDT"); print(eval_and_addDT)
# eval_only <- system.time(sVar.res_l <- lapply(seq_along(self$exprs_list), eval.standardize.expr, self = self, data.df = data.df))
# print("eval_only: "); print(eval_only)
# self$new_expr_names <- lapply(sVar.res_l, '[[', 'new_expr_name')
names(self$new_expr_names) <- unlist(self$new_expr_names)
names(self$exprs_list) <- names(self$new_expr_names)
# names(sVar.res_l) <- names(self$new_expr_names)
# assign self$sVar.names.map based on newly standardized summary names:
# self$sVar.names.map <- lapply(sVar.res_l, function(x) colnames(x[["evaled_expr"]]))
names(self$sVar.names.map) <- names(self$new_expr_names)
# ************************************************************************************
# CHANGE THIS TO MERGING DUPLICATE SUMMARY MEASRURES INTO ONE
# ************************************************************************************
# 1) remove all duplicate summary measures (by name), keeping the ones that were added last:
if (length(unique(names(self$new_expr_names))) < length(names(self$new_expr_names))) {
duplic_idx <- duplicated(self$new_expr_names, fromLast = TRUE)
message("warning: detected duplicate summary measure names, (" %+%
paste0(self$new_expr_names[duplic_idx], collapse=",") %+%
"), all duplicates starting from first to last will be removed...")
# sVar.res_l <- sVar.res_l[-duplic_idx]
self$sVar.names.map <- self$sVar.names.map[-duplic_idx]
self$new_expr_names <- self$new_expr_names[-duplic_idx]
self$exprs_list <- self$exprs_list[-duplic_idx]
}
# mat.sVar <- do.call("cbind", lapply(sVar.res_l, function(x) x[["evaled_expr"]]))
# Indicator that these summaries have been evaluated on the real data at least once:
self$evaled_before <- TRUE
return(invisible(data.df))
},
# This user.env is used for eval'ing each sVar exprs (enclos = user.env)
set.user.env = function(user.env) {
assert_that(!is.null(user.env))
assert_that(is.environment(user.env))
self$user.env <- user.env
},
# List of variable names from data.df with special var name (ANCHOR_ALLVARNMS_VECTOR_0):
df.names = function(data.df) {
return(list(ANCHOR_ALLVARNMS_VECTOR_0 = colnames(data.df)))
}
),
active = list(
placeholder = function() {}
),
private = list(
privplaceholder = function() {}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.