#-----------------------------------------------------------------------------
# Fit and Predict the IPTW (clever covariate) for summary measures (sW,sA):
# P_{\bar{g}^*}(sA | sW)/P_{\bar{g}0}(sA | sW)
#-----------------------------------------------------------------------------
# @title Predict h weights under g_0 and g_star using existing m.h.fit model fit
# @name pred.hbars
# @export
# fit models for m_gAi
predict.hbars <- function(newdatnet = NULL, m.h.fit) {
lbound <- m.h.fit$lbound
# netA_names <- m.h.fit$m.gAi_vec_g$sA_nms
# determ_cols_Friend <- m.h.fit$determ_cols_Friend # Should this be saved in m.gAi_vec_g and m.gAi_vec_gstar objects instead?
# if (!is.null(newdatnet)) {
# NetInd_k <- newdatnet$netind_cl$NetInd_k
# determ.g_user <- newdatnet$determ.g
# determ_cols_user <- .f.allCovars(k, NetInd_k, determ.g_user, "determ.g_true")
# determ_cols <- (determ_cols_user | determ_cols_Friend)
# }
# use original fitted data for prediction
if (is.null(newdatnet)) {
stop("newdatnet argument must be not null; this feature is not implemented")
# newdatnet <- m.h.fit$cY_mtx_fitted
# determ_cols <- m.h.fit$determ_cols_fitted
}
# PASS ENTIRE newdatnet which will get subset, rather than constructing cY_mtx...
# if (h_user==FALSE) {
h_gN <- m.h.fit$summeas.g0$predictAeqa(newdata = newdatnet)
h_gstar <- m.h.fit$summeas.gstar$predictAeqa(newdata = newdatnet)
# }
h_gstar_h_gN <- h_gstar / h_gN
h_gstar_h_gN[is.nan(h_gstar_h_gN)] <- 0 # 0/0 detection
h_gstar_h_gN <- bound(h_gstar_h_gN, c(0,1/lbound))
return(h_gstar_h_gN)
}
# @title Defining and fitting the clever covariate h under g_0 and g_star, i.e. models P(sA[j] | sW,sA[j])
# @name fit.hbars
# @importFrom assertthat assert_that is.count
# @export
# fit models for m_gAi
#---------------------------------------------------------------------------------
fit.hbars <- function(DatNet.ObsP0, est_params_list) {
# .f.mkstrNet <- function(Net) apply(Net, 1, function(Net_i) paste(Net_i, collapse=" ")) # defining the vector of c^A`s that needs evaluation under h(c)
#---------------------------------------------------------------------------------
# PARAMETERS FOR LOGISTIC ESTIMATION OF h
#---------------------------------------------------------------------------------
Odata <- DatNet.ObsP0$Odata # R6 object of class "OdataDT" that contains observed data.table in $OdataDT
O.datnetW <- DatNet.ObsP0$datnetW # R6 object of class "DatNet" that points to observed data.table (but doesn't store anything)
O.datnetA <- DatNet.ObsP0$datnetA # R6 object of class "DatNet" that points to observed data.table (but doesn't store anything)
lbound <- est_params_list$lbound
max_npwt <- est_params_list$max_npwt # NOT IMPLEMENTED
ng.MCsims <- est_params_list$ng.MCsims # replace with p adaptive to k: p <- 100*(2^k)
sW <- est_params_list$sW
sA <- est_params_list$sA
new.sA <- est_params_list$new.sA # intervention summaries
h.g0.sVars <- est_params_list$h.g0.sVars
h.gstar.sVars <- est_params_list$h.gstar.sVars
f.gstar <- est_params_list$f.gstar
f.g0 <- est_params_list$f.g0
h_g0_SummariesModel <- est_params_list$h_g0_SummariesModel
if (!is.null(h_g0_SummariesModel)) {
message("NOTE: Predictions for P(sA|sW) under g0 will be based on the model fit in h_g0_SummariesModel," %+%
"all modeling settings will be ignored")
}
h_gstar_SummariesModel <- est_params_list$h_gstar_SummariesModel
if (!is.null(h_gstar_SummariesModel)) {
message("NOTE: Predictions for P(sA^*|sW^*) under f_gstar will be based on the model fit in h_g0_SummariesModel," %+%
" all modeling settings will be ignored")
}
h_logit_sep_k <- est_params_list$h_logit_sep_k # NOT IMPLEMENTED
# h_user=est_params_list$h_user; h_user_fcn=est_params_list$h_user_fcn; NOT IMPLEMENTED
#---------------------------------------------------------------------------------
# Getting OBSERVED sW
#---------------------------------------------------------------------------------
# Summary measure names / expressions:
sW.g0_nms <- h.g0.sVars$predvars
sW.gstar_nms <- h.gstar.sVars$predvars
# *****
# Check that these summary measures exist in O.datnetW$names.sVar
check.sW.g0.exist <- unlist(lapply(unlist(sW.g0_nms), function(sWname) sWname %in% c(O.datnetW$names.sVar,O.datnetA$names.sVar)))
if (!all(check.sW.g0.exist)) stop("the following predictors from hform.g0 regression could not be located in sW/sA summary measures: " %+%
paste0(sW.g0_nms[!check.sW.g0.exist], collapse = ","))
check.sW.gstar.exist <- unlist(lapply(sW.gstar_nms, function(sWname) sWname %in% c(O.datnetW$names.sVar,O.datnetA$names.sVar)))
if (!all(check.sW.gstar.exist)) stop("the following predictors from hform.gstar regression could not be located in sW/sA summary measures: " %+%
paste0(sW.gstar_nms[!check.sW.gstar.exist], collapse = ","))
#---------------------------------------------------------------------------------
# Getting OBSERVED sA
#---------------------------------------------------------------------------------
# Summary measure names / expressions:
sA_nms_g0 <- h.g0.sVars$outvars
sA_nms_gstar <- h.gstar.sVars$outvars
# ***********
# Check that the outcome summary measures defined by h.g0.sVars$outvars and h.gstar.sVars$outvars are equivalent:
# NOTE: might comment out in the future and allow different summary measures for sA_nms_g0 and sA_nms_gstar.
# ***********
for (idx in seq_along(sA_nms_g0)) {
if (!all(sA_nms_g0[[idx]] %in% sA_nms_gstar[[idx]])) {
stop("the outcome variable names defined by regressions hform.g0 & hform.gstar are not identical;" %+%
" current implementation requires these to be the same.")
}
}
# ***********
# Check that these summary measures exist in O.datnetA$names.sVar
check.sAg0.exist <- unlist(lapply(sA_nms_g0, function(sAname) sAname %in% O.datnetA$names.sVar))
if (!all(check.sAg0.exist)) stop("the following outcomes from hform.g0 regression could not be located in sA summary measures: " %+%
paste0(sA_nms_g0[!check.sAg0.exist], collapse = ","))
check.sAgstar.exist <- unlist(lapply(sA_nms_gstar, function(sAname) sAname %in% O.datnetA$names.sVar))
if (!all(check.sAgstar.exist)) stop("the following outcomes from hform.gstar regression could not be located in sA summary measures: " %+%
paste0(sA_nms_gstar[!check.sAgstar.exist], collapse = ","))
##########################################################
# *********** SUBSETTING ALGORITHM ***********
##########################################################
# NOTE: Subsetting works by var name only (which automatically evaluates as !gvars$misval(var)) for speed & memory efficiency
# Determine subsetting by !gvars$misval (which observations to include in each univariate regression)
# (1) For each UNIVARIATE regression (e.g., "A ~ W") specifies a VECTOR of variable names which should be
# jointly non-missing in the data, this then defined the observations that go into the design matrix.
# (2) For each MULTIVARIATE outcome regression with shared predictors (e.g., "A + sumA ~ W"),
# this should be a list of length equal to the number of outcomes.
# (3) For separate regressions with different predictors, i.e., different time-points (e.g., "A_1 + sumA_1 ~ W", "A_2 + sumA_2 ~ W + L_1"),
# this should be a list of lists of length equal to the total number of such regressions.
subsets_expr <- lapply(sA_nms_g0, function(var) lapply(var, function(var) {var}))
# subsets_expr <- lapply(sA_nms_g0, function(var) {var})
##########################################################
# **** DEPRECATED **** DEFINING SUBSETING EXPRESSIONS (FOR DETERMINISTIC / DEGENERATE sA)
##########################################################
# (1 subset expr per regression P(sA[j]|sA[j-1:0], sW))
# Old examples of subsetting expressions:
# based on the variable of gvars$misval (requires passing gvars envir for eval)
# subset_exprs <- lapply(netvar("determ.g_Friend", c(0:Kmax)), function(var) {var%+%" != "%+%"misval"})
# based on existing logical determ_g columns (TRUE = degenerate/determ):
# subset_exprs <- lapply(netvar("determ.g_true", c(0:Kmax)), function(var) {var%+%" != "%+%TRUE})
#-----------------------------------------------------------
##########################################################
# Summary class params:
##########################################
sA_class <- lapply(sA_nms_g0, function(sA_nms) O.datnetA$type.sVar[sA_nms])
# sA_class <- O.datnetA$type.sVar[sA_nms_g0[[1]]]
if (gvars$verbose) {
message("================================================================")
message("fitting h_g0 with summary measures: ", "P(" %+% paste(sA_nms_g0, collapse = ",") %+% " | " %+% paste(sW.g0_nms, collapse = ",") %+% ")")
message("================================================================")
}
# Make sure the observed Anodes are always backed-up prior to messing with Odata:
Odata$backupAnodes(Anodes = Odata$nodes$Anodes, sA = sA)
p_h0 <- ifelse(is.null(f.g0), 1, ng.MCsims)
if (!is.null(f.g0)) {
if (gvars$verbose) message("generating DatNet.g0 under known g0")
DatNet.g0 <- DatNet.sWsA$new(Odata = Odata, datnetW = O.datnetW, datnetA = O.datnetA)
DatNet.g0$make.dat.sWsA(p = p_h0, f.g_fun = f.g0, sA.object = sA, DatNet.ObsP0 = DatNet.ObsP0)
if (gvars$verbose) {
print("new DatNet.g0$dat.sWsA from known g0: "); print(head(DatNet.g0$dat.sWsA))
}
} else {
DatNet.g0 <- DatNet.ObsP0
}
regclass.g0 <- RegressionClass$new(sep_predvars_sets = TRUE,
outvar.class = sA_class,
outvar = sA_nms_g0,
predvars = sW.g0_nms,
subset = subsets_expr)
regclass.g0$S3class <- "generic"
# using S3 method dispatch on regclass.g0:
summeas.g0 <- newsummarymodel(reg = regclass.g0, DatNet.sWsA.g0 = DatNet.g0)
# summeas.g0 <- SummariesModel$new(reg = regclass.g0, DatNet.sWsA.g0 = DatNet.g0)
if (!is.null(h_g0_SummariesModel)) {
# 1) verify h_g0_SummariesModel is consistent with summeas.g0
assert_that(inherits(h_g0_SummariesModel, "SummariesModel"))
# 2) deep copy model fits in h_g0_SummariesModel to summeas.g0
summeas.g0 <- h_g0_SummariesModel$clone(deep=TRUE)
} else {
summeas.g0$fit(data = DatNet.g0)
}
# *********
# NEED TO PASS obsdat.sW.sA (observed data sWsA) to predict() funs.
# If !is.null(f.g_fun) then DatNet.g0$dat.sWsA IS NOT THE OBSERVED data (sWsA), but rather sWsA data sampled under known g_0.
# Option 1: Wipe out DatNet.g0$dat.sWsA with actually observed data - means that we can't use DatNet.g0$dat.sWsA in the future.
# Option 2: Create a new class DatNet.Obs of DatNet.sWsA (will be painful)
# Going with OPTION 1 for now:
# Already generated DatNet.ObsP0 in tmlenet:
h_gN <- summeas.g0$predictAeqa(newdata = DatNet.ObsP0)
if (length(h_gN)!=DatNet.ObsP0$nobs) stop("the IPW weight prediction under g0 return invalid vector length: " %+% length(h_gN))
# ------------------------------------------------------------------------------------
# to obtain the decomposed of the above probability by each Anode (marginals):
# (directly analogous to the g0 component of gmat in ltmle package)
# gmat.g0 <- matrix(nrow = length(h_gN), ncol = length(summeas.g0$getPsAsW.models()))
# for (i in seq_along(summeas.g0$getPsAsW.models())) {
# gmat.g0[,i] <- summeas.g0$getPsAsW.models()[[i]]$getcumprodAeqa()
# }
# cum.gmat.g0 <- matrixStats::rowCumprods(gmat.g0)
# # message("sum(cum.gmat.g0-h_gN): " %+% sum(cum.gmat.g0[,ncol(cum.gmat.g0)]-h_gN))
# ------------------------------------------------------------------------------------
if (gvars$verbose) {
message("================================================================")
message("fitting h_gstar based on summary measures: ", "P(" %+% paste(sA_nms_gstar, collapse = ",") %+% " | " %+% paste(sW.gstar_nms, collapse = ",") %+% ")")
message("================================================================")
}
regclass.gstar <- RegressionClass$new(sep_predvars_sets = TRUE,
outvar.class = sA_class,
outvar = sA_nms_gstar,
predvars = sW.gstar_nms,
subset = subsets_expr)
regclass.gstar$S3class <- "generic"
# Define Intervals Under g_star to Be The Same as under g0:
summeas.gstar <- newsummarymodel(reg = regclass.gstar, DatNet.sWsA.g0 = DatNet.g0)
# summeas.gstar <- SummariesModel$new(reg = regclass.gstar, DatNet.sWsA.g0 = DatNet.g0)
# Define Intervals Under g_star Based on Summary Measures Generated under g_star:
# summeas.gstar <- SummariesModel$new(reg = regclass.gstar, DatNet.sWsA.g0 = DatNet.gstar)
# Define Intervals Under g_star Based on Union of Summary Measures under g_star and g0:
# summeas.gstar <- SummariesModel$new(reg = regclass.gstar, DatNet.sWsA.g0 = DatNet.g0, datnet.gstar = DatNet.gstar)
DatNet.gstar <- DatNet.sWsA$new(Odata = Odata, datnetW = O.datnetW, datnetA = O.datnetA)
# f.g_fun to be replaced with new.sA:
DatNet.gstar$make.dat.sWsA(p = ng.MCsims, f.g_fun = f.gstar, new.sA.object = new.sA, sA.object = sA, DatNet.ObsP0 = DatNet.ObsP0)
if (gvars$verbose) {
print("Generated new summary measures by sampling A from f_gstar (DatNet.gstar): ")
print(head(DatNet.gstar$dat.sWsA))
}
if (!is.null(h_gstar_SummariesModel)) {
# 1) verify h_gstar_SummariesModel is consistent with summeas.gstar
assert_that(inherits(h_gstar_SummariesModel, "SummariesModel"))
# 2) deep copy the object with model fits to summeas.gstar
summeas.gstar <- h_gstar_SummariesModel$clone(deep=TRUE)
} else {
summeas.gstar$fit(data = DatNet.gstar)
}
# All data is now stored in a single global data.table, which is ALWAYS modified IN PLACE
# For that reason, DatNet.gstar$make.dat.sWsA() call above over-wrote Anodes and sA in Odata$Odata_DT
# These now need to be restored to the original Anodes and sA under g0:
# 1) back-up Anodes and sA generated under f.g.star so that we don't have to re-generate them again & then restore old Anodes and sA (under g.0)
DatNet.gstar$Odata$swapAnodes()
# 2) verify sA's were also restored and if not, regenerate them
if (!DatNet.ObsP0$datnetA$Odata$restored_sA_Vars)
DatNet.ObsP0$datnetA$make.sVar(sVar.object = sA)
h_gstar <- summeas.gstar$predictAeqa(newdata = DatNet.ObsP0)
if (length(h_gstar)!=DatNet.ObsP0$nobs) stop("the IPW weight prediction under gstar return invalid vector length: " %+% length(h_gstar))
# ------------------------------------------------------------------------------------
# to obtain the decomposed probability by each Anode (marginals):
# (directly analogous to the gstar component of gmat in ltmle package)
# gmat.gstar <- matrix(nrow = length(h_gstar), ncol = length(summeas.gstar$getPsAsW.models()))
# for (i in seq_along(summeas.gstar$getPsAsW.models())) {
# gmat.gstar[,i] <- summeas.gstar$getPsAsW.models()[[i]]$getcumprodAeqa()
# }
# cum.gmat.gstar <- matrixStats::rowCumprods(gmat.gstar)
# message("sum(gmat.gstar-h_gstar): " %+% sum(cum.gmat.gstar[,ncol(cum.gmat.gstar)]-h_gstar))
# ------------------------------------------------------------------------------------
###########################################
# 3A) Calculate final h_bar (h_tilde) as ratio of h_gstar / h_gN and bound it
##########################################
h_gstar_h_gN <- h_gstar / h_gN
h_gstar_h_gN[is.nan(h_gstar_h_gN)] <- 0 # 0/0 detection
h_gstar_h_gN <- bound(h_gstar_h_gN, c(0, 1/lbound))
###########################################
# 3B) Directly evaluating IPTW for static interventions (doesn't need DatNet.gstar)
##########################################
# Ag0mat <- DatNet.ObsP0$get.dat.sWsA(covars = DatNet.ObsP0$nodes$Anodes)
# Agstarmat <- f.gen.A.star(data = DatNet.ObsP0$dat.sVar, f.g_fun = f.gstar, Anodes = DatNet.ObsP0$nodes$Anodes)
# h_gstar_I <- Agstarmat == Ag0mat
# h_gstar_cum <- h_gstar_I[,1]
# for (i in seq(ncol(h_gstar_I))[-1]) {
# h_gstar_cum <- h_gstar_cum*h_gstar_I[,i]
# }
# h_gstar_h_gN <- h_gstar_cum / h_gN
# h_gstar_h_gN[is.nan(h_gstar_h_gN)] <- 0 # 0/0 detection
# h_gstar_h_gN <- bound(h_gstar_h_gN, c(0, 1/lbound))
# m.h.fit <- list(summeas.g0 = summeas.g0, summeas.gstar = NULL, lbound = lbound)
# return(list(h_gstar_h_gN = h_gstar_h_gN, m.h.fit = m.h.fit, DatNet.gstar = NULL))
###########################################
# IPTW BY TIME POINT:
# NEED TO IMPLEMENT:
# (1) Apply the bound for each column of the final iptw matrix
# (2) Be able to easily replace summeas.gstar object with KNOWN probabilities (when gstar is a known user-supplied stochastic intervention)
# gmat <- cum.gmat.gstar / cum.gmat.g0
###########################################
m.h.fit <- list(summeas.g0 = summeas.g0, summeas.gstar = summeas.gstar, lbound = lbound)
return(list(h_gstar_h_gN = h_gstar_h_gN, m.h.fit = m.h.fit, DatNet.gstar = DatNet.gstar))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.