# ---------------------------------------------------------------------------------
# TEST SET 4
# ---------------------------------------------------------------------------------
# network TMLE fit with continous sA
# Run one TMLE simulation for network data generated igraph::sample_k_regular (all nodes have the same degree k)
# estimate psi0 under trunced g.star (same as in iid)
# sA is the same as in iid example w sAnet=(sA, rowMeans(sA[[1:Kmax]])).
# ---------------------------------------------------------------------------------------------------------
# TMLE for causal effect in network data with continuous exposure under continuous stochastic intervention;
# intervention g.star is defined by shifting the normal density of observed sA until g.star/g.0 >= 10,
# then its truncated to be equal to g.0
# ---------------------------------------------------------------------------------------------------------
`%+%` <- function(a, b) paste0(a, b)
# f.gstar,
run.net.1sim.tmlenet <- function(datO, NetInd_mat, sW, sA, Kmax, Qform, newA.gstar, psi0) {
datO_input <- datO[,c("W1", "W2", "W3", "sA", "Y")]
res <- tmlenet(data = datO_input,
# Anodes = "sA",
Ynode = "Y",
Kmax = Kmax,
NETIDmat = NetInd_mat,
# f_gstar1 = f.gstar,
intervene1.sA = newA.gstar,
sW = sW, sA = sA,
Qform = Qform,
hform.g0 = "sA + net.mean.sA ~ W1 + W2 + W3",
hform.gstar = "sA + net.mean.sA ~ W1 + W2 + W3",
optPars = list(
bootstrap.var = FALSE, n.bootstrap = 1
))
# correct Q:
# Qform = "Y ~ W1 + W2 + W3 + sA + net.mean.sA",
# misspecified Q:
# Qform = "Y ~ W2 + W3 + net.mean.sA",
CIs <- res$EY_gstar1$IC.CIs
print("CIs: "); print(CIs)
(tmle_B.CI <- CIs[rownames(CIs)%in%"tmle_B",])
(h_iptw.CI <- CIs[rownames(CIs)%in%"h_iptw",])
cover.tmle_B <- ((psi0 <= tmle_B.CI[2]) && (psi0 >= tmle_B.CI[1]))
cover.h_iptw <- ((psi0 <= h_iptw.CI[2]) && (psi0 >= h_iptw.CI[1]))
est_mat <- res$EY_gstar1$estimates
est <- as.vector(est_mat)
names(est) <- rownames(est_mat)
cover <- apply(CIs, 1, function(row) ((psi0 <= row[2]) && (psi0 >= row[1])))
cover2 <- c(tmle_B = cover.tmle_B, h_iptw = cover.h_iptw)
return(list(est = est, cover = cover))
}
#------------------------------------------------------------------------------------------------------------
# The user-defined network sampler(s) from igraph (regular graph model)
# Generate regular random graphs with same degree for each node
# Kmax - degree of each node
make_netDAG <- function(Kmax, trunc, shift) {
generate.igraph.k.regular <- function(n, Kmax, ...) {
if (n < 20) Kmax <- 5
igraph.reg <- igraph::sample_k_regular(no.of.nodes = n, k = Kmax, directed = TRUE, multiple = FALSE)
# From igraph object to sparse adj. matrix:
sparse_AdjMat <- simcausal::igraph.to.sparseAdjMat(igraph.reg)
# From igraph object to simcausal/tmlenet input (NetInd_k, nF, Kmax):
NetInd_out <- simcausal::sparseAdjMat.to.NetInd(sparse_AdjMat)
print("old Kmax:" %+% Kmax)
print("new Kmax:" %+% NetInd_out$Kmax)
print("NetInd_k"); print(head(NetInd_out$NetInd_k))
if (Kmax < NetInd_out$Kmax) message("new network has larger Kmax value than requested, new Kmax = " %+% NetInd_out$Kmax)
return(NetInd_out$NetInd_k)
}
# graph <- igraph::sample_k_regular(no.of.nodes = 50, k = 10, directed = TRUE, multiple = FALSE)
# par(mar=c(.1,.1,.1,.1))
# igraph::plot.igraph(graph,
# layout=igraph::layout.fruchterman.reingold,
# vertex.size=7,
# vertex.label.cex=.5,
# edge.arrow.size=.5)
D <- DAG.empty()
# Adding the ER model network generator from igraph:
D <- D + network("NetInd_k", Kmax = Kmax, netfun = "generate.igraph.k.regular")
D <- D +
node("W1", distr = "rbern", prob = 0.5) +
node("W2", distr = "rbern", prob = 0.3) +
node("W3", distr = "rbern", prob = 0.3) +
node("sA.mu", distr = "rconst", const = (0.98 * W1 + 0.58 * W2 + 0.33 * W3)) +
node("sA", distr = "rnorm", mean = sA.mu, sd = 1) +
node("net.mean.sA", distr = "rconst", const = mean(sA[[1:Kmax]])) +
node("untrunc.sA.gstar", distr = "rconst", const = sA + shift) +
node("r.new.sA", distr = "rconst", const =
exp(shift * (untrunc.sA.gstar - sA.mu - shift / 2))) +
node("tr.sA.gstar", distr = "rconst", const = ifelse(r.new.sA > trunc, sA, untrunc.sA.gstar)) +
node("probY", distr = "rconst", const =
plogis(-0.35 * sA - 0.20 * mean(sA[[1:Kmax]]) - 0.5 * W1 - 0.58 * W2 - 0.33 * W3)) +
node("Y", distr = "rbern", prob = probY) +
node("probY.gstar", distr = "rconst", const =
plogis(-0.35 * tr.sA.gstar - 0.20 * mean(tr.sA.gstar[[1:Kmax]]) - 0.5 * W1 - 0.58 * W2 - 0.33 * W3)) +
node("Y.gstar", distr = "rbern", prob = probY.gstar)
Dset <- set.DAG(D, n.test = 10)
return(Dset)
}
# Function that returns a stochastic intervention function intervening on sA, for given shift
# create_f.gstar <- function(shift, trunc) {
# shift <- shift
# trunc <- trunc
# f.gstar <- function(data, ...) {
# sA.mu <- 0.98 * data[,"W1"] + 0.58 * data[,"W2"] + 0.33 * data[,"W3"]
# sA <- data[,"sA"]
# untrunc.sA.gstar <- sA + shift
# # ratio of P_g^*(sA=sa|W)/P_g0(sA=sa|W) for sa=sA generated under g^*:
# r.new.sA <- exp(shift * (untrunc.sA.gstar - sA.mu - shift / 2))
# trunc.sA.gstar <- ifelse(r.new.sA > trunc, sA, untrunc.sA.gstar)
# return(trunc.sA.gstar)
# }
# return(f.gstar)
# }
test.onesim.net.tmlefit <- function() {
require(simcausal)
#------------------------------------------------------------------------------------------------------------
Kmax <- 5
trunc <- 4
shift <- 1
Dset <- make_netDAG(Kmax, trunc, shift)
#------------------------------------------------------------------------------------------------------------
# # plotDAG(Dset)
# rndseed2 <- 54321
# nsamp <- 50000
# datFull <- sim(Dset, n = nsamp, rndseed = rndseed2)
# print(head(datFull, 50))
# # netind_cl <- attributes(datFull)$netind_cl # to get the network object from sim data:
# # NetInd_mat <- attributes(datFull)$netind_cl$NetInd # to get the network matrix from sim data:
# print("mean(datFull$Y): " %+% mean(datFull$Y));
# # [1] "mean(datFull$Y): 0.301425"
# psi0 <- mean(datFull$Y.gstar)
# print("psi0: " %+% psi0)
# # [1] "psi0: 0.22062" for 50K sample (trunc = 4, shift = 1, Kmax = 5)
# # [1] "psi0: 0.19184" for 50K sample (trunc = 7, shift = 1, Kmax = 10)
# f.gstar <- create_f.gstar(shift = shift, trunc = trunc)
shift <- shift
trunc <- trunc
newA.gstar <- def_new_sA(sA = ifelse(exp(shift * (sA + shift - (0.98*W1 + 0.58*W2 + 0.33*W3) - shift/2)) > trunc,
sA,
sA + shift))
# DEFINE SUMMARY MEASURES:
sW <- def_sW(W1 = "W1", W2 = "W2", W3 = "W3")
sA <- def_sA(sA = "sA", net.mean.sA = rowMeans(sA[[1:Kmax]]), replaceNAw0 = TRUE)
seed <- 12345
datO <- sim(Dset, n = 10000, rndseed = seed)
mean(datO$Y.gstar) # [1] 0.2136
checkTrue(abs(mean(datO$Y.gstar) - 0.2136) < 10^-6)
netind_cl <- attributes(datO)$netind_cl
NetInd_mat <- attributes(datO)$netind_cl$NetInd
dim(NetInd_mat)
getOption("tmlenet.verbose")
options(tmlenet.verbose = FALSE)
# tmlenet_options(binByMass = FALSE, bin_estimator = glmR6$new())
# tmlenet_options(poolContinVar = TRUE, bin_estimator = speedglmR6$new()) # to pool by contin outcome:
print_tmlenet_opts()
#------------------------------------------------------------------------------------------------------------
# TEST MISSPECIFIED Q:
#------------------------------------------------------------------------------------------------------------
tmlenet_options(maxNperBin = 1000, bin.method="equal.mass")
print_tmlenet_opts()
Qform.mis <- "Y ~ W2 + W3 + net.mean.sA" # # misspecified Q:
timerun <- system.time(
estres <- run.net.1sim.tmlenet(datO = datO, NetInd_mat = NetInd_mat,
sW = sW, sA = sA, Kmax = Kmax,
Qform = Qform.mis,
newA.gstar = newA.gstar,
# f.gstar = f.gstar,
psi0 = 0)
)
timerun
# user system elapsed
# 1.659 0.121 1.791
estres
# tmle h_iptw gcomp
# 0.2159311 0.2181338 0.2648796
# [1] "CIs: "
# LBCI_0.025 UBCI_0.975
# tmle 0.1930713 0.2387909
# h_iptw 0.1886270 0.2476407
# gcomp NA NA
# test 1:
checkTrue(abs(estres$est["tmle"] - 0.2159311) < 10^-6)
# test 2:
checkTrue(abs(estres$est["h_iptw"] - 0.2181338) < 10^-6)
# test 3:
checkTrue(abs(estres$est["gcomp"] - 0.2648796) < 10^-6)
#------------------------------------------------------------------------------------------------------------
# TEST EQUAL LENGTH INTERVAL FITTING METHOD (bin.method = "equal.len") (MISSPECIFIED Q):
#------------------------------------------------------------------------------------------------------------
Qform.mis <- "Y ~ W2 + W3 + net.mean.sA" # # misspecified Q:
tmlenet_options(maxNperBin = 1000, bin.method = "equal.len", nbins = 10)
# tmlenet_options(binByMass = FALSE, bin_estimator = glmR6$new())
# tmlenet_options(poolContinVar = TRUE, bin_estimator = speedglmR6$new()) # to pool by contin outcome:
print_tmlenet_opts()
timerun <- system.time(
estres_eqlen <- run.net.1sim.tmlenet(datO = datO, NetInd_mat = NetInd_mat,
sW = sW, sA = sA, Kmax = Kmax,
Qform = Qform.mis,
newA.gstar = newA.gstar,
# f.gstar = f.gstar,
psi0 = 0)
)
timerun
estres_eqlen
# tmle h_iptw gcomp
# 0.2257509 0.2299967 0.2648796
# LBCI_0.025 UBCI_0.975
# tmle 0.2001600 0.2513419
# h_iptw 0.1961125 0.2638809
# test 1:
checkTrue(abs(estres_eqlen$est["tmle"] - 0.2257509) < 10^-6)
# test 2:
checkTrue(abs(estres_eqlen$est["h_iptw"] - 0.2299967) < 10^-6)
# test 3:
checkTrue(abs(estres_eqlen$est["gcomp"] - 0.2648796) < 10^-6)
#------------------------------------------------------------------------------------------------------------
# TEST CORRECT Q:
#------------------------------------------------------------------------------------------------------------
tmlenet_options(maxNperBin = 1000, bin.method="equal.mass")
Qform.corr <- "Y ~ W1 + W2 + W3 + sA + net.mean.sA"
estres2 <- run.net.1sim.tmlenet(datO = datO, NetInd_mat = NetInd_mat,
sW = sW, sA = sA, Kmax = Kmax,
Qform = Qform.corr,
newA.gstar = newA.gstar,
# f.gstar = f.gstar,
psi0 = 0)
estres2
# tmle h_iptw gcomp
# 0.2150487 0.2181338 0.2140926
# [1] "CIs: "
# LBCI_0.025 UBCI_0.975
# tmle 0.1925783 0.2375190
# h_iptw 0.1886270 0.2476407
# gcomp NA NA
# test 1:
checkTrue(abs(estres2$est["tmle"] - 0.2150487) < 10^-6)
# test 2:
checkTrue(abs(estres2$est["h_iptw"] - 0.2181338) < 10^-6)
# test 3:
checkTrue(abs(estres2$est["gcomp"] - 0.2140926) < 10^-6)
#------------------------------------------------------------------------------------------------------------
# TEST PARALLEL FITTING METHOD FOR LOGISTIC REGRESSIONS (MISSPECIFIED Q):
# FAILS ON win build (COMMENTING OUT):
#------------------------------------------------------------------------------------------------------------
# require(foreach)
# require(doParallel)
# registerDoParallel(cores = 2)
# # registerDoParallel(cores = 1)
# tmlenet_options(maxNperBin = 1000, bin.method="equal.mass", parfit = TRUE)
# # tmlenet_options(binByMass = FALSE, bin_estimator = glmR6$new())
# # tmlenet_options(poolContinVar = TRUE, bin_estimator = speedglmR6$new()) # to pool by contin outcome:
# print_tmlenet_opts()
# estres_par <- run.net.1sim.tmlenet(datO = datO, NetInd_mat = NetInd_mat,
# sW = sW, sA = sA, Kmax = Kmax,
# Qform = Qform.mis, f.gstar = f.gstar, psi0 = 0)
# # test 1:
# checkTrue(abs(estres_par$est["tmle"] - 0.2159311) < 10^-6)
# # test 2:
# checkTrue(abs(estres_par$est["h_iptw"] - 0.2181338) < 10^-6)
# # test 3:
# checkTrue(abs(estres_par$est["gcomp"] - 0.2648796) < 10^-6)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.