if (case == "setup") {
cl_export = c("SL.gam3","screen.Main","screen10","screen6","SL.glmnet_1","SL.glmnet_2",
"SL.glmnet_3","xgbFull","xgbMain","screen.Main",
"xgb6","screen6","xgb10","screen10","rpartPrune","nnetFull", "rpartPruneSL",
"nnetMain","screen.Main","earthFull","All","screen10","screen6",
"earthMain","screen.Main","rangerFull","All","screen.Main",
"ranger10","screen10","screen6","SL.glm","screen6","screen10",
"SL.stepAIC","SL.hal","glm.mainint","xgb2")
SL.library1 = list(c("SL.gam3","screen.Main","screen6","screen10","All"),"SL.glmnet_1",
"SL.glmnet_2","SL.glmnet_3",
c("nnetMain","screen.Main"), c("rpartPruneSL", "screen.Main"),
c("earthMain","screen.Main"),
c("SL.glm","screen.Main","screen6","screen10","All"),
"SL.stepAIC", c("SL.hal","screen.Main"),"SL.mean","glm.mainint")
SL.library2 = list(c("SL.gam3","screen.Main","screen6","screen10","All"),
"SL.glmnet_1","SL.glmnet_2","SL.glmnet_3",
c("rpartPruneSL", "screen.Main"),"xgbFull",c("xgbMain","screen.Main"),
c("nnetMain","screen.Main"), c("earthMain","screen.Main"),
"rangerFull",c("ranger10", "screen10"),"nnetFull",
c("SL.glm","screen.Main","screen6","screen10","All"),
"SL.stepAIC", c("SL.hal","screen.Main"),"SL.mean","glm.mainint")
SL.library3 = list(c("SL.gam3","screen.Main","screen6","screen10","All"),
"SL.glmnet_1","SL.glmnet_2","SL.glmnet_3",
c("rpartPruneSL", "screen.Main"),"xgbFull",c("xgbMain","screen.Main"),
c("xgb6","screen6"), c("xgb10","screen10"),
c("nnetMain","screen.Main"), c("earthMain","screen.Main"),
"earthFull",
c("ranger10", "screen10", "All"),"nnetFull",
c("SL.glm","screen.Main","screen6","screen10","All"),
"SL.stepAIC", c("SL.hal","screen.Main"),"SL.mean","glm.mainint")
SL.libraryG = list("nnetMain","SL.mean","SL.hal", "rpartPruneSL",
"SL.earth","SL.glm","SL.step.interaction",
"SL.glm.interaction")
SL.libraryD2 = list("SL.nnet","SL.mean","SL.hal", "SL.rpartPrune", "glm.mainint",
"SL.earth","SL.glm","SL.step.interaction",
"SL.glm.interaction","xgb2","xgb6","SL.ranger")
SL.libraryGD2 = list("SL.nnet","SL.mean","SL.hal", "SL.rpartPrune",
"SL.earth","SL.glm","SL.step.interaction",
"SL.glm.interaction","xgb2","xgb6","SL.ranger")
} else {
if (case == "LRcase2a") {
g0 = g0_linear
Q0 = Q0_trig1
testdata=gendata(1e6, g0 = g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
Qform = paste(colnames(gendata(1,g0 = g0, Q0 = Q0))[2:5], collapse = "+")
Qform = paste0("Y ~ A*(", Qform, ")")
# SL.library = c("glm.mainint")
# SL.libraryG = c("SL.glm")
if (!resultsGotten) {
cl = makeCluster(no.cores, type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,cl_export)
# run this on a 24 core node
ALL=foreach(i=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations","SuperLearner"),
.errorhandling = "remove")%dopar%
{sim_hal(n, g0, Q0, gform=formula("A~."),
Qform = formula(Qform), V=10)}
}
results = data.matrix(data.frame(do.call(rbind, ALL)))
results = results_LRcase2a
B = nrow(results)
type= c(rep("tmle with lr initial",B),rep("lr initial",B))
types = c("tmle with lr initial","lr initial")
inds = c(1,22)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("red","blue")
plotdf = data.frame(ests=ests, type=type)
ggover = ggplot(plotdf,aes(x=ests, color = type, fill=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle("blip variance sampling distributions", subtitle=
"tmle with logistic regression initial estimates")
ggover = ggover+geom_vline(xintercept = var0,color="black")
assign(paste0("gg_",case), ggover)
assign(paste0("results_",case), results)
}
if (case == "LRcase2b"){
g0 = g0_linear
Q0 = Q0_trig
testdata=gendata(1000000, g0=g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
Qform = paste(colnames(gendata(1,g0 = g0, Q0 = Q0))[2:5], collapse = "+")
Qform = paste0("Y ~ A*(", Qform, ")")
# SL.library = c("glm.mainint")
# SL.libraryG = c("SL.glm")
if (!resultsGotten) {
cl = makeCluster(no.cores, type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,cl_export)
# run this on a 24 core node
ALL=foreach(i=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations","SuperLearner"),
.errorhandling = "remove")%dopar%
{sim_hal(n, g0, Q0, gform=formula("A~."),
Qform = formula(Qform), V=10)}
}
results = data.matrix(data.frame(do.call(rbind, ALL)))
B = nrow(results)
type= c(rep("tmle with lr initial",B),rep("lr initial",B))
types = c("tmle with lr initial","lr initial")
inds = c(1,22)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("red","blue")
plotdf = data.frame(ests=ests, type=type)
ggover = ggplot(plotdf,aes(x=ests, color = type, fill=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle("blip variance sampling distributions", subtitle=
"tmle with logistic regression initial estimates")
ggover = ggover+geom_vline(xintercept = var0,color="black")
assign(paste0("gg_",case), ggover)
assign(paste0("results_",case), results)
}
####
####
####
# hal
if (case == "HALcase2a") {
g0 = g0_linear
Q0 = Q0_trig1
testdata=gendata(1e6, g0=g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
SL.library = list(c("SL.hal","screen.Main"))
SL.libraryG = c("SL.glm")
if (!resultsGotten) {
cl = makeCluster(no.cores, type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,cl_export)
# run this on a 24 core node
ALL=foreach(i=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations","SuperLearner"),
.errorhandling = "remove")%dopar%
{sim_hal(n, g0 = g0, Q0 = Q0, gform = formula("A~."))}
}
results = data.matrix(data.frame(do.call(rbind, ALL)))
B = nrow(results)
varind = c("1step tmle HAL" = 1,"init est HAL" = 22)
type= c(rep("TMLE HAL",B), rep("initial est HAL",B))
types = c("TMLE HAL", "initial est HAL")
inds = varind
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("red","blue")
plotdf = data.frame(ests=ests, type=type)
ggover = ggplot(plotdf,aes(x=ests, color = type, fill=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle("blip variance sampling distributions", subtitle=
"tmle with hal initial estimates")
ggover = ggover+geom_vline(xintercept = var0,color="black")+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])
capt = "Truth is at black vline."
ggover=ggdraw(add_sub(ggover,capt, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
assign(paste0("gg_",case), ggover)
coverage = vapply(varind[1],
FUN = function(x) cov.check(results, var0, x),
FUN.VALUE = 1, USE.NAMES = TRUE)
SE_true = sd(results[,varind[1]])*sqrt(B-1)/sqrt(B)
ci_true = data.frame(results[,varind[1]],
results[,varind[1]] - 1.96*SE_true,
results[,varind[1]] + 1.96*SE_true)
cov_true = cov.check(ci_true, var0, 1)
coverage = c(coverage, cov_true)
names(coverage)[2] = paste0(names(varind)[1], " TRUE VAR")
performance.sig = t(apply(results[,varind], 2, perf,var0))
rownames(performance.sig) = names(varind)
assign(paste0("results_",case), results)
assign(paste0("performance.sig_",case), performance.sig)
assign(paste0("coverage_",case), coverage)
}
if (case == "HALcase2b") {
g0 = g0_linear
Q0 = Q0_trig
testdata=gendata(1e6, g0=g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
SL.library = list(c("SL.hal","screen.Main"))
SL.libraryG = c("SL.glm")
if (!resultsGotten) {
cl = makeCluster(no.cores, type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,cl_export)
# run this on a 24 core node
ALL=foreach(i=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations","SuperLearner"),
.errorhandling = "remove")%dopar%
{sim_hal(n, g0 = g0, Q0 = Q0, gform = formula("A~."))}
}
results = data.matrix(data.frame(do.call(rbind, ALL)))
B = nrow(results)
varind = c("1step tmle HAL" = 1,"init est HAL" = 22)
type= c(rep("TMLE HAL",B), rep("initial est HAL",B))
types = c("TMLE HAL", "initial est HAL")
inds = varind
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("red","blue")
plotdf = data.frame(ests=ests, type=type)
ggover = ggplot(plotdf,aes(x=ests, color = type, fill=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle("blip variance sampling distributions", subtitle=
"tmle with hal initial estimates")
ggover = ggover+geom_vline(xintercept = var0,color="black")+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])
capt = "Truth is at black vline."
ggover=ggdraw(add_sub(ggover,capt, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
assign(paste0("gg_",case), ggover)
coverage = vapply(varind[1],
FUN = function(x) cov.check(results, var0, x),
FUN.VALUE = 1, USE.NAMES = TRUE)
SE_true = sd(results[,varind[1]])*sqrt(B-1)/sqrt(B)
ci_true = data.frame(results[,varind[1]],
results[,varind[1]] - 1.96*SE_true,
results[,varind[1]] + 1.96*SE_true)
cov_true = cov.check(ci_true, var0, 1)
coverage = c(coverage, cov_true)
names(coverage)[2] = paste0(names(varind)[1], " TRUE VAR")
performance.sig = t(apply(results[,varind], 2, perf,var0))
rownames(performance.sig) = names(varind)
assign(paste0("results_",case), results)
assign(paste0("performance.sig_",case), performance.sig)
assign(paste0("coverage_",case), coverage)
}
if (case == "case2a") {
g0 = g0_linear
Q0 = Q0_trig1
testdata=gendata(1000000, g0=g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
SL.library = SL.library1
SL.libraryG = list("SL.glm")
if (!resultsGotten) {
cl = makeCluster(no.cores, type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,cl_export)
ALL=foreach(i=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations","SuperLearner"),
.errorhandling = "remove")%dopar%
{sim_cv(n, g0 = g0, Q0 = Q0, SL.library=SL.library,
SL.libraryG=SL.libraryG,method = "method.NNloglik",cv=FALSE
)}
}
results = data.matrix(data.frame(do.call(rbind, ALL)))
B = nrow(results)
varind = c("1step tmle" = 1,"simultaneous tmle" = 7, "init est" = 37)
ateind = c("1step tmle" = 28,"simultaneous tmle" = 25, "init est" = 39)
performance.sig = t(apply(results[,varind], 2, perf,var0))
performance.ate = t(apply(results[,ateind], 2, perf,ATE0))
rownames(performance.sig) = names(varind)
rownames(performance.ate) = names(ateind)
coverage = c(cov.check(results, var0, 1),
cov.simul(results, c(var0, ATE0), c(7,25)),
cov.check(results, ATE0, 28), cov.check(results, ATE0, 34))
# getting coveage using the true variance of the estimator
dd = data.frame(psi = results[,1],l = results[,1]-1.96*sqrt(performance.sig[1,1]),
r = results[,1]+1.96*sqrt(performance.sig[1,1]), psi = results[,28],
l = results[,28]-1.96*sqrt(performance.ate[1,1]),
r = results[,28]+1.96*sqrt(performance.ate[1,1]))
cov.sig.1step = cov.check(dd, var0, 1)
cov.ate = cov.check(dd, ATE0, 4)
cov = c(coverage, cov.sig.1step, cov.ate)
names(cov) = c("TMLE Blip Variance SL1", "Simultaneous TMLE SL1", "TMLE ATE SL1",
"TMLE ATE LR", "TMLE Blip Var using true Var", "TMLE ATE using true Var")
coverage = data.frame(coverage = cov)
rownames(coverage) = names(cov)
# getting superlearner results
LL = 0
for (i in 1:length(SL.library)) {
if (length(SL.library[[i]]) > 1) {
LL = LL + length(SL.library[[i]])-1} else
{LL = LL + 1}
}
SL_results = data.frame(colMeans(results[,65:(64+LL)]))
rownames(SL_results) = colnames(results)[65:(64+LL)]
type = c(rep("TMLE SL1",B), rep("init. est SL1",B),
rep("TMLE LR",B), rep("init est. LR",B))
types = c("simul TMLE SL1","iter TMLE SL1","init. est SL1","init est. LR")
inds = c(25, 39, 34, 40)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("red", "blue","green","orange")
ateests = data.frame(ests=ests,type=type)
ggover = ggplot(ateests,aes(ests, fill=type,color=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("ATE sampling distributions, ", case))
ggover = ggover+geom_vline(xintercept = ATE0,color="black")+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])+
geom_vline(xintercept=mean(results[,inds[3]]),color = colors[3])+
geom_vline(xintercept=mean(results[,inds[4]]),color = colors[4])
capt = paste0("Truth is at black vline.\n",
"\ninitial est using SuperLearner library 1 is excellent",
"\nTMLE for ATE using IC approx for variance covers at ",
100*round(coverage[3,1],3),"%",
"\ninit est. LR which is Logistic Reg. with main terms and interactions ",
"\nplug-in clearly biased but TMLE corrects it and it covers at ",
100*round(coverage[4,],3),"%\n")
ggover=ggdraw(add_sub(ggover,capt, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
assign(paste0("gg_ATE", case), ggover)
type = c(rep("TMLE SL1",B), rep("init est SL1",B),
rep("init est LR",B), rep("TMLE LR",B))
types = c("TMLE SL1","init est SL1","init est LR", "TMLE LR")
inds = c(1, 37, 38, 16)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("blue","green","orange","red")
varests = data.frame(ests=ests,type=type)
ggover2 = ggplot(varests,aes(ests, color = type, fill=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("Blip Variance sampling distributions, ", case))
ggover2 = ggover2+geom_vline(xintercept = var0,color="black")+
theme(plot.title = element_text(size=12),
plot.subtitle = element_text(size=10))+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])+
geom_vline(xintercept=mean(results[,inds[3]]),color = colors[3])+
geom_vline(xintercept=mean(results[,inds[4]]),color = colors[4])
cap = paste0("truth is black line\n",
"tmle SL1, which used Superlearner Library 1 for initial ests\n",
"attains near nominal coverage at ", 100*round(coverage[1,1],3),"%\n",
"and debiases initial SuperLearner lib 1 estimate.\n",
"tmle LR used logistic regression with main terms and interactions\n",
"has very biased initial estimates and leads to bad targeting for computing\n",
"the accompanying TMLE.")
ggover2=ggdraw(add_sub(ggover2,cap, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
assign(paste0("gg_BV", case), ggover2)
assign(paste0("results_",case), results)
assign(paste0("performance.sig_",case), performance.sig)
assign(paste0("performance.ate_",case), performance.ate)
assign(paste0("coverage_",case), coverage)
assign(paste0("SL_results_",case), SL_results)
}
if (case == "case2aCV") {
g0 = g0_linear
Q0 = Q0_trig1
testdata=gendata(1000000, g0=g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
SL.library = SL.library1
SL.libraryG = list("SL.glm")
if (!resultsGotten) {
cl = makeCluster(no.cores, type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,cl_export)
ALL=foreach(i=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations","SuperLearner"),
.errorhandling = "remove")%dopar%
{sim_cv(n, g0 = g0, Q0 = Q0, SL.library=SL.library,
SL.libraryG=SL.libraryG,method = "method.NNloglik",cv = TRUE
)}
}
}
if (case == "combo_LRandSL1case2a") {
g0 = g0_linear
Q0 = Q0_trig1
testdata=gendata(1000000, g0=g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
SL.library = SL.library1
SL.libraryG = list("SL.glm")
varind = c("1step tmle LR" = 1,"1step tmle SL1" = 5,
"init est LR" = 22,"init est tmle SL1" = 41)
performance.sig = lapply(results[varind],perf,var0)
performance.sig = t(as.data.frame(performance.sig))
rownames(performance.sig) = names(varind)
coverage_tmle = cov.check(results_case2a, var0, 1)
coverage_tmle
coverage_tmlesimul = cov.simul(results_case2a, c(ATE0, var0), c(25,7))
coverage_tmlesimul
coverage_LR = cov.check(results_LRcase2a, var0, 1)
coverage_LR
coverage_LRsimul = cov.simul(results_LRcase2a, c(ATE0, var0), c(16,7))
coverage_LRsimul
coverage = c(coverage_halglm, coverage_hal, .434, NA, NA)
cov.check(results_LRcase2a, ATE0, 16)
MSE_cov = cbind(performance.sig, coverage)
MSE_cov
# getting superlearner results
LL = 0
for (i in 1:length(SL.library)) {
if (length(SL.library[[i]]) > 1) {
LL = LL + length(SL.library[[i]])-1} else
{LL = LL + 1}
}
SL_results = data.frame(colMeans(results_halglm[,65:(65+LL-1)]))
rownames(SL_results) = colnames(results)[65:(65+LL-1)]
LG=0
for (i in 1:length(SL.libraryG)) {
if (length(SL.libraryG[[i]]) > 1) {
LG = LG + length(SL.libraryG[[i]])-1} else
{LG = LG + 1}
}
SL_resultsG = data.frame(colMeans(results_halglm[,(65+LL):(65+LL+LG-1)]))
rownames(SL_results) = colnames(results)[(65+LL):(65+LL+LG-1)]
B = nrow(results_halglm)
B1 = nrow(results_hal)
type = c(rep(names(varind)[1],B), rep(names(varind)[2],B1), rep(names(varind)[3],B),
rep(names(varind)[4],B), rep(names(varind)[5],B1), rep(names(varind)[6],B))
# types = c("1step TMLE LR","1step TMLE HAL","1step TMLE HAL+glm")
types = names(varind)
inds = varind
ests = unlist(lapply(inds, FUN = function(x) results[[x]]))
inds = inds[order(types)]
colors = c("blue","green","orange","red", "purple", "yellow")
varests = data.frame(ests=ests,type=type)
ggover2 = ggplot(varests,aes(ests, color = type, fill=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("Blip Variance sampling distributions, ", case))
ggover2 = ggover2+geom_vline(xintercept = var0,color="black")+
theme(plot.title = element_text(size=12),
plot.subtitle = element_text(size=10))+
geom_vline(xintercept=mean(results[[inds[1]]]),color = colors[1])+
geom_vline(xintercept=mean(results[[inds[2]]]),color = colors[2])+
geom_vline(xintercept=mean(results[[inds[3]]]),color = colors[3])+
geom_vline(xintercept=mean(results[[inds[4]]]),color = colors[4])+
geom_vline(xintercept=mean(results[[inds[5]]]),color = colors[5])+
geom_vline(xintercept=mean(results[[inds[6]]]),color = colors[6])
cap = paste0("truth at black line.\n",
"tmle LR uses glm with interactions for outcome model and glm for\n",
"treatment mechanism initial estimates. tmle LR CI's cover at",
round(MSE_cov[3,4] ,1),"\n",
"hal tmle uses highly adaptive lasso for initial estimates of both\n",
"outcome and treatment mech initial estimates and these cover\n",
"at", round(MSE_cov[3,4] ,2),
"\ntmle hal+glm SL uses a SuperLearner with hal and glm for\n",
"outcome and treatment mechanism initial estimates and cover at ",
round(MSE_cov[3,4] ,3),"\n",
"All coverage here is using infuence curve approx for inference.")
ggover2=ggdraw(add_sub(ggover2,cap, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
assign(paste0("results_",case), results)
assign(paste0("gg_BV",case), ggover2)
assign(paste0("MSE_cov_",case), MSE_cov)
assign(paste0("SL_results_",case), SL_results)
assign(paste0("SL_resultsG_",case), SL_resultsG)
}
if (case == "case2bSL1") {
g0 = g0_linear
Q0 = Q0_trig
testdata=gendata(1000000, g0=g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
SL.library = SL.library1
SL.libraryG = list("SL.glm")
if (!resultsGotten) {
cl = makeCluster(no.cores, type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,cl_export)
ALL=foreach(i=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations","SuperLearner"),
.errorhandling = "remove")%dopar%
{sim_cv(n, g0 = g0, Q0 = Q0, SL.library=SL.library,
SL.libraryG=SL.libraryG,method = "method.NNloglik",cv=FALSE
)}
results = data.matrix(data.frame(do.call(rbind, ALL)))
}
B = nrow(results)
varind = c("1step tmle" = 1,"simultaneous tmle" = 7, "init est" = 37)
ateind = c("1step tmle" = 28,"simultaneous tmle" = 25, "init est" = 39)
performance.sig = t(apply(results[,varind], 2, perf,var0))
performance.ate = t(apply(results[,ateind], 2, perf,ATE0))
rownames(performance.sig) = names(varind)
rownames(performance.ate) = names(ateind)
coverage = c(cov.check(results, var0, 1),
cov.simul(results, c(var0, ATE0), c(7,25)),
cov.check(results, ATE0, 28), cov.check(results, ATE0, 34))
# getting coveage using the true variance of the estimator
dd = data.frame(psi = results[,1],l = results[,1]-1.96*sqrt(performance.sig[1,1]),
r = results[,1]+1.96*sqrt(performance.sig[1,1]), psi = results[,28],
l = results[,28]-1.96*sqrt(performance.ate[1,1]),
r = results[,28]+1.96*sqrt(performance.ate[1,1]))
cov.sig.1step = cov.check(dd, var0, 1)
cov.ate = cov.check(dd, ATE0, 4)
cov = c(coverage, cov.sig.1step, cov.ate)
names(cov) = c("TMLE Blip Variance SL1", "Simultaneous TMLE SL1", "TMLE ATE SL1",
"TMLE ATE LR", "TMLE Blip Var using true Var", "TMLE ATE using true Var")
coverage = data.frame(coverage = cov)
rownames(coverage) = names(cov)
# getting superlearner results
LL = 0
for (i in 1:length(SL.library)) {
if (length(SL.library[[i]]) > 1) {
LL = LL + length(SL.library[[i]])-1} else
{LL = LL + 1}
}
SL_results = data.frame(colMeans(results[,65:(64+LL)]))
rownames(SL_results) = colnames(results)[65:(64+LL)]
type = c(rep("TMLE SL1",B), rep("init. est SL1",B),
rep("TMLE LR",B), rep("init est. LR",B))
types = c("simul TMLE SL1","iter TMLE SL1","init. est SL1","init est. LR")
inds = c(25, 39, 34, 40)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("red", "blue","green","orange")
ateests = data.frame(ests=ests,type=type)
ggover = ggplot(ateests,aes(ests, fill=type,color=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("ATE sampling distributions, ", case))
ggover = ggover+geom_vline(xintercept = ATE0,color="black")+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])+
geom_vline(xintercept=mean(results[,inds[3]]),color = colors[3])+
geom_vline(xintercept=mean(results[,inds[4]]),color = colors[4])
capt = paste0("Truth is at black vline.\n",
"\ninitial est using SuperLearner library 1 is excellent",
"\nTMLE for ATE using IC approx for variance covers at ",
100*round(coverage[3,1],3),"%",
"\ninit est. LR which is Logistic Reg. with main terms and interactions ",
"\nplug-in clearly biased but TMLE corrects it and it covers at ",
100*round(coverage[4,],3),"%\n")
ggover=ggdraw(add_sub(ggover,capt, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
assign(paste0("gg_ATE",case), ggover)
type = c(rep("TMLE SL1",B), rep("init est SL1",B), rep("init est LR",B))
types = c("TMLE SL1","init est SL1","init est LR")
inds = c(1, 37, 38)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("blue","green","orange","red")
varests = data.frame(ests=ests,type=type)
ggover2 = ggplot(varests,aes(ests, color = type, fill=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("Blip Variance sampling distributions, ", case))
ggover2 = ggover2+geom_vline(xintercept = var0,color="black")+
theme(plot.title = element_text(size=12),
plot.subtitle = element_text(size=10))+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])+
geom_vline(xintercept=mean(results[,inds[3]]),color = colors[3])
cap = paste0("truth is black line\n",
"tmle SL1, which used Superlearner Library 1 for initial ests\n",
"attains near nominal coverage at ", 100*round(coverage[1,1],3),"%\n",
"and debiases initial SuperLearner lib 1 estimate.\n",
"init est LR used logistic regression with main terms and interactions\n",
"plug-in estimator and is a disaster, which TMLE cannot help.\n")
ggover2=ggdraw(add_sub(ggover2,cap, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
assign(paste0("gg_BV",case), ggover2)
assign(paste0("results_",case), results)
assign(paste0("performance.sig_",case), performance.sig)
assign(paste0("performance.ate_",case), performance.ate)
assign(paste0("coverage_",case), coverage)
assign(paste0("SL_results_",case), SL_results)
}
if (case == "case2bCVSL2") {
B = nrow(results)
varind = c("1step tmle" = 1,"simultaneous tmle" = 7, "init est" = 37)
ateind = c("1step tmle" = 28,"simultaneous tmle" = 25, "init est" = 39)
performance.sig = t(apply(results[,varind], 2, perf,var0))
performance.ate = t(apply(results[,ateind], 2, perf,ATE0))
rownames(performance.sig) = names(varind)
rownames(performance.ate) = names(ateind)
coverage = c(cov.check(results, var0, 1),
cov.simul(results, c(var0, ATE0), c(7,25)),
cov.check(results, ATE0, 28), cov.check(results, ATE0, 34))
# getting coveage using the true variance of the estimator
dd = data.frame(psi = results[,1],l = results[,1]-1.96*sqrt(performance.sig[1,1]),
r = results[,1]+1.96*sqrt(performance.sig[1,1]), psi = results[,28],
l = results[,28]-1.96*sqrt(performance.ate[1,1]),
r = results[,28]+1.96*sqrt(performance.ate[1,1]))
cov.sig.1step = cov.check(dd, var0, 1)
cov.ate = cov.check(dd, ATE0, 4)
cov = c(coverage, cov.sig.1step, cov.ate)
names(cov) = c("TMLE Blip Variance SL2", "Simultaneous TMLE SL2", "TMLE ATE SL2",
"TMLE ATE LR", "TMLE Blip Var using true Var", "TMLE ATE using true Var")
coverage = data.frame(coverage = cov)
rownames(coverage) = names(cov)
# getting superlearner results
LL = 0
for (i in 1:length(SL.library)) {
if (length(SL.library[[i]]) > 1) {
LL = LL + length(SL.library[[i]])-1} else
{LL = LL + 1}
}
SL_results = data.frame(colMeans(results[,65:(64+LL)]))
rownames(SL_results) = colnames(results)[65:(64+LL)]
type = c(rep("TMLE SL2",B), rep("init. est SL2",B),
rep("TMLE LR",B), rep("init est. LR",B))
types = c("simul TMLE SL2","iter TMLE SL2","init. est SL2","init est. LR")
inds = c(25, 39, 34, 40)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("red", "blue","green","orange")
ateests = data.frame(ests=ests,type=type)
ggover = ggplot(ateests,aes(ests, fill=type,color=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("ATE sampling distributions, ", case))
ggover = ggover+geom_vline(xintercept = ATE0,color="black")+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])+
geom_vline(xintercept=mean(results[,inds[3]]),color = colors[3])+
geom_vline(xintercept=mean(results[,inds[4]]),color = colors[4])
capt = paste0("Truth is at black vline.\n",
"\ninitial est using SuperLearner library 2 is excellent",
"\nTMLE for ATE using IC approx for variance covers at ",
100*round(coverage[3,1],3),"%",
"\ninit est. LR which is Logistic Reg. with main terms and interactions ",
"\nplug-in clearly biased but TMLE corrects it and it covers at ",
100*round(coverage[4,],3),"%\n")
ggover=ggdraw(add_sub(ggover,capt, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
assign(paste0("gg_ATE",case), ggover)
type = c(rep("TMLE SL2",B), rep("init est SL2",B), rep("init est LR",B))
types = c("TMLE SL2","init est SL2","init est LR")
inds = c(1, 37, 38)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("blue","green","orange","red")
varests = data.frame(ests=ests,type=type)
ggover2 = ggplot(varests,aes(ests, color = type, fill=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("Blip Variance sampling distributions, ", case))
ggover2 = ggover2+geom_vline(xintercept = var0,color="black")+
theme(plot.title = element_text(size=12),
plot.subtitle = element_text(size=10))+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])+
geom_vline(xintercept=mean(results[,inds[3]]),color = colors[3])
cap = paste0("truth is black line\n",
"tmle SL2, which used Superlearner Library 2 for initial ests\n",
"is normally dist, hasn't many outliers, covers at ", 100*round(coverage[1,1],3),"%\n",
"and slightly debiases initial estimate using Superlearner Lib 2 \n",
"which defies the donsker condition but such is not needed for\n",
"CV-TMLE. The TMLE coverage using the True variance was ",
100*round(coverage[5,1], 3), "%, \nso the IC approx was a bit anticonservative.\n",
"init est LR used logistic regression with main terms and interactions\n",
"plug-in estimator and is a disaster, which TMLE cannot help.\n")
ggover2=ggdraw(add_sub(ggover2,cap, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
assign(paste0("gg_BV",case), ggover2)
assign(paste0("results_",case), results)
assign(paste0("performance.sig_",case), performance.sig)
assign(paste0("performance.ate_",case), performance.ate)
assign(paste0("coverage_",case), coverage)
assign(paste0("SL_results_",case), SL_results)
}
if (case == "case2bSL2") {
g0 = g0_linear
Q0 = Q0_trig
testdata=gendata(1000000, g0=g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
SL.library = SL.library2
SL.libraryG = list("SL.glm")
if (!resultsGotten) {
cl = makeCluster(no.cores, type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,cl_export)
ALL=foreach(i=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations","SuperLearner"),
.errorhandling = "remove")%dopar%
{sim_cv(n, g0 = g0, Q0 = Q0, SL.library=SL.library,
SL.libraryG=SL.libraryG,method = "method.NNloglik",cv=FALSE
)}
results = data.matrix(data.frame(do.call(rbind, ALL)))
}
B = nrow(results)
varind = c("1step tmle" = 1,"simultaneous tmle" = 7, "init est" = 37)
ateind = c("1step tmle" = 28,"simultaneous tmle" = 25, "init est" = 39)
performance.sig = t(apply(results[,varind], 2, perf,var0))
performance.ate = t(apply(results[,ateind], 2, perf,ATE0))
rownames(performance.sig) = names(varind)
rownames(performance.ate) = names(ateind)
coverage = c(cov.check(results, var0, 1),
cov.simul(results, c(var0, ATE0), c(7,25)),
cov.check(results, ATE0, 28), cov.check(results, ATE0, 34))
# getting coveage using the true variance of the estimator
dd = data.frame(psi = results[,1],l = results[,1]-1.96*sqrt(performance.sig[1,1]),
r = results[,1]+1.96*sqrt(performance.sig[1,1]), psi = results[,28],
l = results[,28]-1.96*sqrt(performance.ate[1,1]),
r = results[,28]+1.96*sqrt(performance.ate[1,1]))
cov.sig.1step = cov.check(dd, var0, 1)
cov.ate = cov.check(dd, ATE0, 4)
cov = c(coverage, cov.sig.1step, cov.ate)
names(cov) = c("TMLE Blip Variance SL2", "Simultaneous TMLE SL2", "TMLE ATE SL2",
"TMLE ATE LR", "TMLE Blip Var using true Var", "TMLE ATE using true Var")
coverage = data.frame(coverage = cov)
rownames(coverage) = names(cov)
# getting superlearner results
LL = 0
for (i in 1:length(SL.library)) {
if (length(SL.library[[i]]) > 1) {
LL = LL + length(SL.library[[i]])-1} else
{LL = LL + 1}
}
SL_results = data.frame(colMeans(results[,65:(64+LL)]))
rownames(SL_results) = colnames(results)[65:(64+LL)]
type = c(rep("TMLE SL2",B), rep("init. est SL2",B),
rep("TMLE LR",B), rep("init est. LR",B))
types = c("simul TMLE SL2","iter TMLE SL2","init. est SL2","init est. LR")
inds = c(25, 39, 34, 40)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("red", "blue","green","orange")
ateests = data.frame(ests=ests,type=type)
ggover = ggplot(ateests,aes(ests, fill=type,color=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("ATE sampling distributions, ", case))
ggover = ggover+geom_vline(xintercept = ATE0,color="black")+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])+
geom_vline(xintercept=mean(results[,inds[3]]),color = colors[3])+
geom_vline(xintercept=mean(results[,inds[4]]),color = colors[4])
capt = paste0("Truth is at black vline.\n",
"\ninitial est using SuperLearner library 2 is excellent",
"\nTMLE for ATE using IC approx for variance covers at ",
100*round(coverage[3,1],3),"%",
"\ninit est. LR which is Logistic Reg. with main terms and interactions ",
"\nplug-in clearly biased but TMLE corrects it and it covers at ",
100*round(coverage[4,],3),"%\n")
ggover=ggdraw(add_sub(ggover,capt, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
gg_ATEcase2bSL2 = ggover
type = c(rep("TMLE SL2",B), rep("init est SL2",B), rep("init est LR",B))
types = c("TMLE SL2","init est SL2","init est LR")
inds = c(1, 37, 38)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("blue","green","orange","red")
varests = data.frame(ests=ests,type=type)
ggover2 = ggplot(varests,aes(ests, color = type, fill=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("Blip Variance sampling distributions, ", case))
ggover2 = ggover2+geom_vline(xintercept = var0,color="black")+
theme(plot.title = element_text(size=12),
plot.subtitle = element_text(size=10))+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])+
geom_vline(xintercept=mean(results[,inds[3]]),color = colors[3])
cap = paste0("truth is black line\n",
"tmle SL2, which used Superlearner Library 2 for initial ests\n",
"is skewed, has many outliers and covers at ", 100*round(coverage[1,1],3),"%\n",
"and does not debias initial estimate due to overfit outcome regressions\n",
"which defy the donsker condition--need CV-TMLE!\n",
"init est LR used logistic regression with main terms and interactions\n",
"plug-in estimator and is a disaster, which TMLE cannot help.\n")
ggover2=ggdraw(add_sub(ggover2,cap, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
assign(paste0("gg_BV",case), ggover2)
assign(paste0("results_",case), results)
assign(paste0("performance.sig_",case), performance.sig)
assign(paste0("performance.ate_",case), performance.ate)
assign(paste0("coverage_",case), coverage)
assign(paste0("SL_results_",case), SL_results)
}
if (case == "combo_case2b") {
g0 = g0_linear
Q0 = Q0_trig
testdata=gendata(1000000, g0=g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
coverage_case2b = c(cov.check(results_case2bSL1, var0, c(1)),
cov.check(results_case2bSL2, var0, c(1)),
cov.check(results_case2bCVSL2, var0, c(1)))
coverage_simulcase2b = c(cov.simul(results_case2bSL1, c(var0,ATE0), c(7,25)),
cov.simul(results_case2bSL2, c(var0,ATE0), c(7,25)),
cov.simul(results_case2bCVSL2, c(var0,ATE0), c(7,25)))
coverage_case2b = c(coverage_case2b, coverage_simulcase2b,
rep(NA,3))[c(1,4,7,2,5,7,3,6,7)]
MSE_case2b = rbind(t(apply(results_case2bSL1[,c(1,7,37)],2,perf,var0)),
MSE_of = t(apply(results_case2bSL2[,c(1,7,37)],2,perf,var0)),
MSE_cv = t(apply(results_case2bCVSL2[,c(1,7,37)],2,perf,var0)))
MSE_cov = cbind(MSE_case2b, coverage_case2b)
rownames(MSE_cov) = c("TMLE SL1","Simul. TMLE SL1","init est SL1",
"TMLE SL2","Simul. TMLE SL2","init est SL2",
"CV-TMLE SL2","Simul. CV-TMLE SL2","init est CV-SL1")
# df_trueSL1 = data.frame(results_case2bSL1[,1],
# results_case2bSL1[,1]-1.96*sqrt(MSE_cov[1,1]),
# results_case2bSL1[,1]+1.96*sqrt(MSE_cov[1,1]))
# df_trueSL2 = data.frame(results_case2bSL1[,1],
# results_case2bSL1[,1]-1.96*sqrt(MSE_cov[4,1]),
# results_case2bSL1[,1]+1.96*sqrt(MSE_cov[4,1]))
# df_trueCVSL2 = data.frame(results_case2bSL1[,1],
# results_case2bSL1[,1]-1.96*sqrt(MSE_cov[7,1]),
# results_case2bSL1[,1]+1.96*sqrt(MSE_cov[7,1]))
#
# cov_true = unlist(lapply(list(df_trueSL1, df_trueSL2, df_trueCVSL2),
# FUN = function(x) cov.check(x, var0, c(1))))
# names(cov_true) = c("TMLE SL1", "TMLE SL2", "CV-TMLE SL2")
B1 = nrow(results_case2bSL1)
B2 = nrow(results_case2bSL2)
B3 = nrow(results_case2bCVSL2)
type = c(rep("tmle SL1",B1), rep("tmle SL2",B2),
rep("cv-tmle SL2",B3))
types = c("tmle SL1","tmle SL2",
"cv-tmle SL2")
colors = c("red","green","blue")
ests = c(results_case2bSL1[,1], results_case2bSL2[,1],
results_case2bCVSL2[,1])
plotdf = data.frame(ests=ests, type=type)
ggover = ggplot(plotdf, aes(x=ests, fill = type, color = type))+geom_density(alpha=.5)
ggover = ggover + scale_fill_manual(values=colors)
ggover = ggover + scale_color_manual(values=colors)
ggover = ggover + geom_vline(xintercept = var0, color= "black")+
ggtitle("The virtue of cv-tmle, case 2b",
subtitle = "cv-tmle maintains normality, eliminates skewing, bad outliers")+
theme(plot.title = element_text(size=12), plot.subtitle = element_text(size=10))+
geom_vline(xintercept = mean(as.numeric(results_case2bCVSL2[,1])), color = colors[1])+
geom_vline(xintercept = mean(as.numeric(results_case2bSL1[,1])), color = colors[2])+
geom_vline(xintercept = mean(as.numeric(results_case2bSL2[,1])), color = colors[3])
cap = paste0("truth is black line\n",
"tmle SL2, which used Superlearner Library 2 for initial ests\n",
"is skewed, has many outliers and covers at ", 100*round(MSE_cov[4,4],3),"%\n",
"has over double the MSE of the other two and is less efficient.\n",
"tmle SL1 uses a non-overfitting SuperLearner and covers near nominally at ",
100*round(MSE_cov[1,4],3), "%\n",
"cv-tmle SL2 uses the overfitting SuperLearner library which defies the donsker\n",
"condition for initial ests, but cv-tmle does not require the donsker condition.\n",
"cv-tmle has lowest MSE, no skewing and covers respectably at ",
100*round(MSE_cov[8,4],3),"%.")
ggover=ggdraw(add_sub(ggover,cap, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
gg_cvadvert = ggover
}
if (case == "case4_hal") {
g0 = g0_1
Q0 = Q0_1
cl = makeCluster(no.cores, type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,cl_export)
ALL_hal=foreach(i=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations","SuperLearner"),
.errorhandling = "remove")%dopar%
{sim_hal(n, g0 = g0, Q0 = Q0)}
results_hal = data.matrix(data.frame(do.call(rbind, ALL_hal)))
}
if (case == "case4_halglm"){
g0 = g0_1
Q0 = Q0_1
SL.library = list(c("glm.mainint", "screen.Main"),
c("SL.hal", "screen.Main"))
SL.libraryG = list("SL.glm", "SL.hal")
cl = makeCluster(detectCores(), type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,cl_export)
ALL_halglm=foreach(i=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations","SuperLearner"),
.errorhandling = "remove")%dopar%
{sim_cv(n, g0 = g0, Q0 = Q0, SL.library=SL.library,
SL.libraryG=SL.libraryG,method = "method.NNloglik",cv=FALSE
)}
results_halglm = data.matrix(data.frame(do.call(rbind, ALL_halglm)))
}
if (case == "case4_LR") {
g0 = g0_1
Q0 = Q0_1
testdata=gendata(1e6, g0 = g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
Qform = paste(colnames(gendata(1,g0 = g0, Q0 = Q0))[2:5], collapse = "+")
Qform = paste0("Y ~ A*(", Qform, ")")
cl = makeCluster(no.cores, type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,cl_export)
# run this on a 24 core node
ALL=foreach(i=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations","SuperLearner"),
.errorhandling = "remove")%dopar%
{sim_hal(n, g0, Q0, gform=formula("A~."),
Qform = formula(Qform), V=10)}
results_case4_LR = data.matrix(data.frame(do.call(rbind, ALL)))
}
if (case == "case4"){
g0 = g0_1
Q0 = Q0_1
testdata=gendata(1000000, g0=g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
SL.library = list(c("glm.mainint", "screen.Main"),
c("SL.hal", "screen.Main"))
SL.libraryG = list("SL.glm", "SL.hal")
varind = c("1step tmle LR" = 1,"1step tmle HAL" = 5, "1step tmle HAL+glm" = 9,
"init est LR" = 4,"init est HAL" = 8, "init est HAL+glm" = 45)
performance.sig = lapply(results[varind],perf,var0)
performance.sig = t(as.data.frame(performance.sig))
rownames(performance.sig) = names(varind)
coverage_halglm = cov.check(results_halglm, var0, c(1))
coverage_halglm
coverage_hal = cov.check(results_hal, var0, 1)
coverage_hal
coverage_LR = cov.check(results_case4_LR, var0, 1)
coverage = c(coverage_halglm, coverage_hal, coverage_LR,.434, NA, NA)
MSE_cov = cbind(performance.sig, coverage)
MSE_cov
# getting superlearner results
LL = 0
for (i in 1:length(SL.library)) {
if (length(SL.library[[i]]) > 1) {
LL = LL + length(SL.library[[i]])-1} else
{LL = LL + 1}
}
SL_results = data.frame(colMeans(results_halglm[,65:(65+LL-1)]))
rownames(SL_results) = colnames(results)[65:(65+LL-1)]
LG=0
for (i in 1:length(SL.libraryG)) {
if (length(SL.libraryG[[i]]) > 1) {
LG = LG + length(SL.libraryG[[i]])-1} else
{LG = LG + 1}
}
SL_resultsG = data.frame(colMeans(results_halglm[,(65+LL):(65+LL+LG-1)]))
rownames(SL_results) = colnames(results)[(65+LL):(65+LL+LG-1)]
B = nrow(results_halglm)
B1 = nrow(results_hal)
B2 = nrow(results_case4_LR)
type = c(rep(names(varind)[1],B2), rep(names(varind)[2],B1), rep(names(varind)[3],B),
rep(names(varind)[4],B2), rep(names(varind)[5],B1), rep(names(varind)[6],B))
# types = c("1step TMLE LR","1step TMLE HAL","1step TMLE HAL+glm")
types = names(varind)
inds = varind
ests = unlist(lapply(inds, FUN = function(x) results[[x]]))
inds = inds[order(types)]
colors = c("blue","green","orange","red", "purple", "yellow")
varests = data.frame(ests=ests,type=type)
ggover2 = ggplot(varests,aes(ests, color = type, fill=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("Blip Variance sampling distributions, ", case))
ggover2 = ggover2+geom_vline(xintercept = var0,color="black")+
theme(plot.title = element_text(size=12),
plot.subtitle = element_text(size=10))+
geom_vline(xintercept=mean(results[[inds[1]]]),color = colors[1])+
geom_vline(xintercept=mean(results[[inds[2]]]),color = colors[2])+
geom_vline(xintercept=mean(results[[inds[3]]]),color = colors[3])+
geom_vline(xintercept=mean(results[[inds[4]]]),color = colors[4])+
geom_vline(xintercept=mean(results[[inds[5]]]),color = colors[5])+
geom_vline(xintercept=mean(results[[inds[6]]]),color = colors[6])
cap = paste0("truth at black line.\n",
"tmle LR uses glm with interactions for outcome model and glm for\n",
"treatment mechanism initial estimates. tmle LR CI's cover at",
round(MSE_cov[3,4] ,1),"\n",
"hal tmle uses highly adaptive lasso for initial estimates of both\n",
"outcome and treatment mech initial estimates and these cover\n",
"at", round(MSE_cov[3,4] ,2),
"\ntmle hal+glm SL uses a SuperLearner with hal and glm for\n",
"outcome and treatment mechanism initial estimates and cover at ",
round(MSE_cov[3,4] ,3),"\n",
"All coverage here is using infuence curve approx for inference.")
ggover2=ggdraw(add_sub(ggover2,cap, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
assign(paste0("results_",case), results)
assign(paste0("gg_BV",case), ggover2)
assign(paste0("MSE_cov_",case), MSE_cov)
assign(paste0("SL_results_",case), SL_results)
assign(paste0("SL_resultsG_",case), SL_resultsG)
}
if (case == "case3") {
g0 = g0_1
Q0 = Q0_2
testdata=gendata(1000000, g0=g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
SL.library = SL.library1
SL.libraryG = SL.libraryG
if (!resultsGotten) {
cl = makeCluster(no.cores, type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,cl_export)
ALL=foreach(i=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations","SuperLearner"),
.errorhandling = "remove")%dopar%
{sim_cv(n, g0 = g0, Q0 = Q0, SL.library=SL.library,
SL.libraryG=SL.libraryG,method = "method.NNloglik",cv=FALSE
)}
results = data.matrix(data.frame(do.call(rbind, ALL)))
}
B = nrow(results)
varind = c("1step tmle" = 1,"simultaneous tmle" = 7, "init est" = 37)
ateind = c("1step tmle" = 28,"simultaneous tmle" = 25, "init est" = 39)
performance.sig = t(apply(results[,varind], 2, perf,var0))
performance.ate = t(apply(results[,ateind], 2, perf,ATE0))
rownames(performance.sig) = names(varind)
rownames(performance.ate) = names(ateind)
coverage = c(cov.check(results, var0, 1),
cov.simul(results, c(var0, ATE0), c(7,25)),
cov.check(results, ATE0, 28), cov.check(results, ATE0, 34),
cov.check(results, var0, 16))
# getting coveage using the true variance of the estimator
dd = data.frame(psi = results[,1],l = results[,1]-1.96*sqrt(performance.sig[1,1]),
r = results[,1]+1.96*sqrt(performance.sig[1,1]), psi = results[,28],
l = results[,28]-1.96*sqrt(performance.ate[1,1]),
r = results[,28]+1.96*sqrt(performance.ate[1,1]))
cov.sig.1step = cov.check(dd, var0, 1)
cov.ate = cov.check(dd, ATE0, 4)
cov = c(coverage, cov.sig.1step, cov.ate)
names(cov) = c("TMLE Blip Variance SL1", "Simultaneous TMLE SL1", "TMLE ATE SL1",
"TMLE ATE LR", "TMLE Blip Variance LR",
"TMLE Blip Var using true Var",
"TMLE ATE using true Var")
coverage = data.frame(coverage = cov)
rownames(coverage) = names(cov)
# getting superlearner results
LL = 0
for (i in 1:length(SL.library)) {
if (length(SL.library[[i]]) > 1) {
LL = LL + length(SL.library[[i]])-1} else
{LL = LL + 1}
}
SL_results = data.frame(colMeans(results[,65:(64+LL)]))
rownames(SL_results) = colnames(results)[65:(64+LL)]
type = c(rep("TMLE SL1",B), rep("init. est SL1",B),
rep("TMLE LR",B), rep("init est. LR",B))
types = c("simul TMLE SL1","iter TMLE SL1","init. est SL1","init est. LR")
inds = c(25, 39, 34, 40)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("red", "blue","green","orange")
ateests = data.frame(ests=ests,type=type)
ggover = ggplot(ateests,aes(ests, fill=type,color=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("ATE sampling distributions, ", case))
ggover = ggover+geom_vline(xintercept = ATE0,color="black")+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])+
geom_vline(xintercept=mean(results[,inds[3]]),color = colors[3])+
geom_vline(xintercept=mean(results[,inds[4]]),color = colors[4])
capt = paste0("Truth is at black vline.\n",
"\ninitial est using SuperLearner library 1 is excellent",
"\nTMLE for ATE using IC approx for variance covers at ",
100*round(coverage[3,1],3),"%",
"\ninit est. LR which is Logistic Reg. with main terms and interactions ",
"\nplug-in unbiased and TMLE keeps it as is, covering at ",
100*round(coverage[4,],3),"%\n")
ggover=ggdraw(add_sub(ggover,capt, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
gg_ATEcase3 = ggover
type = c(rep("TMLE SL1",B), rep("init est SL1",B), rep("init est LR",B),
rep("TMLE LR",B))
types = c("TMLE SL1","init est SL1","init est LR","TMLE LR")
inds = c(1, 37, 38, 16)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("blue","green","orange","red")
varests = data.frame(ests=ests,type=type)
ggover2 = ggplot(varests,aes(ests, color = type, fill=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("Blip Variance sampling distributions, ", case),
subtitle = "Recovering both treatment mech and outcome")
ggover2 = ggover2+geom_vline(xintercept = var0,color="black")+
theme(plot.title = element_text(size=12),
plot.subtitle = element_text(size=10))+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])+
geom_vline(xintercept=mean(results[,inds[3]]),color = colors[3])+
geom_vline(xintercept=mean(results[,inds[4]]),color = colors[4])
cap = paste0("truth is black line\n",
"tmle SL1, which used Superlearner Library 1 for initial ests\n",
"attains very good coverage at ", 100*round(coverage[1,1],3),"%\n",
"and debiases initial SuperLearner lib 1 estimate.\n",
"Simultaneous TMLE covers both ATE and blip var at ",
100*round(coverage[2,1],3),
"%\n",
"init est LR used logistic regression with main terms and\n",
"interactions. Corresponding TMLE was much more biased, covering at ",
100*round(coverage[5,1],3),"%",
"\nThis underscores the importance of machine learning in making\n",
"the initial estimates for both propensity score and outcome here.")
ggover2=ggdraw(add_sub(ggover2,cap, x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "",
fontface = "plain",colour = "black", size = 10, angle = 0,
lineheight = 0.9))
assign(paste0("gg_BV",case), ggover2)
assign(paste0("results_",case), results)
assign(paste0("performance.sig_",case), performance.sig)
assign(paste0("performance.ate_",case), performance.ate)
assign(paste0("coverage_",case), coverage)
assign(paste0("SL_results_",case), SL_results)
}
if (case == "noise"|case =="noise_neg"){
g0 = g0_linear
Q0 = Q0_noise
testdata=gendata_noise(1000000, g0=g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
propensity = with(testdata, g0(W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
rate = 1/3
if (case == "noise") {
biasQ = function(A,W1,W2,W3,W4,n,rate) {
n^-rate*1.5*(-.2+1.5*A+.2*W1+1*W2-A*W3+ 1*W4)
}
# hist(with(truth,biasQ(A,W1,W2,W3,W4,n=n,rate=rate)))
sdQ = function(A,W1,W2,W3,W4,n,rate) {
(n^-rate)*.8*(abs(3.5+.5*W1+.15*W2+.33*W3*W4-W4))
}
N=20000} else {
biasQ = function(A,W1,W2,W3,W4,n,rate) {
-n^-rate*.8*(+.2+1.5*A+.2*W1+1*W2+5*A*W3^2+ 1*W4)
}
sdQ = function(A,W1,W2,W3,W4,n,rate) {
(n^-rate)*.8*(abs(3.5+.5*W1+.15*W2+.33*W3*W4-W4))
}
N=50000}
cl = makeCluster(no.cores, type = "SOCK")
registerDoSNOW(cl)
L = list()
i=1
sizes = seq(250,N,250)
for (n in sizes){
rate = 1/3
print(i)
time = proc.time()
ALL=foreach(i=1:B,.packages=c("gentmle2","mvtnorm", "Simulations"))%dopar%
{simBlipvar(n = n, rate = rate, g0 = g0, Q0 = Q0, biasQ, sdQ)}
L[[i]] = ALL
print(proc.time()-time)
i=i+1
}
res_noise = vapply(1:length(sizes), FUN = function(x) {
res = getRes(L[[x]],B, ATE0=ATE0, var0=var0)
bias_init = res[[2]][2,2]
bias_tmle = res[[2]][1,2]
mse_init = res[[2]][2,3]
mse_tmle = res[[2]][1,3]
coverage = res[[3]][2]
return(c(bias_init, bias_tmle, mse_init, mse_tmle, coverage))
}, FUN.VALUE = c(1,1,1,1,1))
res_noise = t(res_noise)
plotdf_biasmse = data.frame(n = rep(sizes,2),
bias = c(res_noise[,1], res_noise[,2]),
mse = c(res_noise[,3], res_noise[,4]),
type = c(rep("init", length(sizes)),
rep("tmle", length(sizes))))
capt=paste0("sample size n=250, blip bias has L2 norm = O_p(n^{-1/3})",
"\no_p(n^{-.25}) as required")
p250 = ggdraw(add_sub(getRes(L[[1]],1000, ATE0=ATE0, var0=var0)$ggover2,capt,
x= .05, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"),
fontfamily = "", fontface = "plain",
colour = "black", size = 9, angle = 0, lineheight = 0.9))
capt=paste0("sample size n=1000, blip bias has L2 norm = O_p(n^{-1/3})",
"\no_p(n^{-.25}) as required")
p1000 = ggdraw(add_sub(getRes(L[[4]],1000, ATE0=ATE0, var0=var0)$ggover2,capt,
x= .05, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"),
fontfamily = "", fontface = "plain",
colour = "black", size = 9, angle = 0, lineheight = 0.9))
capt=paste0("sample size n=5000, blip bias has L2 norm = O_p(n^{-1/3})",
"\no_p(n^{-.25}) as required")
p5000 = ggdraw(add_sub(getRes(L[[20]],1000, ATE0=ATE0, var0=var0)$ggover2,capt,
x= .05, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"),
fontfamily = "", fontface = "plain",
colour = "black", size = 9, angle = 0, lineheight = 0.9))
capt=paste0("sample size n=10000, blip bias has L2 norm = O_p(n^{-1/3})",
"\no_p(n^{-.25}) as required")
p10000 = ggdraw(add_sub(getRes(L[[40]],1000, ATE0=ATE0, var0=var0)$ggover2,capt,
x= .05, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"),
fontfamily = "", fontface = "plain",
colour = "black", size = 9, angle = 0, lineheight = 0.9))
ml=marrangeGrob(list(p250,p1000,p5000,p10000),ncol=2,nrow=2,
widths = c(3.5,3.5),
heights = c(1,1))
if (case=="noise"){
truth = gendata_noise(1e6, g0, Q0)
grobs = lapply(c(1,4,10,20), FUN = function(x) {
coverage = res_noise[x,5]
noise_analysis(sizes[x],1/3,truth,coverage=coverage,
Q0, biasQ, sdQ)[[5]]
})
ml1=marrangeGrob(grobs,ncol=2,nrow=2, widths = c(3.5,3.5),
heights=c(1,1))
} else {
truth = gendata_noise(1e6, g0, Q0)
grobs = lapply(c(1,4,10,160), FUN = function(x) {
coverage = res_noise[x,5]
noise_analysis(sizes[x],1/3,truth,coverage=coverage,
Q0, biasQ, sdQ)[[5]]
})
ml1=marrangeGrob(grobs,ncol=2,nrow=2, widths = c(3.5,3.5),
heights=c(1,1))
}
}
if (case == "wells") {
a = seq(0,15,.5)
b = seq(0,15,.5)
truevars = c()
trueates = c()
gg_wells = list()
oc_list = list()
for (i in 1:31){
Q0 = function (A, W1, W2, W3, W4)
{
plogis(.14*(2* A + W1 + a[i]*A * W1 - b[i]*A*W2 + W2 - W3+ W4))
}
testdata=gendata(1000000, g0=g0_1, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
truevars = c(truevars, var(blip_true))
trueates = c(trueates, mean(blip_true))
oc_list = append(oc_list,Q0)
}
for (nn in c(250,500,1000)){
cl = makeCluster(detectCores(), type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,c("a","b","oc_list"))
for (i in seq(1,31,2)){
print(i)
B = 1000
n = nn
Q0 = oc_list[[i]]
SL.library =
ALL = foreach(rep=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations"),
.export = c("a","b","i"))%dopar%
{sim_lr(n, g0 = g0_linear, Q0 = Q0,
formQ = formula("Y~A*(W1 + W2) +W3 +W4"),
formG = formula("A~."))}
results = do.call(rbind,ALL)
cnames = colnames(results)
results = apply(results,2,as.numeric)
colnames(results) = cnames
var0 = truevars[i]
ATE0 = trueates[i]
cov = vapply(c(1,4),FUN = function(x){
covs = results[,x+1]<=var0&results[,x+2]>=var0
mean(covs)
}, FUN.VALUE = 1)
cov_simulsig = results[,8] <= var0 & results[,9] >= var0
cov_simulATE = results[,11] <= ATE0 & results[,12] >= ATE0
cov_simul = mean(cov_simulsig * cov_simulATE)
cov_ate = mean(results[,14] <= ATE0 & results[,15] >= ATE0)
cov = c(cov, cov_simul, cov_ate)
coverage = rbind(coverage,cov)
MSE = apply(results[,c(1,4,7,16)], 2, perf, var0)
MSE = t(MSE)
bias1 = MSE[,2]
bias = rbind(bias, bias1)
type = c(rep("1step tmle",1000),
rep("initial",1000))
types = c("1step tmle", "initial")
types = types[c(1,16)]
inds = c(1,16)[order(types)]
types = types[order(types)]
colors = c("red", "blue","green","orange")
rbind(colors, types)
ests = c(results[,1],results[,16])
plotdf = data.frame(ests=ests, type=type)
gg_hal = ggplot(plotdf, aes(x=ests, fill = type, color = type))+geom_density(alpha=.5)
gg_hal = gg_hal + scale_fill_manual(values=colors)
gg_hal = gg_hal + scale_color_manual(values=colors)
gg_hal = gg_hal + geom_vline(xintercept = var0, color= "black")+
theme(plot.title = element_text(size=12))+
ggtitle(paste0("tmle sampling dists \nwell-spec models, n=",nn))+
geom_vline(xintercept = mean(as.numeric(results[,inds[1]])), color = colors[1])+
geom_vline(xintercept = mean(as.numeric(results[,inds[2]])), color = colors[2])
caption = paste0("truth at black line, true blip variance = ",round(var0,6),
"\ntmle bias is ",round(MSE[2,2],6))
gg_hal=ggdraw(add_sub(gg_hal,caption,x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "", fontface = "plain",
colour = "black", size = 10, angle = 0, lineheight = 0.9))
gg_hal
gg_wells[[count]] = gg_hal
count = count+1
}
}
data = gendata(1e6, g0_linear, Q0_trig1)
pscores = with(data, g0_linear(W1,W2,W3,W4))
df = data.frame(pscores=pscores, type = rep("p", 1e6))
gg_pscoresWell = ggplot(df, aes(x = pscores, fill = type)) + geom_density()+
scale_fill_manual(values=c("blue"))+
scale_color_manual(values=c("blue"))+
theme(legend.position="none")
gg_pscoresWell = ggdraw(add_sub(gg_pscoresWell,"no positivity issues here",
x= 0, y = 0.5, hjust = 0, vjust = 0.5,
vpadding = grid::unit(1, "lines"), fontfamily = "", fontface = "plain",
colour = "black", size = 10, angle = 0, lineheight = 0.9))
}
if (case%in%list("CI_LRcase2a", "CI_LRcase2b", "CI_LRcase3", "CI_LRcase4")) {
if(case=="CI_LRcase2a"){
g0 = g0_linear
Q0 = Q0_trig1
}
if(case=="CI_LRcase2b"){
g0 = g0_linear
Q0 = Q0_trig
}
if(case=="CI_LRcase3"){
g0 = g0_1
Q0 = Q0_2
}
if(case=="CI_LRcase4"){
g0 = g0_1
Q0 = Q0_1
}
# get the truth
testdata=gendata(1000000, g0=g0, Q0 = Q0)
blip_true = with(testdata,Q0(1,W1,W2,W3,W4)-Q0(0,W1,W2,W3,W4))
ATE0 = mean(blip_true)
var0 = var(blip_true)
cl = makeCluster(detectCores(), type = "SOCK")
registerDoSNOW(cl)
clusterExport(cl,cl_export)
ALL=foreach(i=1:B,.packages=c("gentmle2","mvtnorm","hal","Simulations","SuperLearner"),
.errorhandling = "remove")%dopar%
{LR.BVinference(n, g0 = g0, Q0 = Q0)}
resultsLR = data.matrix(data.frame(do.call(rbind, ALL)))
coverageLR = c(cov.check(resultsLR, var0, 1),
cov.check(resultsLR, ATE0, 7),
cov.simul(resultsLR, c(ATE0,var0), c(10,4)))
names(coverageLR) = c("blip variance", "ATE", "simultaneous")
assign(paste0("coverageLR_",case),coverageLR)
}
if (case == "case2b_2G") {
B = nrow(results)
varind = c("1step tmle" = 1,"simultaneous tmle" = 7, "init est" = 37)
ateind = c("1step tmle" = 28,"simultaneous tmle" = 25, "init est" = 39)
performance.sig = t(apply(results[,varind], 2, perf,var0))
performance.ate = t(apply(results[,ateind], 2, perf,ATE0))
rownames(performance.sig) = names(varind)
rownames(performance.ate) = names(ateind)
coverage = c(cov.check(results, var0, 1),
cov.simul(results, c(var0, ATE0), c(7,25)),
cov.check(results, ATE0, 28))
# getting coveage using the true variance of the estimator
dd = data.frame(psi = results[,1],l = results[,1]-1.96*sqrt(performance.sig[1,1]),
r = results[,1]+1.96*sqrt(performance.sig[1,1]), psi = results[,28],
l = results[,28]-1.96*sqrt(performance.ate[1,1]),
r = results[,28]+1.96*sqrt(performance.ate[1,1]))
cov.sig.1step = cov.check(dd, var0, 1)
cov.ate = cov.check(dd, ATE0, 4)
cov = c(coverage, cov.sig.1step, cov.ate)
names(cov) = c("TMLE Blip Variance", "Simultaneous TMLE", "TMLE ATE",
"TMLE Blip Var using true Var", "TMLE ATE using true Var")
coverage = data.frame(coverage = cov)
rownames(coverage) = names(cov)
# getting superlearner results
LL = 0
for (i in 1:length(SL.library)) {
if (length(SL.library[[i]]) > 1) {
LL = LL + length(SL.library[[i]])-1} else
{LL = LL + 1}
}
SL_results = data.frame(colMeans(results[,65:(64+LL)]))
rownames(SL_results) = colnames(results)[65:(64+LL)]
type = c(rep("iter TMLE SL1",B), rep("init. est SL1",B),
rep("TMLE LR",B), rep("init est. LR",B))
types = c("simul TMLE SL1","iter TMLE SL1","init. est SL1","init est. LR")
inds = c(25, 39, 34, 40)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("red", "blue","green","orange")
ateests = data.frame(ests=ests,type=type)
ggover = ggplot(ateests,aes(ests, fill=type,color=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("ATE sampling distributions, ", case))
ggover = ggover+geom_vline(xintercept = ATE0,color="black")+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])+
geom_vline(xintercept=mean(results[,inds[3]]),color = colors[3])+
geom_vline(xintercept=mean(results[,inds[4]]),color = colors[4])
ggover_ATEcase2b2G = ggover
type = c(rep("TMLE SL1",B), rep("init est SL1",B), rep("init est LR",B))
types = c("TMLE SL1","init est SL1","init est LR")
inds = c(1, 37, 38)
ests = unlist(lapply(inds, FUN = function(x) results[,x]))
inds = inds[order(types)]
colors = c("blue","green","orange","red")
varests = data.frame(ests=ests,type=type)
ggover2 = ggplot(varests,aes(ests, color = type, fill=type)) +
geom_density(alpha=.5)+
scale_fill_manual(values=colors)+
scale_color_manual(values=colors)+
theme(axis.title.x = element_blank())+
ggtitle(paste0("Blip Variance sampling distributions, ", case))
ggover2 = ggover2+geom_vline(xintercept = var0,color="black")+
theme(plot.title = element_text(size=12),
plot.subtitle = element_text(size=10))+
geom_vline(xintercept=mean(results[,inds[1]]),color = colors[1])+
geom_vline(xintercept=mean(results[,inds[2]]),color = colors[2])+
geom_vline(xintercept=mean(results[,inds[3]]),color = colors[3])+
geom_vline(xintercept=mean(results[,inds[4]]),color = colors[4])
ggover_BVcase2b2G = ggover2
}
if (case == "example"){
# get rid of a few with missing data
nas = apply(wcgs, 1, FUN = function(x) any(is.na(x)))
bads = which(nas)
goods = which(!nas)
#select covariates as per the paper
data = wcgs[goods, c(2:7,9:11)]
# assign typical names to treatment and outcome
colnames(data)[8:9] = c("A","Y")
#####
#####
X = data
X$Y = NULL
W = X
W$A = NULL
A = X$A
mainform = paste0(paste(colnames(data)[1:6],"+",collapse=""),colnames(data)[7])
squares = paste0(paste0("I(",colnames(data)[1:7]),"^2)")
squares = paste0(paste(squares[1:6],"+",collapse=""),squares[7])
mainsq = paste0(mainform,"+",squares)
mainsq.int = paste0("Y~A*(",mainsq,")")
mainsq.int = formula(mainsq.int)
X = model.matrix(mainsq.int,data)
X = as.data.frame(X[,-1])
colnames(X)[2:ncol(X)] = paste0("X",2:ncol(X))
head(X)
X1 = X0 = X
X0$A = 0
X1$A = 1
Y = data$Y
newdata = rbind(X,X1,X0)
SL.library = list(c("nnetMain","screen.Main"),c("nnetMain1","screen.Main"),
c("earthFull", "screen6", "screen12","All"),
c("SL.earth", "screen.Main"),c("xgboostFull","screen12","All"),
c("xgboostMain", "screen.Main"),
c("gamFull", "screen6", "screen12","All"),
c("SL.gam", "screen.Main"),"SL.rpartPrune",
c("rangerMain", "screen.Main"), c("rpartMain", "screen.Main"),
"SL.stepAIC", c("SL.glm","screen6", "screen12","screen.Main","All"),
c("SL.hal", "screen.Main"), "SL.mean")
SL.libraryG = list("nnetMainG","nnetMainG1","SL.earth","SL.rpartPrune",
"SL.gam", "rpartMain", "SL.step.interaction", "SL.glm",
"SL.hal", "SL.mean","xgboostG")
# SL.library = SL.libraryG = c("SL.mean","SL.glm")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.