#! /usr/bin/env Rscript
# This file was used to submit the simulation files to
# a slurm-based Unix system. Using the sce.sh shell script
# one can submit each simulation in sequence. First, data files are
# created for each simulation. Those data files are then analyzed
# in the 'run' execution. Then the results are collated in the 'merge'
# execution.
# get environment variables
MYSCRATCH <- Sys.getenv('MYSCRATCH')
RESULTDIR <- Sys.getenv('RESULTDIR')
STEPSIZE <- as.numeric(Sys.getenv('STEPSIZE'))
TASKID <- as.numeric(Sys.getenv('SLURM_ARRAY_TASK_ID'))
# set defaults if nothing comes from environment variables
MYSCRATCH[is.na(MYSCRATCH)] <- '.'
RESULTDIR[is.na(RESULTDIR)] <- '.'
STEPSIZE[is.na(STEPSIZE)] <- 1
TASKID[is.na(TASKID)] <- 0
# get command lines arguments
args <- commandArgs(trailingOnly = TRUE)
if(length(args) < 1){
stop("Not enough arguments. Please use args 'listsize', 'prepare', 'run <itemsize>' or 'merge'")
}
# # load packages
library(arm)
library(plyr)
# library(gam, lib.loc = "/home/dbenkese/R/x86_64-unknown-linux-gnu-library/3.2")
library(caret)
library(haltmle.sim, lib.loc = "/home/dbenkese/R/x86_64-unknown-linux-gnu-library/3.2")
library(Rsolnp, lib.loc = "/home/dbenkese/R/x86_64-unknown-linux-gnu-library/3.2")
library(future, lib.loc = "/home/dbenkese/R/x86_64-unknown-linux-gnu-library/3.2")
library(cvma, lib.loc = "/home/dbenkese/R/x86_64-unknown-linux-gnu-library/3.2")
library(hal9001, lib.loc = "/home/dbenkese/R/x86_64-unknown-linux-gnu-library/3.2")
library(drtmle, lib.loc = "/home/dbenkese/R/x86_64-unknown-linux-gnu-library/3.2")
library(SuperLearner)
# library(truncnorm, lib.loc = "/home/dbenkese/R/x86_64-pc-linux-gnu-library/3.4")
# full parm
ns <- c(250, 500, 1000, 2000)
bigB <- 1000
# # # simulation parameters
parm_values <- expand.grid(seed=1:bigB,
n=ns)
# parm <- find_missing_files(tag = "ksfinalorig",
# # parm needs to be in same order as
# # file saves -- should make this more general...
# parm = c("n", "seed"),
# full_parm = parm_values)
# save(parm, file = "~/haltmle.sim/scratch/remain_ksfinalmod_sims.RData")
# load("~/haltmle.sim/scratch/remain_ksfinalmod_sims.RData")
# directories to save in
saveDir <- "~/haltmle.sim/out/"
scratchDir <- "~/haltmle.sim/scratch/"
# get the list size #########
if (args[1] == 'listsize') {
cat(nrow(parm))
# for testing, useful to just run the first job
# to make sure everything saves correctly
# cat(1)
}
make_ks_mod <- function(n){
z1 <- runif(n, 0.5, 2)
z2 <- runif(n, -2, 2)
z3 <- runif(n, -2, 2)
z4 <- runif(n, -2, 2)
w1 <- exp(z1/2)
w2 <- z2/(1+exp(z1)) + 10
w3 <- (z1*z3/25 + 0.6)^3
w4 <- (z2 + z4 + 20)^2
Y <- 210 + 27.4*z1 + 13.7*z2 + 13.7*z3 + 13.7*z4 + rnorm(n)
g0 <- plogis(-z1 + 0.5*z2 - 0.25*z3 - 0.1*z4)
A <- rbinom(n, 1, g0)
return(list(Y = Y, A = A, W = data.frame(W1 = w1, W2 = w2, W3 = w3, W4 = w4),
Z = data.frame(Z1 = z1, Z2 = z2, Z3 = z3, Z4 = z4)))
}
make_ks <- function(n){
z1 <- rnorm(n)
z2 <- rnorm(n)
z3 <- rnorm(n)
z4 <- rnorm(n)
w1 <- exp(z1/2)
w2 <- z2/(1+exp(z1)) + 10
w3 <- (z1*z3/25 + 0.6)^3
w4 <- (z2 + z4 + 20)^2
Y <- 210 + 27.4*z1 + 13.7*z2 + 13.7*z3 + 13.7*z4 + rnorm(n)
g0 <- plogis(-z1 + 0.5*z2 - 0.25*z3 - 0.1*z4)
A <- rbinom(n, 1, g0)
return(list(Y = Y, A = A, W = data.frame(W1 = w1, W2 = w2, W3 = w3, W4 = w4),
Z = data.frame(Z1 = z1, Z2 = z2, Z3 = z3, Z4 = z4)))
}
get_var_eif <- function(n = 1e6){
z1 <- rnorm(n)
z2 <- rnorm(n)
z3 <- rnorm(n)
z4 <- rnorm(n)
w1 <- exp(z1/2)
w2 <- z2/(1+exp(z1)) + 10
w3 <- (z1*z3/25 + 0.6)^3
w4 <- (z2 + z4 + 20)^2
Q0 <- 210 + 27.4*z1 + 13.7*z2 + 13.7*z3 + 13.7*z4
Y <- Q0 + rnorm(n)
g0 <- plogis(-z1 + 0.5*z2 - 0.25*z3 - 0.1*z4)
A <- rbinom(n, 1, g0)
EIF <- (2*A - 1)/ifelse(A==1, g0, 1-g0) * (Y - Q0)
return(var(EIF))
}
# execute prepare job ##################
if (args[1] == 'prepare') {
}
# execute parallel job #################################################
if (args[1] == 'run') {
if (length(args) < 2) {
stop("Not enough arguments. 'run' needs a second argument 'id'")
}
id <- as.numeric(args[2])
print(paste(Sys.time(), "arrid:" , id, "TASKID:",
TASKID, "STEPSIZE:", STEPSIZE))
for (i in (id+TASKID):(id+TASKID+STEPSIZE-1)) {
print(paste(Sys.time(), "i:" , i))
# load parameters
print(parm[i,])
set.seed(parm$seed[i])
dat <- make_ks_mod(n=parm$n[i])
algo <- c("SL.mean",
"SL.hal9002",
"SL.earth.cv",
"SL.glm",
"SL.bayesglm",
# "SL.earth",
"SL.step.interaction",
# "SL.gam",
"SL.dbarts.mod",
"SL.gbm.caretMod",
"SL.rf.caretMod")
# algo <- c("SL.glm","SL.bayesglm","SL.gam")
# fit super learner with all algorithms
set.seed(12314)
out <- get_all_ates(Y = dat$Y, A = dat$A, W = dat$W, gtol = 0.025,
V = 6, learners = algo, remove_learner = "SL.hal9002",
which_dr_tmle = c("full_sl", "cv_full_sl", "SL.hal9002", "cv_SL.hal9002")) #,
# which_dr_tmle = "full_sl")
save(out, file=paste0(saveDir,"ksmod_n=",parm$n[i],"_seed=",parm$seed[i],
".RData"))
}
}
# merge job ###########################
if (args[1] == 'merge') {
# get_mean_rslt <- function(est_rslt){
# mean_rslt <- plyr::ddply(est_rslt, .(n), colMeans)
# return(mean_rslt)
# }
format_result <- function(out){
tmle_os_list <- lapply(out, function(l){
# browser()
cv_est <- grepl("cv_", names(l))
for(i in 1:length(l)){
l[[i]] <- data.frame(as.list(l[[i]]))
}
for(i in 1:length(l)){
if(cv_est[i]){
l[[i]]$cv_ci_l <- l[[i]]$est - qnorm(0.975)*l[[i]]$se
l[[i]]$cv_ci_u <- l[[i]]$est + qnorm(0.975)*l[[i]]$se
l[[i]]$cov_cv_ci <- l[[i]]$cv_ci_l < truth & l[[i]]$cv_ci_u > truth
}else{
l[[i]]$ci_l <- l[[i]]$est - qnorm(0.975)*l[[i]]$se
l[[i]]$ci_u <- l[[i]]$est + qnorm(0.975)*l[[i]]$se
l[[i]]$cov_ci <- l[[i]]$ci_l < truth & l[[i]]$ci_u > truth
l[[i]]$cv_ci_l <- l[[i]]$est - qnorm(0.975)*l[[i+1]]$se
l[[i]]$cv_ci_u <- l[[i]]$est + qnorm(0.975)*l[[i+1]]$se
l[[i]]$cov_cv_ci <- l[[i]]$cv_ci_l < truth & l[[i]]$cv_ci_u > truth
}
}
return(l)
})
return(tmle_os_list)
}
truth <- 0
all_files <- list.files("~/haltmle.sim/out")
ks_files <- all_files[grepl("ksmod",all_files)]
logistic_tmle_rslt <- matrix(nrow = length(ks_files), ncol = 143 + 1)
linear_tmle_rslt <- matrix(nrow = length(ks_files), ncol = 143 + 1)
onestep_rslt <- matrix(nrow = length(ks_files), ncol = 143 + 1)
drtmle_rslt <- matrix(nrow = length(ks_files), ncol = 26 + 1)
for(i in seq_along(ks_files)){
# get sample size
this_n <- as.numeric(strsplit(strsplit(ks_files[i], "_")[[1]][2], "n=")[[1]][2])
# this_n <- as.numeric(strsplit(strsplit(ks_files[i], "_")[[1]][3], "n=")[[1]][2])
# load file
load(paste0("~/haltmle.sim/out/",ks_files[i]))
# format this file
tmp <- format_result(out)
if(length(unlist(tmp[[1]])) == 143){
# add results to rslt
logistic_tmle_rslt[i,] <- c(this_n, unlist(tmp[[1]]))
linear_tmle_rslt[i,] <- c(this_n,unlist(tmp[[2]]))
onestep_rslt[i,] <- c(this_n,unlist(tmp[[3]]))
drtmle_rslt[i,] <- c(this_n,unlist(tmp[[4]]))
}else{
cat(i, "\n")
file.remove(ks_files[i])
# logistic_tmle_rslt <- logistic_tmle_rslt[-i,]
# linear_tmle_rslt <- linear_tmle_rslt[-i,]
# onestep_rslt <- onestep_rslt[-i,]
# drtmle_rslt <- drtmle_rslt[-i,]
}
}
col_names_1 <- c("n", names(unlist(tmp[[1]],use.names = TRUE)))
col_names_2 <- c("n", names(unlist(tmp[[4]],use.names = TRUE)))
logistic_tmle_rslt <- data.frame(logistic_tmle_rslt)
colnames(logistic_tmle_rslt) <- col_names_1
linear_tmle_rslt <- data.frame(linear_tmle_rslt)
colnames(linear_tmle_rslt) <- col_names_1
onestep_rslt <- data.frame(onestep_rslt)
colnames(onestep_rslt) <- col_names_1
drtmle_rslt <- data.frame(drtmle_rslt)
colnames(drtmle_rslt) <- col_names_2
rslt <- list(log_tmle = logistic_tmle_rslt,
lin_tmle = linear_tmle_rslt,
onestep = onestep_rslt,
drtmle = drtmle_rslt)
save(rslt, file = "~/haltmle.sim/out/allOut_mod.RData")
# coverage
by(rslt$lin_tmle, rslt$lin_tmle$n, function(x){
tmp <- colMeans(x[, grep("cov",colnames(x))])
tmp[order(-tmp)]
})
# ci width
by(rslt$lin_tmle, rslt$lin_tmle$n, function(x){
cove <- colMeans(x[, grep("cov",colnames(x))])
tmp <- colMeans(x[, grep("ci_u",colnames(x))] - x[,grep("ci_l",colnames(x))])
cbind(cove, tmp)[order(cove),]
})
# sqrt(n) * bias
by(rslt$log_tmle, rslt$log_tmle$n, function(x){
return(colMeans(x[, grep(".est",colnames(x))] * sqrt(x$n)))
})
# mse
by(rslt$log_tmle, rslt$log_tmle$n, function(x){
tmp <- colMeans(x[, grep(".est",colnames(x))]^2 )
return(tmp[order(tmp)])
})
est <- "SL.rf.caretMod"
by(linear_tmle_rslt, logistic_tmle_rslt$n, function(x){
colMeans(x[,paste0(c("","cv_"),est,".est")])
})
# by(logistic_tmle_rslt, logistic_tmle_rslt$n, function(x){
# c(sd(x$full_sl.est), mean(x$full_sl.se), sd(x$cv_full_sl.est), mean(x$cv_full_sl.se))
# })
# by(logistic_tmle_rslt, logistic_tmle_rslt$n, function(x){
# # colMeans(x[,c("full_sl.est","cv_full_sl.est",
# # "SL.hal9001.est","cv_SL.hal9001.est"
# # )])
# colMeans(x[,grep(".est",colnames(x))])
# })
# by(logistic_tmle_rslt, logistic_tmle_rslt$n, function(x){
# colMeans(x)
# })
# by(linear_tmle_rslt, linear_tmle_rslt$n, function(x){
# colMeans(x[,c("full_sl.cov_ci","full_sl.cov_cv_ci","cv_full_sl.cov_cv_ci",
# "SL.hal9001.cov_ci","SL.hal9001.cov_cv_ci","cv_SL.hal9001.cov_cv_ci"
# )])
# })
# by(onestep_rslt, onestep_rslt$n, function(x){
# colMeans(x[,c("full_sl.cov_ci","full_sl.cov_cv_ci","cv_full_sl.cov_cv_ci",
# "SL.hal9001.cov_ci","SL.hal9001.cov_cv_ci","cv_SL.hal9001.cov_cv_ci"
# )])
# })
# by(drtmle_rslt, drtmle_rslt$n, function(x){
# colMeans(x[,c("full_sl.cov_ci","full_sl.cov_cv_ci","cv_full_sl.cov_cv_ci",
# "SL.hal9001.cov_ci","SL.hal9001.cov_cv_ci","cv_SL.hal9001.cov_cv_ci"
# )])
# })
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.