knitr::opts_chunk$set(echo = TRUE)
library(causalglm)
n <- 1500 # n large enough to be in (deep) asymptotic regime
nsims <- 100  # There is quite a lot of randomness in coverage. Large sim numbers gives more correct coverage probabilities
learning_method <- "mars" # Recommended machine-learning algorithm (barring computational constraints)

Simulation 95% CI coverage test for semiparametric and nonparametric CATE and CATT

require(doMC)
  doMC::registerDoMC(10)

 passes <- c()
  passes1 <- c()
  passes2 <- c()
for(i in 1:nsims){
  print(i)
  formula <- ~ 1 + W1
  data_list <- sim.CATE(n=n, p=3,  sigma = NULL, formula_estimand = formula, formula_A = ~., formula_Y0W = ~ ., beta = c(1,1), beta_A = 0.5*c(0,1,1,1), beta_Y = 0.5*c(1,1,1,1)) 

  data <- data_list$data
  W <- data_list$W
  A <- data_list$A
  Y <- data_list$Y
  beta0 <- data_list$beta_CATE

  out <- spglm(formula, data, W, A, Y, estimand = "CATE", learning_method = learning_method, verbose = T, HAL_fit_control = list(parallel = TRUE))
  passes <- cbind(passes, out$coefs$lower <= beta0 & beta0 <= out$coefs$upper )

  out <- npglm(formula, data, W, A, Y, estimand = "CATE", learning_method = learning_method, verbose = T, HAL_fit_control = list(parallel = TRUE))
    passes1 <- cbind(passes1, out$coefs$lower <= beta0 & beta0 <= out$coefs$upper )


   out <- npglm(formula, data, W, A, Y, estimand = "CATT", learning_method = learning_method, verbose = T, HAL_fit_control = list(parallel = TRUE))

  passes2 <- cbind(passes2, out$coefs$lower <= beta0 & beta0 <= out$coefs$upper )

  print(rowMeans(passes))
    print(rowMeans(passes1))
     print(rowMeans(passes2))
}

Simulation 95% CI coverage test for semiparametric and nonparametric OR

 passes <- c()
passes1 <- c()
for(i in 1:nsims){
  print(i)
  formula <- ~ 1 + W1
  data_list <- sim.OR(n=n, p=2, formula_estimand = formula, formula_A = ~., formula_Y0W = ~., beta = c(1,0.5), beta_A =0.5 * c(0,1,1) , beta_Y = c(0,1,1)) 

  data <- data_list$data
  W <- data_list$W
  A <- data_list$A
  Y <- data_list$Y
  beta0 <- data_list$beta_logOR

  out <- spglm(formula, data, W, A, Y, estimand = "OR", learning_method = learning_method, verbose = T, delta_epsilon = 0.05, HAL_fit_control = list(parallel = TRUE))

  passes <- cbind(passes, out$coefs$lower <= beta0 & beta0 <= out$coefs$upper )
  print(rowMeans(passes), HAL_fit_control = list(parallel = TRUE))

    out <- npglm(formula, data, W, A, Y, estimand = "OR", learning_method = learning_method, verbose = T, delta_epsilon = 0.05, HAL_fit_control = list(parallel = TRUE))

  passes1 <- cbind(passes1, out$coefs$lower <= beta0 & beta0 <= out$coefs$upper )
  print(rowMeans(passes1))
}

Simulation 95% CI coverage test for semiparametric and nonparametric RR

learning_method <- "glm"
 passes <- c()
passes1 <- c()
for(i in 1:nsims){
  print(i)
  formula <- ~ 1 + W1
  data_list <- sim.RR(n=2500, p=3,   formula_estimand = formula, formula_A = ~., formula_Y0W = ~., beta = c(1,1), beta_A = 0.5*c(0,1,1,1), beta_Y = 0.5*c(0,1,1,1)) 

  data <- data_list$data
  W <- data_list$W
  A <- data_list$A
  Y <- data_list$Y
  beta0 <- data_list$beta_logRR

  out <- spglm(formula, data, W, A, Y, estimand = "RR", learning_method = learning_method, verbose = FALSE, HAL_fit_control = list(parallel = TRUE))

  passes <- cbind(passes, out$coefs$lower <= beta0 & beta0 <= out$coefs$upper )

   out <- npglm(formula, data, W, A, Y, estimand = "RR", learning_method = learning_method, verbose = FALSE, HAL_fit_control = list(parallel = TRUE))

  passes1 <- cbind(passes1, out$coefs$lower <= beta0 & beta0 <= out$coefs$upper )
  print(rowMeans(passes))
  print(rowMeans(passes1))
}

Simulations 95% CI coverage RobustCOXph

stop("no")
passes<-c()
require(simcausal)
for(i in 1:200){
  print(i)
D <- DAG.empty()
D <- D + node("W1", distr = "runif",  min = -1, max = 1) + 
  node("W2", distr = "runif",  min = -1, max = 1) + 
   node("A", distr = "rbinom",  size = 1, prob = plogis(W1 + W2 )) +
  node("dNt", t = 1:10, EFU = TRUE , distr = "rbinom",  size = 1, prob = exp(0.5*A)*0.35*plogis(W1 + W2 )) +
  node("dCt", t = 1:10, EFU = TRUE,  distr = "rbinom",  size = 1, prob = 0*plogis(W1 + W2 + t)) 
D <- set.DAG(D)
data <- sim(D, n = 800)
data

data_N <- data[, grep("[d][N].+", colnames(data))]
data_C <- data[, grep("[d][C].+", colnames(data))]

data_surv <-  as.data.frame(do.call(rbind, lapply(1:nrow(data), function(i) {
  rowN <- data_N[i,]
  rowC <- data_C[i,]
  t <- which(rowN==1)
  tc <- which(rowC==1)
  if(length(tc)==0){
    tc <- 10
  }
  if(length(t)==0){
    t <- 12
  }
  Ttilde <- min(t,tc)
  Delta <- t <= tc
  return(matrix(c(Ttilde,Delta), nrow=1))
})))
colnames(data_surv) <- c("Ttilde", "Delta")
data$Ttilde <- data_surv$Ttilde
 data$Delta <- data_surv$Delta
 data <-  data[, -grep("[d][C].+", colnames(data))]
 data <-  data[, -grep("[d][N].+", colnames(data))]
  data

  print(table(data$Ttilde))
print(table(data$Delta))

  doMC::registerDoMC(10)
  out <- npCOXph(~1, data, learning_method = "HAL", W = c("W1", "W2"), "A" = "A", Ttilde = "Ttilde", Delta = "Delta", formula_N = ~ ., formula_HAL_T = NULL, HAL_args_T = list(max_degree = 2, smoothness_orders  =1, num_knots = c(10,5)), HAL_fit_control = list(parallel = TRUE) )
  print(out$coefs)
  passes <- c(passes, out$coefs$lower <= 0.5  & out$coefs$upper >= 0.5 )
  print(mean(passes))
}


Larsvanderlaan/causalGLM documentation built on April 14, 2022, 12:51 a.m.