knitr::opts_chunk$set(echo = TRUE)
# devtools::install_github("Larsvanderlaan/npOddsRatio) library(npOddsRatio) n <- 1000 # Baseline variables X1 <- runif(n, min = -1, max = 1) X2 <- runif(n, min = -1, max = 1) # Binary treatment A <- rbinom(n,size = 1, prob = plogis(X1 + X2) ) # Binary outcome Y <- rbinom(n,size = 1, prob = plogis(-1+A + A*(X1 + X2) + X1 + X2 + X1*X2)) # Binary missingness indicator Delta <- rbinom(n,size = 1, prob =1- 0.1*plogis(X1 + X2 +A-0.5 )) # construct data.frame data <- data.frame(X1,X2,A,Delta, Y = Delta*Y) print(head(data))
# devtools::install_github("tlverse/sl3) library(sl3) # useful learners lrnr_gam <- Lrnr_gam$new() lrnr_glmnet <- Lrnr_glmnet$new() lrnr_hal <- Lrnr_hal9001$new(max_degree = 2, smoothness_orders =1, num_knots = c(5,3), fit_control = list(parallel = T)) lrnr_xgboost <- Lrnr_xgboost$new(max_depth = 4) # Create a stack/ensemble learner stack <- make_learner(Stack, lrnr_gam, lrnr_glmnet, lrnr_xgboost) # Create a superlearner lrnr_SL <- Lrnr_sl$new(stack) # Just use cross-validation selection lrnr_cv <- Lrnr_cv$new(stack) lrnr_cv <- make_learner(Pipeline, lrnr_cv, Lrnr_cv_selector$new(loss_loglik_binomial)) ###### true_coefs <- c(1, 1,1) library(doMC) registerDoMC() # Specify formula for odds ratio logOR_formula <- ~ 1 + X1 + X2 # Fit odds ratio fit_sp <- spOR(logOR_formula, W = data[,c("X1", "X2")], A = data$A, Y = data$Y , Delta = data$Delta, # Specify HAL estimator for P(Y=1|A=0,W): max_degree_Y0W = 2, # Generate only up to two-way interaction spline basis functions num_knots_Y0W = c(10,5,0), # Specify number of knots used per covariate for each spline basis function of each interaction degree (deg = 1,2,3). sl3_learner_A = lrnr_cv) # learner to estimate P(A=1|X,Delta=1) # Or be totally nonparametric and get correct inference even when parametric model is incorrect and just an approximation fit_np <- npOR(logOR_formula, W = data[,c("X1", "X2")], A = data$A, Y = data$Y , Delta = data$Delta, sl3_learner_OR = lrnr_hal, # Learner for true OR sl3_learner_Y0W = lrnr_hal, # Learner for true P(Y=1|A=0,W) nuisance sl3_learner_A = lrnr_cv, #Learner for P(A=1|W) sl3_learner_Delta = lrnr_cv) #Learner for P(Delta=1|W,A)
#### coefficients #print(coef(fit)) #### Summarize coefficients print("semiparametric") print(summary(fit_sp)) print("nonparametric") print(summary(fit_np)) # Estimates are similar since semiparametric model is correct! # The nonparametric method has a larger uncertainty.
#### Get predictions and inference for log odds ratio at observations predict(fit_sp, data[1:10,c("X1", "X2")])
# Specify formula for log odds ratio logOR_formula <- ~ 1 + X1 + X2 # P(A=1|X, Delta = 1) glm formula glm_formula_A <- ~ 1 + X1 + X2 + X1*X2 # P(Y=1|A=0,X, Delta = 1) glm formula glm_formula_Y0W <- ~ 1 + X1 + X2 + X1*X2 # Fit odds ratio fit_glm <- spOR(logOR_formula, W = data[,c("X1", "X2")], A = data$A, Y = data$Y , Delta = data$Delta, glm_formula_A = glm_formula_A, glm_formula_Y0W = glm_formula_Y0W)
#### coefficients #print(coef(fit)) #### Summarize coefficients print(summary(fit_glm)) #### Get predictions and inference for log odds ratio at observations predict(fit_glm, data[1:10,c("X1", "X2")])
passes1 <- c() passes2 <- c() library(doMC) for(i in 1:1000){ print(i) n <- 5000 X1 <- runif(n, min = -1, max = 1) X2 <- runif(n, min = -1, max = 1) A <- rbinom(n,size = 1, prob = plogis(X1 + X2) ) Y <- rbinom(n,size = 1, prob = plogis(-1+A + A*(X1 + X2) + X1 + X2 + X1*X2)) Delta <- rbinom(n,size = 1, prob =1- 0.3*plogis(X1 + X2 )) data <- data.frame(X1,X2,A,Delta, Y = Y, DeltaY = Delta*Y) true <- c(1, 1,1) fit <- spOR(~ 1 + X1 + X2 , W = data[,c("X1", "X2")], A = data$A, Y = data$DeltaY , Delta = data$Delta, num_knots_Y0W = c(5,3,0), max_degree_Y0W = 2, sl3_learner_A = Lrnr_hal9001_custom$new(max_degree=2,num_knots = c(5,2), fit_control = list(parallel = T)), parallel = T, ncores = 4 ) ci <- fit$coefs passes1 <- cbind(passes1, ci[,4] <= true & ci[,5] >= true) print(rowMeans(passes1)) fit <- spOR(~ 1 + X1 + X2 , W = data[,c("X1", "X2")], A = data$A, Y = data$Y , Delta = rep(1,n), num_knots_Y0W = c(5,3,0), max_degree_Y0W = 2, sl3_learner_A = Lrnr_hal9001_custom$new(max_degree=2,num_knots = c(5,2), fit_control = list(parallel = T)), parallel = T, ncores = 4 ) ci <- fit$coefs passes2 <- cbind(passes2, ci[,4] <= true & ci[,5] >= true) print(rowMeans(passes2)) }
rowMeans(ci) rowMeans(ci1) rowMeans(ci2)
ci <- c() ci1 <- c() ci2 <- c() passes <- c() passesb <- c() passesc <- c() passes1 <- c() ests <- c() ests1 <- c() for(i in 1:200){ print(i) D <- DAG.empty() D <- D + node("W1", distr = "runif", min = -1, max = 1) + node("W2", distr = "runif", min = -1, max = 1) + node("Tfull", distr = "rweibull", shape=4, scale = exp(1 ))+ node("T", distr = "rconst", const = min(Tfull/2,5))+ node("A", distr = "rbinom", size = 1, prob = plogis(0.7*(W1 + W2 + T-1) ))+ node("R", distr = "rbinom", size = 1, prob = 1) + node("mu", distr = "rconst", const = plogis( 0.8*(-1.25 +A + T + 0.8*( T + W1 + W2 - W1*W2 + T*W2 + T*W1 ))))+ node("J", distr = "rbinom", size = 1, prob = mu) + node("G", distr = "rconst",const = 1- 0.25*plogis(W1 + W2 ))+ node("Delta", distr = "rbinom", size = 1, prob = 1)+ node("Jobs", distr = "rconst",const = J * Delta*R) setD <- set.DAG(D ) # data <- sim(setD, n = 250000) # fit_glm <- glm(Jobs~ T + W1 + W2 + A + A*T , family = binomial(), data=data[data$Delta==1,] ) # coef(fit_glm) data <- sim(setD, n = 150) print(quantile(data$mu)) table(data$Delta) true <- 0.8*c(1) library(doMC) registerDoMC(10) fit <- spOR(~ 1 , W = data[,c("W1", "W2", "T")], A = data$A, Y = data$Jobs , data = data, Delta = data$Delta, num_knots_Y0W = c(20,10,0), max_degree_Y0W = 2, sl3_learner_A = Lrnr_hal9001_custom$new(max_degree=2,num_knots = c(20,10,2),fit_control = list(parallel = T, n_folds = 20)), fit_control = list(parallel = T, n_folds = 20), targeting_method = "universal", smoothness_order_Y0W =1 ,fit_Q0W_separate = F ) tmpci <- fit$coefs print(summary(fit)) passes <- cbind(passes, tmpci[,4] <= true & tmpci[,5] >= true) fit <- spOR(~ 1 , W = data[,c("W1", "W2", "T")], A = data$A, Y = data$Jobs , data = data, Delta = data$Delta, num_knots_Y0W = c(20,10,0), max_degree_Y0W = 2, sl3_learner_A = Lrnr_hal9001_custom$new(max_degree=2,num_knots = c(20,10,2),fit_control = list(parallel = T, n_folds = 10)), fit_control = list(parallel = T, n_folds = 10), targeting_method = "universal", smoothness_order_Y0W =1 ) # tmpci <- fit$coefs print(summary(fit)) passes1 <- cbind(passes1, tmpci[,4] <= true & tmpci[,5] >= true) print(rowMeans(passes)) print(rowMeans(passes1)) ######## }
# ci <- c() # ci1 <- c() # ci2 <- c() # passes <- c() # passesb <- c() # passesc <- c() # passes1 <- c() # ests <- c() # ests1 <- c() for(i in 1:100){ print(i) n <- 5000 D <- DAG.empty() D <- D + node("W1", distr = "runif", min = -1, max = 1) + node("W2", distr = "runif", min = -1, max = 1) + node("Tfull", distr = "rweibull", shape=4, scale = exp(1 ))+ node("T", distr = "rconst", const = min(Tfull/2,5))+ node("A", distr = "rbinom", size = 1, prob = plogis(0.5*(W1 + W2 + T-1) ))+ node("R", distr = "rbinom", size = 1, prob = 1) + node("mu", distr = "rconst", const = plogis( 0.8*(-1.25 +A -1* A*(T) + 0.8*( T + W1 + W2 - W1*W2 + T*W2 + T*W1 ))))+ node("J", distr = "rbinom", size = 1, prob = mu) + node("G", distr = "rconst",const = 1- 0.3*plogis(W1 + W2 ))+ node("Delta", distr = "rbinom", size = 1, prob = G)+ node("Jobs", distr = "rconst",const = J * Delta*R) setD <- set.DAG(D ) # data <- sim(setD, n = 250000) # fit_glm <- glm(Jobs~ T + W1 + W2 + A + A*T , family = binomial(), data=data[data$Delta==1,] ) # coef(fit_glm) data <- sim(setD, n = n) print(quantile(data$mu)) table(data$Delta) true <- 0.8*c(1,-1) library(doMC) registerDoMC(10) fit <- spOR(~ 1 + T , W = data[,c("W1", "W2", "T")], A = data$A, Y = data$Jobs , Delta = data$Delta, num_knots_Y0W = c(15,10,0), max_degree_Y0W = 2, sl3_learner_A = Lrnr_hal9001_custom$new(max_degree=2,num_knots = c(10,5,2),fit_control = list(parallel = T, n_folds = 10)), fit_control = list(parallel = T, n_folds = 10), targeting_method = "universal", smoothness_order_Y0W =1 ,fit_Q0W_separate = F ) tmpci <- fit$coefs print(tmpci) ests <- cbind(ests, fit$coefs[,1]) ci <- cbind(ci, apply(tmpci[,c(4,5)],1,diff)) passes <- cbind(passes, tmpci[,4] <= true & tmpci[,5] >= true) print(rowMeans(passes)) ########## fit <- spOR(~ 1 + T , W = data[,c("W1", "W2", "T")], A = data$A, Y = data$Jobs, Delta = data$Delta , glm_formula_Y0W = ~.^2, sl3_learner_A = Lrnr_hal9001_custom$new(max_degree=2,num_knots = c(10,5,2),fit_control = list(parallel = T, n_folds = 10)) ) tmpci <- fit$coefs passesb <- cbind(passesb, tmpci[,4] <= true & tmpci[,5] >= true) print(rowMeans(passesb)) ######## fit <- npOR(~ 1 + T , W = data[,c("W1", "W2", "T")], A = data$A, Y = data$Jobs , Delta = data$Delta, sl3_learner_default = Lrnr_hal9001_custom$new(max_degree=2,num_knots = c(10,5,0), fit_control = list(parallel = T))) tmpci <- fit$coefs print(tmpci) ci2 <- cbind(ci2, apply(tmpci[,c(4,5)],1,diff)) passesc <- cbind(passesc, tmpci[,4] <= true & tmpci[,5] >= true) print(rowMeans(passesc)) ######## fit_glm <- glm(Jobs~ T + W1 + W2 + A + A*T , family = binomial(), data=data[data$Delta==1,] ) tmpci1 <- confint(fit_glm)[5:6,,drop = F] ci1 <- cbind(ci1, apply(tmpci1,1,diff)) ests1 <- cbind(ests1, coef(fit_glm)[5:6]) passes1 <- cbind(passes1, tmpci1[,1] <= true & tmpci1[,2] >= true) print(rowMeans(passes1)) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.