knitr::opts_chunk$set(echo = TRUE)
library(ISLR) library(xgboost) library(tidyverse) library(Metrics) # Data df = ISLR::Hitters %>% select(Salary,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,PutOuts,Assists,Errors) df = df[complete.cases(df),] train = df[1:150,] test = df[151:nrow(df),] # XGBoost Matrix dtrain <- xgb.DMatrix(data=as.matrix(train[,-1]),label=as.matrix(train[,1])) dtest <- xgb.DMatrix(data=as.matrix(test[,-1]),label=as.matrix(test[,1])) watchlist <- list(eval = dtest) # Custom objective function (squared error) myobjective <- function(preds, dtrain) { labels <- getinfo(dtrain, "label") rand <- runif(length(preds), -1 ,1) grad <- rand* (preds-labels) hess <- rand*rep(1, length(labels)) return(list(grad = grad, hess = hess)) } # Custom Metric evalerror <- function(preds, dtrain) { labels <- getinfo(dtrain, "label") err <- (preds-labels)^2 return(list(metric = "MyError", value = mean(err))) } # Model Parameter param1 <- list(booster = 'gbtree' , learning_rate = 0.1 , objective = myobjective , eval_metric = evalerror , set.seed = 2020) # Train Model xgb1 <- xgb.train(params = param1 , data = dtrain , nrounds = 500 , watchlist , maximize = FALSE , early_stopping_rounds = 5) # Predict pred1 = predict(xgb1, dtest) mae1 = mae(test$Salary, pred1) ## XGB Model with standard loss/metric # Model Parameter param2 <- list(booster = 'gbtree' , learning_rate = 0.1 , objective = "reg:squarederror" , set.seed = 2020) # Train Model xgb2 <- xgb.train(params = param2 , data = dtrain , nrounds = 500 , watchlist , maximize = FALSE , early_stopping_rounds = 5) # Predict pred2 = predict(xgb2, dtest) mae2 = mae(test$Salary, pred2)
# DONT CHANGE library(data.table) library(kableExtra) set.seed(123456) n = 5 W1 <- rt(n, df = 5) pi0 <- plogis(W1 ) pi <- ifelse(A==1, pi0, 1-pi0) A <- rbinom(n, 1, pi0) mu0 = 0.35 + 0.65*plogis( W1-2 ) mu1 <- mu0 + plogis(2*W1+2) - plogis( W1-2) - 0.349 mu0 = 0.35 + 0.65*plogis( W1-2 ) mu1 <- mu0 + plogis(2*W1+2) - plogis( W1-2) - 0.349 mu <- ifelse(A==1, mu1, mu0) Y <- rbinom(n, 1, mu) theta <- W1 betas <- seq(-2,2, length = 100) risks <- sapply(betas, function(a) { theta <- a * theta loss <- (mu1 + mu0 +1/pi*(Y - mu))*log(1 + exp(theta)) - (mu1 + A/pi0*(Y - mu1))*theta mean(loss, na.rm = T) }) weights <- round( (mu1 + mu0 + 1/pi*(Y - mu)),2) outcome <- round((mu1 + A/pi0*(Y - mu1)) / weights,2) quantile(weights) quantile(outcome) plot(betas, risks) dat <- data.frame(Covariate = round(W1,3), Weight = weights, Outcome = outcome) dat[1:5,] kab <- kableExtra::kable(dat, booktabs = TRUE) %>% kableExtra::kable_styling(latex_options = c("striped", "hold_position", "scale_down")) kab
DR_learner <- function(CATE_library, W, A, Y, EY1W, EY0W, pA1W, lrnr_A, lrnr_Y) { if(missing(EY1W)) { data <-data.table(W,A,Y) covariates <- colnames(W) task_A <- sl3_Task$new(data, covariates = covariates, outcome = "A") task_Y <- sl3_Task$new(data, covariates = c(covariates, "A"), outcome = "Y") data1 <- data data1$A <- 1 task_Y1 <- sl3_Task$new(data, covariates = c(covariates, "A"), outcome = "Y") data0 <- data data0$A <- 0 task_Y0 <- sl3_Task$new(data, covariates = c(covariates, "A"), outcome = "Y") lrnr_Y <- lrnr_Y$train(task_Y) lrnr_A <- lrnr_A$train(task_A) pA1W <- lrnr_A$predict(task_A) EY1W <- lrnr_Y$predict(task_Y1) EY0W <- lrnr_Y$predict(task_Y0) pA1W <- pmax(pmin(pA1, 0.98), 0.02) } EY <- ifelse(A==1, EY1W, EY0W) pA0 <- 1- pA1W Ytilde <- EY1W - EY0W + (A/pA1W - (1-A)/pA0)*(Y - EY) data <- data.table(W, Y=Ytilde) task_onestep <- sl3_Task$new(data, covariates = colnames(W), outcome = "Y") lrnr <- Stack$new(CATE_library) lrnr_1 <- lrnr$train(task_onestep) CATEonestep <- lrnr_1$predict(task_onestep) return(CATEonestep) }
set.seed(12345) library(sl3) n = 1500 W1 <- rt(n, df = 5) pi0 <- plogis(W1 ) A <- rbinom(n, 1, pi0) #mu0 = plogis( W1-2) #mu1 <- plogis(2*W1+2) mu0 = plogis( W1-2) mu1 <- plogis(2*W1+2) mu0 = 0.35 + 0.65*plogis( W1-2 ) mu1 <- pmax(mu0 + plogis(2*W1+2) - plogis( W1-2) - 0.35, 0.001) mu0 = 0.35 + 0.65*plogis( W1-2 ) mu1 <- mu0 + plogis(2*W1+2) - plogis( W1-2) - 0.349 mu <- ifelse(A==1, mu1, mu0) Y <- rbinom(n, 1, mu) trueCATE <- mu1 - mu0 lrnr_stack <- Stack$new(Lrnr_ranger$new(min.node.size=30,max.depth = 1),Lrnr_ranger$new(min.node.size=30,max.depth = 2), Lrnr_ranger$new(min.node.size=30,max.depth = 3),Lrnr_ranger$new(min.node.size=30,max.depth = 4), Lrnr_ranger$new(min.node.size=30,max.depth = 5), Lrnr_ranger$new(min.node.size=30,max.depth = 6), Lrnr_ranger$new(min.node.size=30,max.depth = 7),Lrnr_ranger$new(min.node.size=30,max.depth = 8) ) lrnr <- Lrnr_sl$new(lrnr_stack, metalearner= Lrnr_cv_selector$new()) initial_likelihood <- npcausalML:::estimate_initial_likelihood(W=as.data.frame(W1), A, Y, weights = rep(1,n), lrnr, lrnr, folds = 10) EY1W_est <- initial_likelihood$EY1W EY0W_est <- initial_likelihood$EY0W pA1W_est <- initial_likelihood$pA1W EY1W_est_full <- initial_likelihood$EY1W_full EY0W_est_full <- initial_likelihood$EY0W_full T_learner <- initial_likelihood$internal$sl3_Learner_EYAW_trained$fit_object$full_fit$fit_object$learner_fits$Stack T_learner_preds <- T_learner$predict(initial_likelihood$internal$task_EY1W) - T_learner$predict(initial_likelihood$internal$task_EY0W) T_learner <- initial_likelihood$internal$sl3_Learner_EYAW_trained$fit_object$cv_fit T_learner_preds_cv <- T_learner$predict(initial_likelihood$internal$task_EY1W) - T_learner$predict(initial_likelihood$internal$task_EY0W) pseudo_outcome <- EY1W_est - EY0W_est + (2*A - 1) * (1/ ifelse(A==1, pA1W_est, 1 - pA1W_est)) * (Y - ifelse(A==1, EY1W_est, EY0W_est )) ggplot(data.frame(X = pseudo_outcome)) + geom_density(aes(x=X)) + scale_x_continuous(limits = c(-5,5), breaks = seq(-6,6 ,length = 13))+ ggtitle("Density of observed pseudo-outcome values") ggsave("HistogramDR.pdf")
CATEonestepbench_cv <- DR_learner(Lrnr_cv$new(lrnr_stack), as.data.frame(W1), A, Y, EY1W_est, EY0W_est, pA1W_est, NULL, NULL) CATEonestepbench <- DR_learner(lrnr_stack, as.data.frame(W1), A, Y, EY1W_est, EY0W_est, pA1W_est, NULL, NULL) fit_npcausalML <- EP_learn(lrnr_stack,V = as.data.frame(W1), A = A, Y = Y, EY1W = EY1W_est , EY0W = EY0W_est , pA1W = pA1W_est, sieve_basis_generator_list = list(fourier_basis$new(c(1,0)), fourier_basis$new(c(2,0)), fourier_basis$new(c(3,0)), fourier_basis$new(c(4,0)), fourier_basis$new(c(5,0)) ) ,EP_learner_spec = EP_learner_spec_CATE, cross_validate = TRUE, nfolds = 10) risk_fun <- function(theta) { loss <- efficient_loss_function_CATE(data.frame(W1), theta, A, Y, EY1W_est,EY0W_est, pA1W_est ) mean(loss) } # risk_fun <- function(theta) { # loss <- (theta - trueCATE)^2 # mean(loss) #} lrnr_names <- colnames(fit_npcausalML$cv_predictions) data <- data.table(CATE = unlist(fit_npcausalML$full_predictions), CATE_cv =unlist(fit_npcausalML$cv_predictions), lrnr = rep(lrnr_names, each = n) , Method = "EP") data <- rbind(data,data.table(CATE = unlist(CATEonestepbench), CATE_cv = unlist(CATEonestepbench_cv), lrnr = rep(names(CATEonestepbench_cv), each = n) , Method = "DR")) data <- rbind(data,data.table(CATE = unlist(T_learner_preds), CATE_cv = unlist(T_learner_preds_cv), lrnr = rep(names(T_learner_preds), each = n) , Method = "T-Learner")) data$sieve <- str_match(data$lrnr, "fourier_basis_([0-9])")[,2] data$depth <- str_match(data$lrnr, "ranger_500_TRUE_none_1_30_([0-9])")[,2] data$lrnr <- NULL data[, cv_risks := risk_fun(CATE_cv), by = c("Method", "sieve", "depth")] data[, best_sieve := sieve[which.min(cv_risks)], by = c("Method", "depth")] data$best_sieve[data$Method=="DR"] <- "DR" data$best_sieve[data$Method=="T-Learner"] <- "T-Learner" data <- data[sieve == best_sieve | Method %in% c("DR", "T-Learner"),] data[, best_depth_cv := depth[which.min(cv_risks)], by = c("Method")] data_cv <- data[depth == best_depth_cv, c("CATE", "Method", "depth"), with = F] data_cv$depth = "CV-Selected Max Depth" data[data$Method=="T-Learner","depth"] <- as.numeric(data[data$Method=="T-Learner","depth"][[1]] ) - 1 data$depth <- paste0("Max Depth = ", data$depth) data <- rbind(data[, c("CATE", "Method", "depth"), with = F] ,data_cv) data_true <- data[Method == "DR"] data_true$CATE <- rep(trueCATE,length(lrnr_stack$params$learners) + 1) data_true$Method <- "Truth" data <- rbind(data,data_true) colnames(data) <- c("CATE_est", "Method", "Tuning") mean((trueCATE - (EY1W_est_full- EY0W_est_full))^2) mean((trueCATE - data_cv$CATE_est[data_cv$Method=="DR-Learner"])^2) mean((trueCATE - data_cv$CATE_est[data_cv$Method=="EP-Learner"])^2) data <- as.data.frame(data) data$Method[data$Method == "EP"] <- "EP-Learner" data$Method[data$Method == "DR"] <- "DR-Learner" data$X <- W1 data_1 <- data[data$Tuning %in% c(paste0("Max Depth = ", c(1,2,4,7))),] data_cv <- data[data$Tuning %in% c(paste0("CV-Selected Max Depth")),] # as.data.table(data)[, mean((CATE_est - data_big$CATE)^2), by = c("Method", "Tuning")] library(ggplot2) p<- ggplot(data_1[data_1$Method!="T-Learner",]) + geom_line(aes(x = X, y = pmax(pmin(CATE_est,1,5),-1), color = Method, linetype = Method ,size = Method) ) + scale_x_continuous(limits=c(-3,3)) + labs(x = "Covariate (W)", y = "CATE") + scale_y_continuous(limits = c(-1, 1) ) + theme_bw() + facet_grid(~ Tuning )+ scale_colour_manual(values = c("red", "blue", "black")) + theme(legend.position="none")+ scale_linetype_manual(values=c("dashed", "dotdash", "solid")) + scale_size_manual( values=c(0.5,0.75, 0.4) ) ggsave(file = "boundViol.pdf", width = 9, height = 4) p<- ggplot(data_1[data_1$Method!="EP-Learner",]) + geom_line(aes(x = X, y = pmax(pmin(CATE_est,1,5),-1), color = Method, linetype = Method ,size = Method) ) + scale_x_continuous(limits=c(-3,3)) + labs(x = "Covariate (W)", y = "CATE") + scale_y_continuous(limits = c(-1, 1) ) + theme_bw() + facet_grid(~ Tuning )+ scale_colour_manual(values = c("red", "blue", "black")) + theme(legend.position="none")+ scale_linetype_manual(values=c("dashed", "dotdash", "solid")) + scale_size_manual( values=c(0.5,0.75, 0.4) ) ggsave(file = "boundViolNoEP.pdf", width = 9, height = 4) width <- 4 height <- 7 p<- ggplot(data_1[data_1$Method!="EP-Learner"& data_1$Tuning=="Max Depth = 1",]) + geom_line(aes(x = X, y = pmax(pmin(CATE_est,1,5),-1), color = Method, linetype = Method ,size = Method) ) + scale_x_continuous(limits=c(-3,3)) + labs(x = "Covariate (W)", y = "CATE") + scale_y_continuous(limits = c(-1, 1) ) + theme_bw() + facet_grid(~ Tuning )+ scale_colour_manual(values = c("red", "blue", "black")) + theme(legend.position="none")+ scale_linetype_manual(values=c("dashed", "dotdash", "solid")) + scale_size_manual( values=c(0.5,0.75, 0.4) ) ggsave(file = "boundViolNoEP_1.pdf", width = width, height = height) p<- ggplot(data_1[data_1$Method!="EP-Learner"& data_1$Tuning=="Max Depth = 2",]) + geom_line(aes(x = X, y = pmax(pmin(CATE_est,1,5),-1), color = Method, linetype = Method ,size = Method) ) + scale_x_continuous(limits=c(-3,3)) + labs(x = "Covariate (W)", y = "CATE") + scale_y_continuous(limits = c(-1, 1) ) + theme_bw() + facet_grid(~ Tuning )+ scale_colour_manual(values = c("red", "blue", "black")) + theme(legend.position="none")+ scale_linetype_manual(values=c("dashed", "dotdash", "solid")) + scale_size_manual( values=c(0.5,0.75, 0.4) ) ggsave(file = "boundViolNoEP_2.pdf", width = width, height = height) p<- ggplot(data_1[data_1$Method!="EP-Learner"& data_1$Tuning=="Max Depth = 4",]) + geom_line(aes(x = X, y = pmax(pmin(CATE_est,1,5),-1), color = Method, linetype = Method ,size = Method) ) + scale_x_continuous(limits=c(-3,3)) + labs(x = "Covariate (W)", y = "CATE") + scale_y_continuous(limits = c(-1, 1) ) + theme_bw() + facet_grid(~ Tuning )+ scale_colour_manual(values = c("red", "blue", "black")) + theme(legend.position="none")+ scale_linetype_manual(values=c("dashed", "dotdash", "solid")) + scale_size_manual( values=c(0.5,0.75, 0.4) ) ggsave(file = "boundViolNoEP_4.pdf", width = width, height = height) p<- ggplot(data_1[data_1$Method!="EP-Learner"& data_1$Tuning=="Max Depth = 7",]) + geom_line(aes(x = X, y = pmax(pmin(CATE_est,1,5),-1), color = Method, linetype = Method ,size = Method) ) + scale_x_continuous(limits=c(-3,3)) + labs(x = "Covariate (W)", y = "CATE") + scale_y_continuous(limits = c(-1, 1) ) + theme_bw() + facet_grid(~ Tuning )+ scale_colour_manual(values = c("red", "blue", "black")) + theme(legend.position="none")+ scale_linetype_manual(values=c("dashed", "dotdash", "solid")) + scale_size_manual( values=c(0.5,0.75, 0.4) ) ggsave(file = "boundViolNoEP_7.pdf", width = width, height = height) p<-ggplot(data_cv[data_cv$Method != "EP-Learner",]) + geom_line(aes(x = X, y = pmax(pmin(CATE_est,1.1,5),-1.1), color = Method, linetype = Method ), size = 1) + scale_x_continuous(limits=c(-3,3)) + labs(x = "Covariate (W)", y = "CATE") + theme_bw()+ scale_colour_manual(values = c("red", "blue", "black")) + theme(legend.margin=margin(t=0, r=1, b=-1.5, l=1, unit="cm")) + scale_y_continuous(limits = c(-0.5, 0.5)) + facet_grid(~ Tuning )+ theme(legend.key.size = unit(1.5, 'cm'), #change legend key size legend.key.height = unit(1.5, 'cm'), #change legend key height legend.key.width = unit(1.5, 'cm'), #change legend key width legend.title = element_blank(), #change legend title font size legend.text = element_text(size=13), axis.text = element_text(size = 15), axis.title = element_text(size = 20)) + scale_linetype_manual(values=c("dashed", "dotdash", "solid")) ggsave(file = "boundViol_cv_noEP.pdf", width = 9, height =5) p<-ggplot(data_cv) + geom_line(aes(x = X, y = pmax(pmin(CATE_est,1.1,5),-1.1), color = Method, linetype = Method ), size = 1) + scale_x_continuous(limits=c(-3,3)) + labs(x = "Covariate (W)", y = "CATE") + theme_bw()+ scale_colour_manual(values = c("red", "blue", "darkgreen", "black")) + theme(legend.margin=margin(t=0, r=1, b=-1.5, l=1, unit="cm")) + facet_grid(~ Tuning )+ theme(legend.key.size = unit(1.5, 'cm'), #change legend key size legend.key.height = unit(1.5, 'cm'), #change legend key height legend.key.width = unit(1.5, 'cm'), #change legend key width legend.title = element_blank(), #change legend title font size legend.text = element_text(size=13), axis.text = element_text(size = 15), axis.title = element_text(size = 20)) + scale_linetype_manual(values=c("twodash", "dotdash", "dotted", "solid")) ggsave(file = "boundViol_cv_T.pdf", width = 9, height =5) data_cv <- data_cv[data_cv$Method != "T-Learner",] p<-ggplot(data_cv) + geom_line(aes(x = X, y = pmax(pmin(CATE_est,1.1,5),-1.1), color = Method, linetype = Method ), size = 1) + scale_x_continuous(limits=c(-3,3)) + labs(x = "Covariate (W)", y = "CATE") + theme_bw()+ scale_colour_manual(values = c("red", "blue", "black")) + theme(legend.margin=margin(t=0, r=1, b=-1.5, l=1, unit="cm")) + scale_y_continuous(limits = c(-0.5, 0.5)) + facet_grid(~ Tuning )+ theme(legend.key.size = unit(1.5, 'cm'), #change legend key size legend.key.height = unit(1.5, 'cm'), #change legend key height legend.key.width = unit(1.5, 'cm'), #change legend key width legend.title = element_blank(), #change legend title font size legend.text = element_text(size=13), axis.text = element_text(size = 15), axis.title = element_text(size = 20)) + scale_linetype_manual(values=c("dashed", "dotdash", "solid")) + geom_histogram(aes(x=X)) + geom_density(aes(x=X, y = ..density..), color = NA, fill = "grey", alpha = 0.3) ggsave(file = "boundViol_cv.pdf", width = 9, height =5) ggplot(data_cv) + geom_line(aes(x = X, y = pmax(pmin(CATE_est,1.1,5),-1.1), color = Method, linetype = Method ), size = 1) + geom_density(aes(x=X, y = ..density..), color = NA, fill = "grey", alpha = 0.3) + scale_x_continuous(limits = c(-3,3)) + scale_y_continuous(limits = c(-0.5, 0.5))
sieve <- str_match(lrnr_names, "fourier_basis_([0-9])")[,2] depth <- str_match(lrnr_names, "ranger_500_TRUE_none_1_30_([0-9])")[,2] data <- data.table(CATE = unlist(fit_npcausalML$cv_predictions),sieve = rep(sieve, each = n) ,depth = rep(depth, each = n)) risks<- data[,risk_fun(CATE), by = c("depth", "sieve")] risks sieve_dim <- risks[, sieve[which.min(V1)], by = c("depth" )] fit_npcausalML$full_predictions[,str_detect(lrnr_names, paste0("fourier_basis_", sieve_dim[[2]])) & str_detect(lrnr_names, paste0("ranger_500_TRUE_none_1_30_", sieve_dim[[1]]))] risks[, sieve[which.min(V1)] ] risks[, depth[which.min(V1)] ]
data <- as.data.frame(cbind(W1,A,Y)) taskY <- sl3_Task$new(data, c("W1"), "Y") taskA <- sl3_Task$new(data, c("W1"), "A") lrnr_Y1 <- lrnr$train(taskY[A==1]) lrnr_Y0 <- lrnr$train(taskY[A==0]) mu1 <- lrnr_Y1$predict_fold(taskY, "validation") mu0 <- lrnr_Y0$predict_fold(taskY, "validation") pi0 <- lrnr$train(taskA)$predict_fold(taskA, "validation") mu <- ifelse(A==1, mu1, mu0) task_big <- sl3_Task$new(data_big, c("W1" ), "zeta0") CATE_T <- lrnr_Y1$predict(task_big) - lrnr_Y0$predict(task_big) X <- (2*A-1)*cbind(1,sin(W1), cos(W1) , sin(2*W1) , cos(2*W1) ) colnames(X) <- paste0("W", seq_len(ncol(X))) X1 <- (2*1-1)*cbind(1,sin(W1), cos(W1) , sin(2*W1) , cos(2*W1) ) colnames(X1) <- paste0("W", seq_len(ncol(X))) X0 <- (2*0-1)*cbind(1,sin(W1), cos(W1) , sin(2*W1) , cos(2*W1) ) colnames(X0) <- paste0("W", seq_len(ncol(X))) data <- data.frame(W1,A,Y, mu, weights = 1 / ifelse(A==1, pi0, 1 - pi0)) data1 <- data.frame(W1,A=1,Y, mu=mu1, weights = 1 / ifelse(A==1, pi0, 1 - pi0)) data0 <- data.frame(W1,A=0,Y, mu=mu0, weights = 1 / ifelse(A==1, pi0, 1 - pi0)) data <- cbind(data, as.data.frame(X)) data0 <- cbind(data0, as.data.frame(X0)) data1 <- cbind(data1, as.data.frame(X1)) taskY0 <- sl3_Task$new(data0, c(colnames(X), "A"), "Y", weights = "weights", offset = "mu") taskY1 <- sl3_Task$new(data1, c(colnames(X), "A" ), "Y", weights = "weights", offset = "mu") taskY <- sl3_Task$new(data, c(colnames(X), "A" ), "Y", weights = "weights", offset = "mu") lrnr <- Lrnr_glm$new( family = gaussian()) lrnr <- lrnr$train(taskY) mu0n <- lrnr$predict(taskY0) mu1n <- lrnr$predict(taskY1) mun <- ifelse(A==1, mu1n, mu0n) colMeans( X/ ifelse(A==1, pi0, 1 - pi0) * (Y - mun)) mean( A/ ifelse(A==1, pi0, 1 - pi0) * (Y - mun)) EP_outcome <- mu1n - mu0n zeta0 <- mu1 - mu0 + (2*A-1) / ifelse(A==1, pi0, 1-pi0) * (Y-mu) data <- data.frame(W1, zeta0, EP_outcome) bins <- findInterval(W1, quantile(W1, seq(0,1,length = n/50)), all.inside = TRUE) data$bins <- bins data_big$bins <- findInterval(data_big$W1, quantile(W1, seq(0,1,length = n/50)), all.inside = TRUE) task <- sl3_Task$new(data, c("W1", "bins"), "zeta0") task_big <- sl3_Task$new(data_big, c("W1", "bins"), "zeta0") lrnr <- Lrnr_stratified$new(Lrnr_mean$new(), "bins") task <- sl3_Task$new(data, c("W1" ), "zeta0") task_big <- sl3_Task$new(data_big, c("W1" ), "zeta0") lrnr <- Lrnr_sl$new(list( Lrnr_ranger$new(min.node.size=30,max.depth = 1), Lrnr_ranger$new(min.node.size=30,max.depth = 2), Lrnr_ranger$new(min.node.size=30,max.depth = 3), Lrnr_ranger$new(min.node.size=30,max.depth = 4), Lrnr_ranger$new(min.node.size=30,max.depth = 5), Lrnr_ranger$new(min.node.size=30,max.depth = 6), Lrnr_ranger$new(min.node.size=30,max.depth = 7) ), metalearner = Lrnr_cv_selector$new()) CATE_hist <- lrnr$train(task)$predict(task_big) task <- sl3_Task$new(data, c("W1" ), "EP_outcome") #lrnr <- Lrnr_stratified$new(Lrnr_mean$new(), "bins") #lrnr <- Lrnr_xgboost$new(max_depth =3, nrounds = 20) CATE_hist_EP <- lrnr$train(task)$predict(task_big) library(ggplot2) task <- sl3_Task$new(data, c("W1"), "zeta0") task_big <- sl3_Task$new(data_big, c("W1", "bins"), "zeta0") lrnr <- Stack$new(Lrnr_ranger$new(min.node.size=30,max.depth = 1),Lrnr_ranger$new(min.node.size=30,max.depth = 2), Lrnr_ranger$new(min.node.size=30,max.depth = 3),Lrnr_ranger$new(min.node.size=30,max.depth = 4), Lrnr_ranger$new(min.node.size=30,max.depth = 5), Lrnr_ranger$new(min.node.size=30,max.depth = 6), Lrnr_ranger$new(min.node.size=30,max.depth = 7) ) CATE_rf<- lrnr$train(task)$predict(task_big) task <- sl3_Task$new(data, c("W1" ), "EP_outcome") #lrnr <- Lrnr_stratified$new(Lrnr_mean$new(), "bins") #lrnr <- Lrnr_xgboost$new(max_depth =3, nrounds = 20) CATE_rf_EP <- lrnr$train(task)$predict(task_big) CATE_rf <- as.data.frame(cbind(CATE_hist, CATE_rf)) colnames(CATE_rf) <- c("CV-Selected Max Depth", paste0("Max Depth = ", 1:7)) library(data.table) CATE_rf <- rbindlist(lapply(seq_len(ncol(CATE_rf)), function(j){ out <- CATE_rf[,j, drop = F] out$tuning <- colnames(CATE_rf)[j] out })) CATE_rf$Method <- "DR-Learner" CATE_rf_EP <- as.data.frame(cbind(CATE_hist_EP, CATE_rf_EP)) colnames(CATE_rf_EP) <- c("CV-Selected Max Depth", paste0("Max Depth = ", 1:7)) CATE_rf_EP <- rbindlist(lapply(seq_len(ncol(CATE_rf_EP)), function(j){ out <- CATE_rf_EP[,j, drop = F] out$tuning <- colnames(CATE_rf_EP)[j] out })) CATE_rf_EP$Method <- "EP-Learner (Ours)" colnames(CATE_rf_EP) <- c("CATE_est", "Tuning", "Method") colnames(CATE_rf) <- c("CATE_est", "Tuning", "Method") CATE_rf_T <- data.frame(CATE_est = CATE_T, Tuning = "CV-Selected Max Depth", Method = "T-Learner")
data <- rbind(CATE_rf, CATE_rf_EP, CATE_rf_T) data <- as.data.frame(data) CATE_true <- as.data.frame(CATE_rf) CATE_true$Method <- "Truth" CATE_true$CATE_est <- data_big$CATE CATE_true <- CATE_true[,colnames(CATE_rf)] data <- rbind(data, CATE_true) data$X <-data_big$W1 data_1 <- data[data$Tuning %in% c(paste0("Max Depth = ", c(1,2,4,7))),] data_cv <- data[data$Tuning %in% c(paste0("CV-Selected Max Depth")),] as.data.table(data)[, mean((CATE_est - data_big$CATE)^2), by = c("Method", "Tuning")] p<- ggplot(data_1) + geom_line(aes(x = X, y = pmax(pmin(CATE_est,1,5),-1), color = Method, linetype = Method ,size = Method) ) + scale_x_continuous(limits=c(-3,3)) + labs(x = "Covariate (W)", y = "CATE") + scale_y_continuous(limits = c(-1, 1) ) + theme_bw() + facet_grid(~ Tuning )+ scale_colour_manual(values = c("red", "blue", "black")) + theme(legend.position="none")+ scale_linetype_manual(values=c("dashed", "dotdash", "solid")) + scale_size_manual( values=c(0.5,0.75, 0.4) ) ggsave(file = "boundViol.pdf", width = 9, height = 4) p<-ggplot(data_cv) + geom_line(aes(x = X, y = pmax(pmin(CATE_est,1.1,5),-1.1), color = Method, linetype = Method ), size = 1) + scale_x_continuous(limits=c(-3,3)) + labs(x = "Covariate (W)", y = "CATE") + theme_bw()+ scale_colour_manual(values = c("red", "blue", "darkgreen", "black")) + theme(legend.margin=margin(t=0, r=1, b=-1.5, l=1, unit="cm")) + facet_grid(~ Tuning )+ theme(legend.key.size = unit(1.5, 'cm'), #change legend key size legend.key.height = unit(1.5, 'cm'), #change legend key height legend.key.width = unit(1.5, 'cm'), #change legend key width legend.title = element_blank(), #change legend title font size legend.text = element_text(size=13), axis.text = element_text(size = 15), axis.title = element_text(size = 20)) + scale_linetype_manual(values=c("twodash", "dotdash", "dotted", "solid")) ggsave(file = "boundViol_cv_T.pdf", width = 9, height =5) data_cv <- data_cv[data_cv$Method != "T-Learner",] p<-ggplot(data_cv) + geom_line(aes(x = X, y = pmax(pmin(CATE_est,1.1,5),-1.1), color = Method, linetype = Method ), size = 1) + scale_x_continuous(limits=c(-3,3)) + labs(x = "Covariate (W)", y = "CATE") + theme_bw()+ scale_colour_manual(values = c("red", "blue", "black")) + theme(legend.margin=margin(t=0, r=1, b=-1.5, l=1, unit="cm")) + scale_y_continuous(limits = c(-0.5, 0.5)) + facet_grid(~ Tuning )+ theme(legend.key.size = unit(1.5, 'cm'), #change legend key size legend.key.height = unit(1.5, 'cm'), #change legend key height legend.key.width = unit(1.5, 'cm'), #change legend key width legend.title = element_blank(), #change legend title font size legend.text = element_text(size=13), axis.text = element_text(size = 15), axis.title = element_text(size = 20)) + scale_linetype_manual(values=c("dashed", "dotdash", "solid")) ggsave(file = "boundViol_cv.pdf", width = 9, height =5)
data <- rbind( data.frame(Method = "EP-Learner (Ours)", Learner = "Histogram", X=data_big$W1, CATE_true = data_big$CATE, CATE_est = CATE_hist_EP), data.frame(Method = "EP-Learner (Ours)", Learner = "Random forests", X=data_big$W1, CATE_true = data_big$CATE, CATE_est = CATE_EP_rf), data.frame(Method = "T-Learner", Learner = "Histogram", X=data_big$W1, CATE_true = data_big$CATE, CATE_est =CATE_T ), data.frame(Method = "T-Learner", Learner = "Random forests", X=data_big$W1, CATE_true = data_big$CATE, CATE_est = CATE_T ) ) ggplot(data) + geom_line(aes(x = X, y = CATE_est, color = Method, linetype = Method ) ) + scale_x_continuous(limits=c(-3,3)) + labs(x = "W", y = "CATE") + geom_line(aes(x = X, y = CATE_true ), color = "black", alpha = 0.4 )+ theme_bw() + facet_grid(~ Learner)+ facet_grid(~ Learner)+ scale_colour_manual(values = c("blue", "red")) ggsave(file = "boundViol_T.pdf") # ggplot(data) + geom_line( aes(x=X, y = AIPW_strat, color = "DR-Learner: Binned")) + # geom_line(aes(y = CATE_rf, x = X, color = "DR-Learner: random forests")) + # geom_smooth(aes(x = X, y = CATE, color = "Truth") ) + scale_x_continuous(limits=c(-3,3)) + labs(x = "W", y = "CATE") + scale_y_continuous(limits = c(-1.5, 2) ) + scale_colour_manual("", # breaks = c("DR-Learner: Binned", "DR-Learner: random forests", "True CATE"), # values = c("red", "blue", "black")) + theme_bw() # # ggplot(data.frame(X=data$W1,Y = data$zeta0), aes(x= Y)) + geom_histogram() + theme_bw() +scale_x_continuous(breaks = seq(-6,6,1)) + labs(x = "Oracle pseudo-outcome for DR-Learner") ggsave(file = "boundViolHistogramOutcome.pdf")
plot(W1, CATE) plot(W1[CATE<=1 & CATE>=0], CATE[CATE<=1 & CATE>=0]) keep <- A==1 & pi0 <= 0.2 sum(keep) sum(zeta0[keep]) / sum(keep) quantile(CATE)
library(sl3) n = 1500 W1 <- rt(n, df = 7) W2 <- runif(n, -1 ,1) pi0 <- plogis(2*W1*sin(5*W2)) hist(pi0) A <- rbinom(n, 1, pi0) mu0 = plogis( W2-1) mu1 <- plogis(2*W2+1) mu <- ifelse(A==1, mu1, mu0) Y <- rbinom(n, 1, mu) data <- data.frame(W1,W2,A ,Y) lrnr <- Lrnr_cv$new(Stack$new(Lrnr_xgboost$new(max_depth = 3, nrounds = 20), Lrnr_xgboost$new(max_depth = 4, nrounds = 20), Lrnr_xgboost$new(max_depth = 5, nrounds = 20))) lrnr <- make_learner(Pipeline, lrnr, Lrnr_cv_selector$new()) lrnr <- Lrnr_ranger$new(min.node.size=30,max_depth = 10) task_A <- sl3_Task$new(data, c("W1", "W2"), "A") task_Y <- sl3_Task$new(data, c("W1", "W2", "A"), "Y") data1 <- data0 <- data data1$A <- 1; data0$A <- 0 task_Y1 <- sl3_Task$new(data1, c("W1", "W2", "A"), "Y") task_Y0 <- sl3_Task$new(data0, c("W1", "W2", "A"), "Y") pin <- lrnr$train(task_A)$predict(task_A) mun <- lrnr$train(task_Y)$predict(task_Y) mu1n <- lrnr$train(task_Y)$predict(task_Y1) mu0n <- lrnr$train(task_Y)$predict(task_Y0) zetan <- mu1n - mu0n + (2*A-1) / ifelse(A==1, pin, 1-pin) * (Y-mun) zeta0 <- mu1 - mu0 + (2*A-1) / ifelse(A==1, pi0, 1-pi0) * (Y-mu) keep <- A==1 & pi0 <= 0.2 sum(keep) sum(zetan[keep]) / sum(keep) sum(zeta0[keep]) / sum(keep) quantile(pin)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.