Nothing
## ----loadpkg, message=FALSE, warning=FALSE, echo=FALSE------------------------
library(personalized)
## ----sim_data_1, message = FALSE, warning = FALSE-----------------------------
library(personalized)
set.seed(1)
n.obs <- 500
n.vars <- 10
x <- matrix(rnorm(n.obs * n.vars, sd = 1), n.obs, n.vars)
# simulate non-randomized treatment
xbetat <- 0.5 + 0.25 * x[,1] - 0.25 * x[,5]
trt.prob <- exp(xbetat) / (1 + exp(xbetat))
trt <- rbinom(n.obs, 1, prob = trt.prob)
# simulate delta (CATE) as a complex function of the covariates
delta <- 2*(0.25 + x[,1] * x[,2] - x[,3] ^ {-2} * (x[,3] > 0.35) +
(x[,1] < x[,3]) - (x[,1] < x[,2]))
# simulate main effects g(X)
xbeta <- x[,1] + x[,2] + x[,4] - 0.2 * x[,4]^2 + x[,5] + 0.2 * x[,9] ^ 2
xbeta <- xbeta + delta * (2 * trt - 1) * 0.5
# simulate continuous outcomes
y <- drop(xbeta) + rnorm(n.obs)
## -----------------------------------------------------------------------------
# arguments can be passed to cv.glmnet via `cv.glmnet.args`.
# normally we would set the number of crossfit folds and internal folds to be larger,
# but have reduced it to make computation time shorter
prop.func <- create.propensity.function(crossfit = TRUE,
nfolds.crossfit = 4,
cv.glmnet.args = list(type.measure = "auc", nfolds = 3))
## -----------------------------------------------------------------------------
aug.func <- create.augmentation.function(family = "gaussian",
crossfit = TRUE,
nfolds.crossfit = 4,
cv.glmnet.args = list(type.measure = "mse", nfolds = 3))
## -----------------------------------------------------------------------------
subgrp.model.linear <- fit.subgroup(x = x, y = y,
trt = trt,
propensity.func = prop.func,
augment.func = aug.func,
loss = "sq_loss_lasso",
nfolds = 3) # option for cv.glmnet (for ITR estimation)
summary(subgrp.model.linear)
## -----------------------------------------------------------------------------
## xgboost tuning parameters to use:
param <- list(max_depth = 5, eta = 0.01, nthread = 1,
booster = "gbtree", subsample = 0.623, colsample_bytree = 1)
subgrp.model.xgb <- fit.subgroup(x = x, y = y,
trt = trt,
propensity.func = prop.func,
augment.func = aug.func,
## specify xgboost via the 'loss' argument
loss = "sq_loss_xgboost",
nfold = 3,
params = param, verbose = 0,
nrounds = 5000, early_stopping_rounds = 50)
subgrp.model.xgb
## ----eval=FALSE---------------------------------------------------------------
# valmod.lin <- validate.subgroup(subgrp.model.linear, B = 100,
# method = "training_test",
# train.fraction = 0.75)
# valmod.lin
## ----eval=FALSE---------------------------------------------------------------
# valmod.xgb <- validate.subgroup(subgrp.model.xgb, B = 100,
# method = "training_test",
# train.fraction = 0.75)
# valmod.xgb
## ---- fig.height=10-----------------------------------------------------------
## RMSE (note: this is still on the in-sample data so
## out-of-sample RMSE is preferred to evaluate methods)
sqrt(mean((delta - treatment.effects(subgrp.model.linear)$delta) ^ 2))
sqrt(mean((delta - treatment.effects(subgrp.model.xgb)$delta) ^ 2))
par(mfrow = c(2,1))
plot(delta ~ treatment.effects(subgrp.model.linear)$delta,
ylab = "True CATE", xlab = "Estimated Linear CATE")
abline(a=0,b=1,col="blue")
plot(delta ~ treatment.effects(subgrp.model.xgb)$delta,
ylab = "True CATE", xlab = "CATE via xgboost")
abline(a=0,b=1,col="blue")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.