Description Usage Arguments Details Value See Also Examples
import_lift_method
is a function that
imports a list of functions to serve as a user defined method
with the rpart
function. see example below for more details on
it's usage.
1 | import_lift_method(f_n = NULL)
|
f_n |
a function that takes as input the split child node sample size and returns a scalar/vector of the same length. this number is used to weight the competing sub populations when making the split. see example below for a use case. if NULL the sample size is ignored when comparing splits. |
the rpart
function accepts in the method argument
a user defined list. This function imports the list that
implements a segment tree. The y
variable in the input
data.frame must be a 2 column matrix who's first column
contains the response variable values (either binary or numeric,
categorical isn't supported) and the second column is a
binary only treatment indicator.
a list containing eval, split and init functions.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | ## ----generate example file, eval=F, echo=F------------------------------
## knitr::purl("README.Rmd", output = "examples/segmenTree_example.R")
## ---- echo=FALSE--------------------------------------------------------
library(segmenTree)
## ----generate a dataset-------------------------------------------------
set.seed(1) # vary seed, n and effect_size below to get a sense of the model performance sensetivity
effect_size <- 0.25
p_x <- function(Tr, X1, X2, X3){
lp <- 2*X1 + 0.2*X2 + as.numeric(X3)/6
effect_size + (effect_size/0.125)*Tr*X1^3 +
(1 - 2*effect_size)*exp(lp)/(1+exp(lp))
}
n <- 10000
Tr <- rbinom(n, 1, 0.3)
X1 <- runif(n, -0.5, 0.5)
X2 <- rnorm(n)
X3 <- factor(sample(LETTERS[1:3], size = n, replace = T))
p <- p_x(Tr, X1, X2, X3)
y <- sapply(p, function(x) rbinom(1, 1, x))
y_mat <- cbind(y, Tr)
dat <- data.frame(y = I(y_mat), X1, X2, X3)
## ----fit a segment tree-------------------------------------------------
lift_method <- import_lift_method()
segment_tree <- rpart(y ~ ., data = dat,
method = lift_method,
control = rpart.control(cp = 0, minbucket = 1000),
x = T)
## ----explore resulting tree---------------------------------------------
segment_tree
## ---- warning=F, message=F----------------------------------------------
segments <- extract_segments(segment_tree, alpha = 0.15)
print(segments)
## ----predict treatment effect and compare with actual treatment effect----
tau <- predict(segment_tree, dat)
p_treat <- p_x(rep(1, n), X1, X2, X3)
p_cont <- p_x(rep(0, n), X1, X2, X3)
cate <- p_treat - p_cont
y_lim <- c(min(tau, cate), max(tau, cate))
plot(c(min(dat$X1), max(dat$X1)), y_lim, type = "n", main = "segmenTree",
xlab = "X1", ylab = "true (red) vs predicted (black) lift")
points(dat$X1, cate, col = "red")
points(dat$X1, tau)
## ----prune tree using tunecp, warning=FALSE-----------------------------
optimal_cp_cv <- tune_cp(segment_tree)
optimal_cp_cv <- optimal_cp_cv$optimal_cp
pruned_segment_tree <- prune(segment_tree, cp = optimal_cp_cv)
## ----predict treatment effect pruned tree-------------------------------
tau <- predict(pruned_segment_tree, dat)
y_lim <- c(min(tau, cate), max(tau, cate))
plot(c(min(dat$X1), max(dat$X1)), y_lim, type = "n", main = "segmenTree",
xlab = "X1", ylab = "true (red) vs predicted (black) lift")
points(dat$X1, cate, col = "red")
points(dat$X1, tau)
## ----fit segment tree with n weights------------------------------------
lift_method <- import_lift_method(f_n = function(x) x)
weighted_segment_tree <- rpart(y ~ ., data = dat,
method = lift_method,
control = rpart.control(cp = 0, minbucket = 1000),
x = T)
## ----predict treatment effect weighted tree-----------------------------
tau <- predict(weighted_segment_tree, dat)
y_lim <- c(min(tau, cate), max(tau, cate))
plot(c(min(dat$X1), max(dat$X1)), y_lim, type = "n", main = "segmenTree",
xlab = "X1", ylab = "true (red) vs predicted (black) lift")
points(dat$X1, cate, col = "red")
points(dat$X1, tau)
## ----compare segmenTree with 2 other approches--------------------------
par(mfrow = c(1, 3))
# segmenTree pruned model
tau <- predict(pruned_segment_tree, dat)
p_treat <- p_x(rep(1, n), X1, X2, X3)
p_cont <- p_x(rep(0, n), X1, X2, X3)
cate <- p_treat - p_cont
y_lim <- c(min(tau, cate), max(tau, cate))
plot(c(min(dat$X1), max(dat$X1)), y_lim, type = "n", main = "segmenTree",
xlab = "X1", ylab = "true (red) vs predicted (black) lift")
points(dat$X1, cate, col = "red")
points(dat$X1, tau)
# Model jointly the treatment and covariates
dat2 <- data.frame(y, X1, X2, X3, Tr)
fit2 <- rpart(y ~ ., data = dat2)
dat2_treat <- dat2; dat2_cont <- dat2
dat2_treat$Tr <- 1L; dat2_cont$Tr <- 0L
tau2 <- predict(fit2, dat2_treat) - predict(fit2, dat2_cont)
y_lim <- c(min(tau2, cate), max(tau2, cate))
plot(c(min(dat$X1), max(dat$X1)), y_lim, type = "n", main = "regular model",
xlab = "X1", ylab = "")
points(dat$X1, cate, col = "red")
points(dat$X1, tau2)
# Train a model on the treatment units and a seperate model on the control units
dat3_treat <- dat2[dat2$Tr == 1, -5]
dat3_cont <- dat2[dat2$Tr == 0, -5]
fit3_treat <- rpart(y ~ ., data = dat3_treat)
fit3_cont <- rpart(y ~ ., data = dat3_cont)
dat2_treat <- dat2; dat2_cont <- dat2
tau3 <- predict(fit3_treat, dat2) - predict(fit3_cont, dat2)
y_lim <- c(min(tau3, cate), max(tau3, cate))
plot(c(min(dat$X1), max(dat$X1)), y_lim, type = "n", main = "Two models",
xlab = "X1", ylab = "")
points(dat$X1, cate, col = "red")
points(dat$X1, tau3)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.