Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
warning = FALSE,
message = FALSE
)
# save built-in output hook
hook_output = knitr::knit_hooks$get("output")
# set a new output hook to cut long output texts
knitr::knit_hooks$set(output = function(x, options) {
if (!is.null(n <- options$out.lines)) {
x = xfun::split_lines(x)
if (length(x) > n) {
# cut
x = c(head(x, n), '....\n')
}
x = paste(x, collapse = '\n')
}
hook_output(x, options)
})
## ----installation, eval=FALSE-------------------------------------------------
# install.packages('devtools')
# devtools::install_github('microfan1/hdcate')
## ----load package-------------------------------------------------------------
library(hdcate)
## ----message=TRUE-------------------------------------------------------------
set.seed(1) # set an arbitrary random seed to ensure the reproducibility
data <- HDCATE.get_sim_data(n_obs=500, n_var=100, n_rel_var=4, intercept=10) # generate data
## ----data, out.lines=6--------------------------------------------------------
data
## ----x_formula----------------------------------------------------------------
x_formula <- "X1+X2+X3+X4+X5+X6"
x_formula
## ----model--------------------------------------------------------------------
model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)
## ----set conditional variable-------------------------------------------------
HDCATE.set_condition_var(model, 'X2', min=-1, max=1, step=0.01)
## ----fit----------------------------------------------------------------------
HDCATE.fit(model)
## ----get fitted ATE-----------------------------------------------------------
# ATE
model$ate
## ----get fitted CATE, out.lines=6---------------------------------------------
# CATE
model$hdcate
## ----inferences---------------------------------------------------------------
HDCATE.inference(model)
## ----two-sided test, out.lines=6----------------------------------------------
# two-sided bands
model$i_two_side
## ----left-sided test, out.lines=6---------------------------------------------
# left-sided bands
model$i_left_side
## ----right-sided test, out.lines=6--------------------------------------------
# right-sided bands
model$i_right_side
## ----plot, fig.show='hold' , fig.width=6, fig.height=6, fig.align='center', fig.cap='HDCATE Estimation (propensity function is misspecified)', dpi=200, out.width="80%"----
HDCATE.plot(model)
# Alternatively, if the model is not inferenced, use the code below:
# HDCATE.plot(model, include_band=FALSE)
# Alternatively, we may want to save the full plot as a high-definition PDF file:
# HDCATE.plot(model, output_pdf=TRUE, pdf_name='hdcate_plot.pdf',
# include_band=TRUE, test_side='both', y_axis_min='auto', y_axis_max='auto',
# display.hdcate='HDCATEF', display.ate='ATE', display.siglevel='sig_level'
# )
## ----change formula, fig.show='hold' , fig.width=6, fig.height=6, fig.align='center', fig.cap='HDCATE Estimation (expectation function is misspecified)', dpi=200, out.width="80%"----
x_formula = paste(paste0('X', c(90:100)), collapse ='+') # x_formula="X90+...+X100"
model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)
HDCATE.set_condition_var(model, 'X2', min=-1, max=1, step=0.01)
HDCATE.fit(model)
HDCATE.inference(model)
HDCATE.plot(model)
## ----cross-fitting------------------------------------------------------------
# create the model first, or use an existing `model` and skip this line
model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)
# suppose we want to use the 5-fold cross-fitting approach:
HDCATE.use_cross_fitting(model, k_fold=5)
## ----manual fold--------------------------------------------------------------
# In this case, the param k_fold is auto detected, you can pass any value to it.
HDCATE.use_cross_fitting(model, k_fold=1, folds=list(c(1:250), c(251:500)))
## ----change to full sample----------------------------------------------------
HDCATE.use_full_sample(model)
## ----temp function------------------------------------------------------------
my_method_to_fit_expectation <- function(df) {
# fit the input data.frame (df), where the first column is outcome value (Y)
# and the rest are covariates in x_formula
# ...
# return a fitted object
}
my_method_to_predict_expectation <- function(fitted_model, df) {
# use the fitted object (returned by your function above) to predict the conditional expectation
# fucntion on the input data.frame (df)
# ...
# return a vector of predicted values
}
my_method_to_fit_propensity <- function(df) {
# fit the input data.frame (df), where the first column is dummy treatment
# value (D) and the rest are covariates in x_formula
# ...
# return a fitted object
}
my_method_to_predict_propensity <- function(fitted_model, df) {
# use the fitted object (returned by your function above) to predict the propensity score
# fucntion on the input data.frame (df)
# ...
# return a vector of predicted values
}
## ----lasso example------------------------------------------------------------
x_formula <- "X1+X2+X3+X4+X5+X6" # propensity function is misspecified
# create a model
model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)
# using the template above, we can write:
my_method_to_fit_expectation <- function(df) {
# fit the input data.frame (df), where the first column is outcome value (Y)
# and the rest are covariates in x_formula
hdm::rlasso(as.formula(paste0('Y', "~", x_formula)), df)
# return a fitted object
}
my_method_to_predict_expectation <- function(fitted_model, df) {
# use the fitted object (fitted_model) to predict the conditional expectation
# fucntion on the input data.frame (df)
predict(fitted_model, df)
# return a vector of predicted values
}
my_method_to_fit_propensity <- function(df) {
# fit the input data.frame (df), where the first column is dummy treatment
# variable (D) and the rest are covariates in x_formula
hdm::rlassologit(as.formula(paste0('D', "~", x_formula)), df)
# return a fitted object
}
my_method_to_predict_propensity <- function(fitted_model, df) {
# use the fitted object (fitted_model) to predict the propensity score
# fucntion on the input data.frame (df)
predict(fitted_model, df, type="response")
# return a vector of predicted values
}
## ----reset1, include=FALSE----------------------------------------------------
model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)
## ----plug in------------------------------------------------------------------
HDCATE.set_first_stage(
model,
fit.treated=my_method_to_fit_expectation,
fit.untreated=my_method_to_fit_expectation,
fit.propensity=my_method_to_fit_propensity,
predict.treated=my_method_to_predict_expectation,
predict.untreated=my_method_to_predict_expectation,
predict.propensity=my_method_to_predict_propensity
)
## ----re estimate 1, fig.show='hold' , fig.width=6, fig.height=6, fig.align='center', fig.cap='HDCATE Estimation (using user-defined LASSO on the first stage)', dpi=200, out.width="80%"----
HDCATE.set_condition_var(model, 'X2', min=-1, max=1, step=0.01)
HDCATE.fit(model)
HDCATE.inference(model)
HDCATE.plot(model)
## ----recover------------------------------------------------------------------
HDCATE.unset_first_stage(model)
## ----reset before rf, include=FALSE-------------------------------------------
model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)
## ----ramdom forest example, fig.show='hold' , fig.width=6, fig.height=6, fig.align='center', fig.cap='HDCATE Estimation (using user-defined Random Forest model on the first stage)', dpi=200, out.width="80%"----
# propensity function is misspecified, for example
x_formula <- "X1+X2+X3+X4+X5+X6"
# create a model
model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)
# =========== User-defined ML approach start ===========
# using the template above, we can write:
my_method_to_fit_expectation <- function(df) {
# fit the input data.frame (df), where the first column is outcome value (Y)
# and the rest are covariates in x_formula
randomForest::randomForest(as.formula(paste0('Y', "~", x_formula)), data = df)
# return a fitted object
}
my_method_to_predict_expectation <- function(fitted_model, df) {
# use the fitted object (fitted_model) to predict the conditional expectation
# fucntion on the input data.frame (df)
predict(fitted_model, df)
# return a vector of predicted values
}
my_method_to_fit_propensity <- function(df) {
# fit the input data.frame (df), where the first column is dummy treatment
# variable (D) and the rest are covariates in x_formula
randomForest::randomForest(as.formula(paste0('D', "~", x_formula)), data = df)
# return a fitted object
}
my_method_to_predict_propensity <- function(fitted_model, df) {
# use the fitted object (fitted_model) to predict the propensity score
# fucntion on the input data.frame (df)
predict(fitted_model, df)
# return a vector of predicted values
}
HDCATE.set_first_stage(
model,
fit.treated=my_method_to_fit_expectation,
fit.untreated=my_method_to_fit_expectation,
fit.propensity=my_method_to_fit_propensity,
predict.treated=my_method_to_predict_expectation,
predict.untreated=my_method_to_predict_expectation,
predict.propensity=my_method_to_predict_propensity
)
# =========== User-defined ML approach end ===========
# set conditional variable in CATE, same as above
HDCATE.set_condition_var(model, 'X2', min=-1, max=1, step=0.01)
# fit, inference and plot
HDCATE.fit(model)
HDCATE.inference(model)
HDCATE.plot(model)
## ----var importance in rf, fig.show='hold' , fig.width=6, fig.height=6, fig.align='center', fig.cap='Variable Importance Extracted from the User-defined Random Forest Model'----
# the list have only one element since we use full-sample estimator
rf_model <- model$treated_cond_exp_model_list[[1]]
# then, we can use the fitted object `rf_model` for futher analysis.
# the rest of codes are omitted since they are unconcerned for this package.
# ...
## ----plot rf, echo=FALSE, fig.align='center', fig.cap='Variable Importance Extracted from the User-defined Random Forest Model', fig.height=6, fig.width=6, dpi=200, fig.show='hold', out.width="80%"----
# the code below uses randomForest and ggplot2 to plot out variable importance
library('ggplot2') # visualization
library('ggthemes') # visualization
library('dplyr') # data manipulation
importance <- randomForest::importance(rf_model)
# varImportance <- data.frame(Variables = row.names(importance), Importance = round(importance[ ,'MeanDecreaseGini'],2)) # for classification problem
varImportance <- data.frame(Variables = row.names(importance), Importance = round(importance[ ,'IncNodePurity'],2)) # for regression problem
rankImportance <- varImportance %>%
mutate(Rank = paste0('#',dense_rank(desc(Importance))))
ggplot(rankImportance, aes(x = reorder(Variables, Importance),
y = Importance, fill = Importance)) +
geom_bar(stat='identity') +
# geom_text(aes(x = Variables, y = 0.5, label = Rank),
# hjust=0, vjust=0.55, size = 4, colour = 'red') +
labs(x = 'Variables', y= 'Importance (measured in squared OOB residuals)') +
coord_flip() +
theme_few()
## ----user-defined inf---------------------------------------------------------
HDCATE.inference(model, sig_level=0.05, n_rep_boot=3000)
## ----bw-----------------------------------------------------------------------
HDCATE.set_bw(model, 0.15) # for example, set bandwidth=0.15
## ----recover bw---------------------------------------------------------------
HDCATE.set_bw(model)
## ----help, eval=FALSE---------------------------------------------------------
# help(HDCATE)
# help(HDCATE.set_condition_var)
# help(HDCATE.fit)
# help(HDCATE.inference)
# help(HDCATE.plot)
# help(HDCATE.use_cross_fitting)
# help(HDCATE.use_full_sample)
# help(HDCATE.set_first_stage)
# help(HDCATE.unset_first_stage)
# help(HDCATE.set_bw)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.