user_manual.R
In hdcate: Estimation of Conditional Average Treatment Effects with High-Dimensional Data

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  warning = FALSE,
  message = FALSE
)

# save built-in output hook
hook_output = knitr::knit_hooks$get("output")

# set a new output hook to cut long output texts
knitr::knit_hooks$set(output = function(x, options) {
  if (!is.null(n <- options$out.lines)) {
    x = xfun::split_lines(x)
    if (length(x) > n) {
      # cut
      x = c(head(x, n), '....\n')
    }
    x = paste(x, collapse = '\n')
  }
  hook_output(x, options)
})

## ----installation, eval=FALSE-------------------------------------------------
#  install.packages('devtools')
#  devtools::install_github('microfan1/hdcate')

## ----load package-------------------------------------------------------------
library(hdcate)

## ----message=TRUE-------------------------------------------------------------
set.seed(1)  # set an arbitrary random seed to ensure the reproducibility
data <- HDCATE.get_sim_data(n_obs=500, n_var=100, n_rel_var=4, intercept=10)  # generate data

## ----data, out.lines=6--------------------------------------------------------
data

## ----x_formula----------------------------------------------------------------
x_formula <- "X1+X2+X3+X4+X5+X6"
x_formula

## ----model--------------------------------------------------------------------
model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)

## ----set conditional variable-------------------------------------------------
HDCATE.set_condition_var(model, 'X2', min=-1, max=1, step=0.01)

## ----fit----------------------------------------------------------------------
HDCATE.fit(model)

## ----get fitted ATE-----------------------------------------------------------
# ATE
model$ate

## ----get fitted CATE, out.lines=6---------------------------------------------
# CATE
model$hdcate

## ----inferences---------------------------------------------------------------
HDCATE.inference(model)

## ----two-sided test, out.lines=6----------------------------------------------
# two-sided bands
model$i_two_side

## ----left-sided test, out.lines=6---------------------------------------------
# left-sided bands
model$i_left_side

## ----right-sided test, out.lines=6--------------------------------------------
# right-sided bands
model$i_right_side

## ----plot, fig.show='hold' , fig.width=6, fig.height=6, fig.align='center', fig.cap='HDCATE Estimation (propensity function is misspecified)', dpi=200, out.width="80%"----
HDCATE.plot(model)

# Alternatively, if the model is not inferenced, use the code below:
# HDCATE.plot(model, include_band=FALSE)

# Alternatively, we may want to save the full plot as a high-definition PDF file:
# HDCATE.plot(model, output_pdf=TRUE, pdf_name='hdcate_plot.pdf', 
#   include_band=TRUE, test_side='both', y_axis_min='auto', y_axis_max='auto',
#   display.hdcate='HDCATEF', display.ate='ATE', display.siglevel='sig_level'
# )

## ----change formula, fig.show='hold' , fig.width=6, fig.height=6, fig.align='center', fig.cap='HDCATE Estimation (expectation function is misspecified)', dpi=200, out.width="80%"----
x_formula = paste(paste0('X', c(90:100)), collapse ='+')  # x_formula="X90+...+X100"

model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)

HDCATE.set_condition_var(model, 'X2', min=-1, max=1, step=0.01)
HDCATE.fit(model)
HDCATE.inference(model)
HDCATE.plot(model)

## ----cross-fitting------------------------------------------------------------
# create the model first, or use an existing `model` and skip this line
model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)

# suppose we want to use the 5-fold cross-fitting approach:
HDCATE.use_cross_fitting(model, k_fold=5)

## ----manual fold--------------------------------------------------------------
# In this case, the param k_fold is auto detected, you can pass any value to it.
HDCATE.use_cross_fitting(model, k_fold=1, folds=list(c(1:250), c(251:500)))

## ----change to full sample----------------------------------------------------
HDCATE.use_full_sample(model)

## ----temp function------------------------------------------------------------
my_method_to_fit_expectation <- function(df) {
  # fit the input data.frame (df), where the first column is outcome value (Y)
  #   and the rest are covariates in x_formula
  # ...
  # return a fitted object
}
my_method_to_predict_expectation <- function(fitted_model, df) {
  # use the fitted object (returned by your function above) to predict the conditional expectation
  #   fucntion on the input data.frame (df)
  # ...
  # return a vector of predicted values
}
my_method_to_fit_propensity <- function(df) {
  # fit the input data.frame (df), where the first column is dummy treatment 
  #   value (D) and the rest are covariates in x_formula
  # ...
  # return a fitted object
}
my_method_to_predict_propensity <- function(fitted_model, df) {
  # use the fitted object (returned by your function above) to predict the propensity score 
  #   fucntion on the input data.frame (df)
  # ...
  # return a vector of predicted values
}

## ----lasso example------------------------------------------------------------
x_formula <- "X1+X2+X3+X4+X5+X6"  # propensity function is misspecified
# create a model
model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)

# using the template above, we can write:
my_method_to_fit_expectation <- function(df) {
  # fit the input data.frame (df), where the first column is outcome value (Y)
  #   and the rest are covariates in x_formula
  hdm::rlasso(as.formula(paste0('Y', "~", x_formula)), df)
  # return a fitted object
}
my_method_to_predict_expectation <- function(fitted_model, df) {
  # use the fitted object (fitted_model) to predict the conditional expectation
  #   fucntion on the input data.frame (df)
  predict(fitted_model, df)
  # return a vector of predicted values
}
my_method_to_fit_propensity <- function(df) {
  # fit the input data.frame (df), where the first column is dummy treatment 
  #   variable (D) and the rest are covariates in x_formula
  hdm::rlassologit(as.formula(paste0('D', "~", x_formula)), df)
  # return a fitted object
}
my_method_to_predict_propensity <- function(fitted_model, df) {
  # use the fitted object (fitted_model) to predict the propensity score 
  #   fucntion on the input data.frame (df)
  predict(fitted_model, df, type="response")
  # return a vector of predicted values
}

## ----reset1, include=FALSE----------------------------------------------------
model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)

## ----plug in------------------------------------------------------------------
HDCATE.set_first_stage(
  model,
  fit.treated=my_method_to_fit_expectation,
  fit.untreated=my_method_to_fit_expectation,
  fit.propensity=my_method_to_fit_propensity,
  predict.treated=my_method_to_predict_expectation,
  predict.untreated=my_method_to_predict_expectation,
  predict.propensity=my_method_to_predict_propensity
)

## ----re estimate 1, fig.show='hold' , fig.width=6, fig.height=6, fig.align='center', fig.cap='HDCATE Estimation (using user-defined LASSO on the first stage)', dpi=200, out.width="80%"----
HDCATE.set_condition_var(model, 'X2', min=-1, max=1, step=0.01)
HDCATE.fit(model)
HDCATE.inference(model)
HDCATE.plot(model)

## ----recover------------------------------------------------------------------
HDCATE.unset_first_stage(model)

## ----reset before rf, include=FALSE-------------------------------------------
model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)

## ----ramdom forest example, fig.show='hold' , fig.width=6, fig.height=6, fig.align='center', fig.cap='HDCATE Estimation (using user-defined Random Forest model on the first stage)', dpi=200, out.width="80%"----
# propensity function is misspecified, for example
x_formula <- "X1+X2+X3+X4+X5+X6"
# create a model
model <- HDCATE(data=data, y_name='Y', d_name='D', x_formula=x_formula)

# =========== User-defined ML approach start ===========

# using the template above, we can write:
my_method_to_fit_expectation <- function(df) {
  # fit the input data.frame (df), where the first column is outcome value (Y)
  #   and the rest are covariates in x_formula
  randomForest::randomForest(as.formula(paste0('Y', "~", x_formula)), data = df)
  # return a fitted object
}
my_method_to_predict_expectation <- function(fitted_model, df) {
  # use the fitted object (fitted_model) to predict the conditional expectation
  #   fucntion on the input data.frame (df)
  predict(fitted_model, df)
  # return a vector of predicted values
}
my_method_to_fit_propensity <- function(df) {
  # fit the input data.frame (df), where the first column is dummy treatment 
  #   variable (D) and the rest are covariates in x_formula
  randomForest::randomForest(as.formula(paste0('D', "~", x_formula)), data = df)
  # return a fitted object
}
my_method_to_predict_propensity <- function(fitted_model, df) {
  # use the fitted object (fitted_model) to predict the propensity score 
  #   fucntion on the input data.frame (df)
  predict(fitted_model, df)
  # return a vector of predicted values
}

HDCATE.set_first_stage(
  model,
  fit.treated=my_method_to_fit_expectation,
  fit.untreated=my_method_to_fit_expectation,
  fit.propensity=my_method_to_fit_propensity,
  predict.treated=my_method_to_predict_expectation,
  predict.untreated=my_method_to_predict_expectation,
  predict.propensity=my_method_to_predict_propensity
)

# =========== User-defined ML approach end ===========

# set conditional variable in CATE, same as above
HDCATE.set_condition_var(model, 'X2', min=-1, max=1, step=0.01)

# fit, inference and plot
HDCATE.fit(model)
HDCATE.inference(model)
HDCATE.plot(model)

## ----var importance in rf, fig.show='hold' , fig.width=6, fig.height=6, fig.align='center', fig.cap='Variable Importance Extracted from the User-defined Random Forest Model'----
# the list have only one element since we use full-sample estimator
rf_model <- model$treated_cond_exp_model_list[[1]]

# then, we can use the fitted object `rf_model` for futher analysis.
# the rest of codes are omitted since they are unconcerned for this package.
# ...

## ----plot rf, echo=FALSE, fig.align='center', fig.cap='Variable Importance Extracted from the User-defined Random Forest Model', fig.height=6, fig.width=6, dpi=200, fig.show='hold', out.width="80%"----
# the code below uses randomForest and ggplot2 to plot out variable importance
library('ggplot2') # visualization
library('ggthemes') # visualization
library('dplyr') # data manipulation
importance    <- randomForest::importance(rf_model)
# varImportance <- data.frame(Variables = row.names(importance), Importance = round(importance[ ,'MeanDecreaseGini'],2))  # for classification problem
varImportance <- data.frame(Variables = row.names(importance), Importance = round(importance[ ,'IncNodePurity'],2))  # for regression problem

rankImportance <- varImportance %>%
  mutate(Rank = paste0('#',dense_rank(desc(Importance))))

ggplot(rankImportance, aes(x = reorder(Variables, Importance), 
    y = Importance, fill = Importance)) +
  geom_bar(stat='identity') + 
  # geom_text(aes(x = Variables, y = 0.5, label = Rank),
  #   hjust=0, vjust=0.55, size = 4, colour = 'red') +
  labs(x = 'Variables', y= 'Importance (measured in squared OOB residuals)') +
  coord_flip() + 
  theme_few()

## ----user-defined inf---------------------------------------------------------
HDCATE.inference(model, sig_level=0.05, n_rep_boot=3000)

## ----bw-----------------------------------------------------------------------
HDCATE.set_bw(model, 0.15)  # for example, set bandwidth=0.15

## ----recover bw---------------------------------------------------------------
HDCATE.set_bw(model)

## ----help, eval=FALSE---------------------------------------------------------
#  help(HDCATE)
#  help(HDCATE.set_condition_var)
#  help(HDCATE.fit)
#  help(HDCATE.inference)
#  help(HDCATE.plot)
#  help(HDCATE.use_cross_fitting)
#  help(HDCATE.use_full_sample)
#  help(HDCATE.set_first_stage)
#  help(HDCATE.unset_first_stage)
#  help(HDCATE.set_bw)
Any scripts or data that you put into this service are public.
hdcate documentation built on Dec. 16, 2022, 5:19 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
hdcate
Estimation of Conditional Average Treatment Effects with High-Dimensional Data

inst/doc/user_manual.R
In hdcate: Estimation of Conditional Average Treatment Effects with High-Dimensional Data

Try the hdcate package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hdcate Estimation of Conditional Average Treatment Effects with High-Dimensional Data

inst/doc/user_manual.R In hdcate: Estimation of Conditional Average Treatment Effects with High-Dimensional Data

Try the hdcate package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hdcate
Estimation of Conditional Average Treatment Effects with High-Dimensional Data

inst/doc/user_manual.R
In hdcate: Estimation of Conditional Average Treatment Effects with High-Dimensional Data