Nothing
#' Estimate event-study coefficients using TWFE and 5 proposed improvements.
#'
#' @description Uses the estimation procedures recommended from Borusyak, Jaravel, Spiess (2021); Callaway and Sant'Anna (2020); Gardner (2021); Roth and Sant'Anna (2021); Sun and Abraham (2020)
#'
#' @param data The dataframe containing all the variables
#' @param yname Variable name for outcome variable
#' @param idname Variable name for unique unit id
#' @param tname Variable name for calendar period
#' @param gname Variable name for unit-specific date of initial treatment
#' (never-treated should be zero or NA)
#' @param xformla A formula for the covariates to include in the model.
#' It should be of the form `~ X1 + X2`. Default is NULL.
#' @param weights Variable name for estimation weights. This is used in
#' estimating Y(0) and also augments treatment effect weights.
#' Implementation of Roth and Sant'Anna (2021) currently does not allow for weights.
#' @param estimator Estimator you would like to use. Use "all" to estimate all.
#' Otherwise see table to know advantages and requirements for each of these.
#' @param verbose Optional. Logical. Should information about the two-stage
#' procedure be printed back to the user?
#' Default is `TRUE`.
#'
#' @return `event_study` returns a data.frame of point estimates for each estimator
#'
#' @examples
#' \donttest{
#' out = event_study(
#' data = did2s::df_het, yname = "dep_var", idname = "unit",
#' tname = "year", gname = "g", estimator = "all"
#' )
#' plot_event_study(out)
#' }
#' @export
event_study = function(
data,
yname,
idname,
gname,
tname,
xformla = NULL,
weights = NULL,
estimator = c("all", "TWFE", "did2s", "did", "impute", "sunab", "staggered"),
verbose = TRUE
) {
# Check Parameters -------------------------------------------------------------
# Select estimator
estimator <- match.arg(estimator)
# Display message about estimator's different assumptions
if (estimator == "all" && isTRUE(verbose)) {
message(
"Note these estimators rely on different underlying assumptions. See Table 2 of `https://arxiv.org/abs/2109.05913` for an overview."
)
}
# Test that gname is in tname or 0/NA for untreated
if (
!all(
unique(data[[gname]]) %in% c(0, NA, unique(data[[tname]]))
)
) {
stop(sprintf(
"'%s' must denote which year treatment starts for each group. Untreated observations should have g = 0 or NA.",
gname
))
}
# Test that there exists never-treated units
if (
!any(
unique(data[[gname]]) %in% c(0, NA)
)
) {
stop(
"event_study only works when there is a never-treated groups. This will be updated in the future, though with fewer estimators."
)
}
# If `xformla` is included, note
if (!is.null(xformla)) {
if (estimator %in% c("all", "staggered")) {
warning(paste0(
"Warning: `",
xformla,
"` is ignored for the `staggered` estimator"
))
}
}
# Checking weights
if (!(is.null(weights) || is.character(weights))) {
stop("Argument `weights` must be `NULL` or a character string")
}
# Setup ------------------------------------------------------------------------
# Treat
data$zz000treat = 1 * (data[[tname]] >= data[[gname]]) * (data[[gname]] > 0)
data[is.na(data$zz000treat), "zz000treat"] = 0
# Set g to zero if NA
data[is.na(data[[gname]]), gname] = 0
# Create event time
data$zz000event_time = ifelse(
is.na(data[[gname]]) | data[[gname]] == 0 | data[[gname]] == Inf,
-Inf,
as.numeric(data[[tname]] - data[[gname]])
)
event_time = unique(data$zz000event_time)
event_time = event_time[!is.na(event_time) & is.finite(event_time)]
# Format xformla for inclusion
if (!is.null(xformla)) {
xformla_null = paste0("0 + ", as.character(xformla)[[2]])
} else {
xformla_null = "0"
}
# initialize empty arguments
tidy_twfe = NULL
tidy_did2s = NULL
tidy_did = NULL
tidy_sunab = NULL
tidy_impute = NULL
tidy_staggered = NULL
# TWFE -------------------------------------------------------------------------
if (estimator %in% c("TWFE", "all")) {
if (isTRUE(verbose)) {
message("Estimating TWFE Model")
}
try({
twfe_formula = stats::as.formula(
paste0(
yname,
" ~ 1 + ",
xformla_null,
" + i(zz000event_time, ref = c(-1, -Inf)) | ",
idname,
" + ",
tname
)
)
if (is.null(weights)) {
est_twfe = fixest::feols(twfe_formula, data = data, warn = F, notes = F)
} else {
est_twfe = fixest::feols(
twfe_formula,
data = data,
weights = as.matrix(data[weights]),
warn = F,
notes = F
)
}
# Extract coefficients and standard errors
tidy_twfe = broom::tidy(est_twfe)
# Extract zz000event_time
tidy_twfe = tidy_twfe[grep("zz000event_time::", tidy_twfe$term), ]
# Make event time into a numeric
tidy_twfe$term = as.numeric(gsub("zz000event_time::", "", tidy_twfe$term))
# Subset column
tidy_twfe = tidy_twfe[, c("term", "estimate", "std.error")]
})
if (is.null(tidy_twfe)) warning("TWFE Failed")
}
# did2s ------------------------------------------------------------------------
if (estimator %in% c("did2s", "all")) {
if (isTRUE(verbose)) {
message("Estimating using Gardner (2021)")
}
try({
did2s_first_stage = stats::as.formula(
paste0(
"~ 0 + ",
xformla_null,
" | ",
idname,
" + ",
tname
)
)
est_did2s = did2s::did2s(
data,
yname = yname,
first_stage = did2s_first_stage,
second_stage = ~ i(zz000event_time, ref = -Inf),
treatment = "zz000treat",
cluster_var = idname,
weights = weights,
verbose = FALSE
)
# Extract coefficients and standard errors
tidy_did2s = broom::tidy(est_did2s)
# Extract zz000event_time
tidy_did2s = tidy_did2s[grep("zz000event_time::", tidy_did2s$term), ]
# Make event time into a numeric
tidy_did2s$term = as.numeric(gsub(
"zz000event_time::",
"",
tidy_did2s$term
))
# Subset columns
tidy_did2s = tidy_did2s[, c("term", "estimate", "std.error")]
})
if (is.null(tidy_did2s)) warning("Gardner (2021) Failed")
}
# did --------------------------------------------------------------------------
if (estimator %in% c("did", "all")) {
if (isTRUE(verbose)) {
message("Estimating using Callaway and Sant'Anna (2020)")
}
try({
est_did = did::att_gt(
yname = yname,
tname = tname,
idname = idname,
gname = gname,
xformla = xformla,
data = data,
weightsname = weights,
allow_unbalanced_panel = FALSE
)
est_did = did::aggte(est_did, type = "dynamic", na.rm = TRUE)
# Extract es coefficients
tidy_did = broom::tidy(est_did)
# Subset columns
tidy_did$term = tidy_did$event.time
tidy_did = tidy_did[, c("term", "estimate", "std.error")]
})
if (is.null(tidy_did)) warning("Callaway and Sant'Anna (2020) Failed")
}
# sunab ------------------------------------------------------------------------
if (estimator %in% c("sunab", "all")) {
if (isTRUE(verbose)) {
message("Estimating using Sun and Abraham (2020)")
}
try({
# Format xformla for inclusion
if (is.null(xformla)) {
sunab_xformla = "1"
} else {
sunab_xformla = paste0("1 + ", as.character(xformla)[[2]])
}
sunab_formla = stats::as.formula(
paste0(
yname,
" ~ ",
sunab_xformla,
" + sunab(",
gname,
", zz000event_time, ref.c =0, ref.p = -1) | ",
idname,
" + ",
tname
)
)
if (is.null(weights)) {
est_sunab = fixest::feols(sunab_formla, data = data)
} else {
est_sunab = fixest::feols(
sunab_formla,
data = data,
weights = as.matrix(data[weights])
)
}
tidy_sunab = broom::tidy(est_sunab)
# Extract zz000event_time
tidy_sunab = tidy_sunab[grep("zz000event_time::", tidy_sunab$term), ]
# Make event time into a numeric
tidy_sunab$term = as.numeric(gsub(
"zz000event_time::",
"",
tidy_sunab$term
))
# Subset columns
tidy_sunab = tidy_sunab[, c("term", "estimate", "std.error")]
})
if (is.null(tidy_sunab)) warning("Sun and Abraham (2020) Failed")
}
# did_imputation ---------------------------------------------------------------
if (estimator %in% c("impute", "all")) {
if (isTRUE(verbose)) {
message("Estimating using Borusyak, Jaravel, Spiess (2021)")
}
try({
impute_first_stage = stats::as.formula(
paste0(
"~ 1 + ",
xformla_null,
"+ i(",
tname,
") | ",
idname
)
)
tidy_impute = didimputation::did_imputation(
data,
yname = yname,
gname = gname,
tname = tname,
idname = idname,
wname = weights,
first_stage = impute_first_stage,
horizon = TRUE,
pretrends = TRUE
)
# Subset columns
tidy_impute = tidy_impute[, c("term", "estimate", "std.error")]
tidy_impute = tidy_impute[grep("^(-)?[0-9]+$", tidy_impute$term), ]
# Make event time into a numeric
tidy_impute$term = as.numeric(tidy_impute$term)
})
if (is.null(tidy_impute)) warning("Borusyak, Jaravel, Spiess (2021) Failed")
}
# staggered --------------------------------------------------------------------
if (estimator %in% c("staggered", "all")) {
# Waiting for staggered on CRAN
if (isTRUE(verbose)) {
message("Estimating using Roth and Sant'Anna (2021)")
}
try({
# Make untreated g = Inf
data_staggered = data
data_staggered[, gname] = ifelse(
data_staggered[[gname]] == 0,
Inf,
data_staggered[[gname]]
)
event_time_staggered = event_time[
is.finite(event_time) & event_time != -1
]
event_time_staggered = event_time_staggered[
event_time_staggered != min(event_time_staggered)
]
tidy_staggered = staggered::staggered(
data_staggered,
i = idname,
t = tname,
g = gname,
y = yname,
estimand = "eventstudy",
eventTime = event_time_staggered
)
# Subset columns
tidy_staggered$term = tidy_staggered$eventTime
tidy_staggered$std.error = tidy_staggered$se
tidy_staggered = tidy_staggered[, c("term", "estimate", "std.error")]
})
if (is.null(tidy_staggered)) {
warning("Roth and Sant'Anna (2021) Failed")
}
if (!is.null(weights)) {
warning("Roth and Sant'Anna (2021) Does Not Allow Weights")
}
}
# Bind results together --------------------------------------------------------
out = data.table::rbindlist(
list(
"TWFE" = tidy_twfe,
"Gardner (2021)" = tidy_did2s,
"Callaway and Sant'Anna (2020)" = tidy_did,
"Sun and Abraham (2020)" = tidy_sunab,
"Roth and Sant'Anna (2021)" = tidy_staggered,
"Borusyak, Jaravel, Spiess (2021)" = tidy_impute
),
idcol = "estimator"
)
return(out)
}
#' Plot results of [event_study()]
#' @param out Output from [event_study()]
#' @param separate Logical. Should the estimators be on separate plots? Default is TRUE.
#' @param horizon Numeric. Vector of length 2. First element is min and
#' second element is max of event_time to plot
#'
#' @return `plot_event_study` returns a ggplot object that can be fully customized
#'
#' @rdname event_study
#'
#' @importFrom rlang .data
#' @export
plot_event_study = function(out, separate = TRUE, horizon = NULL) {
# Get list of estimators
estimators = unique(out$estimator)
# Subset factor levels
levels = c(
"TWFE",
"Borusyak, Jaravel, Spiess (2021)",
"Callaway and Sant'Anna (2020)",
"Gardner (2021)",
"Roth and Sant'Anna (2021)",
"Sun and Abraham (2020)"
)
levels = levels[levels %in% estimators]
# Make estimator into factor
out$estimator = factor(out$estimator, levels = levels)
# Subset color scales
color_scale = c(
"TWFE" = "#374E55",
"Gardner (2021)" = "#DF8F44",
"Callaway and Sant'Anna (2020)" = "#00A1D5",
"Sun and Abraham (2020)" = "#B24745",
"Roth and Sant'Anna (2021)" = "#79AF97",
"Borusyak, Jaravel, Spiess (2021)" = "#6A6599"
)
color_scale = color_scale[names(color_scale) %in% estimators]
# create confidence intervals
out$ci_lower = out$estimate - 1.96 * out$std.error
out$ci_upper = out$estimate + 1.96 * out$std.error
# position depending on separate
if (separate) {
position = "identity"
} else {
position = ggplot2::position_dodge(width = 0.5)
}
# Subset plot if horizon is specified
if (!is.null(horizon)) {
out = out[out$term >= horizon[1] & out$term <= horizon[2], ]
}
# max and min of limits
y_lims = c(min(out$ci_lower), max(out$ci_upper)) * 1.05
x_lims = c(min(out$term) - 1, max(out$term) + 1)
ggplot2::ggplot(
data = out,
mapping = ggplot2::aes(
x = .data$term,
y = .data$estimate,
color = .data$estimator,
ymin = .data$ci_lower,
ymax = .data$ci_upper
)
) +
{
if (separate) ggplot2::facet_wrap(~estimator, scales = "free")
} +
ggplot2::geom_point(position = position) +
ggplot2::geom_errorbar(position = position) +
ggplot2::geom_vline(xintercept = -0.5, linetype = "dashed") +
ggplot2::geom_hline(yintercept = 0, linetype = "dashed") +
ggplot2::labs(
y = "Point Estimate and 95% Confidence Interval",
x = "Event Time",
color = "Estimator"
) +
{
if (separate) ggplot2::scale_y_continuous(limits = y_lims)
} +
{
if (separate) ggplot2::scale_x_continuous(limits = x_lims)
} +
ggplot2::theme_minimal(base_size = 16) +
ggplot2::scale_color_manual(values = color_scale) +
ggplot2::guides(
color = ggplot2::guide_legend(title.position = "top", nrow = 2)
) +
ggplot2::theme(legend.position = "bottom")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.