Nothing
#' Estimates Equation (2) in Freyaldenhoven et al. (2021)
#'
#' @description `EventStudy` uses regression methods to estimate the effect of a policy on a given outcome.
#'
#' @param estimator Accepts one of "OLS" or "FHS". If "OLS" is specified, implements Ordinary Least Squares. If "FHS" is specified, implements Instrumental Variables (IV) estimator proposed in [Freyaldenhoven Hansen Shapiro (FHS, 2019)](https://www.aeaweb.org/articles?id=10.1257/aer.20180609).
#' @param data Data frame containing the variables of interest.
#' @param outcomevar Character indicating column of outcome variable y.
#' @param policyvar Character indicating column of policy variable z.
#' @param idvar Character indicating column of units.
#' @param timevar Character indicating column of time periods.
#' @param controls Optional character vector indicating a set of control variables q.
#' @param proxy Character indicating column of variable that is thought to be affected by the confound but not by the policy.
#' Should be specified if and only if estimator is specified as "FHS".
#' @param proxyIV Character of column to be used as an instrument. Should be specified if and only if estimator is specified as "FHS".
#' If NULL, defaults to the strongest lead of the policy variable based on the first stage.
#' @param FE Logical indicating whether unit fixed-effects should be included. Defaults to TRUE.
#' @param TFE Logical indicating whether time fixed-effects should be included. Defaults to TRUE.
#' @param cluster Logical indicating whether to use clustered errors by units. If FALSE, will use unclustered heteroskedasticity-robust standard errors.
#' Defaults to TRUE. Must be TRUE if FE is TRUE.
#' @param post Whole number indicating the number of periods in the past before which the past values of the policy
#' are not supposed to affect the value of the outcome. Corresponds to M in equation (2) of
#' [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170).
#' @param overidpost Optional whole number indicating the number of event times after "post" to be included in estimation.
#' Defaults to 1.
#' Corresponds to L_M in equation (2) of [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170).
#' @param pre Whole number indicating the number of periods in the future after which the future values of the policy are
#' not supposed to affect the value of the outcome today. Corresponds to G in equation (2) of
#' [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170).
#' @param overidpre Optional whole number indicating the number of event times earlier than -"pre" to be included in estimation.
#' Defaults to "post" + "pre".
#' Corresponds to L_G in equation (2) of [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170).
#' @param normalize Specifies the event-time coefficient to be normalized. Defaults to - pre - 1.
#' @param anticipation_effects_normalization If set to TRUE, runs the default process and switches coefficient to be normalized to 0
#' when there are anticipation effects. If set to FALSE, does not make the switch. Defaults to TRUE.
#'
#' @return A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function.
#' @import dplyr
#' @import estimatr
#' @importFrom stats reformulate
#' @importFrom data.table setorderv as.data.table .SD
#' @export
#'
#' @examples
#'
#' # A minimal example
#' eventstudy_model <-
#' EventStudy(
#' estimator = "OLS",
#' data = example_data,
#' outcomevar = "y_base",
#' policyvar = "z",
#' idvar = "id",
#' timevar = "t",
#' pre = 0, post = 3,
#' normalize = -1
#' )
#'
#' ### Access estimated model
#' eventstudy_model$output
#'
#' summary(eventstudy_model$output)
#'
#' ### data.frame of estimates
#' estimatr::tidy(eventstudy_model$output)
#'
#' ### Access arguments
#' eventstudy_model$arguments
#'
#' # A dynamic OLS model with anticipation effects and controls
#' eventstudy_model_dyn <-
#' EventStudy(
#' estimator = "OLS",
#' data = example_data,
#' outcomevar = "y_base",
#' policyvar = "z",
#' idvar = "id",
#' timevar = "t",
#' controls = "x_r",
#' FE = TRUE, TFE = TRUE,
#' post = 3, overidpost = 5,
#' pre = 2, overidpre = 4,
#' normalize = - 3,
#' cluster = TRUE,
#' anticipation_effects_normalization = TRUE
#' )
#'
#' summary(eventstudy_model_dyn$output)
#'
#' # A static model
#' eventstudy_model_static <-
#' EventStudy(
#' estimator = "OLS",
#' data = example_data,
#' outcomevar = "y_jump_m",
#' policyvar = "z",
#' idvar = "id",
#' timevar = "t",
#' FE = TRUE, TFE = TRUE,
#' post = 0, overidpost = 0,
#' pre = 0, overidpre = 0,
#' cluster = TRUE
#' )
#'
#' summary(eventstudy_model_static$output)
#'
#' # A dynamic model with an unbalanced panel
#' data_unbal <- example_data[1:(nrow(example_data)-1),] # drop last row to make unbalanced
#'
#' eventstudy_model_unbal <-
#' EventStudy(
#' estimator = "OLS",
#' data = data_unbal,
#' outcomevar = "y_base",
#' policyvar = "z",
#' idvar = "id",
#' timevar = "t",
#' pre = 0, post = 3,
#' normalize = -1
#' )
#'
#' summary(eventstudy_model_unbal$output)
#'
#' # A dynamic model estimated using IV
#' eventstudy_model_iv <-
#' EventStudy(
#' estimator = "FHS",
#' data = example_data,
#' outcomevar = "y_base",
#' policyvar = "z",
#' idvar = "id",
#' timevar = "t",
#' proxy = "x_r",
#' FE = TRUE, TFE = TRUE,
#' post = 2, overidpost = 1,
#' pre = 0, overidpre = 3,
#' normalize = -1,
#' cluster = TRUE
#' )
#'
#' summary(eventstudy_model_iv$output)
#'
EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, controls = NULL,
proxy = NULL, proxyIV = NULL, FE = TRUE, TFE = TRUE, post, overidpost = 1, pre, overidpre = post + pre,
normalize = -1 * (pre + 1), cluster = TRUE, anticipation_effects_normalization = TRUE) {
# Check for errors in arguments
if (! estimator %in% c("OLS", "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")}
if (! is.data.frame(data)) {stop("data should be a data frame.")}
if (! is.character(outcomevar)) {stop("outcomevar should be a character.")}
if (! is.character(policyvar)) {stop("policyvar should be a character.")}
if (! is.character(idvar)) {stop("idvar should be a character.")}
if (! is.character(timevar)) {stop("timevar should be a character.")}
if (! (is.null(controls) | is.character(controls))) {stop("controls should be either NULL or a character.")}
if ((estimator == "OLS" & ! is.null(proxy))) {stop("proxy should only be specified when estimator = 'FHS'.")}
if ((estimator == "FHS" & ! is.character(proxy))) {stop("proxy should be a character.")}
if ((estimator == "OLS" & ! is.null(proxyIV))) {stop("proxyIV should only be specified when estimator = 'FHS'.")}
if ((estimator == "FHS" &
! is.null(proxyIV) & ! is.character(proxyIV))) {stop("proxyIV should be a character.")}
if (estimator == "FHS" & pre == 0 & overidpre == 0 & is.null(proxyIV)) {
stop("When estimator is 'FHS' and there are no leads in the model, proxyIV must be specified explicitly.")
}
if (! is.logical(FE)) {stop("FE should be either TRUE or FALSE.")}
if (! is.logical(TFE)) {stop("TFE should be either TRUE or FALSE.")}
if (! is.logical(cluster)) {stop("cluster should be either TRUE or FALSE.")}
if (FE & !cluster) {stop("cluster=TRUE is required when FE=TRUE.")}
if (! is.logical(anticipation_effects_normalization)) {stop("anticipation_effects_normalization should be either TRUE or FALSE.")}
if (! (is.numeric(post) & post >= 0 & post %% 1 == 0)) {stop("post should be a whole number.")}
if (! (is.numeric(overidpost) & overidpost >= 0 & overidpost %% 1 == 0)) {stop("overidpost should be a whole number.")}
if (! (is.numeric(pre) & pre >= 0 & pre %% 1 == 0)) {stop("pre should be a whole number.")}
if (! (is.numeric(overidpre) & overidpre >= 0 & overidpre %% 1 == 0)) {stop("overidpre should be a whole number.")}
if (normalize == 0 & post == 0 & overidpost == 0 & pre == 0 & overidpre == 0) {stop("normalize cannot be zero when post = overidpost = pre = overidpre = 0.")}
if (! (is.numeric(normalize) & normalize %% 1 == 0
& normalize >= -(pre + overidpre + 1) & normalize <= post + overidpost)) {
stop("normalize should be an integer between -(pre + overidpre + 1) and (post + overidpost).")
}
# Check for errors in data
if (! is.numeric(data[[timevar]])) {stop("timevar column in dataset should be numeric.")}
if (! all(data[[timevar]] %% 1 == 0)) {
stop("timevar column in dataset should be a vector of integers.")
}
data_ids <- as.data.frame(data)[, c(idvar, timevar)]
n_units <- length(base::unique(data[[idvar]]))
n_periods <- length(base::unique(data[[timevar]]))
n_unique_rows <- nrow(data[!base::duplicated(data_ids),])
if (n_unique_rows != n_units*n_periods) {
warning("Dataset is unbalanced.")
unbalanced <- TRUE
} else {
unbalanced <- FALSE
}
data.table::setorderv(data, c(idvar, timevar))
detect_holes <- function(dt, idvar, timevar) {
dt <- data.table::as.data.table(dt)
holes_per_id <- dt[, .SD[!is.na(base::get(timevar))], by = c(idvar)
][, list(holes = any(base::diff(base::get(timevar)) != 1)),
by = c(idvar)]
return(any(holes_per_id$holes))
}
if (detect_holes(data, idvar, timevar)) {
warning(paste0("Note: gaps of more than one unit in the time variable '", timevar, "' were detected. ",
"Treating these as gaps in the panel dimension."))
timevar_holes <- TRUE
} else {
timevar_holes <- FALSE
}
if (post == 0 & overidpost == 0 & pre == 0 & overidpre == 0) {
static <- TRUE
} else {
static <- FALSE
}
num_evenstudy_coeffs <- overidpre + pre + post + overidpost
num_periods <- max(data[[timevar]], na.rm = T) - min(data[[timevar]], na.rm = T)
if (num_evenstudy_coeffs > num_periods - 1) {stop("overidpre + pre + post + overidpost cannot exceed the data window.")}
for (tag in c("_fd", "_lead", "_lag")) {
if (sum(grepl(paste0(policyvar, tag), colnames(data))) > 0) {
warning(paste0("Variables starting with ", policyvar, tag,
" should be reserved for usage by eventstudyr."))
}
}
# Compute shifts in policy variable
num_fd_lags <- post + overidpost - 1
num_fd_leads <- pre + overidpre
furthest_lag_period <- num_fd_lags + 1
if (static) {
message("post, overidpost, pre, and overidpre are set to 0. A static model will be estimated.")
} else {
data <- ComputeFirstDifferences(data, idvar, timevar, policyvar, timevar_holes)
if ((post + overidpost - 1 >= 1) & (pre + overidpre >= 1)) {
shift_values = c(-num_fd_leads:-1, 1:num_fd_lags)
} else if (pre + overidpre < 1) {
shift_values = 1:num_fd_lags
} else if (post + overidpost - 1 < 1) {
shift_values = -num_fd_leads:-1
}
data <- ComputeShifts(data, idvar, timevar,
shiftvar = paste0(policyvar, "_fd"),
shiftvalues = shift_values,
timevar_holes = timevar_holes)
}
if (!static) {
data <- ComputeShifts(data, idvar, timevar,
shiftvar = policyvar,
shiftvalues = c(-num_fd_leads, furthest_lag_period),
timevar_holes = timevar_holes)
lead_endpoint_var <- paste0(policyvar, "_lead", num_fd_leads)
data[lead_endpoint_var] <- 1 - data[lead_endpoint_var]
}
if (pre != 0 & normalize == -1 & anticipation_effects_normalization) {
normalize <- -pre - 1
warning(paste("You allowed for anticipation effects", pre,
"periods before the event, so the coefficient at", normalize,
"was selected to be normalized to zero.",
"To override this, change anticipation_effects_normalization to FALSE."))
}
if (normalize < 0) {
if (normalize == -(pre + overidpre + 1)) {
normalization_column <- paste0(policyvar, "_lead", (-1 * (normalize + 1)))
} else {
normalization_column <- paste0(policyvar, "_fd_lead", (-1 * normalize))
}
} else if (normalize == 0){
if (normalize == post + overidpost) {
normalization_column <- paste0(policyvar, "_lag", (normalize))
} else {
normalization_column <- paste0(policyvar, "_fd")
}
} else {
if (normalize == post + overidpost) {
normalization_column <- paste0(policyvar, "_lag", (normalize))
} else {
normalization_column <- paste0(policyvar, "_fd_lag", (normalize))
}
}
if (static) {
str_policy_vars = policyvar
} else {
all_vars <- names(data)[grepl(policyvar, names(data))]
lead_endpoint_var <- all_vars[grepl(paste0("^", policyvar, "_lead"), all_vars)]
lead_fd_vars <- all_vars[grepl(paste0("^", policyvar, "_fd_lead"), all_vars)]
fd_var <- paste0(policyvar, "_fd")
lag_fd_vars <- all_vars[grepl(paste0("^", policyvar, "_fd_lag"), all_vars)]
lag_endpoint_var <- all_vars[grepl(paste0("^", policyvar, "_lag"), all_vars)]
str_policy_vars <- c(lead_endpoint_var, lead_fd_vars, fd_var, lag_fd_vars, lag_endpoint_var)
str_policy_vars <- str_policy_vars[!(str_policy_vars %in% normalization_column)]
}
if (estimator == "OLS") {
event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
static, controls, proxy, proxyIV)
output <- EventStudyOLS(event_study_formula, data, idvar, timevar, FE, TFE, cluster)
coefficients <- str_policy_vars
}
if (estimator == "FHS") {
if (is.null(proxyIV)) {
Fstart <- 0
str_fd_leads <- str_policy_vars[grepl("^z_fd_lead", str_policy_vars)]
for (var in str_fd_leads) {
lm <- lm(data = data, formula = stats::reformulate(termlabels = var, response = proxy))
Floop <- summary(lm)$fstatistic["value"]
if (Floop > Fstart) {
Fstart <- Floop
proxyIV <- var
}
}
message(paste0("Defaulting to strongest lead of differenced policy variable: proxyIV = ", proxyIV,
". To specify a different proxyIV use the proxyIV argument."))
}
event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
static, controls, proxy, proxyIV)
output <- EventStudyFHS(event_study_formula, data, idvar, timevar, FE, TFE, cluster)
coefficients <- dplyr::setdiff(str_policy_vars, proxyIV)
}
event_study_args <- list("estimator" = estimator,
"data" = data,
"outcomevar" = outcomevar,
"policyvar" = policyvar,
"idvar" = idvar,
"timevar" = timevar,
"controls" = controls,
"proxy" = proxy,
"proxyIV" = proxyIV,
"FE" = FE,
"TFE" = TFE,
"post" = post,
"overidpost" = overidpost,
"pre" = pre,
"overidpre" = overidpre,
"normalize" = normalize,
"normalization_column" = normalization_column,
"cluster" = cluster,
"eventstudy_coefficients" = coefficients)
return(list("output" = output,
"arguments" = event_study_args))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.