R/did-main.R

Defines functions did

Documented in did

#' Double Difference-in-Differences Estimator
#'
#' Implement the double did estimator and compute the variance via block bootstrap.
#'
#' @export
#' @param formula A formula of the following form,
#' \itemize{
#'   \item When \code{is_panel = TRUE}: \code{y ~ treatment | x1 + x2} where
#'    \code{treatment} is a time-varying treatment indicator.
#'
#'   \item When \code{is_panel = FALSE}: \code{y ~ treat_group + post_treat | x1 + x2},
#'    where \code{treat_group} is a time-invariant treatment indicator,
#'    and \code{post_treat} is an indicator that takes 1 if the observations is from the
#'    post treatment periods and zero otherwise. The order is strict.
#'
#'    \item Covariates are specified after the vertical bar \code{|}.
#'     When there are no covariates, the formula should be given as
#'     \code{y ~ treatment} for \code{is_panel = TRUE} and
#'     \code{y ~ treat_group + post_treat} for \code{is_panel = FALSE}.
#' }
#' @param data A data frame.
#' @param id_unit A variable name of unit (e.g., country name, respondent id).
#'  When \code{is_panel = TRUE} (i.e., the panel data), a variable name of the unit index should be provided.
#'  When \code{is_panel = FALSE} (i.e., the repeated cross-section data), this can be left as \code{NULL}.
#' @param id_time A variable name of time (e.g., year).
#' @param design
#'    The design to be used: either \code{"did"} (the standard difference-in-differences design) or
#'    \code{"sa"} (the staggered adoption design).
#'    The default is \code{"did"}.
#' @param is_panel A boolean argument.
#'    This argument should be \code{TRUE} when the dataset is panel (i.e., the same units are repeatedly observed over time);
#'    This should be \code{FALSE} when the dataset is the repeated cross-section (RCS),
#'      where different sets of units are observed at each time point.
#' @param option A list of the following option parameters:
#' \describe{
#'   \item{n_boot}{The number of bootstrap iterations.}
#'   \item{se_boot}{A boolean argument.
#'                    If \code{TRUE} bootstrap-based critical value is used to construct the confidence intervals.
#'                    Default is \code{TRUE}. This option only affects the basic DID design.}
#'   \item{id_cluster}{A variable name of a cluster index used to conduct clustered bootstraps.
#'                     If left unspecified, the unit level bootstrap will be conducted.}
#'   \item{parallel}{Use multiple cores for computing the weighting matrix. \code{future} package is used.}
#'   \item{lead}{Lead parameter. Default is 0, which estimate the instantaneous treatment effect.}
#' }
#' @examples
#' \donttest{
#' ## The standard DID design -----------
#' ### (1) panel data
#' ### (2) repeated cross-section data
#'
#' ## (1) panel data --------------------
#' data(anzia2012)
#'
#' set.seed(1234)
#' fit_panel <- did(
#'   formula = lnavgsalary_cpi ~ oncycle | teachers_avg_yrs_exper +
#'                         ami_pc + asian_pc + black_pc + hisp_pc,
#'   data    = anzia2012,
#'   id_unit = "district",
#'   id_time = "year",
#'   option  = list(n_boot = 20, parallel = FALSE)
#' )
#'
#' summary(fit_panel)
#'
#' ## (2) repeated cross-section data ---
#' data(malesky2014)
#'
#' set.seed(1234)
#' ff_rcs <- did(
#'   formula = transport ~ treatment + post_treat | factor(city),
#'   data    = malesky2014,
#'   id_time = 'year',
#'   is_panel= FALSE,
#'   option  = list(n_boot = 20, id_cluster = "id_district", parallel = FALSE)
#' )
#'
#' summary(ff_rcs)
#'
#' ## The staggered adoption design ----
#' data(paglayan2019)
#' require(dplyr)
#' paglayan2019 <- paglayan2019 %>%
#'   filter(!(state %in% c("WI", "DC"))) %>%
#'   mutate(id_time = year,
#'          id_subject = as.numeric(as.factor(state)),
#'          log_expenditure = log(pupil_expenditure + 1),
#'          log_salary      = log(teacher_salary + 1))
#'
#' # estimate effects
#' set.seed(1234)
#' fit_sa <- did(
#'   formula = log_expenditure ~ treatment,
#'   data    = paglayan2019,
#'   id_unit = "id_subject",
#'   id_time = "id_time",
#'   design  = "sa",
#'   option  = list(n_boot = 20, parallel = FALSE, thres = 1, lead = 0)
#' )
#' summary(fit_sa)
#' }
#'
#' @return \code{did} returns an object of \code{DIDdesign} class, which is a list of following items:
#' \describe{
#'   \item{estimates}{A table of estimates in the tibble format.}
#'   \item{weights}{A list of weight information.}
#' }
#' The functions `summary` is used to obtain and print a summary of the results.
did <- function(
  formula, data, id_unit, id_time, design = "did",
  is_panel = TRUE, option = list()
) {

  ## set option
  option <- set_option(option)

  ## implement Double DID estimator
  if (design == "did") {
    ## standard design
    fit <- did_std(formula, data, id_unit, id_time, is_panel, option)
  } else if (design == "sa"){
    ## staggered adoption design
    if (isFALSE(is_panel)) stop("Only panel data is supported in the SA design.")
    fit <- did_sad(formula, data, id_unit, id_time, option)
  }

  class(fit) <- c(class(fit), "DIDdesign")
  return(fit)

}
naoki-egami/DIDdesign documentation built on June 4, 2024, 2:59 p.m.