Nothing
#' Post-estimation aggregation of ETWFE results
#'
#' @md
#' @description
#' Companion function to `etwfe`, enabling the recovery of aggregate treatment
#' effects along different dimensions of interest (e.g, an event study of
#' dynamic average treatment effects). `emfx` is a light wrapper around the
#' \code{\link[marginaleffects]{slopes}} function from the **marginaleffects**
#' package.
#'
#' @param object An `etwfe` model object.
#' @param type Character. The desired type of post-estimation aggregation.
#' @param by_xvar Logical. Should the results account for heterogeneous
#' treatment effects? Only relevant if the preceding `etwfe` call included a
#' specified `xvar` argument, i.e. interacted categorical covariate. The
#' default behaviour (`"auto"`) is to automatically estimate heterogeneous
#' treatment effects for each level of `xvar` if these are detected as part
#' of the underlying `etwfe` model object. Users can override by setting to
#' either `FALSE` or `TRUE.` See the "Heterogeneous treatment effects"
#' section below.
#' @param compress Logical. Compress the data by (period by cohort) groups
#' before calculating marginal effects? This trades off a slight loss in
#' precision (typically around the 1st or 2nd significant decimal point) for a
#' substantial improvement in estimation time for large datasets. The default
#' behaviour (`"auto"`) is to automatically compress if the original dataset
#' has more than 500,000 rows. Users can override by setting either `FALSE` or
#' `TRUE`. Note that collapsing by group is only valid if the preceding `etwfe`
#' call was run with `"ivar = NULL"` (the default). See the "Performance
#' tips" section below.
#' @param collapse Logical. An alias for `compress` (only used for backwards
#' compatability and ignored if both arguments are provided). The behaviour is
#' identical, but it will trigger a message nudging users to rather use the
#' `compress` argument.
#' @param predict Character. The type (scale) of prediction used to compute the
#' marginal effects. If `"response"` (the default), then the output is at the
#' level of the response variable, i.e. it is the expected predictor
#' \eqn{E(Y|X)}. If `"link"`, the value returned is the linear predictor of
#' the fitted model, i.e. \eqn{X\cdot \beta}. The difference should only
#' matter for nonlinear models. (Note: This argument is typically called
#' `type` when use in \code{\link[stats]{predict}} or
#' \code{\link[marginaleffects]{slopes}}, but we rename it here to avoid a
#' clash with the top-level `type` argument above.)
#' @param post_only Logical. Drop pre-treatment ATTs? Only evaluated if (a)
#' `type = "event"` and (b) the original `etwfe` model object was estimated
#' using the default `"notyet"` treated control group. If conditions (a) and
#' (b) are met then the pre-treatment effects will be zero as a mechanical
#' result of ETWFE's estimation setup. The default behaviour (`TRUE`) is
#' thus to drop these nuisance rows from the dataset. The `post_only` argument
#' recognises that you may still want to keep them for presentation purposes
#' (e.g., plotting an event study). Nevertheless, be forewarned that enabling
#' that behaviour via `FALSE` is _strictly_ performative: the "zero" treatment
#' effects for any pre-treatment periods is purely an artefact of the
#' estimation setup.
#' @param lean Logical. Default is `FALSE`. Switching to `TRUE` enforces a lean
#' return object; namely a simple data.frame of the main results, stripped of
#' ancillary attributes. Note that this will disable some advanced
#' `marginaleffects` post-processing features, but those are unlikely to be
#' used in the `emfx` context. The upside is a potentially dramatic reduction
#' in the size of the return object. Consequently, we may change the default
#' to `TRUE` in a future version of **etwfe**.
#' @param ... Additional arguments passed to
#' [`marginaleffects::slopes`]. For example, you can pass `vcov =
#' FALSE` to dramatically speed up estimation times of the main marginal
#' effects (but at the cost of not getting any information about standard
#' errors; see Performance tips below). Another potentially useful
#' application is testing whether heterogeneous treatment effects (i.e., the
#' levels of any `xvar` covariate) are equal by invoking the `hypothesis`
#' argument, e.g. `hypothesis = "b1 = b2"`.
#' @return A `data.frame` of aggregated treatment effects along the
#' dimension(s) of interested. Note that this data.frame will have been
#' overloaded with the \code{\link[marginaleffects]{slopes}} class, and so
#' will come with a special print method. But the underlying columns will
#' usually include:
#'
#' - `term`
#' - `contrast`
#' - `<type>` (i.e., the name of your `type` string)
#' - `estimate`
#' - `std.error`
#' - `statistic`
#' - `p.value`
#' - `s.value`
#' - `conf.low`
#' - `conf.high`
#' @references
#' Wong, Jeffrey _et al._ (2021). \cite{You Only Compress Once: Optimal Data
#' Compression for Estimating Linear Models}. Working paper (version: March 16,
#' 2021). Available:
#' https://doi.org/10.48550/arXiv.2102.11297
#' @seealso [marginaleffects::slopes] which does the heavily lifting behind the
#' scenes. [`etwfe`] is the companion estimating function that should be run
#' before `emfx`.
#' @inherit etwfe examples
#' @inheritSection etwfe Performance tips
#' @inheritSection etwfe Heterogeneous treatment effects
#' @importFrom data.table as.data.table setDT .N .SD
#' @importFrom marginaleffects slopes
#' @export
emfx = function(
object,
type = c("simple", "group", "calendar", "event"),
by_xvar = "auto",
compress = "auto",
collapse = compress,
predict = c("response", "link"),
post_only = TRUE,
lean = FALSE,
...
) {
dots = list(...)
## FIXME: uncomment when marginaleffects 0.25.0 is released and remove
## workaround at the boom of this function
## https://github.com/vincentarelbundock/marginaleffects/pull/1296
# if (isTRUE(lean)) {
# oldlean = getOption("marginaleffects_lean")
# options(marginaleffects_lean = TRUE)
# on.exit(options(marginaleffects_lean = oldlean))
# }
.Dtreat = NULL
type = match.arg(type)
predict = match.arg(predict)
etwfe_attr = attr(object, "etwfe")
etwfe_attr[["type"]] = type
gvar = etwfe_attr[["gvar"]]
tvar = etwfe_attr[["tvar"]]
ivar = etwfe_attr[["ivar"]]
xvar = etwfe_attr[["xvar"]]
gref = etwfe_attr[["gref"]]
tref = etwfe_attr[["tref"]]
cgroup = etwfe_attr[["cgroup"]]
if (!by_xvar %in% c("auto", TRUE, FALSE)) stop("\"by_xvar\" has to be \"auto\", TRUE, or FALSE.")
if (!collapse %in% c("auto", TRUE, FALSE)) stop("\"collapse\" has to be \"auto\", TRUE, or FALSE.")
# sanity check
if (isTRUE(by_xvar)) {
if(is.null(xvar)){
warning(
"An \"xvar\" attribute was not found as part of the supplied model object. ",
"(Did your original `etwfe()` call include a valid `xvar = ...` argument?)",
"Average margins are reported instead."
)
by_xvar = FALSE
}
}
if (by_xvar=="auto") by_xvar = !is.null(xvar)
dat = as.data.table(eval(object$call$data, object$call_env))
if ("group" %in% names(dat)) dat[["group"]] = NULL
# check compress argument
nrows = NULL
if (compress == "auto" && collapse != compress) {
compress = collapse
message(
"\nPlease note that the `collapse` argument has been superseded by `compress`. ",
"Both arguments have the identical effect, but we encourage users to use `compress` going forward.\n"
)
}
if (compress == "auto") {
nrows = nrow(dat)
if (nrows >= 5e5) {
compress = TRUE
} else {
compress = FALSE
}
}
if (cgroup == "never") {
dat = dat[dat[[gvar]] != gref, , drop = FALSE] # Drop never treated reference group
if (type != "event") {
# For non-event studies, we want to calculated ATTs for post-treatment only
dat = dat[dat[[".Dtreat"]], , drop = FALSE]
dat = dat[dat[[tvar]] >= dat[[gvar]], , drop = FALSE]
}
} else if (type=="event" & isFALSE(post_only)) {
dat = dat[dat[[gvar]] != gref, , drop = FALSE]
} else if (".Dtreat" %in% names(dat)) {
dat = dat[dat[[".Dtreat"]], , drop = FALSE]
}
# define formulas and calculate weights
if(compress & is.null(ivar)){
if(by_xvar){
dat_weights = dat[(.Dtreat)][, .N, by = c(gvar, tvar, xvar)]
} else {
dat_weights = dat[(.Dtreat)][, .N, by = c(gvar, tvar)]
}
if (!is.null(nrows) && nrows > 5e5) warning(
"\nNote: Dataset larger than 500k rows detected. The data will be ",
"compressed by period-cohort groups to reduce estimation times. ",
"However, this shortcut can reduce the accuracy of the reported ",
"marginal effects. ",
"To override this default behaviour, specify: ",
"`emfx(..., compress = FALSE)`\n"
)
# compress the data
dat = dat[(.Dtreat)][, lapply(.SD, mean), by = c(gvar, tvar, xvar, ".Dtreat")] # compress data
dat = setDT(dat)[, merge(.SD, dat_weights, all.x = TRUE)] # add weights
} else if (compress & !is.null(ivar)) {
warning("\"ivar\" is not NULL. Marginal effects are calculated without collapsing.")
dat$N = 1L
} else {
dat$N = 1L
}
# compress the data
if (type=="simple") {
by_var = ".Dtreat"
} else if (type=="group") {
by_var = gvar
} else if (type=="calendar") {
by_var = tvar
} else if (type=="event") {
dat[["event"]] = dat[[tvar]] - dat[[gvar]]
by_var = "event"
}
if (by_xvar) by_var = c(by_var, xvar)
mfx = slopes(
object,
newdata = dat,
wts = "N",
variables = ".Dtreat",
by = by_var,
type = predict,
...
)
## FIXME: remove when marginaleffects 0.25.0 is released
## https://github.com/vincentarelbundock/marginaleffects/pull/1296
if (isTRUE(lean)) {
atts = names(attributes(mfx))
for (a in setdiff(atts, c("names", "row.names", "class", "by", "conf_level", "lean"))) attr(mfx, a) = NULL
}
class(mfx) = c("emfx", class(mfx))
attr(mfx, "etwfe") = etwfe_attr
return(mfx)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.