#' Parse Fixed Effects
#'
#' Extract fixed effects from formula
#'
#' @param f Formula.
#' @param ... additional arguments.
#'
#' @return a character vector with fixed effects.
#'
parseFEs <- function(f, ...) {
gsub(
"[[:space:]]", "",
strsplit(
deparse(f[[3]][[3]]),
"\\+"
)[[1]]
)
}
#' Subset data according to function arguments
#'
#' @param data A data frame.
#' @param s A list containing model parameters.
#'
#' @return A data frame.
#'
subsetData <- function(data, s){
vars_to_keep <- list(all.vars(s$y0), s$time, s$cohort, s$unit, s$het)
if( is.character(s$w) ) {
vars_to_keep <- c(vars_to_keep, s$w)
}
vars_to_keep <- unlist(Filter(is.character, as.list(unique(vars_to_keep))))
return(subset(data, select = vars_to_keep))
}
#' Parse control variables
#'
#' @param f Formula.
#' @param ... additional arguments.
#'
#' @importFrom rlang parse_exprs
#'
#' @return a vector of expressions.
#'
parseControls <- function(f, ...) {
parse_exprs(
grep("^[^\\d].*",
gsub(
"[[:space:]]", "",
strsplit(
deparse(f[[3]][[2]]),
"\\+"
)[[1]]
),
perl = TRUE,
value = TRUE
)
)
}
#' Generate longitudinal data with staggered treatment
#'
#' @param i Numeric. Number of units
#' @param t Numeric. Number of periods
#' @param hdfe Logical. Should high dimensional fixed effect be added?
#' @param control Logical. Should continuous control be added?
#' @param attrition.rate Numerical. Default is 0. Probability of missing observation.
#' @param treatment one sided formula, default is ~ d * (k + 1). See details.
#' @param ig an optional one sided formula, default is `~ i %% 2`. Will be evaluated to define a new variable which can be used
#' in the treatment formula. By default, `ig` is `~ i %% 2`, that is a random unit invariant dummy variable.
#'
#'
#' @section Treatment effect formula:
#'
#' By default the treatment effect is `~ d * (k + 1)` where `d` is an internal
#' dummy equal to one if observation is treated and `k` is the relative time to
#' event. User can define its own formula to create complex treatment and test
#' the model effectiveness.
#'
#' User has access to the following hidden variables:
#' - `h`: a cohort-wise randomly generated number
#' - `g`: Cohort identifier
#' - `a`: Individual fixed effect
#' - `b`: Time fixed effect
#' - `t`: Calendar time
#' - `ig`: a random unit invariant value in {0, 1, 2}. Can be used to define heterogeneous effects.
#'
#' @section Default outcome:
#'
#' The default data generating process is:
#'
#' `y ~ a + b + hdfe + 2*x1 + d * (k + 1) + e`
#'
#' Variable description is available in previous section.
#'
#' @return a data.table with columns
#' \item{i}{Unit identifier.}
#' \item{t}{Calendar time.}
#' \item{g}{Time of treatment.}
#' \item{a}{Individual fixed effect. ~N(0,1)}
#' \item{b}{Time fixed effect. ~N(0,1)}
#' \item{x1}{A continuous, normally distributed control. ~N(0,2)}
#' \item{hdfe}{A random effect with \{i\}/50 modalities. ~ N(0,1)}
#' \item{e}{Residual. ~ N(0,2)}
#' \item{hdfefactor}{A fixed effect of size \{i\}/50.}
#' \item{k}{Relative time to treatment.}
#' \item{d}{Dummy variable defined as k >= 0}
#' \item{h}{Randomly generated variable at the level of the cohort.}
#' \item{y}{Outcome.}
#' \item{true_effect}{True average treatment effect on the treated.}
#'
#' @importFrom stats rnorm runif
#'
#' @export
#'
#' @examples
#' dt <- generateDidData(100,10)
#' head(dt)
#'
generateDidData <- function(i,
t,
hdfe = TRUE,
control = TRUE,
attrition.rate = 0,
treatment = ~ d * (k + 1),
ig = ~ i %% 2
){
N <- i*t
dt <- data.frame(i = rep(1:i, each = t),
t = rep(1:t, i),
g = rep(sample(c(2:(t - 1), Inf), i, replace = TRUE), each = t),
a = rep(rnorm(i), each = t),
b = rep(rnorm(t), i),
x1 = rnorm(N, sd = 2),
hdfe = sample(rnorm(i/50), N, replace = T),
e = rnorm(N, sd = 2))
# Naming hdfe
dt$hdfefactor <- as.factor(dt$hdfe)
levels(dt$hdfefactor) <- 1:length(levels(dt$hdfefactor))
# Time to event
dt$k <- dt$t - dt$g
# Treated dummy
dt$d <- as.numeric(dt$k >= 0)
# Heterogeneous effects
dt$h <- rnorm(1, sd = 0.1) * (dt$g - min(dt$g))
dt$h[!is.finite(dt$h)] <- 0
# Outcome
dt$k[!is.finite(dt$k)] <- 0
dt$y <- dt$e + dt$a + dt$b
# Group identifier
dt$ig <- eval(ig[[2]], envir = dt)
#compute the true causal effect
dt$trueffect <- eval(treatment[[2]], envir = dt)
dt$y <- dt$y + dt$trueffect
if (control) {
dt$y <- dt$y + 2 * dt$x1
}
if (hdfe) {
dt$y <- dt$y + dt$hdfe
}
dt$k <- dt$t - dt$g
if (attrition.rate > 0) {
dt <- dt[runif(N) >= attrition.rate,]
}
return(dt)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.