#' @title reset.sim
#' @description a function to create a "reasonable" set of parameters
#' to create simulated panel data that obeys a parallel trends assumption.
#' In particular, it provides parameters where the the effect of participating
#' in the treatment is equal to one in all post-treatment time periods.
#'
#' After calling this function, the user can change particular values of the
#' parameters in order to generate dynamics, heterogeneous effects across
#' groups, etc.
#'
#' @param time.periods The number of time periods to include
#' @param n The total number of observations
#' @param ipw If TRUE, sets parameters so that DGP is
#' compatible with recovering ATT(g,t)'s using IPW (i.e.,
#' where logit that just includes a linear term in X works). If
#' FALSE, sets parameters that will be incompatible with IPW.
#' Either way, these parameters can be specified by the user
#' if so desired.
#' @param reg If TRUE, sets parameters so that DGP is compatible
#' with recovering ATT(g,t)'s using regressions on untreated
#' untreated potential outcomes. If FALSE, sets parameters that
#' will be incompatible with using regressions (i.e., regressions
#' that include only linear term in X). Either way, these
#' parameters can be specified by the user if so desired.
#'
#' @return list of simulation parameters
#'
#' @export
reset.sim <- function(time.periods=4, n=5000, ipw=TRUE, reg=TRUE) {
#-----------------------------------------------------------------------------
# set parameters
#-----------------------------------------------------------------------------
# coefficient on X
bett <- seq(1:time.periods)
# time fixed effect
thet <- seq(1:time.periods)
# time fixed effect
theu <- thet # changing this creates violations of parallel trends
# covariate effect
betu <- bett # changing this creates violations of conditional parallel trends
#-----------------------------------------------------------------------------
# parameters for treated potential outcomes
#-----------------------------------------------------------------------------
te.bet.ind <- rep(1,time.periods) # no selective treatment timing
te.bet.X <- bett #no heterogeneous effects by X
te.t <- thet # no calendar time effects
te.e <- rep(0,time.periods) # no dynamic effects
te <- 1 # overall basic effect
# parameters in generalized propensity score
# don't make them too big otherwise can get divide by 0
gamG <- c(0,1:time.periods)/(2*time.periods)
# return list of parameters
list(time.periods=time.periods,
bett=bett,
thet=thet,
theu=theu,
betu=betu,
te.bet.ind=te.bet.ind,
te.bet.X=te.bet.X,
te.t=te.t,
te.e=te.e,
te=te,
n=n,
gamG=gamG,
ipw=ipw,
reg=reg
)
}
#' @title build_sim_dataset
#'
#' @description A function for building simulated data
#'
#' @param sp_list A list of simulation parameters. See `reset.sim` to generate
#' some default values for parameters
#' @param panel whether to construct panel data (the default) or repeated
#' cross sections data
#'
#' @return a data.frame with the following columns
#' \itemize{
#' \item G observations group
#' \item X value of covariate
#' \item id observation's id
#' \item cluster observation's cluster (by construction there is no within-cluster correlation)
#' \item period time period for current observation
#' \item Y outcome
#' \item treat whether or not this unit is ever treated
#' }
#'
#' @export
build_sim_dataset <- function(sp_list, panel=TRUE) {
#-----------------------------------------------------------------------------
# build dataset
#-----------------------------------------------------------------------------
time.periods <- sp_list$time.periods
nt <- sp_list$nt
bett <- sp_list$bett
thet=sp_list$thet
nu <- sp_list$nu
theu <- sp_list$theu
betu <- sp_list$betu
te.bet.ind <- sp_list$te.bet.ind
te.bet.X <- sp_list$te.bet.X
te.t <- sp_list$te.t
te.e <- sp_list$te.e
te <- sp_list$te
n <- sp_list$n
gamG <- sp_list$gamG
ipw <- sp_list$ipw
reg <- sp_list$reg
X <- rnorm(n)
if (ipw) {
pr <- exp(outer(X,gamG)) / apply( exp(outer(X,gamG)), 1, sum)
} else {
pr <- exp(outer((pnorm(X)+0.5)^2,gamG)) / apply( exp(outer((pnorm(X)+0.5)^2,gamG)), 1, sum)
}
G <- apply(pr, 1, function(pvec) sample(seq(0,time.periods), size=1, prob=pvec))
Gt <- G[G>0]
nt <- length(Gt)
if (reg) {
Xmodel <- X
} else {
Xmodel <- X^2
}
Xt <- Xmodel[G>0]
# draw individual fixed effect
Ct <- rnorm(nt, mean=G)
# generate untreated potential outcomes in each time period
Ynames <- paste0("Y",1:time.periods)
#Ynames <- paste0(1:time.periods)
Y0tmat <- sapply(1:time.periods, function(t) {
thet[t] + Ct + Xt*bett[t] + rnorm(nt)
})
Y0tdf <- as.data.frame(Y0tmat)
# generate treated potential outcomes
Y1tdf <- sapply(1:time.periods, function(t) {
te.t[t] + te.bet.ind[Gt]*Ct + Xt*te.bet.X[t] + (Gt <= t)*te.e[sapply(1:nt, function(i) max(t-Gt[i]+1,1))] + te + rnorm(nt) # hack for the dynamic effects but ok
})
# generate observed data
Ytdf <- sapply(1:time.periods, function(t) {
(Gt<=t)*Y1tdf[,t] + (Gt>t)*Y0tdf[,t]
})
colnames(Ytdf) <- Ynames
# store observed data for treated group
dft <- cbind.data.frame(G=Gt,X=X[G>0],Ytdf)
# untreated units
# draw untreated covariate
nu <- sum(G==0)
Xu <- Xmodel[G==0]
# draw untreated fixed effect
Cu <- rnorm(nu, mean=0)
# generate untreated potential outcomes
Y0umat <- sapply(1:time.periods, function(t) {
theu[t] + Cu + rnorm(nu) + Xu*betu[t]
})
Y0udf <- as.data.frame(Y0umat)
colnames(Y0udf) <- Ynames
# store dataset of observed outcomes for untreated units
dfu <- cbind.data.frame(G=0,X=X[G==0],Y0udf)
# store overall dataset
df <- rbind.data.frame(dft, dfu)
# generate id variable
df$id <- 1:nrow(df)
# generate clusters (there's no actual within-cluster correlation)
df$cluster <- sample(1:50, size=nrow(df), replace=TRUE)
# convert data from wide to long format
ddf <- tidyr::pivot_longer(df,
cols=tidyr::starts_with("Y"),
names_to="period",
names_prefix="Y",
values_to="Y")
ddf$period <- as.numeric(ddf$period)
ddf$treat <- 1*(ddf$G > 0)
ddf <- ddf[order(ddf$id, ddf$period),] # reorder data
if (!panel) { # repeated cross sections
n <- nt+nu
Time <- sample(1:time.periods, size=n, replace=TRUE, prob=rep(1/time.periods, time.periods))
right.row <- sapply( unique(ddf$id), function(i) {
which(ddf$id==i & ddf$period==Time[i])
})
ddf <- ddf[right.row,]
}
ddf <- subset(ddf, G != 1)
ddf
}
#' @title sim
#' @description An internal function that builds simulated data, computes
#' ATT(g,t)'s and some aggregations. It is useful for testing the inference
#' procedures in the `did` function.
#'
#' @inheritParams reset.sim
#' @inheritParams build_sim_dataset
#'
#' @param ret which type of results to return. The options are `Wpval` (returns
#' 1 if the p-value from a Wald test that all pre-treatment ATT(g,t)'s are equal
#' is less than .05),
#' `cband` (returns 1 if a uniform confidence band covers 0 for groups and times),
#' `simple` (returns 1 if, using the simple treatment effect aggregation results
#' in rejecting that this aggregated treatment effect parameter is equal to 0),
#' `dynamic` (returns 1 if the uniform confidence band from the dynamic treatment
#' effect aggregation covers 0 in all pre- and post-treatment periods). The default
#' value is NULL, and in this case the function will just return the results from
#' the call to `att_gt`.
#' @param bstrap whether or not to use the bootstrap to conduct inference (default is TRUE)
#' @param cband whether or not to compute uniform confidence bands in the call to `att_gt`
#' (the default is TRUE)
#' @param control_group Whether to use the "nevertreated" comparison group (the default)
#' or the "notyettreated" as the comparison group
#' @param xformla Formula for covariates in `att_gt` (default is `~X`)
#' @param est_method Which estimation method to use in `att_gt` (default is "dr")
#' @param clustervars Any additional variables which should be clustered on
#' @param panel whether to simulate panel data (the default) or otherwise repeated
#' cross sections data
#'
#' @return When `ret=NULL`, returns the results of the call to `att_gt`, otherwise returns
#' 1 if the specified test rejects or 0 if not.
#'
#' @export
sim <- function(sp_list,
ret=NULL,
bstrap=TRUE,
cband=TRUE,
control_group="nevertreated",
xformla=~X,
est_method="dr",
clustervars=NULL,
panel=TRUE) {
ddf <- build_sim_dataset(sp_list=sp_list,
panel=panel)
time.periods <- sp_list$time.periods
te.e <- sp_list$te.e
te <- sp_list$te
# get results
res <- att_gt(yname="Y", xformla=xformla, data=ddf, tname="period", idname="id",
gname="G",
bstrap=bstrap, cband=cband, control_group=control_group,
est_method=est_method,
clustervars=clustervars,
panel=panel)
if (is.null(ret)) {
return(res)
} else if (ret=="Wpval") {
rej <- 1*(res$Wpval < .05)
return(rej)
} else if (ret=="cband") {
cu <- res$att + res$c*res$se
cl <- res$att - res$c*res$se
covers0 <- 1*(all( (cu > 0) & (cl < 0)))
return(covers0)
} else if (ret=="simple") {
agg <- aggte(res)
rej <- 1*( abs(agg$overall.att / agg$overall.se) > qnorm(.975) )
return(rej)
} else if (ret=="dynamic") {
truth <- c(rep(0,(time.periods-2)),te+te.e[1:(time.periods-1)])
agg <- aggte(res, type="dynamic")
cu <- agg$att.egt + agg$crit.val.egt * agg$se.egt
cl <- agg$att.egt - agg$crit.val.egt * agg$se.egt
coverstruth <- 1*(all( (cu > truth) & (cl < truth)))
return(coverstruth)
} else if (ret=="notyettreated") {
agg <- aggte(res)
rej <- 1*( abs(agg$overall.att / agg$overall.se) > qnorm(.975) )
return(rej)
} else {
return(res)
}
}
## pretest_sim <- function(ret=NULL, bstrap=FALSE, cband=FALSE,
## control.group="nevertreated", panel=TRUE, xformla=~X, cores=1) {
## ddf <- build_ipw_dataset(panel=panel)
## # get results
## res <- conditional_did_pretest(yname="Y", xformla=xformla, data=ddf,
## tname="period", idname="id",
## first.treat.name="G", estMethod="ipw",
## printdetails=FALSE,
## bstrap=bstrap, cband=cband,
## control.group=control.group,
## panel=panel,
## pl=TRUE, cores=cores)
## res$CvMpval
## }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.