inst/doc/pre-testing.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ---- echo=FALSE, results="hide", warning=FALSE, message=FALSE----------------
library(did)
# Source the currently version of the did package (based on our Dropbox)
# fldr <- here::here("R/")
# sapply(paste0(fldr,list.files(fldr)), source)
# Source simulation designs
source(here::here("vignettes/setup_sims.R"))

## ----echo=FALSE---------------------------------------------------------------
time.periods <- 4
reset.sim()
bett <- betu <- rep(0, time.periods)
te <- 0
set.seed(1814)

## -----------------------------------------------------------------------------
# generate dataset with 4 time periods
time.periods <- 4

# generate dynamic effects
te.e <- time.periods:1

# generate data set with these parameters
# (main thing: it generates a dataset that satisfies
# parallel trends in all periods...including pre-treatment)
data <- build_sim_dataset()

head(data)

## ---- fig.width=8,fig.height=5, fig.align='center', out.width="90%", dpi = 200----
#-----------------------------------------------------------------------------
# modify the dataset a bit so that we can run an event study
#-----------------------------------------------------------------------------

# generate leads and lags of the treatment
Dtl <- sapply(-(time.periods-1):(time.periods-2), function(l) {
    dtl <- 1*( (data$period == data$G + l) & (data$G > 0) )
    dtl
})
Dtl <- as.data.frame(Dtl)
cnames1 <- paste0("Dtmin",(time.periods-1):1)
colnames(Dtl) <- c(cnames1, paste0("Dt",0:(time.periods-2)))
data <- cbind.data.frame(data, Dtl)
row.names(data) <- NULL

head(data)

#-----------------------------------------------------------------------------
# run the event study regression
#-----------------------------------------------------------------------------

# load plm package
library(plm)

# run event study regression
# normalize effect to be 0 in pre-treatment period
es <- plm(Y ~ Dtmin3 + Dtmin2 + Dt0 + Dt1 + Dt2, 
          data=data, model="within", effect="twoways",
          index=c("id","period"))

summary(es)

#-----------------------------------------------------------------------------
# make an event study plot
#-----------------------------------------------------------------------------

# some housekeeping for making the plot
# add 0 at event time -1
coefs1 <- coef(es)
ses1 <- sqrt(diag(summary(es)$vcov))
idx.pre <- 1:(time.periods-2)
idx.post <- (time.periods-1):length(coefs1)
coefs <- c(coefs1[idx.pre], 0, coefs1[idx.post])
ses <- c(ses1[idx.pre], 0, ses1[idx.post])
exposure <- -(time.periods-1):(time.periods-2)

cmat <- data.frame(coefs=coefs, ses=ses, exposure=exposure)

library(ggplot2)

ggplot(data=cmat, mapping=aes(y=coefs, x=exposure)) +
  geom_line(linetype="dashed") +
  geom_point() + 
  geom_errorbar(aes(ymin=(coefs-1.96*ses), ymax=(coefs+1.96*ses)), width=0.2) +
  ylim(c(-2,5)) +
  theme_bw()

## ---- fig.width=8,fig.height=10, fig.align='center', out.width="90%", dpi = 200----
# estimate group-group time average treatment effects
did_att_gt <- att_gt(yname="Y",
                     tname="period",
                     idname="id",
                     gname="G",
                     data=data,
                     bstrap=FALSE,
                     cband=FALSE)
summary(did_att_gt)

# plot them
ggdid(did_att_gt)


## ---- fig.width=8,fig.height=5, fig.align='center', out.width="90%", dpi = 200----
# aggregate them into event study plot
did_es <- aggte(did_att_gt, type="dynamic")

# plot the event study
ggdid(did_es)

## ---- echo=FALSE--------------------------------------------------------------
reset.sim()
bett <- betu <- rep(0, time.periods)
te <- 0
set.seed(1814)

## -----------------------------------------------------------------------------
# generate dataset with 4 time periods
time.periods <- 4

# generate dynamic effects
te.e <- time.periods:1

# generate selective treatment timing
# (*** this is what is different here ***)
te.bet.ind <- time.periods:1 / (time.periods/2)

# generate data set with these parameters
# (main thing: it generates a dataset that satisfies
# parallel trends in all periods...including pre-treatment)
data <- build_sim_dataset()

## -----------------------------------------------------------------------------
# run through same code as in earlier example...omitted

## ---- echo=FALSE--------------------------------------------------------------
#-----------------------------------------------------------------------------
# modify the dataset a bit so that we can run an event study
#-----------------------------------------------------------------------------

# generate leads and lags of the treatment
Dtl <- sapply(-(time.periods-1):(time.periods-2), function(l) {
    dtl <- 1*( (data$period == data$G + l) & (data$G > 0) )
    dtl
})
Dtl <- as.data.frame(Dtl)
cnames1 <- paste0("Dtmin",(time.periods-1):1)
colnames(Dtl) <- c(cnames1, paste0("Dt",0:(time.periods-2)))
data <- cbind.data.frame(data, Dtl)
row.names(data) <- NULL

#-----------------------------------------------------------------------------
# run the event study regression
#-----------------------------------------------------------------------------

# load plm package
library(plm)

## -----------------------------------------------------------------------------
# run event study regression
# normalize effect to be 0 in pre-treatment period
es <- plm(Y ~ Dtmin3 + Dtmin2 + Dt0 + Dt1 + Dt2, 
          data=data, model="within", effect="twoways", 
          index=c("id","period"))

summary(es)

## ---- echo=FALSE--------------------------------------------------------------
#-----------------------------------------------------------------------------
# make an event study plot
#-----------------------------------------------------------------------------

# some housekeeping for making the plot
# add 0 at event time -1
coefs1 <- coef(es)
ses1 <- sqrt(diag(summary(es)$vcov))
idx.pre <- 1:(time.periods-2)
idx.post <- (time.periods-1):length(coefs1)
coefs <- c(coefs1[idx.pre], 0, coefs1[idx.post])
ses <- c(ses1[idx.pre], 0, ses1[idx.post])
exposure <- -(time.periods-1):(time.periods-2)

cmat <- data.frame(coefs=coefs, ses=ses, exposure=exposure)

## ---- fig.width=8,fig.height=5, fig.align='center', out.width="90%", dpi = 200----
# run through same code as before...omitted

# new event study plot
ggplot(data=cmat, mapping=aes(y=coefs, x=exposure)) +
  geom_line(linetype="dashed") +
  geom_point() + 
  geom_errorbar(aes(ymin=(coefs-1.96*ses), ymax=(coefs+1.96*ses)), width=0.2) +
  ylim(c(-2,5)) +
  theme_bw()

## ---- fig.width=8,fig.height=10, fig.align='center', out.width="90%", dpi = 200----
# estimate group-group time average treatment effects
did.att.gt <- att_gt(yname="Y",
                     tname="period",
                     idnam="id",
                     gname="G",
                     data=data
                     )
summary(did.att.gt)

# plot them
ggdid(did.att.gt)

## ---- fig.width=8,fig.height=5, fig.align='center', out.width="90%", dpi = 200----
# aggregate them into event study plot
did.es <- aggte(did.att.gt, type="dynamic")

# plot the event study
ggdid(did.es)

## ----eval=FALSE---------------------------------------------------------------
#  # not run (this code can be substantially slower)
#  reset.sim()
#  set.seed(1814)
#  nt <- 1000
#  nu <- 1000
#  cdp <- conditional_did_pretest("Y", "period", "id", "G", xformla=~X, data=data)
#  cdp

Try the did package in your browser

Any scripts or data that you put into this service are public.

did documentation built on July 20, 2022, 5:06 p.m.