devtools::load_all()
library(dplyr)
library(ggplot2)
library(gsynth)
data(BRFSS)
seed <- 2018
BRFSS_panel <- BRFSS %>%
group_by(state, year) %>%
summarize(MEDCOST = weighted.mean(MEDCOST, wt, na.rm = TRUE),
female = weighted.mean(SEX==2, wt, na.rm = TRUE),
college = weighted.mean(EDUCA %in% c(5,6), wt, na.rm = TRUE),
married = weighted.mean(MARITAL==1, wt, na.rm = TRUE), wt = n()) %>%
create_panel() %>%
filter(year != 2017, !is.na(MEDCOST)) #%>%
# filter(year != 2001) %>%
# filter(MEDCOST < 0.2)
panelView(MEDCOST ~ treated, data = as.data.frame(BRFSS_panel), index = c("state", "year"), na.rm = TRUE, main = "BRFSS Data: Treatment Status") #+
ggsave("plot/BRFSS_status.pdf", width = 12, height = 8)
panelView(MEDCOST ~ treated, data = as.data.frame(BRFSS_panel), index = c("state", "year"), na.rm = TRUE, type="raw")
# DID regression
fit <- lm(MEDCOST ~ state + factor(year) + treated, data = BRFSS_panel, weights = wt)
summary(fit)
# Clustered SE
lmtest::coeftest(fit, vcov = plm::vcovHC(fit, method = "arellano", type = "HC0", cluster = fit$model[["state"]]))
# With controls
fit <- lm(MEDCOST ~ state + factor(year) + female + college + married + treated, data = BRFSS_panel, weights = wt)
lmtest::coeftest(fit, vcov = plm::vcovHC(fit, method = "arellano", type = "HC0", cluster = fit$model[["state"]]))
BRFSS_panel %>%
group_by(treated_states, treated) %>%
summarize(MEDCOST = weighted.mean(MEDCOST, wt, na.rm = TRUE))
BRFSS_panel$event_year <- factor(BRFSS_panel$event_year) %>% relevel(ref = "0")
# Event-study regressiom
fit <- lm(MEDCOST ~ state + factor(year) + event_year, data = BRFSS_panel, weights = wt)
summary(fit)
# Clustered SE
lmtest::coeftest(fit, vcov = plm::vcovHC(fit, method = "arellano", type = "HC0", cluster = fit$model[["state"]]))
# Event plot
event_plot(fit, "event_year", cluster = "state") + xlim(c(-9,10)) +
ylim(c(-0.015, 0.026)) +
labs(y = "MEDCOST", title = "Event Study Coefficients on MEDCOST") #+
ggsave("plot/BRFSS_event_plot.pdf", width = 9, height = 6)
# With controls
fit <- lm(MEDCOST ~ state + factor(year) + female + college + married + event_year, data = BRFSS_panel, weights = wt)
lmtest::coeftest(fit, vcov = plm::vcovHC(fit, method = "arellano", type = "HC0", cluster = fit$model[["state"]]))
out <- gsynth(MEDCOST ~ treated, data = as.data.frame(filter(BRFSS_panel, year!=2001)), na.rm = TRUE,
index = c("state", "year"), force = "two-way", type = "fe", CV = TRUE, r = 2, se = TRUE,
inference = "parametric", nboots = 1000, parallel = TRUE, min.T0 = 5, seed = seed)
plot(out)
plot(out, type = "raw")
plot(out, type = "counterfactual", raw = "none", main = "Average of All Treated States", xlab = "years relative to the treatment",
ylab = "MEDCOST") #+ ggsave("plot/BRFSS_GSC_average.pdf", height = 9, width = 12)
plot(out, type = "counterfactual", raw = "band")
plot(out, type = "counterfactual", raw = "all")
for (state in out$id.tr) {
if (state == "DC") {
state_title <- "DC"
} else {
state_title <- state.name[state.abb == state]
}
assign(state, plot(out, type = "counterfactual", raw = "band", id = state)$plot + labs(title = state_title))
}
#pdf("plot/BRFSS_GSC.pdf", height = 12, width = 18)
multiplot(AL, AZ, CT, DC, DE, ME, MS, NY, VT, cols = 3)
#dev.off()
out$est.att
out$est.avg
data_frame(year = c(1991:2000,2002:2016), treated = out$Y.tr[,state_col], counterfactual = out$Y.ct[,state_col]) %>%
mutate(ci_lower = counterfactual - 1.96 * out$est.ind[,2,state_col],
ci_upper = counterfactual + 1.96 * out$est.ind[,2,state_col]) %>%
ggplot(aes(x = year)) +
geom_line(aes(y = treated), color = "red") +
geom_line(aes(y = counterfactual)) +
geom_line(aes(y = ci_lower), linetype = 2) +
geom_line(aes(y = ci_upper), linetype = 2)
# Robustness checks
# Drop Arizona
fit <- lm(MEDCOST ~ state + factor(year) + treated, data = filter(BRFSS_panel, state!="AZ"), weights = wt)
lmtest::coeftest(fit, vcov = plm::vcovHC(fit, method = "arellano", type = "HC0", cluster = fit$model[["state"]]))
# Arizona Only
fit <- lm(MEDCOST ~ state + factor(year) + treated, data = filter(BRFSS_panel, (state=="AZ" | treated_states==0)), weights = wt)
summary(fit)
lmtest::coeftest(fit, vcov = plm::vcovHC(fit, method = "arellano", type = "HC0", cluster = fit$model[["state"]]))
# Drop Minnesota
fit <- lm(MEDCOST ~ state + factor(year) + treated, data = filter(BRFSS_panel, state!="MN"), weights = wt)
lmtest::coeftest(fit, vcov = plm::vcovHC(fit, method = "arellano", type = "HC0", cluster = fit$model[["state"]]))
# MN as treated group
BRFSS_MN <- BRFSS_panel
BRFSS_MN$treated[BRFSS_MN$state=="MN"] <- BRFSS_MN$treated[BRFSS_MN$state=="AZ"]
fit <- lm(MEDCOST ~ state + factor(year) + treated, data = BRFSS_MN, weights = wt)
lmtest::coeftest(fit, vcov = plm::vcovHC(fit, method = "arellano", type = "HC0", cluster = fit$model[["state"]]))
# Drop Alaska
arrange(BRFSS_panel, wt) %>% select(state, year, wt)
fit <- lm(MEDCOST ~ state + factor(year) + treated, data = filter(BRFSS_panel, state!="AK"), weights = wt)
lmtest::coeftest(fit, vcov = plm::vcovHC(fit, method = "arellano", type = "HC0", cluster = fit$model[["state"]]))
# drop 2001
fit <- lm(MEDCOST ~ state + factor(year) + treated, data = filter(BRFSS_panel, year!=2001), weights = wt)
lmtest::coeftest(fit, vcov = plm::vcovHC(fit, method = "arellano", type = "HC0", cluster = fit$model[["state"]]))
# Only 1991-2010
fit <- lm(MEDCOST ~ state + factor(year) + treated, data = filter(BRFSS_panel, year<=2010), weights = wt)
lmtest::coeftest(fit, vcov = plm::vcovHC(fit, method = "arellano", type = "HC0", cluster = fit$model[["state"]]))
# College
BRFSS_panel_college <- BRFSS %>%
filter(EDUCA %in% c(5,6)) %>%
group_by(state, year) %>%
summarize(MEDCOST = weighted.mean(MEDCOST, wt, na.rm = TRUE), wt = n()) %>%
create_panel() %>%
filter(year != 2017 & !is.na(MEDCOST) & year != 2001)
fit <- lm(MEDCOST ~ state + factor(year) + treated, data = BRFSS_panel_college, weights = wt)
lmtest::coeftest(fit, vcov = plm::vcovHC(fit, method = "arellano", type = "HC0", cluster = fit$model[["state"]]))
# non-college
BRFSS_panel_noncollege <- BRFSS %>%
filter(EDUCA %in% c(1,2,3,4)) %>%
group_by(state, year) %>%
summarize(MEDCOST = weighted.mean(MEDCOST, wt, na.rm = TRUE), wt = n()) %>%
create_panel() %>%
filter(year != 2017 & !is.na(MEDCOST) & year != 2001)
fit <- lm(MEDCOST ~ state + factor(year) + treated, data = BRFSS_panel_noncollege, weights = wt)
lmtest::coeftest(fit, vcov = plm::vcovHC(fit, method = "arellano", type = "HC0", cluster = fit$model[["state"]]))
dataprep(BRFSS_panel, predictors = "MEDCOST" )
dataprep.out<-
dataprep(
foo = synth.data,
predictors = c("X1", "X2", "X3"),
predictors.op = "mean",
dependent = "Y",
unit.variable = "unit.num",
time.variable = "year",
special.predictors = list(
list("Y", 1991, "mean"),
list("Y", 1985, "mean"),
list("Y", 1980, "mean")
),
treatment.identifier = 7,
controls.identifier = c(29, 2, 13, 17, 32, 38),
time.predictors.prior = c(1984:1989),
time.optimize.ssr = c(1984:1990),
unit.names.variable = "name",
time.plot = 1984:1996
)
synth.out <- synth(dataprep.out)
gaps.plot(dataprep.res = dataprep.out,synth.res = synth.out)
path.plot(dataprep.res = dataprep.out,synth.res = synth.out)
out <- gsynth(MEDCOST ~ treated, data = as.data.frame(filter(BRFSS_panel, state!="AZ", year!=2001)), na.rm = TRUE,
index = c("state", "year"), force = "two-way", type = "fe", CV = TRUE, r = 0, se = TRUE,
inference = "parametric", nboots = 1000, parallel = TRUE, min.T0 = 5, seed = seed)
plot(out)
plot(out, type = "raw")
plot(out, type = "counterfactual", raw = "none", main = "Average of All Treated States", xlab = "years relative to the treatment",
ylab = "MEDCOST") #+ ggsave("plot/BRFSS_GSC_average.pdf", height = 9, width = 12)
plot(out, type = "counterfactual", raw = "band")
plot(out, type = "counterfactual", raw = "all")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.