### Header ---------------------------
###
### Title: reproduce_results.R
###
### Description: R script to reproduce results of Daljord, Pouliot, Hu, and Xiao (2021).
###
### Author: Omkar A. Katta
###
### Notes: User must change the information in the Preliminaries Section
### as needed. In particular, the `version` variable, the file paths,
### and the plotting parameters can be changed to fit the user's preferences.
###
### User needs to change file paths to source functions used to prepare and
### plot the data.
###
### Due to privacy restrictions, the original data set cannot be distributed.
### Hence, an anonymized version of the data set will be provided.
### The results of this script will therefore differ slightly from the paper's results.
###
### Preliminaries ---------------------------
# Install Packages (as needed)
if (!require(diftrans, quietly = T)){
if (!require(devtools, quietly = T)){
install.packages("devtools")
}
devtools::install_github("omkarakatta/diftrans")
}
if (!require(magrittr, quietly = T)){
install.packages("magrittr")
}
if (!require(ggplot2, quietly = T)){
install.packages("ggplot2")
}
if (!require(gridExtra, quietly = T)){
install.packages("gridExtra")
}
if (!require(dplyr, quietly = T)){
install.packages("dplyr")
}
if (!require(tidyr, quietly = T)){
install.packages("tidyr")
}
if (!require(stargazer, quietly = T)){
install.packages("stargazer")
}
if (!require(here, quietly = T)){
install.packages("here")
}
library(diftrans)
library(magrittr)
library(ggplot2)
library(gridExtra)
library(dplyr)
library(tidyr)
library(stargazer)
library(latex2exp)
# source functions: CHANGE FILE PATH
source(here::here("data-raw", "prepare_data.R"))
source(here::here("data-raw", "plotting_functions.R"))
# What data do you want to use?
# Option 1: "original" i.e. use entire data set
# Option 2: "v1" i.e. exclude December 2010
# Option 3: "v2" i.e. exclude December 2010, January 2011, and February 2012
version <- "original"
# Do you want to see figures?
show_fig <- TRUE
show_fig1 <- FALSE
show_fig2 <- FALSE
show_fig3 <- FALSE
show_footnote8 <- FALSE
show_fig4 <- FALSE
show_fig5 <- FALSE
show_fig6 <- FALSE
show_fig7 <- FALSE
show_fig8 <- FALSE
show_fig9 <- FALSE
show_fig10 <- FALSE
# Do you want to save figures?
save_fig <- FALSE
save_fig1 <- FALSE
save_fig2 <- FALSE
save_fig3 <- FALSE
save_fig4 <- FALSE
save_fig5 <- FALSE
save_fig6 <- FALSE
save_fig7 <- FALSE
save_fig8 <- FALSE
save_fig9 <- FALSE
save_fig10 <- FALSE
# Do you want to run diff-in-diff analysis
show_diff_in_diff <- FALSE
# Plotting Preferences
grayscale <- FALSE # toggle to TRUE for black-and-white plots
temp <- ifelse(grayscale, "grayscale", "color")
fontsize <- 20 # change font size of plot, axis, and legend titles
fontsizeaxis <- 15 # change font size of axis labels
linetype0 <- "solid" # main line type
linetype1 <- "dashed" # secondary line type
linetype2 <- "dotted" # tertiary line type
linetype3 <- "twodash"
linetype4 <- "longdash"
# Where will you store plots?
# img_path <- ... # uncomment and replace ... with destination file path
# suffix <- "_" # naming convention
# How large do you want your plots to be? (inches)
# default_width <- 7
# default_height <- 3
### Prepare Data ---------------------------
if (version == "original") {
Beijing <- Beijing_sample
Tianjin <- Tianjin_sample
Shijiazhuang <- Shijiazhuang_sample
seedplus <- 0
message(paste("Version: ", version, sep = ""))
} else if (version == "v1") {
# filter out Dec 2010
Beijing <- Beijing_sample %>%
filter(!(year == 2010 & month == 12))
Tianjin <- Tianjin_sample %>%
filter(!(year == 2010 & month == 12))
Shijiazhuang <- Shijiazhuang_sample %>%
filter(!(year == 2010 & month == 12))
seedplus <- 1
message(paste("Version: ", version, sep = ""))
} else if (version == "v2") {
# filter out Dec 2010, Jan 2011, and Feb 2011
Beijing <- Beijing_sample %>%
filter(!(year == 2010 & month == 12)) %>%
filter(!(year == 2011 & month %in% c(1, 2)))
Tianjin <- Tianjin_sample %>%
filter(!(year == 2010 & month == 12)) %>%
filter(!(year == 2011 & month %in% c(1, 2)))
Shijiazhuang <- Shijiazhuang_sample %>%
filter(!(year == 2010 & month == 12)) %>%
filter(!(year == 2011 & month %in% c(1, 2)))
seedplus <- 2
message(paste("Version: ", version, sep = ""))
} else {
stop("Specify a valid value for `version`.")
}
### Figure 1 ---------------------------
fignum <- 1
if (show_fig | show_fig1) {
BTS <- do.call("rbind", list(Beijing, Tianjin, Shijiazhuang))
fig1_prep <- BTS %>%
select(ym, city, swtprice) %>%
group_by(ym, city) %>%
summarize(swtprice = mean(swtprice, na.rm = T)) %>%
ungroup()
fig1_plot <- ggplot() +
bmp_vline(xint = as.Date("2011-01-01")) +
bmp_vline(xint = as.Date("2014-01-01")) +
bmp_point(x = ym,
y = swtprice,
data = fig1_prep,
color = factor(city,
levels = c("Beijing", "Tianjin", "Shijiazhuang")),
size = 1.5,
alpha = 0.5,
show.legend = F) +
bmp_plot(data = fig1_prep,
color = city,
size = 0.5,
xlab = "Date (months)", ylab = "Price (RMB 1000)",
xtype = "ym",
ytype = "continuous",
ybreaks = seq(100, 225, 20),
ylabels = seq(100, 225, 20),
legend.position = c(0.1, 0.85),
legend.direction = "vertical",
sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5)) +
geom_line(data = fig1_prep, aes(x = ym, y = swtprice,
color = factor(city,
levels = c("Beijing", "Tianjin", "Shijiazhuang")),
linetype = factor(city,
levels = c("Beijing", "Tianjin", "Shijiazhuang"))),
size = 0.5) +
scale_linetype_manual(values = c(linetype0, linetype1, linetype2),
name = "")
if (save_fig | save_fig1){
ggsave(paste("fig", fignum, suffix, "OK.jpg", sep = ""), path = img_path,
width = default_width, height = default_height+1, units = "in")
message(paste("fig", fignum, " is saved in ", img_path, " as fig", fignum, suffix, "OK.jpg", sep = ""))
}
message(paste("Figure ", fignum, " is complete.", sep = ""))
}
### Figure 2 ---------------------------
fignum <- 2
if (show_fig | show_fig2) {
pre <- prep_data(Beijing, prep = "pmf",
lowerdate = "2010-01-01", upperdate = "2011-01-01")
# find prices of new cars in 2011
post <- Beijing %>%
filter(ym < as.Date("2012-01-01")) %>%
pivot_wider(id_cols = c(month, color, noticenum),
names_from = year,
values_from = MSRP) %>%
filter(is.na(`2010`)) %>%
rename(MSRP = `2011`) %>%
select(MSRP)
fig2_plot <- ggplot() +
bmp_twohist(data1 = pre, data2 = post,
x = log(MSRP), scale = 1, binwidth = 1/6) +
stat_function(data = pre,
mapping = aes(x = log(MSRP)),
color = get_color_palette(2, grayscale)[1],
fun = dnorm,
args = list(mean = mean(log(pre$MSRP)),
sd = sd(log(pre$MSRP)))) +
stat_function(data = post,
mapping = aes(x = log(MSRP)),
color = get_color_palette(2, grayscale)[2],
fun = dnorm,
args = list(mean = mean(log(post$MSRP)),
sd = sd(log(post$MSRP)))) +
bmp_plot(data = Beijing,
color = policy_dummy,
fill = policy_dummy,
legendlabels = c("pre-lottery", "post-lottery"),
xlab = "log MSRP (log RMB)",
ylab = "Density",
sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5))
if (save_fig | save_fig2) {
ggsave(paste("fig", fignum, suffix, "OK.jpg", sep = ""), path = img_path,
width = default_width, height = default_height+1, units = "in")
message(paste("fig", fignum, " is saved in ", img_path, " as fig", fignum, suffix, "OK.jpg", sep = ""))
}
message(paste("Figure ", fignum, " is complete.", sep = ""))
}
### Figure 3 ---------------------------
fignum <- 3
if (show_fig | show_fig3) {
pre <- prep_data(Beijing, prep = "dist",
lowerdate = "2010-01-01", upperdate = "2011-01-01")
post <- prep_data(Beijing, prep = "dist",
lowerdate = "2011-01-01", upperdate = "2012-01-01")
fig3_plot <- ggplot() +
bmp_twohist(data1 = pre, data2 = post,
scale = 1000,
x = MSRP, binwidth = 20) +
bmp_plot(data = Beijing,
color = policy_dummy,
fill = policy_dummy,
legendlabels = c("pre-lottery", "post-lottery"),
xtype = "continuous",
xbreaks = seq(0, 1400, by = 100),
ytype = "continuous",
ybreaks = seq(0, 0.008, by = 0.002),
xlab = "MSRP (RMB 1000)",
ylab = "Density",
sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5))
if (save_fig | save_fig3) {
ggsave(paste("fig", fignum, suffix, "OK.jpg", sep = ""), path = img_path,
width = default_width, height = default_height+1, units = "in")
message(paste("fig", fignum, " is saved in ", img_path, " as fig", fignum, suffix, "OK.jpg", sep = ""))
}
message(paste("Figure ", fignum, " is complete.", sep = ""))
}
### Footnote 8 ---------------------------
if (show_fig | show_footnote8) {
support <- prep_data(data = Beijing, prep = "support")
pre <- prep_data(Beijing, prep = "pmf",
support = support,
lowerdate = "2010-01-01", upperdate = "2011-01-01")
post <- prep_data(Beijing, prep = "pmf",
support = support,
lowerdate = "2011-01-01", upperdate = "2012-01-01")
# create L1 penalty cost
Lpcost <- function(min_pre_support, min_post_support, p) {
costm <- matrix(NA_real_, nrow = length(min_pre_support), ncol = length(min_post_support))
for (i in seq_along(min_pre_support)) {
costm[i, ] <- abs(min_pre_support[i] - min_post_support)^p
}
costm
}
# initialize values
bandwidth_seq <- seq(0, 100000, 1000)
lambda_seq <- c(0, 0.01)
results <- matrix(NA_real_,
nrow = length(bandwidth_seq), ncol = length(lambda_seq))
d <- 25000
lambda <- 0.01
l1_cost <- Lpcost(support, support, 1)
l0_cost <- build_costmatrix(support = support, bandwidth = d)
cost_mat <- l0_cost + lambda + l1_cost
OT <- transport(
as.numeric(sum(post$count) / sum(pre$count) * pre$count),
as.numeric(post$count),
cost_mat
)
pre_support <- unique(pre$MSRP[pre$count != 0 & !is.na(pre$MSRP)])
post_support <- unique(post$MSRP[post$count != 0 & !is.na(post$MSRP)])
OT_revised <- OT %>%
dplyr::mutate(from = pre_support[from]) %>%
dplyr::mutate(to = post_support[to])
common <- unique(sort(c(OT_revised$from, OT_revised$to)))
OT_final <- OT_revised %>%
dplyr::mutate(abs_diff = abs(from - to)) %>%
dplyr::group_by(abs_diff) %>%
dplyr::summarise(total = sum(mass))
#~ running sum of penalized optimal transport at d* = 25000
total_mass <- sum(OT_final$total[OT_final$abs_diff > 25000])
OT_running_sum <- OT_final %>%
dplyr::arrange(desc(abs_diff)) %>%
dplyr::mutate(total_lag = dplyr::lag(total)) %>%
tidyr::replace_na(list(total_lag = 0)) %>%
dplyr::mutate(cumulative = cumsum(total_lag)) %>%
dplyr::mutate(percentage = cumulative / total_mass * 100) %>%
dplyr::arrange(abs_diff) %>%
dplyr::filter(abs_diff >= 25000)
# plot
ggplot(OT_running_sum, aes(x = abs_diff, y = percentage)) +
geom_line() +
theme_bmp(sizefont = (fontsize - 8), axissizefont = (fontsizeaxis - 5)) +
xlab("d") +
ylab("Mass above d (% of total mass > 25000)") +
scale_y_continuous(breaks = seq(0, max(OT_running_sum$percentage), 10)) +
scale_x_continuous(breaks = seq(25000, max(OT_running_sum$abs_diff), 100000))
# table
OT_running_sum %>%
dplyr::filter(abs_diff %% 5000 < 1000) %>%
dplyr::select(abs_diff, cumulative, percentage) %>%
knitr::kable(., format = "rst", booktabs = T, linesep = "", longtable = T, align = 'c')
}
### Figure 4 ---------------------------
fignum <- 4
if (show_fig | show_fig4) {
pre <- prep_data(Beijing, prep = "pmf",
lowerdate = "2010-01-01", upperdate = "2011-01-01")
post <- prep_data(Beijing, prep = "pmf",
lowerdate = "2011-01-01", upperdate = "2012-01-01")
scalex <- 1000
color_fig4a <- ifelse(grayscale,
get_color_palette(2, grayscale)[[1]],
get_color_palette(2, grayscale)[[1]])
fig4a_OK <- ggplot() +
geom_segment(data = pre,
mapping = aes(x = MSRP / scalex, xend = MSRP / scalex,
y = 0, yend = count),
alpha = 0.5,
color = color_fig4a) +
bmp_plot(data = Beijing,
xtype = "continuous",
xbreaks = seq(0, max(Beijing$MSRP, na.rm = T)/scalex, by = 200000/scalex),
ytype = "continuous",
ybreaks = seq(0, max(pre$count, post$count), by = 5000),
ylims = c(0, max(pre$count, post$count)),
ylab = "Probability Mass Function",
xlab = "MSRP (RMB 1000)",
ggtitle = "Pre-Lottery",
sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5))
color_fig4b <- ifelse(grayscale,
get_color_palette(3, grayscale)[[2]],
get_color_palette(2, grayscale)[[2]])
fig4b_OK <- ggplot() +
geom_segment(data = post,
mapping = aes(x = MSRP / scalex, xend = MSRP / scalex,
y = 0, yend = count),
alpha = 0.5,
color = color_fig4b) +
bmp_plot(data = Beijing,
xtype = "continuous",
xbreaks = seq(0, max(Beijing$MSRP, na.rm = T)/scalex, by = 200000/scalex),
ytype = "continuous",
ybreaks = seq(0, max(pre$count, post$count), by = 5000),
ylims = c(0, max(pre$count, post$count)),
# ylab = "Probability Mass Function",
xlab = "MSRP (RMB 1000)",
ggtitle = "Post-Lottery",
sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5))
# fig4_plot_show <- grid.arrange(fig4a_OK, fig4b_OK, ncol = 2)
fig4_plot <- arrangeGrob(fig4a_OK, fig4b_OK, ncol = 2)
if (save_fig | save_fig4) {
ggsave(paste("fig", fignum, suffix, "OK.jpg", sep = ""), fig4_plot, path = img_path,
width = default_width, height = default_height+1, units = "in")
message(paste("fig", fignum, " is saved in ", img_path, " as fig", fignum, suffix, "OK.jpg", sep = ""))
}
message(paste("Figure ", fignum, " is complete.", sep = ""))
}
### Figure 5 ---------------------------
fignum <- 5
if (show_fig | show_fig5) {
pre <- prep_data(Beijing, prep = "dist",
lowerdate = "2010-01-01", upperdate = "2011-01-01")
post <- prep_data(Beijing, prep = "dist",
lowerdate = "2011-01-01", upperdate = "2012-01-01")
upperxlim <- 1400000
scale <- 1000
da <- 30000 / scale
fig5a_OK <- ggplot() +
bmp_twohist(data1 = pre, data2 = post,
x = MSRP, scale = scale, binwidth = da) +
bmp_plot(data = Beijing,
color = policy_dummy,
fill = policy_dummy,
legendlabels = c("pre-lottery", "post-lottery"),
xtype = "continuous",
xbreaks = seq(0, upperxlim / scale, by = 100000 / scale),
xlims = c(0, upperxlim/scale),
ytype = "continuous",
ybreaks = seq(0, 0.008, by = 0.002),
# xlab = "MSRP (RMB 1000)",
ylab = "Density",
xangle = 45,
sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5)) +
ggtitle(TeX(paste("\\textit{d} = ", da*scale, sep = "")))
# fig5a_OK
db <- 50000 / scale
fig5b_OK <- ggplot() +
bmp_twohist(data1 = pre, data2 = post,
x = MSRP, scale = scale, binwidth = db) +
bmp_plot(data = Beijing,
color = policy_dummy,
fill = policy_dummy,
legendlabels = c("pre-lottery", "post-lottery"),
xtype = "continuous",
xbreaks = seq(0, upperxlim / scale, by = 100000 / scale),
xlims = c(0, upperxlim/scale),
ytype = "continuous",
ybreaks = seq(0, 0.008, by = 0.002),
# xlab = "MSRP (RMB 1000)",
# ylab = "Density",
xangle = 45,
ggtitle = paste("d = ", db*scale, sep = ""),
sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5)) +
ggtitle(TeX(paste("\\textit{d} = ", db*scale, sep = "")))
# fig5b_OK
dc <- 70000 / scale
fig5c_OK <- ggplot() +
bmp_twohist(data1 = pre, data2 = post,
x = MSRP, scale = scale, binwidth = dc) +
bmp_plot(data = Beijing,
color = policy_dummy,
fill = policy_dummy,
legendlabels = c("pre-lottery", "post-lottery"),
xtype = "continuous",
xbreaks = seq(0, upperxlim / scale, by = 100000 / scale),
xlims = c(0, upperxlim/scale),
ytype = "continuous",
ybreaks = seq(0, 0.008, by = 0.001),
xlab = "MSRP (RMB 1000)",
ylab = "Density",
xangle = 45,
ggtitle = paste("d = ", dc*scale, sep = ""),
sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5)) +
ggtitle(TeX(paste("\\textit{d} = ", dc*scale, sep = "")))
# fig5c_OK
dd <- 90000 / scale
fig5d_OK <- ggplot() +
bmp_twohist(data1 = pre, data2 = post,
x = MSRP, scale = scale, binwidth = dd) +
bmp_plot(data = Beijing,
color = policy_dummy,
fill = policy_dummy,
legendlabels = c("pre-lottery", "post-lottery"),
xtype = "continuous",
xbreaks = seq(0, upperxlim / scale, by = 100000 / scale),
xlims = c(0, upperxlim/scale),
ytype = "continuous",
ybreaks = seq(0, 0.008, by = 0.001),
xlab = "MSRP (RMB 1000)",
# ylab = "Density",
xangle = 45,
ggtitle = paste("d = ", dd*scale, sep = ""),
sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5)) +
ggtitle(TeX(paste("\\textit{d} = ", dd*scale, sep = "")))
# fig5d_OK
# fig5_plot_show <- grid.arrange(fig5a_OK, fig5b_OK,
# fig5c_OK, fig5d_OK,
# nrow = 2)
fig5_plot <- arrangeGrob(fig5a_OK, fig5b_OK,
fig5c_OK, fig5d_OK,
nrow = 2)
if (save_fig | save_fig5) {
ggsave(paste("fig", fignum, suffix, "OK.jpg", sep = ""), fig5_plot, path = img_path,
width = default_width, height = default_height+5, units = "in")
message(paste("fig", fignum, " is saved in ", img_path, " as fig", fignum, suffix, "OK.jpg", sep = ""))
}
message(paste("Figure ", fignum, " is complete.", sep = ""))
}
### Figure 6 ---------------------------
fignum <- "6"
if (show_fig | show_fig6) {
set.seed(11 + seedplus)
support <- prep_data(data = Beijing, prep = "support")
pre <- prep_data(Beijing, prep = "pmf",
support = support,
lowerdate = "2010-01-01", upperdate = "2011-01-01")
post <- prep_data(Beijing, prep = "pmf",
support = support,
lowerdate = "2011-01-01", upperdate = "2012-01-01")
bandwidth_seq <- seq(0, 100000, 1000)
real <- diftrans(pre, post, bandwidth_seq = bandwidth_seq, conservative = F)
numsims <- 500
placebo_results <- matrix(NA, nrow = numsims, ncol = length(bandwidth_seq))
for (i in seq_len(numsims)) {
print(paste("Simulation Number", i, "out of", numsims, sep = " "))
# draw n_B,2010 samples from Beijing, 2010
synth_pre1 <- data.frame(MSRP = pre$MSRP,
count = rmultinom(1, sum(pre$count), pre$count))
# draw n_B,2011 samples from Beijing, 2010
synth_pre2 <- data.frame(MSRP = pre$MSRP,
count = rmultinom(1, sum(post$count), pre$count))
placebo <- diftrans(synth_pre1, synth_pre2,
bandwidth_seq = bandwidth_seq, conservative = F)
placebo_results[i, ] <- placebo$main
}
placebo_mean <- apply(placebo_results, 2, mean) * 100
placebo_sd <- apply(placebo_results, 2, sd) * 100
probs <- c(0.90, 0.95, 0.99)
placebo_quantiles <- apply(placebo_results, 2, quantile, prob = probs) * 100
plot_mat <- do.call(rbind, list(placebo_mean, placebo_sd, placebo_quantiles))
rownames(plot_mat) <- c("mean", "sd", paste("quantile", probs, sep = ""))
plot_table <- t(plot_mat) %>%
as.data.frame() %>%
mutate(bandwidth = bandwidth_seq,
real = real$main * 100) %>%
select(bandwidth, real, everything())
# close to mean placebo = 0.05%
lower <- 0.04
upper <- 0.06
valid <- plot_table$bandwidth[plot_table$mean < upper & plot_table$mean > lower]
# table 4
d_table <- plot_table %>%
filter(bandwidth %in% c(valid,
4000, 5000, 6000, 7000, 8000, 9000, 10000,
15000, 20000, 25000, 30000, 35000, 40000, 45000,
50000, 70000, 90000))
knitr::kable(d_table, format = "latex", booktabs = T, linesep = "")
# plot real cost and mean placebo cost - fig6
fig6_plot <- ggplot(data = plot_table, aes(x = bandwidth)) +
geom_line(aes(y = real, color = "0", linetype = "0")) +
geom_line(aes(y = mean, color = "5", linetype = "5")) +
scale_color_manual(values = get_color_palette(2, grayscale),
labels = c("real", "placebo"),
name = "") +
scale_linetype_manual(values = c(linetype0, linetype1, linetype2),
labels = c("real", "placebo"),
name = "") +
bmp_vline(xint = 25000) +
xlab(TeX("\\textit{d}")) +
ylab("Transport Cost (%)") +
theme_bmp(sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5)) +
scale_x_continuous(breaks = seq(0, 100000, 10000))
# simulation results only, no real cost (additional figure)
fig6_plot2 <- ggplot(data = plot_table, aes(x = bandwidth)) +
geom_line(aes(y = quantile0.99, color = "1", linetype = "1")) +
geom_line(aes(y = quantile0.95, color = "2", linetype = "2")) +
geom_line(aes(y = quantile0.9, color = "3", linetype = "3")) +
geom_line(aes(y = mean, color = "5", linetype = "5")) +
geom_line(aes(y = sd, color = "6", linetype = "6")) +
scale_color_manual(values = get_color_palette(5, grayscale),
labels = c("99%ile", "95%ile", "90%ile", "mean", "sd"),
name = "") +
scale_linetype_manual(values = c(linetype0, linetype1, linetype2, linetype3, linetype4),
labels = c("99%ile", "95%ile", "90%ile", "mean", "sd"),
name = "") +
bmp_vline(xint = 25000) +
xlab(TeX("\\textit{d}")) +
ylab("Transport Cost (%)") +
theme_bmp(sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5),
legend.position = c(0.8, 0.7)) +
scale_y_continuous(breaks = seq(0, 1.6, 0.1)) +
scale_x_continuous(breaks = seq(0, 100000, 10000))
if (save_fig | save_fig6) {
ggsave(paste("fig", fignum, suffix, "OK.jpg", sep = ""),
path = img_path, fig6_plot,
width = default_width, height = default_height, units = "in")
message(paste("fig", fignum, " is saved in ", img_path, " as fig",
fignum, suffix, "OK.jpg", sep = ""))
ggsave(paste("fig", fignum, suffix,"zoomed", suffix, "OK.jpg", sep = ""),
path = img_path, fig6_plot2,
width = default_width, height = default_height, units = "in")
message(paste("fig", fignum, "_zoomed", " is saved in ", img_path, " as fig",
fignum, suffix, "OK.jpg", sep = ""))
}
message(paste("Figure ", fignum, " is complete.", sep = ""))
}
### Figure 7 ---------------------------
fignum <- 7
if (show_fig | show_fig7) {
pre <- prep_data(Tianjin, prep = "dist",
lowerdate = "2010-01-01", upperdate = "2011-01-01")
post <- prep_data(Tianjin, prep = "dist",
lowerdate = "2011-01-01", upperdate = "2012-01-01")
fig7_plot <- ggplot() +
bmp_twohist(data1 = pre,
data2 = post,
x = MSRP,
scale = 1000,
binwidth = 20) +
bmp_plot(data = Tianjin,
color = postBeijing,
fill = postBeijing,
legendlabels = c("pre-lottery", "post-lottery"),
xtype = "continuous",
xbreaks = seq(0, 1400, by = 100),
ytype = "continuous",
ybreaks = seq(0, 0.008, by = 0.002),
xlab = "MSRP (RMB 1000)",
ylab = "Density",
sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5))
if (save_fig | save_fig7) {
ggsave(paste("fig", fignum, suffix, "OK.jpg", sep = ""), path = img_path,
width = default_width, height = default_height, units = "in")
message(paste("fig", fignum, " is saved in ", img_path, " as fig", fignum, suffix, "OK.jpg", sep = ""))
}
message(paste("Figure ", fignum, " is complete.", sep = ""))
}
### Figure 8 ---------------------------
fignum <- "8"
if (show_fig | show_fig8) {
set.seed(11 + seedplus)
support <- prep_data(data = Beijing, prep = "support")
preB <- prep_data(Beijing, prep = "pmf",
support = support,
lowerdate = "2010-01-01", upperdate = "2011-01-01")
postB <- prep_data(Beijing, prep = "pmf",
support = support,
lowerdate = "2011-01-01", upperdate = "2012-01-01")
preT <- prep_data(Tianjin, prep = "pmf",
support = support,
lowerdate = "2010-01-01", upperdate = "2011-01-01")
postT <- prep_data(Tianjin, prep = "pmf",
support = support,
lowerdate = "2011-01-01", upperdate = "2012-01-01")
bandwidth_seq <- seq(0, 100000, 1000)
realB <- diftrans(preB, postB, bandwidth_seq = bandwidth_seq, conservative = F)
realT <- diftrans(preT, postT, bandwidth_seq = bandwidth_seq, conservative = F)
realB_2d <- diftrans(preB, postB, bandwidth_seq = bandwidth_seq, conservative = T)
numsims <- 500
placebo_resultsB <- matrix(NA, nrow = numsims, ncol = length(bandwidth_seq))
placebo_resultsT <- matrix(NA, nrow = numsims, ncol = length(bandwidth_seq))
for (i in seq_len(numsims)) {
print(paste("Simulation Number", i, "out of", numsims, sep = " "))
# draw n_B,2010 samples from Beijing, 2010
synth_pre1B <- data.frame(MSRP = preB$MSRP,
count = rmultinom(1, sum(preB$count), preB$count))
# draw n_B,2011 samples from Beijing, 2010
synth_pre2B <- data.frame(MSRP = preB$MSRP,
count = rmultinom(1, sum(postB$count), preB$count))
placeboB <- diftrans(synth_pre1B, synth_pre2B,
bandwidth_seq = bandwidth_seq, conservative = F)
placebo_resultsB[i, ] <- placeboB$main
# draw n_T,2010 samples from Tianjin, 2010
synth_pre1T <- data.frame(MSRP = preT$MSRP,
count = rmultinom(1, sum(preT$count), preT$count))
# draw n_T,2011 samples from Tianjin, 2010
synth_pre2T <- data.frame(MSRP = preT$MSRP,
count = rmultinom(1, sum(postT$count), preT$count))
placeboT <- diftrans(synth_pre1T, synth_pre2T,
bandwidth_seq = bandwidth_seq, conservative = F)
placebo_resultsT[i, ] <- placeboT$main
}
placebo_meanB <- apply(placebo_resultsB, 2, mean) * 100
placebo_sdB <- apply(placebo_resultsB, 2, sd) * 100
probs <- c(0.90, 0.95, 0.99)
placebo_quantilesB <- apply(placebo_resultsB, 2, quantile, prob = probs) * 100
placebo_meanT <- apply(placebo_resultsT, 2, mean) * 100
placebo_sdT <- apply(placebo_resultsT, 2, sd) * 100
probs <- c(0.90, 0.95, 0.99)
placebo_quantilesT <- apply(placebo_resultsT, 2, quantile, prob = probs) * 100
plot_matB <- do.call(rbind, list(placebo_meanB, placebo_sdB, placebo_quantilesB))
rownames(plot_matB) <- c("mean", "sd", paste("quantile", probs, sep = ""))
plot_tableB <- t(plot_matB) %>%
as.data.frame() %>%
mutate(bandwidth = bandwidth_seq,
real = realB$main * 100) %>%
select(bandwidth, real, everything())
plot_matT <- do.call(rbind, list(placebo_meanT, placebo_sdT, placebo_quantilesT))
rownames(plot_matT) <- c("mean", "sd", paste("quantile", probs, sep = ""))
plot_tableT <- t(plot_matT) %>%
as.data.frame() %>%
mutate(bandwidth = bandwidth_seq,
real = realT$main * 100) %>%
select(bandwidth, real, everything())
# close to mean placebo = 0.05%
lower <- 0.04
upper <- 0.06
validB <- plot_tableB$bandwidth[plot_tableB$mean < upper & plot_tableB$mean > lower]
validT <- plot_tableT$bandwidth[plot_tableT$mean < upper & plot_tableT$mean > lower]
#~ # replicate table 4 -- Beijing
#~ d_tableB <- plot_tableB %>%
#~ filter(bandwidth %in% c(validB,
#~ 0,
#~ 4000, 5000, 6000, 7000, 8000, 9000, 10000,
#~ 15000, 20000, 25000, 30000, 35000, 40000, 45000,
#~ 50000, 70000, 90000))
#~ knitr::kable(d_tableB, format = "latex", booktabs = T, linesep = "")
#~
#~
#~ # replicate table 4 -- Tianjin
#~ d_tableT <- plot_tableT %>%
#~ filter(bandwidth %in% c(validT,
#~ 0,
#~ 4000, 5000, 6000, 7000, 8000, 9000, 10000,
#~ 15000, 20000, 25000, 30000, 35000, 40000, 45000,
#~ 50000, 70000, 90000))
#~ knitr::kable(d_tableT, format = "latex", booktabs = T, linesep = "")
# average of absolute difference of placebos, with 2d --- table 5
max_bw <- max(bandwidth_seq)
half_max_bw <- which(bandwidth_seq == max_bw / 2)
diff_2d <- matrix(NA_real_, nrow = half_max_bw, ncol = 6)
for (i in seq_len(half_max_bw)) {
bw <- bandwidth_seq[i]
B_2d <- placebo_resultsB[, which(bandwidth_seq == 2 * bw)]
T_d <- placebo_resultsT[, which(bandwidth_seq == bw)]
diff <- abs(B_2d - T_d)
diff_2d[i, ] <- c(bw,
mean(diff) * 100,
sd(diff) * 100,
quantile(diff, prob = probs) * 100)
}
knitr::kable(diff_2d[which(bandwidth_seq %in%
c(validB,
0,
4000, 5000, 6000, 7000, 8000, 9000, 10000,
15000, 20000, 25000, 30000, 35000, 40000, 45000,
50000)), ],
format = "latex", booktabs = T, linesep = "")
# empirical costs with 2d --- table 5
valid_d <- c(0,
4000, 5000, 6000, 7000, 8000, 9000, 10000,
15000, 20000,
23000, 24000,
25000, 30000, 35000, 40000, 45000,
50000)
real_costs <- diftrans(pre_main = preB, post_main = postB,
pre_control = preT, post_control = postT, bandwidth_seq = valid_d,
conservative = T)
}
### Figure 9 ---------------------------
fignum <- 9
if (show_fig | show_fig9) {
BTS <- do.call("rbind", list(Beijing, Tianjin, Shijiazhuang))
supportB <- prep_data(BTS %>% filter(city == "Beijing"),
prep = "support")
supportT <- prep_data(BTS %>% filter(city == "Tianjin"),
prep = "support")
post_Bpmf <- prep_data(Beijing, prep = "pmf",
support = supportB,
lowerdate = "2015-01-01", upperdate = "2016-01-01")
pre_Bpmf <- prep_data(Beijing, prep = "pmf",
support = supportB,
lowerdate = "2014-01-01", upperdate = "2015-01-01")
post_Tpmf <- prep_data(Tianjin, prep = "pmf",
support = supportT,
lowerdate = "2015-01-01", upperdate = "2016-01-01")
pre_Tpmf <- prep_data(Tianjin, prep = "pmf",
support = supportT,
lowerdate = "2014-01-01", upperdate = "2015-01-01")
bandwidth_seq = seq(0, 40000, 1000)
temp <- diftrans(pre_Bpmf, post_Bpmf, pre_Tpmf, post_Tpmf,
bandwidth_seq = bandwidth_seq,
conservative = F,
quietly = T)
bandwidth_selection <- temp %>%
pivot_longer(cols = c(main, control, diff),
names_to = "type",
values_to = "diffprop") %>%
mutate(type = case_when(type == "main" ~ "Beijing",
type == "control" ~ "Tianjin",
type == "diff" ~ "a"))
# bandwidth_selection %>% filter(type == "a") %>% mutate(percent_diffprop = 100*diffprop) %>% View()
fig9_plot <- ggplot(data = bandwidth_selection,
aes(x = bandwidth)) +
geom_line(aes(y = diffprop*100, color = type, linetype = type)) +
bmp_vline(xint = 2000) +
bmp_plot(data = bandwidth_selection,
color = type,
legendlabels = c("Difference", "Beijing placebo", "Tianjin placebo"),
xlab = TeX("\\textit{d}"),
ylab = "Transport Cost (%)",
ytype = "continuous",
ybreaks = seq(-50, 100, 10),
xtype = "continuous",
xbreaks = seq(0, 40000, 5000),
sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5)) +
scale_linetype_manual(values = c(linetype0, linetype1, linetype2),
labels = c("Difference", "Beijing placebo", "Tianjin placebo"),
name = "")
if (save_fig | save_fig9) {
ggsave(paste("fig", fignum, suffix, "OK.jpg", sep = ""), path = img_path,
width = default_width, height = default_height, units = "in")
message(paste("fig", fignum, " is saved in ", img_path, " as fig", fignum, suffix, "OK.jpg", sep = ""))
}
message(paste("Figure ", fignum, " is complete.", sep = ""))
}
### Figure 10 ---------------------------
fignum <- 10
if (show_fig | show_fig10) {
# BTS <- do.call("rbind", list(Beijing, Tianjin, Shijiazhuang))
supportB <- prep_data(Beijing,
prep = "support")
supportT <- prep_data(Tianjin,
prep = "support")
post_Bpmf <- prep_data(Beijing, prep = "pmf",
support = supportB,
lowerdate = "2011-01-01", upperdate = "2012-01-01")
pre_Bpmf <- prep_data(Beijing, prep = "pmf",
support = supportB,
lowerdate = "2010-01-01", upperdate = "2011-01-01")
post_Tpmf <- prep_data(Tianjin, prep = "pmf",
support = supportT,
lowerdate = "2011-01-01", upperdate = "2012-01-01")
pre_Tpmf <- prep_data(Tianjin, prep = "pmf",
support = supportT,
lowerdate = "2010-01-01", upperdate = "2011-01-01")
bandwidth_seq = seq(0, 40000, 1000)
cons_dit <- diftrans(pre_Bpmf, post_Bpmf, pre_Tpmf, post_Tpmf,
bandwidth_seq = bandwidth_seq,
conservative = T,
save_dit = T)
dit <- diftrans(pre_Bpmf, post_Bpmf, pre_Tpmf, post_Tpmf,
bandwidth_seq = bandwidth_seq,
conservative = F,
save_dit = T)
bandwidth_selection <- cons_dit$out %>%
select(-main2d) %>%
pivot_longer(cols = c(main,
control,
diff,
diff2d),
names_to = "type",
values_to = "diffprop") %>%
mutate(type = case_when(type == "main" ~ "Beijing",
type == "control" ~ "Tianjin",
type == "diff" ~ "a",
type == "diff2d" ~ "b"))
mostinform <- dit$optimal_bandwidth
mostinform2 <- cons_dit$optimal_bandwidth
fig10_plot <- ggplot(data = bandwidth_selection %>%
filter(type != "a"), # control d-d or 2d-d
aes(x = bandwidth,
linetype = type)) +
bmp_vline(xint = mostinform) +
bmp_vline(xint = mostinform2) +
geom_line(aes(y = diffprop*100, color = type)) +
bmp_plot(data = bandwidth_selection %>%
filter(type != "a"),
color = type,
legendlabels = c("Difference", "Beijing", "Tianjin"),
xlab = TeX("\\textit{d}"),
ylab = "Transport Cost (%)",
ytype = "continuous",
ybreaks = seq(-50, 100, 10),
xtype = "continuous",
xbreaks = seq(0, 40000, 5000),
sizefont = (fontsize - 8),
axissizefont = (fontsizeaxis - 5)) +
scale_linetype_manual(values = c(linetype0, linetype1, linetype2),
labels = c("Difference", "Beijing", "Tianjin"),
name = "")
if (save_fig | save_fig10) {
ggsave(paste("fig", fignum, suffix, "OK.jpg", sep = ""), path = img_path,
width = default_width, height = default_height, units = "in")
message(paste("fig", fignum, " is saved in ", img_path, " as fig", fignum, suffix, "OK.jpg", sep = ""))
}
message(paste("Figure ", fignum, " is complete.", sep = ""))
}
### Miscellaneous ---------------------------
# Original Figure 3 (Comparing prices of car models between 2010 and 2011)
fig3_prep <- Beijing %>%
filter(ym < as.Date("2012-01-01")) %>%
pivot_wider(id_cols = c(month, color, noticenum),
names_from = year,
values_from = MSRP) %>%
filter(!(is.na(`2010`) | is.na(`2011`))) %>%
mutate(diff = `2011` - `2010`)
nrow(fig3_prep)
summary(fig3_prep$diff)
# Diff-in-diff
if (show_diff_in_diff) {
did_BT <- do.call("rbind", list(Beijing, Tianjin)) %>%
filter(ym < "2012-01-01") %>%
filter(city == "Beijing" | city == "Tianjin") %>%
mutate(Beijing = ifelse(city == "Beijing", 1, 0)) %>%
group_by(Beijing, postBeijing, MSRP) %>%
summarise(sum = sum(sales)) %>%
uncount(sum)
did_BT_reg <- lm(log(MSRP) ~ postBeijing + Beijing + postBeijing*Beijing, data = did_BT)
did_BS <- do.call("rbind", list(Beijing, Shijiazhuang)) %>%
filter(ym < "2012-01-01") %>%
filter(city == "Beijing" | city == "Shijiazhuang") %>%
mutate(Beijing = ifelse(city == "Beijing", 1, 0)) %>%
group_by(Beijing, postBeijing, MSRP) %>%
summarise(sum = sum(sales)) %>%
uncount(sum)
did_BS_reg <- lm(log(MSRP) ~ postBeijing + Beijing + postBeijing*Beijing, data = did_BS)
did_BST <- do.call("rbind", list(Beijing, Tianjin, Shijiazhuang)) %>%
filter(ym < "2012-01-01") %>%
filter(city == "Beijing" | city == "Shijiazhuang" | city == "Tianjin") %>%
mutate(Beijing = ifelse(city == "Beijing", 1, 0)) %>%
group_by(Beijing, postBeijing, MSRP) %>%
summarise(sum = sum(sales)) %>%
uncount(sum)
did_BST_reg <- lm(log(MSRP) ~ postBeijing + Beijing + postBeijing*Beijing, data = did_BST)
stargazer(did_BT_reg, did_BS_reg, did_BST_reg,
label = "tab:eight",
type = "latex",
order = c(2, 1, 3, 4),
covariate.labels = c("Beijing", "post", "Beijing $\\times$ post", "Constant"),
keep.stat = c("rsq","n"),
align = T,
p.auto = FALSE,
# se = list(summary(did_BT_reg)$coefficients[,"t value"],
# summary(did_BS_reg)$coefficients[,"t value"],
# summary(did_BST_reg)$coefficients[,"t value"]),
# notes = c("t-statistics are in parentheses.")
# single.row = TRUE,
column.labels = c("Tianjin", "Shijiazhuang", "Both"),
model.numbers = FALSE,
dep.var.caption = "",
dep.var.labels.include = FALSE,
report = "vcs"
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.