Nothing
#' Plot the Regression Discontinuity
#'
#' \code{plot.rd} plots the relationship between the running variable and the outcome.
#' It is based on the \code{plot.RD} function in the "rdd" package.
#'
#' @method plot rd
#'
#' @param x An \code{rd} object, typically the result of \code{\link{rd_est}}.
#' @param preds An optional vector of predictions generated by \code{\link{predict.rd}}.
#' If not supplied, prediction is completed within the \code{plot.rd} function.
#' @param fit_line A character vector specifying models to be shown as fitted lines.
#' Options are \code{c("linear", "quadratic", "cubic", "optimal", "half", "double")}.
#' @param fit_ci A string specifying whether and how to plot prediction confidence intervals
#' around the fitted lines. Options are \code{c("area", "dot", "hide")}.
#' @param fit_ci_level A numeric value between 0 and 1 specifying the confidence level of prediction CIs. The default is 0.95.
#' @param bin_n An integer specifying the number of bins for binned data points. If \code{bin_n} is 0, raw data points are plotted.
#' If \code{bin_n} is < 0, data points are suppressed. The default is 20.
#' @param bin_level A numeric value between 0 and 1 specifying the confidence level for CIs around binned data points. The default is 0.95.
#' @param bin_size A string specifying how to plot the number of observations in each bin, by \code{"size"} or \code{"shape"}.
#' @param quant_bin A logical value indicating whether the data are binned by quantiles. The default is \code{TRUE}.
#' @param xlim An optional numeric vector containing the x-axis limits.
#' @param ylim An optional numeric vector containing the y-axis limits.
#' @param include_rugs A logical value indicating whether to include the 1d plot for both axes. The default is \code{FALSE}.
#' @param ... Additional graphic arguments passed to \code{plot}.
#'
#' @references Drew Dimmery (2016). rdd: Regression Discontinuity Estimation. R package
#' version 0.57. https://CRAN.R-project.org/package=rdd
#'
#' @importFrom stats qnorm qt quantile na.omit aggregate
#' @importFrom graphics lines arrows legend rug strwidth abline
#'
#' @include rd_est.R
#' @include predict.rd.R
#' @include treat_assign.R
#'
#' @export
#'
#' @examples
#' set.seed(12345)
#' dat <- data.frame(x = runif(1000, -1, 1), cov = rnorm(1000))
#' dat$tr <- as.integer(dat$x >= 0)
#' dat$y <- 3 + 2 * dat$x + 3 * dat$cov + 10 * (dat$x >= 0) + rnorm(1000)
#' rd <- rd_est(y ~ x + tr | cov, data = dat, cutpoint = 0, t.design = "geq")
#' plot(rd)
plot.rd <- function(x, preds = NULL,
fit_line = c("linear", "quadratic", "cubic", "optimal", "half", "double"),
fit_ci = c("area", "dot", "hide"), fit_ci_level = .95, bin_n = 20, bin_level = .95,
bin_size = c("shade", "size"), quant_bin = TRUE, xlim = NULL, ylim = NULL,
include_rugs = FALSE, ...) {
if (!inherits(x, "rd"))
stop("Not an object of class rd.")
if (is.factor(x$frame[, x$cov]) == TRUE)
stop("Plotting is currently not supported for covariates that are factors.")
if ("cutpoint" %in% names(x$call))
cut <- eval.parent(x$call$cutpoint)
else cut <- 0
if ("t.design" %in% names(x$call))
t.design <- eval.parent(x$call$t.design)
else t.design <- "l"
if (is.null(preds))
preds <- predict(x)
if (!"Z" %in% names(x$frame))
x$frame$Z <- treat_assign(x$frame$X, cut, t.design)
d <- as.data.frame(x$frame)
if (length(x$na.action) > 0)
d <- d[-x$na.action, ]
# CALCULATE CI
preds$Yhat.linear.ub = preds$Yhat.linear + qnorm((1 - fit_ci_level) / 2) * preds$YSE.linear
preds$Yhat.quadratic.ub = preds$Yhat.quadratic + qnorm((1 - fit_ci_level) / 2) * preds$YSE.quadratic
preds$Yhat.cubic.ub = preds$Yhat.cubic + qnorm((1 - fit_ci_level) / 2) * preds$YSE.cubic
preds$Yhat.optimal.ub = preds$Yhat.optimal + qnorm((1 - fit_ci_level) / 2) * preds$YSE.optimal
preds$Yhat.half.ub = preds$Yhat.half + qnorm((1 - fit_ci_level) / 2) * preds$YSE.half
preds$Yhat.double.ub = preds$Yhat.double + qnorm((1 - fit_ci_level) / 2) * preds$YSE.double
preds$Yhat.linear.lb = preds$Yhat.linear - qnorm((1 - fit_ci_level) / 2) * preds$YSE.linear
preds$Yhat.quadratic.lb = preds$Yhat.quadratic - qnorm((1 - fit_ci_level) / 2) * preds$YSE.quadratic
preds$Yhat.cubic.lb = preds$Yhat.cubic - qnorm((1 - fit_ci_level) / 2) * preds$YSE.cubic
preds$Yhat.optimal.lb = preds$Yhat.optimal - qnorm((1 - fit_ci_level) / 2) * preds$YSE.optimal
preds$Yhat.half.lb = preds$Yhat.half - qnorm((1 - fit_ci_level) / 2) * preds$YSE.half
preds$Yhat.double.lb = preds$Yhat.double - qnorm((1 - fit_ci_level) / 2) * preds$YSE.double
covs <- setdiff(names(d), c("X", "Y", "Z"))
## RESIDUALIZE DATA
# if (residualize & length(covs) > 0){
# d$Y <- d$Y + predict(lm(sprintf("Y ~ 1+%s", paste(c("", covs), collapse = " + ")), d),
# terms = covs, type = "term")
# # d$X <- resid(lm(sprintf("X ~ 1+%s", paste(c("", covs), collapse = " + ")), d))
# }
## BIN DATA
if (is.null(bin_n))
bin_n <- -1
if (bin_n > 0) {
if (quant_bin){
cut_ptile <- mean(d$X < cut)
ptiles_l <- seq(0, 1, length.out = ceiling((bin_n + 1) * cut_ptile))
ptiles_r <- seq(0, 1, length.out = ceiling((bin_n + 1) * (1 - cut_ptile)))
b_l <- quantile(d$X[d$X < cut], ptiles_l[-length(ptiles_l)], na.rm = TRUE, type = 1)
b_r <- quantile(d$X[d$X > cut], ptiles_r[-1], na.rm = TRUE, type = 1)
b <- c(b_l, cut, b_r)
bins <- within(data.frame(Xmid = b, bcode = 1:length(b)), {
Xmid = Xmid + c(diff(Xmid) / 2, 0)
})
bins <- bins[-nrow(bins), ]
} else {
bin_ratio <- (cut - min(d$X, na.rm = TRUE)) / (max(d$X, na.rm = TRUE) -
min(d$X, na.rm = TRUE))
bin_n_l <- ceiling((bin_n + 1) * bin_ratio)
bin_n_r <- ceiling((bin_n + 1) * (1 - bin_ratio))
b <- c(seq(min(d$X, na.rm = TRUE), cut, length.out = bin_n_l), # left
seq(cut, max(d$X, na.rm = TRUE), length.out = bin_n_r)[-1])
bins <- within(data.frame(Xmid = b[-1], bcode = 1:length(b[-1])), {
Xmid = Xmid - (Xmid[1] - b[1]) / 2
})
}
d$bcode <- .bincode(d$X, b, F, T)
bin_summ <- aggregate(Y ~ bcode + Z, d,
function(x) c(Ymean = mean(x), Ysd = sd(x), N = sum(!is.na(x))), na.action = na.omit)
bin_summ <- cbind(bin_summ[, 1:2], bin_summ[, 3])
bin_summ <- merge(bin_summ, bins, by = c("bcode"), all = TRUE)
bin_summ$Yse <- bin_summ$Ysd / sqrt(bin_summ$N)
bin_summ$Yub <- qt(1 - (1 - bin_level) / 2, bin_summ$N - 1) * bin_summ$Yse + bin_summ$Ymean
bin_summ$Ylb <- qt((1 - bin_level) / 2, bin_summ$N - 1) * bin_summ$Yse + bin_summ$Ymean
bin_summ <- bin_summ[!is.na(bin_summ$Ymean),]
} else {
bin_summ <- d
bin_summ$Xmid <- bin_summ$X
bin_summ$Ymean <- bin_summ$Y
bin_size <- setdiff(bin_size, "shade")
bin_level <- 0
}
## PLOT
cols <- c("red", "blue", "green4", "red4", "blue4", "darkgray")[1:length(fit_line)]
plot.new()
if(is.null(xlim))
xlim <- range(d$X)
if(is.null(ylim))
ylim <- range(d$Y)
dy <- 0.1
ylim <- c((1-dy)*ylim[1], (1+dy)*ylim[2])
plot.window(xlim = xlim, ylim = ylim)
# POINTS
if (bin_level > 0 & bin_level < 100)
arrows(bin_summ$Xmid, bin_summ$Ylb, y1 = bin_summ$Yub, angle = 90, code = 3, length = .05)
if (bin_n >= 0)
points(bin_summ$Xmid, bin_summ$Ymean, type = "p",
pch = 21,
# col = ifelse(bin_summ$Z, "white", "black"),
bg = ifelse(bin_summ$Z, "black", "white"),
cex = if (bin_n > 0 & "size" %in% bin_size & min(bin_summ$N) != max(bin_summ$N))
(bin_summ$N - min(bin_summ$N)) / (max(bin_summ$N) - min(bin_summ$N)) * 2 + 1
else .9
# bg = if("shade" %in% bin_size & min(bin_summ$N) != max(bin_summ$N))
# gray(1 - (bin_summ$N - min(bin_summ$N)) / (max(bin_summ$N) - min(bin_summ$N)))
# else "black"
)
# LINEAR
if ("linear" %in% fit_line) {
by(preds, preds$Tr,
function(df_p) {
color <- cols[which(fit_line == "linear")]
# fit line
lines(df_p$X, df_p$Yhat.linear, type = "l", lwd = 2, lty = 2, col = color)
# ci area
if ("area" %in% fit_ci) {
ci_area <- rbind(setNames(df_p[order(df_p$X), c("X", "Yhat.linear.ub")], c("X", "Y")),
setNames(df_p[order(df_p$X, decreasing = TRUE), c("X", "Yhat.linear.lb")], c("X", "Y")))
polygon(ci_area[,1], ci_area[,2], border = NA, col = adjustcolor(color, alpha.f = .2))
}
if("dot" %in% fit_ci) {
lines(df_p$X, df_p$Yhat.linear.lb, type = "l", lwd = 2, lty = 3, col = color)
lines(df_p$X, df_p$Yhat.linear.ub, type = "l", lwd = 2, lty = 3, col = color)
}
}
)
}
# QUADRATIC
if ("quadratic" %in% fit_line) {
by(preds, preds$Tr,
function(df_p) {
color <- cols[which(fit_line == "quadratic")]
# fit line
lines(df_p$X, df_p$Yhat.quadratic, type = "l", lwd = 2, lty = 2, col = color)
# ci area
if("area" %in% fit_ci) {
ci_area <- rbind(setNames(df_p[order(df_p$X), c("X", "Yhat.quadratic.ub")], c("X", "Y")),
setNames(df_p[order(df_p$X, decreasing = TRUE), c("X", "Yhat.quadratic.lb")], c("X", "Y")))
polygon(ci_area[,1], ci_area[,2], border = NA, col = adjustcolor(color, alpha.f = .2))
}
if("dot" %in% fit_ci) {
lines(df_p$X, df_p$Yhat.quadratic.lb, type = "l", lwd = 2, lty = 3, col = color)
lines(df_p$X, df_p$Yhat.quadratic.ub, type = "l", lwd = 2, lty = 3, col = color)
}
}
)
}
# CUBIC
if ("cubic" %in% fit_line) {
by(preds, preds$Tr,
function(df_p){
color <- cols[which(fit_line == "cubic")]
# fit line
lines(df_p$X, df_p$Yhat.cubic, type = "l", lwd=2, lty = 2, col = color)
# ci area
if ("area" %in% fit_ci) {
ci_area <- rbind(setNames(df_p[order(df_p$X), c("X", "Yhat.cubic.ub")], c("X", "Y")),
setNames(df_p[order(df_p$X, decreasing = TRUE), c("X", "Yhat.cubic.lb")], c("X", "Y")))
polygon(ci_area[,1], ci_area[,2], border = NA, col = adjustcolor(color, alpha.f = .2))
}
if ("dot" %in% fit_ci) {
lines(df_p$X, df_p$Yhat.cubic.lb, type = "l", lwd = 2, lty = 3, col = color)
lines(df_p$X, df_p$Yhat.cubic.ub, type = "l", lwd = 2, lty = 3, col = color)
}
}
)
}
# OPTIMAL
if ("optimal" %in% fit_line) {
by(preds, preds$Tr,
function(df_np) {
color <- cols[which(fit_line == "optimal")]
# fit line
points(Yhat.optimal ~ X,
data = subset(df_np, !(is.na(df_np$Yhat.optimal) | is.na(df_np$X))),
type = "l", lwd = 2, pch = 17, cex = .8, col = color)
# ci area
if ("area" %in% fit_ci) {
ci_area <- rbind(setNames(df_np[order(df_np$X), c("X", "Yhat.optimal.lb")], c("X", "Y")),
setNames(df_np[order(df_np$X, decreasing = TRUE), c("X", "Yhat.optimal.ub")], c("X", "Y")))
ci_area <- na.omit(ci_area)
polygon(ci_area[,1], ci_area[,2], border = NA, col = adjustcolor(color, alpha.f = .2))
}
if ("dot" %in% fit_ci) {
lines(Yhat.optimal.ub ~ X,
data = subset(df_np, !(is.na(df_np$Yhat.optimal.ub) | is.na(df_np$X))),
type = "l", lwd = 2, lty = 3, col = color)
lines(Yhat.optimal.lb ~ X,
data = subset(df_np, !(is.na(df_np$Yhat.optimal.lb) | is.na(df_np$X))),
type = "l", lwd = 2, lty = 3, col = color)
}
}
)
}
# HALF
if ("half" %in% fit_line) {
by(preds, preds$Tr,
function(df_np) {
color <- cols[which(fit_line == "half")]
# fit line
points(Yhat.half ~ X,
data = subset(df_np, !(is.na(df_np$Yhat.half) | is.na(df_np$X))),
type = "l", lwd = 2, pch = 3, cex = .8, col = color)
# ci area
if ("area" %in% fit_ci) {
ci_area <- rbind(setNames(df_np[order(df_np$X), c("X", "Yhat.half.lb")], c("X", "Y")),
setNames(df_np[order(df_np$X, decreasing = TRUE), c("X", "Yhat.half.ub")], c("X", "Y")))
ci_area <- na.omit(ci_area)
polygon(ci_area[,1], ci_area[,2], border = NA, col = adjustcolor(color, alpha.f = .2))
}
if ("dot" %in% fit_ci) {
lines(Yhat.half.ub ~ X,
data = subset(df_np, !(is.na(df_np$Yhat.half.ub) | is.na(df_np$X))),
type = "l", lwd = 2, lty = 3, col = color)
lines(Yhat.half.lb ~ X,
data = subset(df_np, !(is.na(df_np$Yhat.half.lb) | is.na(df_np$X))),
type = "l", lwd = 2, lty = 3, col = color)
}
}
)
}
# DOUBLE
if ("double" %in% fit_line) {
by(preds, preds$Tr,
function(df_np) {
color <- cols[which(fit_line == "double")]
# fit line
points(Yhat.double ~ X,
data = subset(df_np, !(is.na(df_np$Yhat.double) | is.na(df_np$X))),
type = "l", lwd = 2, pch = 4, cex = .8, col = color)
# ci area
if ("area" %in% fit_ci) {
ci_area <- rbind(setNames(df_np[order(df_np$X), c("X", "Yhat.double.lb")], c("X", "Y")),
setNames(df_np[order(df_np$X, decreasing = TRUE), c("X", "Yhat.double.ub")], c("X", "Y")))
ci_area <- na.omit(ci_area)
polygon(ci_area[,1], ci_area[,2], border = NA, col = adjustcolor(color, alpha.f = .2))
}
if ("dot" %in% fit_ci) {
lines(Yhat.double.ub ~ X,
data = subset(df_np, !(is.na(df_np$Yhat.double.ub) | is.na(df_np$X))),
type = "l", lwd = 2, lty = 3, col = color)
lines(Yhat.double.lb ~ X,
data = subset(df_np, !(is.na(df_np$Yhat.double.lb) | is.na(df_np$X))),
type = "l", lwd = 2, lty = 3, col = color)
}
}
)
}
# LEGEND
pos <- legend(
x = "bottomright",
legend = c("raw data:", "treated", "untreated"),
text.width = strwidth("raw data:"),
pch = c(NA, 19, 1),
horiz = TRUE,
bty = "n"
)
if (bin_n > 0 & length(bin_size) > 0 & min(bin_summ$N) != max(bin_summ$N)) {
legend(x = pos$rect$left, y = pos$rect$top, yjust = .5,
pch = c(NA,21,21),
legend = as.expression(c("bin size:", bquote(italic(n) == .(min(bin_summ$N))),
bquote(italic(n) == .(max(bin_summ$N))))),
text.width = strwidth("raw data:"),
horiz = TRUE,
col = "black",
bty = "n",
# pt.bg = if("shade" %in% bin_size & min(bin_summ$N) != max(bin_summ$N))
# c(NA,gray(1:0))
# else "black"
pt.cex = if ("size" %in% bin_size & min(bin_summ$N) != max(bin_summ$N))
c(NA, 0, 1) * 2 + 1
else c(NA, .9, .9)
)
}
if (length(fit_line) > 0) {
pos0 <- legend(x = "topleft", legend = "", bty = "n")
if (any(fit_line %in% c("linear", "quadratic", "cubic"))) {
pos1 <- legend(x = pos0$rect$left + pos0$rect$w + strwidth("parametric"), y = pos0$rect$top,
legend = rep("", sum(fit_line %in% c("linear", "quadratic", "cubic")) + 1),
col = c(NA, cols[which(fit_line %in% c("linear", "quadratic", "cubic"))]),
lty = c(NA, rep(2,sum(fit_line %in% c("linear", "quadratic", "cubic")))),
lwd = 2, bty = "n", merge = FALSE)
text(pos1$rect$left, pos1$text$y, adj = 1,
labels = c("parametric:", fit_line[which(fit_line %in% c("linear", "quadratic", "cubic"))]))
} else {
pos1 <- pos0
}
if (any(fit_line %in% c("optimal", "half", "double"))) {
pos2 <- legend(x = pos1$rect$left + pos1$rect$w + strwidth("non-parametric"), y = pos0$rect$top,
legend = rep("", sum(fit_line %in% c("optimal", "half", "double")) + 1),
col = c(NA, cols[which(fit_line %in% c("optimal", "half", "double"))]),
# pch = c(NA, c(17,3,4)[which(c("optimal", "half", "double") %in% fit_line)]),
lty = c(NA, rep(1, sum(fit_line %in% c("optimal", "half", "double")))),
lwd = 2, bty = "n")
text(pos2$rect$left, pos2$text$y, adj = 1,
labels = c("non-parametric:", fit_line[which(fit_line %in% c("optimal", "half", "double"))]))
}
}
## OTHER ELEMENTS
if (include_rugs) {
rug(x = d$X, side = 1, ticksize = .01)
rug(x = d$Y, side = 2, ticksize = .01)
}
# plot the cutoff
abline(v = cut, col = "black", lty = 2)
box()
axis(1)
axis(2)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.