Nothing
## ---- include = FALSE----------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup, echo = FALSE-------------------------------------------------
library(bggum)
## ----read_in_raw_data----------------------------------------------------
roberts_court <- read.csv("roberts_court.csv", stringsAsFactors = FALSE)
head(roberts_court)
## ----reshape_data--------------------------------------------------------
## Recode the votes to be integers in $\{0, \ldots, K_j - 1\}$
roberts_court$vote <- ifelse(roberts_court$vote %in% c(2, 7), 0, 1)
## Reshape the data
library(dplyr)
library(tidyr)
responses <- roberts_court %>%
select(-caseId, -term) %>%
spread(lexisCite, vote)
## I like to keep the respondents' names as the rownames
rownames(responses) <- responses$justiceName
## Now we just eliminate the respondent ID column and turn it into a matrix
responses <- as.matrix(responses[ , -1])
## Eliminate unanimous items
unanimous <- apply(responses, 2, function(x) length(unique(na.omit(x))) == 1)
responses <- responses[ , !unanimous]
## Here are responses to a few items:
responses[ , c(1, floor(ncol(responses) / 2), ncol(responses))]
## ----tune_proposals, eval = FALSE----------------------------------------
# ## Load the package
# library(bggum)
# ## Set the seed for reproducibility
# set.seed(123)
# ## Tune the proposal densities
# proposal_sds <- tune_proposals(responses, tune_iterations = 5000)
# set.seed(456)
# ## Tune the temperature schedule
# temps <- tune_temperatures(responses, n_temps = 6, proposal_sds = proposal_sds)
## ----load_hypers, echo = FALSE-------------------------------------------
## (These are the results when I ran the above unevaluated code locally)
temps <- c(1, 0.933, 0.870, 0.812, 0.758, 0.706)
## ----check_temps---------------------------------------------------------
round(temps, 2)
## ----sample_posterior, eval = FALSE--------------------------------------
# ## We need the parallel package
# library(parallel)
# ## We'll set up the cluster with two cores (for two chains)
# cl <- makeCluster(2, type = "FORK", outfile = "bggum-log.txt")
# ## Deal with reproducibility
# clusterSetRNGStream(cl = cl, iseed = 789)
# ## Produce the chains
# chains <- parLapplyLB(cl = cl, X = 1:2, fun = function(x) {
# ggumMC3(data = responses,
# sample_iterations = 50000,
# burn_iterations = 5000,
# proposal_sds = proposal_sds,
# temps = temps)
# })
# ## Stop the cluster
# stopCluster(cl)
## ----post_process, eval = FALSE------------------------------------------
# constraint <- which(rownames(responses) == "RBGinsburg")
# processed_chains <- lapply(chains, post_process, constraint = constraint,
# expected_sign = "-")
## ----roberts_trace_plots_setup1, eval = FALSE----------------------------
# roberts <- which(rownames(responses) == "JGRoberts")
# trace_colors <- c("#0072b280", "#d55e0080")
# iters <- nrow(chains[[1]])
# idx <- seq(floor(iters / 1000), iters, floor(iters / 1000))
# chain1_raw <- chains[[1]][idx, roberts]
# chain2_raw <- chains[[2]][idx, roberts]
# chain1_pp <- processed_chains[[1]][idx, roberts]
# chain2_pp <- processed_chains[[2]][idx, roberts]
## ----roberts_trace_plots_setup2, echo = FALSE----------------------------
roberts <- which(rownames(responses) == "JGRoberts")
trace_colors <- c("#0072b280", "#d55e0080")
roberts_draws <- read.csv("roberts_draws.csv", stringsAsFactors = FALSE)
iters <- nrow(roberts_draws) * 50
idx <- 1:nrow(roberts_draws)
chain1_raw <- roberts_draws[idx, "chain1_raw"]
chain2_raw <- roberts_draws[idx, "chain2_raw"]
chain1_pp <- roberts_draws[idx, "chain1_pp"]
chain2_pp <- roberts_draws[idx, "chain2_pp"]
idx <- seq(floor(iters / 1000), iters, floor(iters / 1000))
## ----roberts_trace_plots, results = 'asis', fig.dim = c(6, 4)------------
ylims <- range(c(chain1_raw, chain2_raw, chain1_pp, chain2_pp))
opar <- par(mar = c(3, 3, 3, 1) + 0.1)
plot(idx, chain1_raw, type = "l", col = trace_colors[1], ylim = ylims,
xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = "Raw Samples")
lines(idx, chain2_raw, col = trace_colors[2])
axis(side = 1, tick = FALSE, line = -0.75)
axis(side = 2, tick = FALSE, line = -0.75)
title(xlab = "Iteration", ylab = expression(theta[Roberts]), line = 1.5)
plot(idx, chain1_pp, type = "l", col = trace_colors[1], ylim = ylims,
xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = "Processed Samples")
lines(idx, chain2_pp, col = trace_colors[2])
axis(side = 1, tick = FALSE, line = -0.75)
axis(side = 2, tick = FALSE, line = -0.75)
title(xlab = "Iteration", ylab = expression(theta[Roberts]), line = 1.5)
par(opar)
## ----convergence1, eval = FALSE------------------------------------------
# library(coda)
# ## Look at the Gelman-Rubin potential scale reduction factor:
# convergence_stats <- gelman.diag(processed_chains)
# ## See what estimates are:
# summary(convergence_stats$psrf[ , 1])
## ----convergence2, echo = FALSE------------------------------------------
convergence_stats <- read.csv("convergence.csv")
summary(convergence_stats[ , 1])
## ----summary, eval = FALSE-----------------------------------------------
# posterior_summary <- summary(processed_chains)
## ----load_summary, echo = FALSE------------------------------------------
posterior_summary <- readRDS("posterior_summary.rds")
## ----display_summary-----------------------------------------------------
str(posterior_summary$estimates, max.level = 1)
str(posterior_summary$sds, max.level = 1)
head(posterior_summary$statistics)
## ----plot_thetas, results = 'asis', fig.dim = c(6, 6)--------------------
theta <- posterior_summary$estimates$theta
names(theta) <- rownames(responses)
n <- length(theta)
ordering <- order(theta)
theta_sorted <- sort(theta)
opar <- par(mar = c(3, 6, 1, 1) + 0.1)
plot(theta_sorted, factor(names(theta_sorted), levels = names(theta_sorted)),
pch = 19, xlim = c(-3, 2.5), xaxt = "n", yaxt = "n", xlab = "", ylab = "")
axis(side = 1, tick = FALSE, line = -0.75)
axis(side = 2, tick = FALSE, line = -0.75, las = 1,
labels = names(theta_sorted), at = 1:n)
title(xlab = expression(theta), line = 1.5)
segments(x0 = posterior_summary$statistics[ordering, 1], y0 = 1:n,
x1 = posterior_summary$statistics[ordering, 4], y1 = 1:n)
par(opar)
## ----plot_items, results = 'asis', fig.dim = c(6, 4)---------------------
alpha <- posterior_summary$estimates$alpha
delta <- posterior_summary$estimates$delta
tau <- posterior_summary$estimates$tau
phillip_morris <- which(colnames(responses) == "2007 U.S. LEXIS 1332")
rucho <- which(colnames(responses) == "2019 U.S. LEXIS 4401")
icc(alpha[phillip_morris], delta[phillip_morris], tau[[phillip_morris]],
main_title = "Philip Morris USA, Inc. v. Williams",
plot_responses = TRUE, thetas = theta,
responses = responses[ , phillip_morris])
icc(alpha[rucho], delta[rucho], tau[[rucho]],
main_title = "Rucho v. Common Cause",
plot_responses = TRUE, thetas = theta,
responses = responses[ , rucho])
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.