Nothing
#' Iterative EI Estimation
#'
#' This function runs enables running iterative ecoligical inference (EI) to
#' estimate the proportion of votes by different race/ethnicity groups for
#' different political candidates.
#'
#' Iterative EI iterates through all possible race-candidate pairs. For each
#' pair, votes by other races and for other candidates are binned and 2x2
#' ecological inference is run.
#'
#' This function wraps around the ei function from the ei R package. This
#' function is unstable and can break in arbitrary ways. Errors
#' often emerge with particular values of the erho parameter. If the function
#' breaks, it will automatically try adjusting the erho parameter, first to 20,
#' then to 0.5.
#'
#' If problems persist, please submit an issue on the eiCompare github
#' repository and include the error message you receive.
#'
#' @param data A data.frame() object containing precinct-level turnout data by
#' race and candidate
#' @param cand_cols A character vector listing the column names for turnout for
#' each candidate
#' @param race_cols A character vector listing the column names for turnout by
#' race
#' @param totals_col The name of the column containing total votes cast in each
#' precinct
#' @param name A unique identifier for the outputted eiCompare object.
#' @param erho A number passed directly to ei::ei(). Defaulted to 10. Can also
#' pass in a vector of erho values
#' @param seed An integer seed value for replicating estimate results across
#' runs. If NULL, a random seed is chosen. Defaulted to NULL.
#' @param plots A boolean indicating whether or not to include density and
#' tomography plots
#' @param eiCompare_class default = TRUE
#' @param betas A boolean to return precinct-level betas for each 2x2 ei
#' @param par_compute A boolean to conduct ei using parallel processing
#' @param n_cores The number of cores to use in parallel computation. Defaulted to NULL, in which case parallel::detectCores() - 1 is used
#' @param verbose A boolean indicating whether to print out status messages.
#' @param plot_path A string to specify plot save location. If NULL, plot is not saved
#' @param ... Additional arguments passed directly to ei::ei()
#'
#' @return If eiCompare_class = TRUE, an object of class eiCompare is returned.
#' Otherwise, a dataframe is returned that matches the formatting of ei_est_gen
#' output.
#'
#' @importFrom doSNOW registerDoSNOW
#' @importFrom foreach getDoParWorkers %dopar% %do%
#' @importFrom parallel makeCluster stopCluster
#' @importFrom bayestestR ci
#' @importFrom purrr lift
#' @importFrom utils capture.output setTxtProgressBar
#'
#' @author Loren Collingwood <loren.collingwood@@ucr.edu>
#' @author Ari Decter-Frain <agd75@@cornell.edu>
#' @author Hikari Murayama <hikari_murayama@@berkeley.edu>
#'
#' @references eiPack. Gary King (1997). A Solution to the Ecological Inference
#' Problem. Princeton: Princeton University Press.
#'
#' @export
ei_iter <- function(
data,
cand_cols,
race_cols,
totals_col,
name = "",
erho = 10,
seed = NULL,
plots = FALSE,
eiCompare_class = TRUE,
betas = FALSE,
par_compute = FALSE,
n_cores = NULL,
verbose = FALSE,
plot_path = NULL,
...) {
# Preparation for parallel processing if user specifies parallelization
if (par_compute) {
# Detect the number of cores you have
parallel::detectCores()
if (parallel::detectCores() < 4) {
stop("It is not recommended to run parallel ei with less than 4 cores")
} else if (parallel::detectCores() == 4) {
warning("4 cores is the minimum recommended to run parallel ei")
}
# Standard to use 1 less core for clusters
if (is.null(n_cores)) {
clust <- parallel::makeCluster(parallel::detectCores() - 1)
} else {
clust <- parallel::makeCluster(n_cores)
}
# Register parallel processing cluster
doSNOW::registerDoSNOW(clust)
# Check to make sure that cores are set up correctly
foreach::getDoParWorkers()
}
# Set infix option for parallel or not parallel
`%myinfix%` <- ifelse(par_compute, `%dopar%`, `%do%`)
# Check for valid arguments
check_args(data, cand_cols, race_cols, totals_col)
# Save any additional arguments to pass into ei inside foreach
args_pass <- list(...)
# Subset data
data <- data[, c(cand_cols, race_cols, totals_col)]
# Check for missings
data <- remove_nas(data)
# Force data to be a dataframe
data <- as.data.frame(data)
# Get race and cand lengths
n_races <- length(race_cols)
n_cands <- length(cand_cols)
n_iters <- n_races * n_cands
# Create race-cand pairs for iteration
race_cand_pairs <- expand.grid(
"race" = race_cols,
"cand" = cand_cols,
stringsAsFactors = FALSE
)
# Get seed if seed is null
if (is.null(seed)) {
seed <- sample(1:10e5, 1)
if (verbose) {
message(paste("Setting random seed equal to", seed))
}
}
if (verbose) {
message(paste("Beginning", n_iters, "2x2 estimations..."))
}
# Init progressbar
pb <- utils::txtProgressBar(
min = 0,
max = n_iters,
style = 3
)
progress <- function(n) setTxtProgressBar(pb, n)
opts <- list(progress = progress)
# Loop through each 2x2 ei
ei_results <- foreach::foreach(
i = seq_len(n_iters),
.inorder = FALSE,
.packages = c("ei", "stats", "utils", "mvtnorm"),
.options.snow = opts
) %myinfix% {
cand <- race_cand_pairs[i, "cand"]
race <- race_cand_pairs[i, "race"]
# Get formula
formula <- stats::formula(paste(cand, "~", race), sep = " ")
# Set seed
set.seed(seed)
# Run 2x2 ei
# This loop tries three different erho values before returning an error.
# It first tries the provided erho value, then the default for ei (0.5),
# then 20.
# If a vector of erhos was entered, it will go through all of these before
# failing
n_erhos <- length(erho)
if (n_erhos > 1) {
erhos <- erho
} else {
erhos <- c(erho, 0.5, 20)
n_erhos <- 3
}
ii <- 1
while (ii < (n_erhos + 1)) {
tryCatch(
{
utils::capture.output({
ei_out <-
suppressMessages(
purrr::lift(ei::ei)(
data = data,
formula = formula,
total = totals_col,
erho = erhos[ii],
simulate = TRUE,
args_pass
)
)
})
break
# This was meant to enable parameterization of the ei importance sample
# size, but its inclusion changes results dramatically.
# utils::capture.output({
# ei_out <- suppressMessages(ei_sim(ei_out, samples))
# })
},
error = function(cond) {
if (ii == n_erhos) {
stop(
message(
paste(
"\n", format(formula), "iteration failed", n_erhos,
"times.\nType ?ei_iter for guidance on how to proceed.\n",
"Error on final failure:\n", cond
)
)
)
} else {
message(
paste(
"\n",
format(formula),
"iteration failed. Retrying with erho =",
as.character(erhos[ii]),
"..."
)
)
}
}
)
# Advance iteration
ii <- ii + 1
}
if (plots & !is.null(plot_path)) {
# Create tomography plots
grDevices::png(paste0(plot_path, "tomography_", cand, "_", race, ".png"),
units = "in", height = 6, width = 6, res = 500
)
plot(ei_out, "tomogE")
graphics::mtext(paste(cand, race, sep = " "),
outer = T, line = -1
)
grDevices::dev.off()
# Create denity plots
grDevices::png(paste0(plot_path, "density_", cand, "_", race, ".png"),
units = "in", height = 6, width = 6, res = 500
)
plot(ei_out, "betab", "betaw")
graphics::mtext(paste(cand, race, sep = " "),
outer = T, line = -1
)
grDevices::dev.off()
}
# Extract mean, standard error for each precinct and district-wide
res <- ei::eiread(
ei.object = ei_out,
"betab",
"sbetab",
"betaw",
"sbetaw",
"aggs",
"maggs"
)
# get aggregate means
betab_district_mean <- mean(res$aggs[, 1], na.rm = TRUE)
betaw_district_mean <- mean(res$aggs[, 2], na.rm = TRUE)
# Get aggregate ses
# This works according to the aggregate formula in King, 1997, section 8.3
aggs <- res$aggs
ses <- c()
for (k in 1:ncol(aggs)) {
aggs_col <- aggs[, k]
m <- mean(aggs_col)
nsims <- length(aggs_col)
devs <- m - aggs_col
sq_devs <- devs^2
sum_sq_devs <- sum(sq_devs)
se <- sqrt(sum_sq_devs / nsims)
ses <- append(ses, se)
}
# Put district-wide estimates in dataframe
district_res <- data.frame(
c(cand, "se"),
c(betab_district_mean, ses[1]),
c(betaw_district_mean, ses[2])
)
colnames(district_res) <- c("Candidate", race, "other")
# Put precinct betas in dataframe
precinct_res <- cbind(res$betab, res$betaw)
colnames(precinct_res) <- paste(c("betab", "betaw"), cand, race, sep = "_")
# Save out aggs
aggs_b <- eiread(ei_out, "aggs")
setTxtProgressBar(pb, i)
list(district_res, precinct_res, aggs_b, list(race, cand, ei_out))
}
# Stop clusters as soon as done parallel processing
if (par_compute) {
# Stop clusters (always done between uses)
parallel::stopCluster(clust)
# Garbage collection (in case of leakage)
gc()
}
# close progress bar
close(pb)
# Extract results objects
district_results <- sapply(ei_results, function(x) x[1])
precinct_results <- sapply(ei_results, function(x) x[2])
agg_results <- sapply(ei_results, function(x) x[3])
ei_objects <- sapply(ei_results, function(x) x[4])
# Put results in dataframe
results_table <- get_results_table(
district_results,
cand_col = cand_cols,
race_col = race_cols,
n_cand = n_cands,
n_race = n_races,
n_iter = n_iters
)
# Plots moved to here
# Density plots
if (plots) {
message("Creating density plots")
# Combine aggregate results for district level values into one data frame
agg_race <- sapply(ei_results, function(x) colnames(x[[1]])[2])
agg_cand <- sapply(ei_results, function(x) x[[1]]$Candidate[1])
agg_race_cand <- paste0(agg_race, "_", agg_cand)
agg_race_cand_2 <- rep(agg_race_cand, each = 2)
agg_colnames <- as.vector(sapply(agg_results, function(x) colnames(x)))
new_colnames <- paste(tolower(agg_colnames), agg_race_cand_2, sep = "_")
agg_betas <- data.frame(do.call(cbind, agg_results))
colnames(agg_betas) <- new_colnames
# Create density plots
density_plots <- overlay_density_plot(
agg_betas,
results_table,
race_cols,
cand_cols,
plot_path,
ei_type = "ei"
)
# Create degree of racially polarized voting
rpv_distribution <- rpv_density(agg_betas, plot_path)
}
if (eiCompare_class) {
# Set up containers
races <- c()
cands <- c()
means <- c()
ses <- c()
sds <- c()
ci_lowers <- c()
ci_uppers <- c()
district_samples <- as.data.frame(matrix(ncol = 0, nrow = 99))
precinct_samples <- list()
for (i in 1:length(ei_objects)) {
ei_object <- ei_objects[[i]][[3]]
cand <- ei_objects[[i]][[2]]
race <- ei_objects[[i]][[1]]
cands <- append(cands, cand)
races <- append(races, race)
# Get estimates
aggs <- ei::eiread(ei_object, "aggs")[, 1]
# Both CIs
suppressMessages({
suppressWarnings({
cis <- bayestestR::ci(aggs, ci = 0.95, method = "HDI")
})
})
ci_lowers <- append(ci_lowers, cis$CI_low)
ci_uppers <- append(ci_uppers, cis$CI_high)
# Mean
mean <- mean(aggs, na.rm = TRUE)
means <- append(means, mean)
# Standard deviation
sd <- sd(aggs, na.rm = TRUE)
sds <- append(sds, sd)
# Add district chains to dataframe
district_name <- paste(cand, race, sep = "_")
district_samples[[district_name]] <- as.numeric(aggs)
# Get samples of precinct-level estimates
prec_res <- ei::eiread(
ei.object = ei_object,
"betab",
"sbetab",
"betaw",
"sbetaw"
)
# Put precinct betas in dataframe
precinct_res <- cbind(
prec_res$betab,
prec_res$sbetab,
prec_res$betaw,
prec_res$sbetaw
)
colnames(precinct_res) <- c("betab", "sbetab", "betaw", "sbetaw")
precinct_samples[[district_name]] <- precinct_res
}
# Make estimates table
estimates <- data.frame(cbind(means, sds, ci_lowers, ci_uppers))
estimates <- cbind(cands, races, estimates)
colnames(estimates) <- c(
"cand", "race", "mean", "sd", "ci_95_lower", "ci_95_upper"
)
output <- list(
"type" = "iter",
"estimates" = estimates,
"district_samples" = district_samples,
"precinct_samples" = precinct_samples,
"stat_objects" = ei_objects,
"name" = name
)
class(output) <- "eiCompare"
return(output)
} else {
message(
paste(
"This results output is deprecated by the eiCompare class",
"object. It will be removed in the near future."
)
)
# If betas == TRUE, return a list with results plus df of betas
if (betas) {
df_betas <- betas_for_return(precinct_results, race_cand_pairs)
to_return <- list(
"race_group_table" = results_table,
"all_betas" = df_betas
)
return(to_return)
} else {
return(results_table)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.