knitr::opts_chunk$set( collapse = TRUE, comment = "#>", cache = TRUE )
To install the package use devtools (devtools is available on CRAN).
# devtools::install_github("tereom/quickcountmx") library(quickcountmx) library(tidyverse)
The package includes the results of the 2012 Guanajuato Governor election, which will be used to exemplify the functions.
library(dplyr) data("gto_2012") dplyr::glimpse(gto_2012)
The variables are described in the package documentation ?gto_2012
.
counts <- mrp_party_estimation(gto_2012, party = pan_na, stratum = distrito_loc_17, frac = 0.04, seed = 211871, n_chains = 2, n_burnin = 500, n_iter = 1000, model_string = "model_bern_t") qplot(counts$n_votes, binwidth = 1000) + geom_vline(xintercept = sum(gto_2012$pan_na), colour ='red')
R2jags::traceplot(counts$fit, varname = 'deviance')
Calibration run for one party (pan):
gto_pan <- calibration_party(gto_2012, party = pan_na, frac = 0.075, stratum = distrito_loc_17, n_iter = 1500, n_burnin = 500, cl_cores = 5, n_chains = 1, seed = 19112, n_rep = 3) pan <- summary_calibration_party(gto_pan)
pan$plot pan$coverage
Calibration run for one party (prd):
gto_prd <- calibration_party(gto_2012, party = prd, frac = 0.075, stratum = distrito_loc_17, n_iter = 1500, n_burnin = 500, cl_cores = 5, n_chains = 1, seed = 19112, n_rep = 3) prd <- summary_calibration_party(gto_prd)
prd$plot prd$coverage
calibration_gto <- calibration_prop(gto_2012, pri_pvem:otros, frac = 0.075, stratum = distrito_loc_17, n_iter = 1500, n_burnin = 500, n_chains = 1, seed = 191127, cl_cores = 1, n_rep = 3, model_string = "model_bern_t")
calib_summary <- summary_calibration(calibration_gto) calib_summary
calibration_gto_miss <- calibration_prop(gto_2012, pri_pvem:otros, frac = 0.075, stratum = distrito_loc_17, n_iter = 1500, n_burnin = 500, n_chains = 1, seed = 19331, cl_cores = 1, n_rep = 10, model_string = "model_bern_t", num_missing_strata = 2)
calib_summary <- summary_calibration(calibration_gto_miss) calib_summary
set.seed(1211) gto_stratum_sizes <- gto_2012 %>% dplyr::group_by(distrito_loc_17) %>% dplyr::mutate(n_stratum = n()) calib_ratio <- parallel::mclapply(1:200, function(i) { gto_sample <- select_sample_prop(gto_stratum_sizes, stratum = distrito_loc_17, 0.075) ratio <- ratio_estimation(gto_sample, stratum = distrito_loc_17, n_stratum = n_stratum, ... = pri_pvem:otros) ratio <- ratio %>% mutate(n_sim = i) ratio }, mc.cores = 4) %>% bind_rows
gto_gather <- gto_2012 %>% dplyr::select(casilla_id, pri_pvem:otros) %>% tidyr::gather(party, votes, pri_pvem:otros) actual <- gto_gather %>% group_by(party) %>% summarise(n_votes = sum(votes, na.rm = T)) %>% mutate(prop_votes = 100*n_votes/sum(n_votes, na.rm=T)) calib_ratio <- calib_ratio %>% left_join(actual) %>% ungroup() %>% mutate(coverage = r - 2*std_error < prop_votes & r + 2*std_error > prop_votes, precision = 2 * std_error)
ggplot(filter(calib_ratio, n_sim <= 200), aes(x = n_sim, y = r, ymin = r - 2*std_error, ymax = r + 2*std_error )) + geom_linerange(colour='red') + geom_hline(data = actual, aes(yintercept = prop_votes)) + facet_wrap(~party, scales="free_y")
calib_summary <- calib_ratio %>% group_by(party) %>% summarise(coverage = mean(coverage), precision_media = mean(precision), n_sims = n()) calib_summary
Randomly collapsing to observed strata
set.seed(1211) gto_stratum_sizes <- gto_2012 %>% dplyr::group_by(distrito_loc_17) %>% dplyr::mutate(n_stratum = n()) calib_ratio_miss <- parallel::mclapply(1:200, function(i) { # select sample gto_sample <- select_sample_prop(gto_stratum_sizes, stratum = distrito_loc_17, 0.075) %>% mutate(distrito_coll = distrito_loc_17) %>% select(-n_stratum) strata <- unique(gto_2012$distrito_loc_17) missing <- sample(strata, 3) present <- setdiff(strata, missing) gto_sample_miss <- gto_sample %>% filter(distrito_loc_17 %in% present) collapse_to <- sample(present, length(missing)) new_strata <- data_frame(distrito_loc_17 = c(missing, present), distrito_coll = c(collapse_to, present)) gto_2012_collapsed <- left_join(gto_2012, new_strata, by = "distrito_loc_17") gto_sample_miss <- gto_sample_miss %>% left_join(gto_2012_collapsed %>% group_by(distrito_coll) %>% summarise(n_stratum = n()), by ="distrito_coll") ratio <- ratio_estimation(gto_sample_miss, stratum = distrito_coll, n_stratum = n_stratum, ... = pri_pvem:otros) ratio <- ratio %>% mutate(n_sim = i) ratio }, mc.cores = 4) %>% bind_rows
gto_gather <- gto_2012 %>% dplyr::select(casilla_id, pri_pvem:otros) %>% tidyr::gather(party, votes, pri_pvem:otros) actual <- gto_gather %>% group_by(party) %>% summarise(n_votes = sum(votes, na.rm = T)) %>% mutate(prop_votes = 100*n_votes/sum(n_votes, na.rm=T)) calib_ratio_miss <- calib_ratio_miss %>% left_join(actual) %>% ungroup() %>% mutate(coverage = r - 2*std_error < prop_votes & r + 2*std_error > prop_votes, precision = 2 * std_error)
ggplot(filter(calib_ratio_miss, n_sim <= 200), aes(x = n_sim, y = r, ymin = r - 2*std_error, ymax = r + 2*std_error )) + geom_linerange(colour='red') + geom_hline(data = actual, aes(yintercept = prop_votes)) + facet_wrap(~party, scales="free_y")
calib_summary <- calib_ratio_miss %>% group_by(party) %>% summarise(coverage = mean(coverage), precision_media = mean(precision), n_sims = n()) calib_summary
calib_ratio <- calib_ratio %>% mutate(int_l = r - 2*std_error, int_r = r + 2*std_error) calibration_gto <- calibration_gto %>% mutate(coverage = prop_votes < int_r & prop_votes > int_l)
ratio <- select(calib_ratio, n_sim, party, int_l, int_r) %>% mutate(method = 'ratio') bayesian <- select(calibration_gto, n_sim, party, int_l, int_r) %>% mutate(method = 'bayesian') intervals <- bind_rows(ratio, bayesian)
# also try with median and mean instad of min/max intervals_max <- intervals %>% group_by(n_sim, party) %>% mutate(int_l = min(int_l), int_r = max(int_r)) %>% left_join(actual)
intervals_max %>% ungroup %>% mutate(coverage = int_l < prop_votes & int_r > prop_votes) %>% group_by(party, method) %>% mutate(n = max(n_sim)) %>% filter(n_sim < min(n)) %>% group_by(party) %>% summarise(coverage = mean(coverage))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.