#devtools::install_github("tereom/quickcountmx", force = TRUE) library(quickcountmx) library(tidyverse) model_string <- eval(parse(text=deparse(quickcountmx:::model_bern_t)[3])) model_string %>% cat
Sample coverage report for one party (vote counts):
data("gto_2012") gto_pan <- calibration_party(gto_2012, party = pan_na, frac = 0.075, stratum = distrito_loc_17, n_iter = 1500, n_burnin = 500, cl_cores = 36, n_chains = 1, seed = 19112, n_rep = 36) pan <- summary_calibration_party(gto_pan) pan$plot pan$coverage
calibration_gto <- calibration_prop(gto_2012, pri_pvem:otros, frac = 0.075, stratum = distrito_loc_17, n_iter = 3000, n_burnin = 1500, n_chains = 1, seed = 1933210, cl_cores = 6, n_rep = 100, model_string = "model_bern_t", num_missing_strata = 2) saveRDS(calibration_gto, file = "report_calib.rds")
Report and plot
report <- summary_calibration(calibration_gto) report$plot
report$coverage
set.seed(1211) gto_stratum_sizes <- gto_2012 %>% dplyr::group_by(distrito_loc_17) %>% dplyr::mutate(n_stratum = n()) calib_ratio <- parallel::mclapply(1:200, function(i) { gto_sample <- select_sample_prop(gto_stratum_sizes, stratum = distrito_loc_17, 0.075) ratio <- ratio_estimation(gto_sample, stratum = distrito_loc_17, n_stratum = n_stratum, ... = pri_pvem:otros) ratio <- ratio %>% mutate(n_sim = i) ratio }, mc.cores = 30) %>% bind_rows
gto_gather <- gto_2012 %>% dplyr::select(casilla_id, pri_pvem:otros) %>% tidyr::gather(party, votes, pri_pvem:otros) actual <- gto_gather %>% group_by(party) %>% summarise(n_votes = sum(votes)) %>% mutate(prop_votes = 100*n_votes/sum(n_votes)) calib_ratio <- calib_ratio %>% left_join(actual) %>% ungroup() %>% mutate(coverage = r - 2*std_error < prop_votes & r + 2*std_error > prop_votes, precision = 2 * std_error)
ggplot(filter(calib_ratio, n_sim <= 200), aes(x = n_sim, y = r, ymin = r - 2*std_error, ymax = r + 2*std_error )) + geom_linerange(colour='red') + geom_hline(data = actual, aes(yintercept = prop_votes)) + facet_wrap(~party, scales="free_y")
calib_summary <- calib_ratio %>% group_by(party) %>% summarise(coverage = mean(coverage), precision_media = mean(precision)) calib_summary
Exluded strata randomly selected for each run.
calibration_gto_miss <- calibration_prop(gto_2012, pri_pvem:otros, frac = 0.075, stratum = distrito_loc_17, n_iter = 3000, n_burnin = 1500, n_chains = 1, seed = 1933210, cl_cores = 6, n_rep = 200, model_string = "model_bern_t", num_missing_strata = 2) saveRDS(calibration_gto_miss, file = "report_calib_missing.rds")
Report and plot
report <- summary_calibration(calibration_gto_miss) report$plot report$coverage
set.seed(12121) gto_stratum_sizes <- gto_2012 %>% dplyr::group_by(distrito_loc_17) %>% dplyr::mutate(n_stratum = n()) calib_ratio <- parallel::mclapply(1:200, function(i) { # select sample gto_sample <- select_sample_prop(gto_stratum_sizes, stratum = distrito_loc_17, 0.075) %>% mutate(distrito_coll = distrito_loc_17) %>% select(-n_stratum) strata <- unique(gto_2012$distrito_loc_17) missing <- sample(strata, 3) present <- setdiff(strata, missing) gto_sample_miss <- gto_sample %>% filter(distrito_loc_17 %in% present) collapse_to <- sample(present, length(missing)) new_strata <- data_frame(distrito_loc_17 = c(missing, present), distrito_coll = c(collapse_to, present)) gto_2012_collapsed <- left_join(gto_2012, new_strata, by = "distrito_loc_17") gto_sample_miss <- gto_sample_miss %>% left_join(gto_2012_collapsed %>% group_by(distrito_coll) %>% summarise(n_stratum = n()), by ="distrito_coll") ratio <- ratio_estimation(gto_sample_miss, stratum = distrito_coll, n_stratum = n_stratum, ... = pri_pvem:otros) ratio <- ratio %>% mutate(n_sim = i) ratio }, mc.cores = 30) %>% bind_rows
gto_gather <- gto_2012 %>% dplyr::select(casilla_id, pri_pvem:otros) %>% tidyr::gather(party, votes, pri_pvem:otros) actual <- gto_gather %>% group_by(party) %>% summarise(n_votes = sum(votes)) %>% mutate(prop_votes = 100*n_votes/sum(n_votes)) calib_ratio <- calib_ratio %>% left_join(actual) %>% ungroup() %>% mutate(coverage = r - 2 * std_error < prop_votes & r + 2 * std_error > prop_votes, precision = 2 * std_error)
ggplot(filter(calib_ratio, n_sim <= 50), aes(x = n_sim, y = r, ymin = r - 2*std_error, ymax = r + 2*std_error )) + geom_linerange(colour='red') + geom_hline(data = actual, aes(yintercept = prop_votes)) + facet_wrap(~party, scales="free_y")
calib_summary <- calib_ratio %>% group_by(party) %>% summarise(coverage = mean(coverage), precision_media = mean(precision), n_sims = n()) calib_summary
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.