knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(quickcountmx) cmdstanr::install_cmdstan(cores = 2, quiet = TRUE)
library(dplyr) library(tidyr) data("conteo_2018") nrow(conteo_2018)
Select a state
state <- "ZACATECAS" conteo <- conteo_2018 %>% filter(NOMBRE_ESTADO == state)
The final computations can calculated as follows:
conteo <- conteo %>% filter(TOTAL_VOTOS_CALCULADOS!= 0) %>% filter(ID_DISTRITO != 0) %>% mutate(distrito_f = interaction(ID_ESTADO, ID_DISTRITO)) %>% mutate(OTROS = VN + CNR + CAND_IND_01) %>% mutate(no_casilla = row_number()) %>% mutate(ln = LISTA_NOMINAL_CASILLA) |> mutate(tipo_1 = ifelse(tipo_seccion == 1, 1, 0)) |> mutate(tipo_2 = ifelse(tipo_seccion == 2, 2, 0)) |> mutate(casilla_e = ifelse(tipo_casilla == "E", 1, 0)) # covariates final_computos <- conteo %>% select(any_of(c("AMLO", "JAMK", "RAC", "CAND_IND_02", "OTROS", "TOTAL_VOTOS_CALCULADOS"))) %>% summarise(across(where(is.numeric), ~ sum(.x, na.rm = TRUE))) %>% mutate(across(where(is.numeric), ~ .x / TOTAL_VOTOS_CALCULADOS))%>% pivot_longer(cols = everything(), names_to = "candidato", values_to = "prop") final_computos sum(conteo$TOTAL_VOTOS_CALCULADOS) / sum(conteo$LISTA_NOMINAL_CASILLA)
To select a proportional sample
sample <- select_sample_prop(conteo, stratum = distrito_f, frac = 0.25, seed = 1010) nrow(sample)
Bayesian estimates (the first time running will take longer because compilation of model is needed). chains and num_iter should be increased in the following example:
fit_estimates <- hb_estimation(sample, stratum = distrito_f, id_station = no_casilla, sampling_frame = conteo, parties = all_of(c("AMLO", "JAMK", "RAC", "CAND_IND_02", "OTROS")), covariates = .fittedPC1, #covariates = c(.fittedPC1, .fittedPC2, tipo_1, tipo_2, ln_log_c), model = "mlogit-corr", chains = 2, num_warmup = 200, num_iter = 200, adapt_delta = 0.8, return_fit = FALSE, nominal_max = 1200, # increase chains and num_iter seed = 123)
estimates <- fit_estimates$estimates estimates %>% mutate(across(where(is.numeric), ~round(.x, 4)))
We can also work with a time censored sample. The correction to interval width is based on arrival modeling for thet 2018 sample, so it should be used with this in mind:
# consider observed arrival times to do censoring #data("arrivals_tbl") sample_censored <- sample %>% sample_frac(0.5) # 3 hours later gives around 50% of sample: prop_obs <- 0.50 fit_estimates <- hb_estimation(sample_censored, stratum = distrito_f, id_station = no_casilla, sampling_frame = conteo, parties = all_of(c("AMLO", "JAMK", "RAC", "CAND_IND_02", "OTROS")), covariates = c(.fittedPC1, tipo_1, tipo_2, ln_log_c), prop_obs = prop_obs, model = "mlogit-corr", chains = 2, num_iter = 200, # increase chains and num_iter max_treedepth = 11, seed = 123)
estimates <- fit_estimates$estimates estimates %>% mutate(across(where(is.numeric), ~round(.x, 4)))
data("nay_2015") nrow(nay_2015)
conteo <- nay_2015 %>% mutate(TOTAL_VOTOS_CALCULADOS = CAND1 + CAND2 + CAND3 + CAND4 + CAND5 + CAND6 + CAND7 + CAND8 + OTROS) %>% mutate(ID_DISTRITO = ID_DISTRITO_15) %>% filter(TOTAL_VOTOS_CALCULADOS!= 0) %>% filter(ID_DISTRITO != 0) %>% mutate(distrito_f = interaction(ID_ESTADO, ID_DISTRITO)) %>% mutate(no_casilla = row_number()) %>% mutate(ln = ifelse(LISTA_NOMINAL_15==0, 1200, LISTA_NOMINAL_15)) final_computos <- conteo %>% select(any_of(c("CAND1", "CAND2", "CAND3", "CAND4", "CAND5", "CAND6", "CAND7", "CAND8", "OTROS", "TOTAL_VOTOS_CALCULADOS"))) %>% summarise(across(where(is.numeric), ~ sum(.x, na.rm = TRUE))) %>% mutate(across(where(is.numeric), ~ .x / TOTAL_VOTOS_CALCULADOS))%>% pivot_longer(cols = everything(), names_to = "candidato", values_to = "prop") final_computos
To select a proportional sample
sample <- select_sample_prop(conteo, stratum = estrato_df, frac = 0.15, seed = 100) nrow(sample)
Bayesian estimates (the first time running will take longer because compilation of model is needed). chains and num_iter should be increased in the following example:
fit_estimates <- hb_estimation(sample, stratum = estrato_df, id_station = no_casilla, sampling_frame = conteo, parties = c(all_of(contains("CAND")), OTROS), covariates = .fittedPC1, model = "mlogit-corr", chains = 2, num_iter = 200, # increase chains and num_iter seed = 123)
estimates <- fit_estimates$estimates estimates %>% mutate(across(where(is.numeric), ~round(.x, 3)))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.