knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(quickcountmx)
cmdstanr::install_cmdstan(cores = 2, quiet = TRUE)

Estimation and testing with 2018 results

library(dplyr)
library(tidyr)
data("conteo_2018")
nrow(conteo_2018)

Select a state

state <- "ZACATECAS"
conteo <- conteo_2018 %>% filter(NOMBRE_ESTADO == state)

The final computations can calculated as follows:

conteo <- conteo %>% 
  filter(TOTAL_VOTOS_CALCULADOS!= 0) %>% 
  filter(ID_DISTRITO != 0) %>% 
  mutate(distrito_f = interaction(ID_ESTADO, ID_DISTRITO)) %>% 
  mutate(OTROS = VN + CNR + CAND_IND_01) %>% 
  mutate(no_casilla = row_number()) %>% 
  mutate(ln = LISTA_NOMINAL_CASILLA) |> 
  mutate(tipo_1 = ifelse(tipo_seccion == 1, 1, 0)) |> 
  mutate(tipo_2 = ifelse(tipo_seccion == 2, 2, 0)) |> 
  mutate(casilla_e = ifelse(tipo_casilla == "E", 1, 0))
# covariates
final_computos <- conteo %>% 
  select(any_of(c("AMLO", "JAMK", "RAC", "CAND_IND_02", "OTROS", "TOTAL_VOTOS_CALCULADOS"))) %>%
  summarise(across(where(is.numeric), ~ sum(.x, na.rm = TRUE))) %>% 
  mutate(across(where(is.numeric), ~ .x / TOTAL_VOTOS_CALCULADOS))%>% 
  pivot_longer(cols = everything(), names_to = "candidato", values_to = "prop")
final_computos
sum(conteo$TOTAL_VOTOS_CALCULADOS) / sum(conteo$LISTA_NOMINAL_CASILLA)

To select a proportional sample

sample <- select_sample_prop(conteo, 
                             stratum = distrito_f, frac = 0.25, seed = 1010)
nrow(sample)

Bayesian estimates (the first time running will take longer because compilation of model is needed). chains and num_iter should be increased in the following example:

fit_estimates <- hb_estimation(sample, stratum = distrito_f,
                      id_station = no_casilla,
                      sampling_frame =  conteo,
                      parties = all_of(c("AMLO", "JAMK", "RAC", "CAND_IND_02", "OTROS")),
                      covariates = .fittedPC1,
                      #covariates = c(.fittedPC1, .fittedPC2, tipo_1, tipo_2, ln_log_c),
                      model = "mlogit-corr",
                      chains = 2, 
                      num_warmup = 200,
                      num_iter = 200, 
                      adapt_delta = 0.8,
                      return_fit = FALSE, 
                      nominal_max = 1200,
                      # increase chains and num_iter
                      seed = 123)
estimates <- fit_estimates$estimates
estimates %>% mutate(across(where(is.numeric), ~round(.x, 4)))

We can also work with a time censored sample. The correction to interval width is based on arrival modeling for thet 2018 sample, so it should be used with this in mind:

# consider observed arrival times to do censoring
#data("arrivals_tbl")
sample_censored <- sample %>% 
  sample_frac(0.5)
# 3 hours later gives around 50% of sample:
prop_obs <- 0.50
fit_estimates <- hb_estimation(sample_censored, stratum = distrito_f,
                      id_station = no_casilla,
                      sampling_frame =  conteo,
                      parties = all_of(c("AMLO", "JAMK", "RAC", "CAND_IND_02", "OTROS")),
                      covariates = c(.fittedPC1, tipo_1, tipo_2, ln_log_c),
                      prop_obs = prop_obs, 
                      model = "mlogit-corr",
                      chains = 2, num_iter = 200, # increase chains and num_iter
                      max_treedepth = 11,
                      seed = 123)
estimates <- fit_estimates$estimates
estimates %>% mutate(across(where(is.numeric), ~round(.x, 4)))

Estimation and testing with 2015 state results

data("nay_2015")
nrow(nay_2015)
conteo <- nay_2015 %>%
  mutate(TOTAL_VOTOS_CALCULADOS = CAND1 + CAND2 + CAND3 + CAND4 + CAND5 + CAND6 + CAND7 + CAND8 + OTROS) %>%
  mutate(ID_DISTRITO = ID_DISTRITO_15) %>%
  filter(TOTAL_VOTOS_CALCULADOS!= 0) %>%
  filter(ID_DISTRITO != 0) %>%
  mutate(distrito_f = interaction(ID_ESTADO, ID_DISTRITO)) %>%
  mutate(no_casilla = row_number()) %>%
  mutate(ln = ifelse(LISTA_NOMINAL_15==0, 1200, LISTA_NOMINAL_15))

final_computos <- conteo %>%
select(any_of(c("CAND1", "CAND2", "CAND3", "CAND4", "CAND5", "CAND6", "CAND7", "CAND8",
"OTROS", "TOTAL_VOTOS_CALCULADOS"))) %>%
summarise(across(where(is.numeric), ~ sum(.x, na.rm = TRUE))) %>%
mutate(across(where(is.numeric), ~ .x / TOTAL_VOTOS_CALCULADOS))%>%
pivot_longer(cols = everything(), names_to = "candidato", values_to = "prop")
final_computos

To select a proportional sample

sample <- select_sample_prop(conteo, 
                             stratum = estrato_df, frac = 0.15, seed = 100)
nrow(sample)

Bayesian estimates (the first time running will take longer because compilation of model is needed). chains and num_iter should be increased in the following example:

fit_estimates <- hb_estimation(sample, stratum = estrato_df,
                      id_station = no_casilla,
                      sampling_frame =  conteo,
                      parties = c(all_of(contains("CAND")), OTROS),
                      covariates = .fittedPC1,
                      model = "mlogit-corr",
                      chains = 2, num_iter = 200, # increase chains and num_iter
                      seed = 123)
estimates <- fit_estimates$estimates
estimates %>% mutate(across(where(is.numeric), ~round(.x, 3)))


cotecora-team-2/quickcountmx documentation built on July 17, 2025, 5:14 a.m.