In zdaarau/fokus: Provides an API around the FOKUS Post-voting Surveys

NOTES

This file is not included in the source package because of the .nopurl suffix in its filename.
The chunks below have to be manually executed in order to regenerate the package data.
When used internally, other than internal data, exported data must always be referred to by its explicit namespace.

Setup

library(rlang,
        include.only = "%|%")
library(magrittr,
        include.only = c("%>%", "%<>%", "%T>%", "%!>%", "%$%"))

Define data

`qstnrs`

Questionnaire data as one single long-format tibble. We also export the qstnrs in all relevant formats to this repository's output/questionnaires directory, which is automatically deployed to qstnr.fokus.ag via Netlify.

qstnrs <-
  fokus::all_ballot_dates %>%
  magrittr::set_names(., .) |>
  purrr::map(fokus::cantons) |>
  purrr::imap(\(cantons, ballot_date) {

    cantons |>
      purrr::map(\(canton) {

        qstnr_tibble <-
          fokus:::gen_qstnr_tibble(ballot_date = ballot_date,
                                   canton = canton) |>
          fokus:::validate_qstnr_tibble()

        fokus::export_qstnr(qstnr_tibble = qstnr_tibble,
                            path = "output/questionnaires",
                            upload_to_g_drive = FALSE)
        qstnr_tibble
      }) |>
      purrr::list_rbind()
  }) |>
  purrr::list_rbind() |>
  # expand list cols
  tidyr::unnest(cols = any_of(fokus:::qstnr_item_keys_multival)) |>
  # modify some vars
  dplyr::mutate(
    ## complete `question_full`
    question_full = question_full %|% question,
    ## merge `question_intro_i/j` into `question_intro`
    question_intro = paste(tidyr::replace_na(data = question_intro_i,
                                             replace = ""),
                           tidyr::replace_na(data = question_intro_j,
                                             replace = "")),
    ## complete `*_common` vars with their ballot-date/canton-specific siblings
    question_common = question_common %|% question_full,
    variable_label_common = variable_label_common %|% variable_label) |>
  dplyr::select(-c(question_intro_i,
                   question_intro_j,
                   lvl,
                   i,
                   j)) |>
  dplyr::relocate(question_intro,
                  .before = question) |>
  # strip Markdown formatting from chr cols
  dplyr::mutate(dplyr::across(where(is.character),
                              pal::strip_md))

`proposals`

proposals <-
  fokus::combos_proposals() |>
  purrr::map(\(combo) {

    tibble::tibble(ballot_date = combo$ballot_date,
                   lvl = combo$lvl,
                   canton = combo$canton,
                   nr = combo$proposal_nr,
                   type = fokus::proposal_type(ballot_date = combo$ballot_date,
                                               lvl = combo$lvl,
                                               canton = combo$canton,
                                               proposal_nr = combo$proposal_nr),
                   name.de.short = fokus::proposal_name(ballot_date = combo$ballot_date,
                                                        lvl = combo$lvl,
                                                        canton = combo$canton,
                                                        proposal_nr = combo$proposal_nr,
                                                        type = "short"),
                   name.de.long = fokus::proposal_name(ballot_date = combo$ballot_date,
                                                       lvl = combo$lvl,
                                                       canton = combo$canton,
                                                       proposal_nr = combo$proposal_nr,
                                                       type = "long"))
  }) |>
  purrr::list_rbind()

`elections`

elections <- 
  fokus::combos_elections() |>
  purrr::map(\(combo) {

    tibble::tibble(ballot_date = combo$ballot_date,
                   lvl = combo$lvl,
                   canton = combo$canton,
                   prcd = combo$prcd,
                   nr = combo$election_nr,
                   name.de.short = fokus::election_name(ballot_date = combo$ballot_date,
                                                        lvl = combo$lvl,
                                                        canton = combo$canton,
                                                        prcd = combo$prcd,
                                                        election_nr = combo$election_nr,
                                                        type = "short"),
                   name.de.long = fokus::election_name(ballot_date = combo$ballot_date,
                                                       lvl = combo$lvl,
                                                       canton = combo$canton,
                                                       prcd = combo$prcd,
                                                       election_nr = combo$election_nr,
                                                       type = "long"),
                   name.de.body = fokus::election_name(ballot_date = combo$ballot_date,
                                                       lvl = combo$lvl,
                                                       canton = combo$canton,
                                                       prcd = combo$prcd,
                                                       election_nr = combo$election_nr,
                                                       type = "body"))
  }) |>
  purrr::list_rbind()

`ballots`

NOTES:

For every
- referendum proposal or majoritarian election, a CSV file containing the official ballot numbers is expected to be present at data-raw/ballots/{ballot_date}_{canton}_{lvl}_proposal_{proposal_nr}.csv or data-raw/ballots/{ballot_date}_{canton}_{lvl}_majoritarian_election_{election_nr}.csv, respectively.
- proportional election, a CSV file containing the officially apportioned party voter numbers is expected to be present at data-raw/ballots/{ballot_date}_{canton}_federal_proportional_election_{election_nr}_votes.csv and data-raw/ballots/{ballot_date}_{canton}_cantonal_proportional_election_{election_nr}_votes_apportioned.csv respectively. Additionally, a common CSV file containing the electorate and turnout numbers of all proportional elections in the respective canton on the respective political level is expected to be present at {canton}_{lvl}_proportional_elections_electorate.csv. The latter file must be updated each time a new proportional election is covered by a FOKUS survey in the respective canton on the respective political level.
This convention is based on the data available for the canton of Aargau and might require revision/extension to work for other cantons as well.

The CSV files must be downloaded manually (it's futile trying to automate this since official cantonal websites tend to change more often than we do surveys). The following download locations are currently known:
- Aargau
  - referendums proposals
    - cantonal
    - federal
  - elections
    - cantonal
      - majoritarian (2024 data is missing as of 2024-11-15 and only available from here as file Übersicht Kandidatenstimmen (Pressedaten))
      - proportional
        
        electorate/turnout (common dataset covering all cantonal proportional election dates)
        
        party votes (no common date-independent site exists)
        
        2024 (data is only available from here as file Übersicht Partei- und Kandidatenstimmen (Pressedaten) so far)
        
        2020
        
        2016
        
        Due to the division into constituencies and the biproportional election procedure, we cannot use the raw number of party votes provided here, but instead have to rely on the officially apportionated numbers above. - federal - majoritarian - proportional - electorate/turnout (common dataset covering all federal proportional election dates)
        
        party votes

ballots <- list()

# define common constants
na_vals <- c("", "...", "-")

# add referendum proposal data
ballots$referendum <-
  fokus::combos_proposals() |>
  purrr::map(\(combo) {

    # read in data
    path_file <- glue::glue("data-raw/ballots/{combo$ballot_date}_{combo$canton}_{combo$lvl}_proposal_{combo$proposal_nr}.csv")

    if (!fs::file_exists(path_file)) {
      cli::cli_alert_warning(paste0("No raw ballot CSV data file found under {.file {path_file}}. Please add the necessary CSV file and run again in order to ",
                                    "generate the corresponding {.var ballots} results data."))
      return(NULL)
    }

    data <-
      readr::read_delim(file = path_file,
                        delim = ";",
                        locale = readr::locale(date_names = "de",
                                               encoding = "latin1"),
                        na = na_vals,
                        col_types = readr::cols(.default = readr::col_character())) |>
      # tidy cols
      fokus::tidy_cols() |>
      # remove all-NA cols (`AUSL_SCHWEIZER` before e-voting suspension)
      dplyr::select(-where(\(x) all(is.na(x)))) |>
      # add remaining cols
      dplyr::mutate(lvl = combo$lvl,
                    canton = combo$canton,
                    proposal_nr = combo$proposal_nr,
                    .after = ballot_date)

    # check data integrity
    ballot_date_data <- unique(data$ballot_date)

    if (length(ballot_date_data) > 1L) {
      cli::cli_abort(paste0("The raw ballot CSV data file {.file {path_file}} contains data for multiple ballot dates: {.val {ballot_date_data}}. ",
                            "Please fix this and run again."))
    }
    if (ballot_date_data != combo$ballot_date) {
      cli::cli_abort("The raw ballot CSV data file {.file {path_file}} does not contain the data for ballot date {.val {combo$ballot_date}} but ",
                     "{.val {ballot_date_data}}. Please fix this and run again.")
    }

    data
  }) |>
  purrr::compact() |>
  purrr::list_rbind()

# add election data
## per-election datasets
ballots$election <-
  fokus::combos_elections() %>%
  magrittr::set_names(value = purrr::map_chr(.,
                                             \(combo) glue::glue("{combo$ballot_date}_{combo$canton}_{combo$lvl}_{combo$prcd}_{combo$election_nr}"))) |>
  purrr::imap(\(combo, election_str) {

    path_file <- glue::glue("data-raw/ballots/{combo$ballot_date}_{combo$canton}_{combo$lvl}_{combo$prcd}_election_{combo$election_nr}",
                            ifelse(combo$prcd == "majoritarian",
                                   "",
                                   ifelse(combo$lvl == "federal",
                                          "_votes",
                                          "_votes_apportioned")),
                            ".csv")

    if (!fs::file_exists(path_file)) {
      cli::cli_alert_warning(paste0("No raw ballot CSV data file found under {.file {path_file}}. Please add the necessary CSV file and run again in order to ",
                                    "generate the corresponding {.var ballots} results data."))
      return(NULL)
    }

    if (combo$prcd == "majoritarian") {

      candidates_phrased <-
        fokus::phrase_election_candidate(ballot_date = combo$ballot_date,
                                         lvl = combo$lvl,
                                         canton = combo$canton,
                                         election_nr = combo$election_nr)
      result <-
        # read in all cols as chr
        readr::read_delim(file = path_file,
                          delim = ";",
                          locale = readr::locale(date_names = "de",
                                                 encoding = "latin1"),
                          na = na_vals,
                          col_types = readr::cols(.default = readr::col_character())) |>
        # remove garbage from raw col names
        dplyr::rename_with(.cols = everything(),
                           .fn = \(x) stringr::str_remove_all(string = x,
                                                              pattern = stringr::fixed("#"))) |>
        # tidy common cols
        fokus::tidy_cols() |>
        # ensure we renamed all non-candidate-specific col names
        pal::when(length(candidates_phrased) != length(stringr::str_subset(colnames(.), "^[[:upper:]_\\s]+$")) ~
                    cli::cli_abort(paste0("There are raw non-candidate-specific column names left. Please debug. Raw column names include: ",
                                          "{.vals {stringr::str_subset(colnames(.), '^[[:upper:]_\\s]+$')}}")),
                  ~ .) |>
        # rename candidate-specific cols
        dplyr::rename_with(.cols = -any_of(fokus:::dicts$colnames$ballots),
                           .fn = \(x) {

                             ix <-
                               candidates_phrased |>
                               stringi::stri_trans_general(id = "de-ASCII",
                                                           forward = TRUE) |>
                               purrr::map_int(\(x2) stringr::str_which(string = x2,
                                                                       pattern = stringr::regex(pattern =
                                                                                                  x |>
                                                                                                  stringi::stri_trans_general(id = "de-ASCII",
                                                                                                                              forward = TRUE) |>
                                                                                                  stringr::str_replace_all(pattern = "[^[:alnum:]\\s]",
                                                                                                                           replacement = "."),
                                                                                                ignore_case = TRUE)))
                             candidates_phrased[ix]
                           }) |>
        # convert remaining col types
        dplyr::mutate(dplyr::across(.cols = -any_of(c("entity", "ballot_date")),
                                    .fns = \(x) as.integer(as.numeric(x))))
    } else {

      parties_phrased <-
        fokus::election_parties(ballot_date = combo$ballot_date,
                                lvl = combo$lvl,
                                canton = combo$canton,
                                election_nr = combo$election_nr) %$%
        name.de.qstnr

      if (combo$lvl == "federal") {

        result <-
          readr::read_delim(file = path_file,
                            delim = ";",
                            locale = readr::locale(date_names = "de",
                                                   encoding = "latin1"),
                            na = na_vals,
                            col_types = readr::cols(.default = readr::col_character())) |>
          # tidy cols
          fokus::tidy_cols() |>
          # convert remaining col types
          dplyr::mutate(dplyr::across(.cols = -any_of(c("entity", "ballot_date")),
                                      .fns = \(x) as.integer(as.numeric(x))))
      } else {

        result <-
          readr::read_csv(file = path_file,
                          na = na_vals,
                          col_types = readr::cols(.default = readr::col_character())) |>
          # tidy cols
          fokus::tidy_cols() |>
          # convert remaining col types
          dplyr::mutate(dplyr::across(.cols = -any_of(c("entity", "electorate_total")),
                                      .fns = \(x) as.numeric(x)))
      }

      # rename party-specific cols
      parties_phrased_normalized <-
        parties_phrased |>
        stringi::stri_trans_general(id = "de-ASCII",
                                    forward = TRUE) |>
        stringr::str_replace_all(pattern = stringr::fixed("."),
                                 replacement = " ") |>
        stringr::str_replace_all(pattern = "[^[:alnum:]\\s]",
                                 replacement = "")

      party_col_names_normalized <-
        colnames(result) |>
        setdiff(y = fokus:::dicts$colnames$ballots) |>
        stringi::stri_trans_general(id = "de-ASCII",
                                    forward = TRUE) |>
        stringr::str_replace_all(pattern = "[^[:alnum:]\\s]",
                                 replacement = ".") |>
        # modify junior party names to match ours
        stringr::str_replace_all(pattern = "^(?i)j(bdp|cvp|evp|glp|gruene|mitte|svp)$",
                                 replacement = "Junge \\1") %>%
        stringr::str_replace_all(pattern = "^(?i)jfdp$",
                                 replacement = "Jungfreisinnige") %>%
        # modify remaining party names to match ours
        dplyr::case_match(.x = .,
                          "T65P"  ~ "TEAM65",
                          "Musik" ~ "Musikpartei",
                          .default = .)

      party_col_names_new <-
        party_col_names_normalized |>
        purrr::map_chr(\(col_name_normalized) {

          parties_phrased_normalized |>
            stringr::str_which(pattern = stringr::regex(pattern = paste0("\\b", col_name_normalized, "\\b"),
                                                        ignore_case = TRUE)) %>%
            magrittr::extract(parties_phrased, .)
        })

      colnames(result)[!(colnames(result) %in% fokus:::dicts$colnames$ballots)] <- party_col_names_new

      # accumulate cols where necessary
      col_names_duplicated <-
        colnames(result) |>
        table(exclude = NULL) %>%
        magrittr::extract(. > 1L) |>
        names()

      for (col_name_duplicated in col_names_duplicated) {
        ix <- which(colnames(result) == col_name_duplicated)
        col_new <- rowSums(result[, ix])
        result %<>% .[, -ix]
        result[[col_name_duplicated]] <- col_new
      }

      # ensure we renamed all cols
      colnames_raw <- setdiff(x = colnames(result),
                              y = c(fokus:::dicts$colnames$ballots,
                                    parties_phrased))

      if (length(colnames_raw) > 0L) {
        cli::cli_abort("There are {.val {length(colnames_raw)}} raw column names left. Please debug. Raw column names include: {.vals {colnames_raw}}")
      }
    }

    result
  }) |>
  purrr::compact()

## per-lvl electorate and turnout dataset for proportional elections
ballots$election$common <-
  fokus::combos_elections(prcds = "proportional",
                          incl_nr = FALSE) |>
  purrr::map_dfr(tibble::as_tibble_row) |>
  dplyr::select(-c(ballot_date, prcd)) |>
  unique() |>
  purrr::pmap(\(lvl, canton) {

    path_file <- glue::glue("data-raw/ballots/{canton}_{lvl}_proportional_elections_electorate.csv")

    if (!fs::file_exists(path_file)) {
      cli::cli_alert_warning(paste0("No raw ballot CSV data file found under {.file {path_file}}. Please add the necessary CSV file and run again in order to ",
                                    "generate the corresponding {.var ballots} results data."))
      return(NULL)
    }

    # read in all cols as chr
    readr::read_delim(file = path_file,
                      delim = ";",
                      locale = readr::locale(date_names = "de",
                                             encoding = "latin1"),
                      na = na_vals,
                      col_types = readr::cols(.default = readr::col_character())) |>
      fokus::tidy_cols() |>
      dplyr::mutate(lvl = lvl,
                    canton = canton,
                    # numbers are not apportioned, so we can use int
                    voters_total = as.integer(voters_total))
  }) |>
  purrr::list_rbind() |>
  dplyr::relocate(any_of(c("fso_id",
                           "ballot_date",
                           "lvl",
                           "canton")),
                  everything()) |>
  dplyr::mutate(dplyr::across(.cols = c(any_of("fso_id"),
                                        starts_with("electorate_"),
                                        starts_with("votes_"),
                                        contains("_votes_")),
                              .fns = as.integer))
# clean up
rm(na_vals)

Write data

Save all the bigger data objects to separate files under data/*.rda. Note that when documenting them, they mustn't be explicitly @exported since they're already automatically exported and thus available to package users.

usethis::use_data(qstnrs,
                  proposals,
                  elections,
                  ballots,
                  internal = FALSE,
                  overwrite = TRUE,
                  compress = "xz",
                  version = 3L)