NOTES

Setup

library(rlang,
        include.only = "%|%")
library(magrittr,
        include.only = c("%>%", "%<>%", "%T>%", "%!>%", "%$%"))

Define data

qstnrs

Questionnaire data as one single long-format tibble. We also export the qstnrs in all relevant formats to this repository's output/questionnaires directory, which is automatically deployed to qstnr.fokus.ag via Netlify.

qstnrs <-
  fokus::all_ballot_dates %>%
  magrittr::set_names(., .) |>
  purrr::map(fokus::cantons) |>
  purrr::imap(\(cantons, ballot_date) {

    cantons |>
      purrr::map(\(canton) {

        qstnr_tibble <-
          fokus:::gen_qstnr_tibble(ballot_date = ballot_date,
                                   canton = canton) |>
          fokus:::validate_qstnr_tibble()

        fokus::export_qstnr(qstnr_tibble = qstnr_tibble,
                            path = "output/questionnaires",
                            upload_to_g_drive = FALSE)
        qstnr_tibble
      }) |>
      purrr::list_rbind()
  }) |>
  purrr::list_rbind() |>
  # expand list cols
  tidyr::unnest(cols = any_of(fokus:::qstnr_item_keys_multival)) |>
  # modify some vars
  dplyr::mutate(
    ## complete `question_full`
    question_full = question_full %|% question,
    ## merge `question_intro_i/j` into `question_intro`
    question_intro = paste(tidyr::replace_na(data = question_intro_i,
                                             replace = ""),
                           tidyr::replace_na(data = question_intro_j,
                                             replace = "")),
    ## complete `*_common` vars with their ballot-date/canton-specific siblings
    question_common = question_common %|% question_full,
    variable_label_common = variable_label_common %|% variable_label) |>
  dplyr::select(-c(question_intro_i,
                   question_intro_j,
                   lvl,
                   i,
                   j)) |>
  dplyr::relocate(question_intro,
                  .before = question) |>
  # strip Markdown formatting from chr cols
  dplyr::mutate(dplyr::across(where(is.character),
                              pal::strip_md))

proposals

proposals <-
  fokus::combos_proposals() |>
  purrr::map(\(combo) {

    tibble::tibble(ballot_date = combo$ballot_date,
                   lvl = combo$lvl,
                   canton = combo$canton,
                   nr = combo$proposal_nr,
                   type = fokus::proposal_type(ballot_date = combo$ballot_date,
                                               lvl = combo$lvl,
                                               canton = combo$canton,
                                               proposal_nr = combo$proposal_nr),
                   name.de.short = fokus::proposal_name(ballot_date = combo$ballot_date,
                                                        lvl = combo$lvl,
                                                        canton = combo$canton,
                                                        proposal_nr = combo$proposal_nr,
                                                        type = "short"),
                   name.de.long = fokus::proposal_name(ballot_date = combo$ballot_date,
                                                       lvl = combo$lvl,
                                                       canton = combo$canton,
                                                       proposal_nr = combo$proposal_nr,
                                                       type = "long"))
  }) |>
  purrr::list_rbind()

elections

elections <- 
  fokus::combos_elections() |>
  purrr::map(\(combo) {

    tibble::tibble(ballot_date = combo$ballot_date,
                   lvl = combo$lvl,
                   canton = combo$canton,
                   prcd = combo$prcd,
                   nr = combo$election_nr,
                   name.de.short = fokus::election_name(ballot_date = combo$ballot_date,
                                                        lvl = combo$lvl,
                                                        canton = combo$canton,
                                                        prcd = combo$prcd,
                                                        election_nr = combo$election_nr,
                                                        type = "short"),
                   name.de.long = fokus::election_name(ballot_date = combo$ballot_date,
                                                       lvl = combo$lvl,
                                                       canton = combo$canton,
                                                       prcd = combo$prcd,
                                                       election_nr = combo$election_nr,
                                                       type = "long"),
                   name.de.body = fokus::election_name(ballot_date = combo$ballot_date,
                                                       lvl = combo$lvl,
                                                       canton = combo$canton,
                                                       prcd = combo$prcd,
                                                       election_nr = combo$election_nr,
                                                       type = "body"))
  }) |>
  purrr::list_rbind()

ballots

NOTES:

ballots <- list()

# define common constants
na_vals <- c("", "...", "-")

# add referendum proposal data
ballots$referendum <-
  fokus::combos_proposals() |>
  purrr::map(\(combo) {

    # read in data
    path_file <- glue::glue("data-raw/ballots/{combo$ballot_date}_{combo$canton}_{combo$lvl}_proposal_{combo$proposal_nr}.csv")

    if (!fs::file_exists(path_file)) {
      cli::cli_alert_warning(paste0("No raw ballot CSV data file found under {.file {path_file}}. Please add the necessary CSV file and run again in order to ",
                                    "generate the corresponding {.var ballots} results data."))
      return(NULL)
    }

    data <-
      readr::read_delim(file = path_file,
                        delim = ";",
                        locale = readr::locale(date_names = "de",
                                               encoding = "latin1"),
                        na = na_vals,
                        col_types = readr::cols(.default = readr::col_character())) |>
      # tidy cols
      fokus::tidy_cols() |>
      # remove all-NA cols (`AUSL_SCHWEIZER` before e-voting suspension)
      dplyr::select(-where(\(x) all(is.na(x)))) |>
      # add remaining cols
      dplyr::mutate(lvl = combo$lvl,
                    canton = combo$canton,
                    proposal_nr = combo$proposal_nr,
                    .after = ballot_date)

    # check data integrity
    ballot_date_data <- unique(data$ballot_date)

    if (length(ballot_date_data) > 1L) {
      cli::cli_abort(paste0("The raw ballot CSV data file {.file {path_file}} contains data for multiple ballot dates: {.val {ballot_date_data}}. ",
                            "Please fix this and run again."))
    }
    if (ballot_date_data != combo$ballot_date) {
      cli::cli_abort("The raw ballot CSV data file {.file {path_file}} does not contain the data for ballot date {.val {combo$ballot_date}} but ",
                     "{.val {ballot_date_data}}. Please fix this and run again.")
    }

    data
  }) |>
  purrr::compact() |>
  purrr::list_rbind()

# add election data
## per-election datasets
ballots$election <-
  fokus::combos_elections() %>%
  magrittr::set_names(value = purrr::map_chr(.,
                                             \(combo) glue::glue("{combo$ballot_date}_{combo$canton}_{combo$lvl}_{combo$prcd}_{combo$election_nr}"))) |>
  purrr::imap(\(combo, election_str) {

    path_file <- glue::glue("data-raw/ballots/{combo$ballot_date}_{combo$canton}_{combo$lvl}_{combo$prcd}_election_{combo$election_nr}",
                            ifelse(combo$prcd == "majoritarian",
                                   "",
                                   ifelse(combo$lvl == "federal",
                                          "_votes",
                                          "_votes_apportioned")),
                            ".csv")

    if (!fs::file_exists(path_file)) {
      cli::cli_alert_warning(paste0("No raw ballot CSV data file found under {.file {path_file}}. Please add the necessary CSV file and run again in order to ",
                                    "generate the corresponding {.var ballots} results data."))
      return(NULL)
    }

    if (combo$prcd == "majoritarian") {

      candidates_phrased <-
        fokus::phrase_election_candidate(ballot_date = combo$ballot_date,
                                         lvl = combo$lvl,
                                         canton = combo$canton,
                                         election_nr = combo$election_nr)
      result <-
        # read in all cols as chr
        readr::read_delim(file = path_file,
                          delim = ";",
                          locale = readr::locale(date_names = "de",
                                                 encoding = "latin1"),
                          na = na_vals,
                          col_types = readr::cols(.default = readr::col_character())) |>
        # remove garbage from raw col names
        dplyr::rename_with(.cols = everything(),
                           .fn = \(x) stringr::str_remove_all(string = x,
                                                              pattern = stringr::fixed("#"))) |>
        # tidy common cols
        fokus::tidy_cols() |>
        # ensure we renamed all non-candidate-specific col names
        pal::when(length(candidates_phrased) != length(stringr::str_subset(colnames(.), "^[[:upper:]_\\s]+$")) ~
                    cli::cli_abort(paste0("There are raw non-candidate-specific column names left. Please debug. Raw column names include: ",
                                          "{.vals {stringr::str_subset(colnames(.), '^[[:upper:]_\\s]+$')}}")),
                  ~ .) |>
        # rename candidate-specific cols
        dplyr::rename_with(.cols = -any_of(fokus:::dicts$colnames$ballots),
                           .fn = \(x) {

                             ix <-
                               candidates_phrased |>
                               stringi::stri_trans_general(id = "de-ASCII",
                                                           forward = TRUE) |>
                               purrr::map_int(\(x2) stringr::str_which(string = x2,
                                                                       pattern = stringr::regex(pattern =
                                                                                                  x |>
                                                                                                  stringi::stri_trans_general(id = "de-ASCII",
                                                                                                                              forward = TRUE) |>
                                                                                                  stringr::str_replace_all(pattern = "[^[:alnum:]\\s]",
                                                                                                                           replacement = "."),
                                                                                                ignore_case = TRUE)))
                             candidates_phrased[ix]
                           }) |>
        # convert remaining col types
        dplyr::mutate(dplyr::across(.cols = -any_of(c("entity", "ballot_date")),
                                    .fns = \(x) as.integer(as.numeric(x))))
    } else {

      parties_phrased <-
        fokus::election_parties(ballot_date = combo$ballot_date,
                                lvl = combo$lvl,
                                canton = combo$canton,
                                election_nr = combo$election_nr) %$%
        name.de.qstnr

      if (combo$lvl == "federal") {

        result <-
          readr::read_delim(file = path_file,
                            delim = ";",
                            locale = readr::locale(date_names = "de",
                                                   encoding = "latin1"),
                            na = na_vals,
                            col_types = readr::cols(.default = readr::col_character())) |>
          # tidy cols
          fokus::tidy_cols() |>
          # convert remaining col types
          dplyr::mutate(dplyr::across(.cols = -any_of(c("entity", "ballot_date")),
                                      .fns = \(x) as.integer(as.numeric(x))))
      } else {

        result <-
          readr::read_csv(file = path_file,
                          na = na_vals,
                          col_types = readr::cols(.default = readr::col_character())) |>
          # tidy cols
          fokus::tidy_cols() |>
          # convert remaining col types
          dplyr::mutate(dplyr::across(.cols = -any_of(c("entity", "electorate_total")),
                                      .fns = \(x) as.numeric(x)))
      }

      # rename party-specific cols
      parties_phrased_normalized <-
        parties_phrased |>
        stringi::stri_trans_general(id = "de-ASCII",
                                    forward = TRUE) |>
        stringr::str_replace_all(pattern = stringr::fixed("."),
                                 replacement = " ") |>
        stringr::str_replace_all(pattern = "[^[:alnum:]\\s]",
                                 replacement = "")

      party_col_names_normalized <-
        colnames(result) |>
        setdiff(y = fokus:::dicts$colnames$ballots) |>
        stringi::stri_trans_general(id = "de-ASCII",
                                    forward = TRUE) |>
        stringr::str_replace_all(pattern = "[^[:alnum:]\\s]",
                                 replacement = ".") |>
        # modify junior party names to match ours
        stringr::str_replace_all(pattern = "^(?i)j(bdp|cvp|evp|glp|gruene|mitte|svp)$",
                                 replacement = "Junge \\1") %>%
        stringr::str_replace_all(pattern = "^(?i)jfdp$",
                                 replacement = "Jungfreisinnige") %>%
        # modify remaining party names to match ours
        dplyr::case_match(.x = .,
                          "T65P"  ~ "TEAM65",
                          "Musik" ~ "Musikpartei",
                          .default = .)

      party_col_names_new <-
        party_col_names_normalized |>
        purrr::map_chr(\(col_name_normalized) {

          parties_phrased_normalized |>
            stringr::str_which(pattern = stringr::regex(pattern = paste0("\\b", col_name_normalized, "\\b"),
                                                        ignore_case = TRUE)) %>%
            magrittr::extract(parties_phrased, .)
        })

      colnames(result)[!(colnames(result) %in% fokus:::dicts$colnames$ballots)] <- party_col_names_new

      # accumulate cols where necessary
      col_names_duplicated <-
        colnames(result) |>
        table(exclude = NULL) %>%
        magrittr::extract(. > 1L) |>
        names()

      for (col_name_duplicated in col_names_duplicated) {
        ix <- which(colnames(result) == col_name_duplicated)
        col_new <- rowSums(result[, ix])
        result %<>% .[, -ix]
        result[[col_name_duplicated]] <- col_new
      }

      # ensure we renamed all cols
      colnames_raw <- setdiff(x = colnames(result),
                              y = c(fokus:::dicts$colnames$ballots,
                                    parties_phrased))

      if (length(colnames_raw) > 0L) {
        cli::cli_abort("There are {.val {length(colnames_raw)}} raw column names left. Please debug. Raw column names include: {.vals {colnames_raw}}")
      }
    }

    result
  }) |>
  purrr::compact()

## per-lvl electorate and turnout dataset for proportional elections
ballots$election$common <-
  fokus::combos_elections(prcds = "proportional",
                          incl_nr = FALSE) |>
  purrr::map_dfr(tibble::as_tibble_row) |>
  dplyr::select(-c(ballot_date, prcd)) |>
  unique() |>
  purrr::pmap(\(lvl, canton) {

    path_file <- glue::glue("data-raw/ballots/{canton}_{lvl}_proportional_elections_electorate.csv")

    if (!fs::file_exists(path_file)) {
      cli::cli_alert_warning(paste0("No raw ballot CSV data file found under {.file {path_file}}. Please add the necessary CSV file and run again in order to ",
                                    "generate the corresponding {.var ballots} results data."))
      return(NULL)
    }

    # read in all cols as chr
    readr::read_delim(file = path_file,
                      delim = ";",
                      locale = readr::locale(date_names = "de",
                                             encoding = "latin1"),
                      na = na_vals,
                      col_types = readr::cols(.default = readr::col_character())) |>
      fokus::tidy_cols() |>
      dplyr::mutate(lvl = lvl,
                    canton = canton,
                    # numbers are not apportioned, so we can use int
                    voters_total = as.integer(voters_total))
  }) |>
  purrr::list_rbind() |>
  dplyr::relocate(any_of(c("fso_id",
                           "ballot_date",
                           "lvl",
                           "canton")),
                  everything()) |>
  dplyr::mutate(dplyr::across(.cols = c(any_of("fso_id"),
                                        starts_with("electorate_"),
                                        starts_with("votes_"),
                                        contains("_votes_")),
                              .fns = as.integer))
# clean up
rm(na_vals)

Write data

Save all the bigger data objects to separate files under data/*.rda. Note that when documenting them, they mustn't be explicitly @exported since they're already automatically exported and thus available to package users.

usethis::use_data(qstnrs,
                  proposals,
                  elections,
                  ballots,
                  internal = FALSE,
                  overwrite = TRUE,
                  compress = "xz",
                  version = 3L)


zdaarau/fokus documentation built on Dec. 24, 2024, 10:47 p.m.