NOTES

The Referendum Database (RDB) currently offers a privately documented API available under services.c2d.ch with the following HTTP GET "endpoints":

Further notes:

INTERNAL

Avoid R CMD check notes

Undefined global objects used in magrittr pipes

Cf. https://github.com/tidyverse/magrittr/issues/29#issuecomment-74313262

utils::globalVariables(names = c(".",
                                 # tidyselect fns
                                 "all_of",
                                 "any_of",
                                 "everything",
                                 "matches",
                                 "starts_with",
                                 "where",
                                 ".x",
                                 # other
                                 "Alpha_2",
                                 "Alpha_3",
                                 "Alpha_4",
                                 "applicability_constraint",
                                 "archive",
                                 "Children",
                                 "children_tier_1",
                                 "children_tier_2",
                                 "children_tier_3",
                                 "Code",
                                 "committee_name",
                                 "Common_name",
                                 "content-disposition",
                                 "count",
                                 "country_code",
                                 "country_code_long",
                                 "country_code_continual",
                                 "country_name",
                                 "country_name_de",
                                 "country_name_long",
                                 "date_last_edited",
                                 "date_time_created",
                                 "date_time_last_active",
                                 "date_time_last_edited",
                                 "Date_withdrawn",
                                 "day",
                                 "electorate_abroad",
                                 "electorate_total",
                                 "files",
                                 "id",
                                 "id_official",
                                 "id_sudd",
                                 "id_sudd_prefix",
                                 "inst_has_precondition",
                                 "inst_has_urgent_legal_basis",
                                 "inst_is_assembly",
                                 "inst_is_binding",
                                 "inst_is_counter_proposal",
                                 "inst_legal_basis_type",
                                 "inst_object_author",
                                 "inst_object_legal_level",
                                 "inst_object_revision_extent",
                                 "inst_object_revision_modes",
                                 "inst_object_type",
                                 "inst_precondition_actor",
                                 "inst_precondition_decision",
                                 "inst_quorum_approval",
                                 "inst_quorum_turnout",
                                 "inst_topics_excluded",
                                 "inst_topics_only",
                                 "inst_trigger_actor",
                                 "inst_trigger_actor_level",
                                 "inst_trigger_threshold",
                                 "inst_trigger_time_limit",
                                 "inst_trigger_type",
                                 "is_draft",
                                 "is_former_country",
                                 "is_multi_valued",
                                 "is_opt",
                                 "is_testing_server",
                                 "ISO_Alpha_3",
                                 "items",
                                 "level",
                                 "lower_house_abstentions",
                                 "lower_house_no",
                                 "lower_house_yes",
                                 "month",
                                 "municipality",
                                 "n",
                                 "Name",
                                 "name_long",
                                 "number",
                                 "Official_name",
                                 "Parent",
                                 "parent_topic",
                                 "position_government",
                                 "ptype",
                                 "question",
                                 "question_en",
                                 "referendum_text_options",
                                 "remarks",
                                 "result",
                                 "rowid",
                                 "sources",
                                 "subnational_entity_code",
                                 "subnational_entity_name",
                                 "subterritories_no",
                                 "subterritories_yes",
                                 "sudd_prefix",
                                 "tags",
                                 "topic",
                                 "topic_tier_1",
                                 "topic_tier_2",
                                 "topic_tier_3",
                                 "topics_tier_1",
                                 "topics_tier_2",
                                 "topics_tier_3",
                                 "territory_name_de",
                                 "territory_name_de_short",
                                 "title_de",
                                 "title_fr",
                                 "title_en",
                                 "turnout",
                                 "type",
                                 "Type",
                                 "un_country_code",
                                 "un_region_tier_1_code",
                                 "un_region_tier_2_name",
                                 "un_region_tier_3_name",
                                 "upper_house_abstentions",
                                 "upper_house_no",
                                 "upper_house_yes",
                                 "url_sudd",
                                 "url_swissvotes",
                                 "value_labels",
                                 "value_scale",
                                 "variable_name",
                                 "variable_name_unnested",
                                 "variable_name_print",
                                 "variable_values",
                                 "value",
                                 "value_total",
                                 "votes",
                                 "votes_empty",
                                 "votes_invalid",
                                 "votes_no",
                                 "votes_per_subterritory",
                                 "votes_yes",
                                 "year"))

Unused imports

Define dummy functions that reference objects from the package's namespaces.

# katex is used in documentation dynamically via `\Sexpr`, so it has to be imported to avoid an error when rendering the corresponding help page
# TODO: since the `R CMD check` warning can be considered a bug in this case, investigate the situation and possibly submit bug report
has_katex <- function() {
  nchar(katex::example_math()) > 0L
}

Package load/unload

.onLoad <- function(libname, pkgname) {

  # clear pkgpins cache
  tryCatch(expr = pkgpins::clear_cache(board = pkgpins::board(pkg = pkgname),
                                       max_age = pal::pkg_config_val(key = "global_max_cache_age",
                                                                     pkg = pkgname)),
           error = function(e) cli::cli_alert_warning(text = "Failed to clear pkgpins cache on load of {.pkg {pkgname}}. Error message: {e$message}"))
}

Create custom CSS file for package vignettes

Assembles a custom CSS file by combining rmarkdown's default lightweight vignette stylesheet with our custom pkgdown CSS.

The R code chunk below is not included in the source package (purl = FALSE) and thus has to be manually executed in order to regenerate the custom CSS file.

rlang::check_installed("brio")

fs::path_package("rmarkdown/templates/html_vignette/resources/vignette.css",
                 package = "rmarkdown") %>%
  brio::read_file() %>%
  paste0("/* BEGIN custom CSS */\n",
         brio::read_file("pkgdown/extra.css"),
         "/* END custom CSS */\n") %>%
  brio::write_file(path = "vignettes/custom.css")

Functions

api_failure

api_failure <- function(parsed,
                        raw = NULL,
                        prefix = "") {

  env <- parent.frame(n = 2L)

  assign(x = "parsed",
         value = parsed,
         pos = env)

  msg_part_val <- ifelse(utils::hasName(parsed$error, "value"),
                         paste0(": ", paste0("{.var ", names(parsed$error$value), "}: {.warn ", parsed$error$value, "}",
                                             collapse = ", ")),
                         "")

  msg_part_error <- ifelse(utils::hasName(parsed, "error"),
                           paste0("error {.err {parsed$error$id}}", msg_part_val),
                           "{.err {.y}}.")

  cli_div_id <- cli::cli_div(theme = cli_theme)
  cli::cli_alert_warning(paste0(prefix, "The API server responded with ", msg_part_error),
                         .envir = env)
  if (!is.null(raw)) {
    cli::cli_alert_info("The following JSON payload was sent: {.content {raw}}")
  }
  cli::cli_end(id = cli_div_id)
}

as_fm_list

Convert to formula-list (mainly to be fed to dplyr::case_match()).

as_fm_list <- function(x) {

  purrr::imap(x,
              ~ rlang::new_formula(lhs = .y,
                                   rhs = .x,
                                   env = emptyenv()))
}

assemble_query_filter

NOTE: date is not stored as an actual MongoDB Date type in the database, so we can't filter on it. It has been requested to change this upstream.

#' Assemble MongoDB query filter document
#'
#' @param country_code The `country_code`(s) to be included. A character vector.
#' @param subnational_entity_name The `subnational_entity_name`(s) to be included. A character vector.
#' @param municipality The `municipality`(s) to be included. A character vector.
#' @param level The `level`(s) to be included. A character vector.
#' @param type The `type`(s) to be included. A character vector.
#' @param date_min The minimum `date` to be included. A [date][Date] or something coercible to.
#' @param date_max The maximum `date` to be included. A [date][Date] or something coercible to.
#' @param is_draft `TRUE` means to include only referendum entries with _draft_ status, `FALSE` to include only normal entries. Set to `NULL` in order to
#'   include both draft and normal entries.
#' @param date_time_created_min The minimum `date_time_created` to be included. A [datetime][base::DateTimeClasses], or something coercible to (like
#'   `"2006-01-02"` or `"2006-01-02T15:04:05Z"`; assumed to be in UTC if no timezone is given).
#' @param date_time_created_max The maximum `date_time_created` to be included. A [datetime][base::DateTimeClasses], or something coercible to (like
#'   `"2006-01-02"` or `"2006-01-02T15:04:05Z"`; assumed to be in UTC if no timezone is given).
#' @param date_time_last_edited_min The minimum `date_time_last_edited` to be included. A [datetime][base::DateTimeClasses], or something coercible to (like
#'   `"2006-01-02"` or `"2006-01-02T15:04:05Z"`; assumed to be in UTC if no timezone is given).
#' @param date_time_last_edited_max The maximum `date_time_last_edited` to be included. A [datetime][base::DateTimeClasses], or something coercible to (like
#'   `"2006-01-02"` or `"2006-01-02T15:04:05Z"`; assumed to be in UTC if no timezone is given).
#' @param query_filter A valid [MongoDB JSON query filter document](https://docs.mongodb.com/manual/core/document/#query-filter-documents) which allows for
#'   maximum control over what data is included. This takes precedence over all of the above listed parameters, i.e. if `query_filter` is provided, the
#'   parameters `r formals(assemble_query_filter) |> names() |> setdiff(c("query_filter", "base64_encode")) |> pal::enum_str(wrap = "\x60")` are ignored.
#' @param base64_encode Whether or not to [Base64](https://en.wikipedia.org/wiki/Base64)-encode the resulting query filter document. Note that the
#'   `query_filter` argument provided to other functions of this package must be Base64-encoded.
#'
#' @return A character scalar containing a valid [MongoDB JSON query filter document](https://docs.mongodb.com/manual/core/document/#query-filter-documents),
#'   [Base64](https://en.wikipedia.org/wiki/Base64)-encoded if `base64_encode = TRUE`.
#' @keywords internal
assemble_query_filter <- function(country_code = NULL,
                                  subnational_entity_name = NULL,
                                  municipality = NULL,
                                  level = NULL,
                                  type = NULL,
                                  date_min = NULL,
                                  date_max = NULL,
                                  is_draft = NULL,
                                  date_time_created_min = NULL,
                                  date_time_created_max = NULL,
                                  date_time_last_edited_min = NULL,
                                  date_time_last_edited_max = NULL,
                                  query_filter = NULL,
                                  base64_encode = TRUE) {

  checkmate::assert_string(query_filter,
                           null.ok = TRUE)
  checkmate::assert_flag(base64_encode)

  # assemble JSON query filter document if `query_filter` is not provided
  if (is.null(query_filter)) {

    purrr::map_chr(.x = country_code,
                   .f = checkmate::assert_choice,
                   choices = val_set$country_code,
                   null.ok = TRUE,
                   .var.name = "country_code")
    checkmate::assert_character(subnational_entity_name,
                                any.missing = FALSE,
                                null.ok = TRUE)
    checkmate::assert_character(municipality,
                                any.missing = FALSE,
                                null.ok = TRUE)
    purrr::map_chr(.x = level,
                   .f = checkmate::assert_choice,
                   choices = var_vals("level"),
                   null.ok = TRUE,
                   .var.name = "level")
    purrr::map_chr(.x = type,
                   .f = checkmate::assert_choice,
                   choices = var_vals("type"),
                   null.ok = TRUE,
                   .var.name = "type")
    checkmate::assert_flag(is_draft,
                           null.ok = TRUE)

    date_min %<>% lubridate::as_date()
    date_max %<>% lubridate::as_date()
    date_time_created_min %<>% lubridate::as_datetime(tz = "UTC")
    date_time_created_max %<>% lubridate::as_datetime(tz = "UTC")
    date_time_last_edited_min %<>% lubridate::as_datetime(tz = "UTC")
    date_time_last_edited_max %<>% lubridate::as_datetime(tz = "UTC")

    query_filter <-
      list(country_code = query_filter_in(country_code),
           canton = query_filter_in(subnational_entity_name),
           municipality = query_filter_in(municipality),
           level = query_filter_in(level),
           institution =
             type %>%
             pal::when(length(.) == 0L ~ .,
                       ~ dplyr::case_match(.x = .,
                                           "citizens' assembly" ~ "citizen assembly",
                                           .default = .) %>%
                         stringr::str_to_sentence()) %>%
             query_filter_in(),
           date = query_filter_date(min = date_min,
                                    max = date_max),
           draft = is_draft,
           created_on = query_filter_datetime(min = date_time_created_min,
                                              max = date_time_created_max),
           date_time_last_edited = query_filter_datetime(min = date_time_last_edited_min,
                                                         max = date_time_last_edited_max)) %>%
      # remove `NULL` elements
      purrr::compact() %>%
      # convert to JSON
      jsonlite::toJSON(POSIXt = "ISO8601",
                       auto_unbox = TRUE,
                       digits = NA,
                       pretty = FALSE)
  }

  if (base64_encode) {
    query_filter %<>% jsonlite::base64_enc()
  }

  query_filter
}

assert_api_success

assert_api_success <- function(x) {

  if (!is.null(x$error$id)) {
    cli_div_id <- cli::cli_div(theme = cli_theme)
    cli::cli_abort("API server responded with error {.err {x$error$id}}")
    cli::cli_end(id = cli_div_id)
  }

  invisible(x)
}

assert_cols_absent

assert_cols_absent <- function(data,
                               type) {

  type <- rlang::arg_match0(arg = type,
                            values = unique(unlist(data_cols_absent$type)))
  cols <-
    data_cols_absent %>%
    dplyr::filter(purrr::map_lgl(type,
                                 ~ !!type %in% .x)) %$%
    col

  col_names <- colnames(data)

  purrr::walk(cols,
              ~ {

                if (.x %in% col_names) {

                  data_cols_absent %>%
                    dplyr::filter(col == !!.x & purrr::map_lgl(type,
                                                               ~ !!type %in% .x)) %$%
                    msg %>%
                    cli::cli_abort()
                }
              })

  invisible(data)
}

assert_cols_valid

assert_cols_valid <- function(data,
                              type = c("validate", "add", "edit"),
                              action = cli::cli_abort,
                              cli_progress_id = NULL) {

  type <- rlang::arg_match(type)

  non_na_col_names <- c("id",
                        "country_code",
                        "date",
                        "level")
  na_col_names <-
    data %>%
    dplyr::select(any_of(non_na_col_names)) %>%
    dplyr::filter(dplyr::if_any(.cols = everything(),
                                .fns = is.na)) %>%
    dplyr::select(where(~ anyNA(.x))) %>%
    colnames()
  n_na_col_names <- length(na_col_names)

  if (n_na_col_names) {
    cli::cli_progress_done(id = cli_progress_id,
                           result = "failed")
    action("Detected {n_na_col_names} column{?s} in {.arg data} that contain forbidden {.val NA}s: {.var {na_col_names}}")
  }

  ## check `id`
  if ("id" %in% colnames(data) && anyDuplicated(data$id)) {
    cli::cli_progress_done(id = cli_progress_id,
                           result = "failed")
    action("Duplicated {.var id}s detected. IDs must be unique.")
  }

  ## check `date`
  if ("date" %in% colnames(data)) {

    check <- checkmate::check_date(data$date,
                                   any.missing = FALSE)
    if (!isTRUE(check)) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action("Failed to validate {.var data$date}. {check}")
    }
  }

  ## check `level`
  if ("level" %in% colnames(data)) {

    check <- checkmate::check_subset(as.character(data$level),
                                     choices = var_vals("level"))
    if (!isTRUE(check)) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action("Failed to validate {.var data$level}. {check}")
    }
  }

  ## check `country_code`
  if ("country_code" %in% colnames(data)) {

    check <- checkmate::check_subset(as.character(data$country_code),
                                     choices = val_set$country_code)
    if (!isTRUE(check)) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action("Failed to validate {.var data$country_code}. {check}")
    }

    ## ensure `position_government` is present for additions when `country_code = "CH" & level = "national"`
    if (type == "add"
        && (data %>%
            dplyr::filter(country_code == "CH" & level == "national") %>%
            nrow() %>%
            magrittr::is_greater_than(0L))
        && !("position_government" %in% colnames(data))) {

      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("Referendums with {.code country_code = \"CH\" & level = \"national\"} present in {.arg data} but column {.var ",
                    "position_government} is missing."))
    }
  }

  ## check `subnational_entity_name`
  ## TODO: check `subnational_entity_code` instead once it's available
  if (any(data[["level"]] != "national")) {

    if (!("subnational_entity_name" %in% colnames(data))) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("Referendums of {.var level} below {.val national} present in {.arg data} but column {.var subnational_entity_name} is missing."))
    }

    ix_missing_subnational_entities <-
      data %>%
      tibble::rowid_to_column() %>%
      dplyr::filter(level != "national" & is.na(subnational_entity_name)) %$%
      rowid

    n_missing_subnational_entities <- length(ix_missing_subnational_entities)

    if (n_missing_subnational_entities) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("{n_missing_subnational_entities} row{?s} in {.arg data} {?is/are} missing a {.var subnational_entity_name}. Affected {?is/are} ",
                    "the row{?s} with ind{?ex/ices} {.val {ix_missing_subnational_entities}}."))
    }
  }
  if ("subnational_entity_name" %in% colnames(data)) {

    ix_illegal_subnational_entities <-
      data %>%
      tibble::rowid_to_column() %>%
      dplyr::filter(level == "national" & !is.na(subnational_entity_name)) %$%
      rowid

    n_illegal_subnational_entities <- length(ix_illegal_subnational_entities)

    if (n_illegal_subnational_entities) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("{n_illegal_subnational_entities} row{?s} in {.arg data} {?has/have} a {.var subnational_entity_name} set although they are on the ",
                    "national level. Affected {?is/are} the row{?s} with ind{?ex/ices} {.val {ix_illegal_subnational_entities}}."))
    }
  }

  ## check `municipality`
  if (any(data[["level"]] == "local")) {

    if (!("municipality" %in% colnames(data))) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("Referendums of {.var level = \"local\"} present in {.arg data} but column {.var municipality} is missing."))
    }

    ix_missing_municipalities <-
      data %>%
      tibble::rowid_to_column() %>%
      dplyr::filter(level == "local" & is.na(municipality)) %$%
      rowid

    n_missing_municipalities <- length(ix_missing_municipalities)

    if (n_missing_municipalities) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("{n_missing_municipalities} row{?s} in {.arg data} {?is/are} missing a {.var municipality}. Affected {?is/are} the row{?s} with ",
                    "ind{?ex/ices} {.val {ix_missing_subnational_entities}}."))
    }
  }
  if ("municipality" %in% colnames(data)) {

    ix_illegal_municipalities <-
      data %>%
      tibble::rowid_to_column() %>%
      dplyr::filter(level != "local" & !is.na(municipality)) %$%
      rowid

    n_illegal_municipalities <- length(ix_illegal_municipalities)

    if (n_illegal_municipalities) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("{n_illegal_municipalities} row{?s} in {.arg data} {?has/have} a {.var municipality} set although they are not on the local level. ",
                    "Affected {?is/are} the row{?s} with ind{?ex/ices} {.val {ix_illegal_municipalities}}."))
    }
  }

  ## check variables that are only meant to be set for Swiss national referendums
  ## TODO: Remove this as soon as [issue #52](https://github.com/zdaarau/c2d-app/issues/52) is resolved.
  ### `votes_per_subterritory`
  if (all(c("votes_per_subterritory", "level", "country_code") %in% colnames(data))) {

    ix_illegal_votes_per_subterritory <-
      data %>%
      tibble::rowid_to_column() %>%
      dplyr::filter((level != "national" | country_code != "CH") & !purrr::map_lgl(votes_per_subterritory, is.null)) %$%
      rowid

    n_illegal_votes_per_subterritory <- length(ix_illegal_votes_per_subterritory)

    if (n_illegal_votes_per_subterritory) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("{n_illegal_votes_per_subterritory} row{?s} in {.arg data} {?has/have} a {.var position_government} set although they are not Swiss ",
                    "national referendums. Affected {?is/are} the row{?s} with ind{?ex/ices} {.val {ix_illegal_votes_per_subterritory}}."))
    }
  }
  ### non-list vars
  c("lower_house_yes",
    "lower_house_no",
    "lower_house_abstentions",
    "upper_house_yes",
    "upper_house_no",
    "upper_house_abstentions",
    "position_government") %>%
    purrr::walk(function(var_name) {

      if (all(c(var_name, "level", "country_code") %in% colnames(data))) {

        ix_illegal <-
          data %>%
          tibble::rowid_to_column() %>%
          dplyr::filter((level != "national" | country_code != "CH") & !is.na(!!as.symbol(var_name))) %$%
          rowid

        n_illegal <- length(ix_illegal)

        if (n_illegal) {
          cli::cli_progress_done(id = cli_progress_id,
                                 result = "failed")
          action(paste0("{n_illegal} row{?s} in {.arg data} {?has/have} a {.var {var_name}} set although {cli::qty(n_illegal)}{?it is not a/they are not} ",
                        "Swiss national referendum{?s}. Affected {?is/are} the row{?s} with ind{?ex/ices} {.val {ix_illegal}}."))
        }
      }
    })

  invisible(data)
}

assert_content

assert_content <- function(x) {

  if (!nchar(x)) {
    cli::cli_abort("Received empty response from RDB API. Please debug.",
                   .internal = TRUE)
  }

  invisible(x)
}

auth_session

TODO:

#' Authenticate a user session for the [RDB API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#1-reflexive-routes)
#'
#' Creates a new user session token if necessary. The token is stored in the R option `rdb.user_session_tokens`, a [tibble][tibble::tbl_df] with the columns
#' `email`, `token` and `date_time_last_active`.
#' 
#' `email` and `password` default to the [package configuration options][pkg_config] `api_username` and `api_password` respectively.
#'
#' User session tokens expire automatically after 15 days of inactivity.
#'
#' @inheritParams url_api
#' @param email The e-mail address of the user for which a session should be created. A character scalar.
#' @param password The password of the user for which a session should be created. A character scalar.
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return The user session token as a character scalar, invisibly.
#' @keywords internal
auth_session <- function(email = pal::pkg_config_val(key = "api_username",
                                                     pkg = this_pkg),
                         password = pal::pkg_config_val(key = "api_password",
                                                        pkg = this_pkg),
                         use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                  pkg = this_pkg),
                         quiet = FALSE) {

  checkmate::assert_string(email,
                           min.chars = 3L)
  checkmate::assert_string(password,
                           min.chars = 1L)
  checkmate::assert_flag(quiet)

  # get existing tokens or initialize empty tibble
  tokens <-
    getOption("rdb.user_session_tokens") %>%
    pal::when(all(c("email", "token", "date_time_last_active") %in% colnames(.)) ~ .,
              ~ tibble::tibble(email = character(),
                               token = character(),
                               is_testing_server = logical(),
                               date_time_last_active = as.POSIXct(NULL)))
  # extract latest token
  token <- tokens %>% dplyr::filter(email == !!email & is_testing_server == !!use_testing_server)

  if (nrow(token)) {
    token %<>% dplyr::filter(date_time_last_active == max(date_time_last_active))
    token %<>% .[1L, ]
  }

  # ensure token is not expired (checked if older than 14 days), else set to `NULL`
  if (nrow(token) &&
      checkmate::test_string(token$token, min.chars = 1L) &&
      ((token$date_time_last_active > clock::add_days(clock::date_now(zone = "UTC"), -14L)) || !is_session_expired(token = token$token,
                                                                                                                   use_testing_server = use_testing_server))) {
    token <- token$token

  } else {
    token <- NULL
  }

  # create new session if necessary
  if (is.null(token)) {

    if (!quiet) {
      status_msg <- "Authenticating new user session"
      cli::cli_progress_step(msg = status_msg,
                             msg_done = paste(status_msg, "done"),
                             msg_failed = paste(status_msg, "failed"))
    }

    token <-
      httr::RETRY(verb = "POST",
                  url = url_api("users/session",
                                .use_testing_server = use_testing_server),
                  config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server)),
                  times = 3L,
                  encode = "json",
                  body = list(email = email,
                              password = password)) %>%
      # ensure we actually got a JSON response
      pal::assert_mime_type(mime_type = "application/json",
                            msg_suffix = mime_error_suffix) %>%
      # parse response
      httr::content(as = "parsed") %$%
      token

    if (!quiet) {
      cli::cli_progress_done()
    }
  }

  # update `rdb.user_session_tokens` option
  options(rdb.user_session_tokens =
            tokens %>%
            dplyr::filter(token != !!token) %>%
            tibble::add_row(email = email,
                            token = token,
                            is_testing_server = use_testing_server,
                            date_time_last_active = clock::date_now(zone = "UTC")))
  # return token
  invisible(token)
}

md_link_codebook

md_link_codebook <- function(var_names) {

  purrr::map_chr(var_names,
                 \(x) paste0("[`", x, "`](", url_codebook(x), ")"))
}

country_code_to_name

country_code_to_name <- function(country_code) {

  purrr::map2_chr(.x = country_code,
                  .y = nchar(as.character(country_code)) > 2L,
                  .f = ~ {

                    if (isTRUE(.y)) {

                      result <-
                        data_iso_3166_3 %>%
                        dplyr::filter(Alpha_4 == !!.x) %$%
                        name_short

                    } else {

                      result <-
                        data_iso_3166_1 %>%
                        dplyr::filter(Alpha_2 == !!.x) %$%
                        name_short
                    }

                    if (length(result) == 0L) {
                      result <- NA_character_
                    }

                    result
                  })
}

field_to_var_name

field_to_var_name <- function(x) {

  x %>% purrr::map_chr(~ var_names[[.x]] %||% .x)
}

derive_country_vars

TODO:

NOTES:

derive_country_vars <- function(country_code,
                                date) {

  country_code %<>% as.character()
  subnational_entity_code <- NA_character_

  # handle subnational entities
  ## Ascension
  if (country_code == "AC") {

    country_code <- "SH"
    subnational_entity_code <- "SH-AC"
  }

  # assign canonical pseudo codes
  ## Kosovo
  country_code %<>% dplyr::case_match(.x = .,
                                      "KS" ~ "XK",
                                      .default = .)
  data_former <-
    data_iso_3166_3 %>%
    dplyr::filter(Alpha_2 == !!country_code & !!date <= (clock::add_years(Date_withdrawn, 50L))) %>%
    dplyr::filter(Date_withdrawn == pal::safe_max(Date_withdrawn))

  is_former <- nrow(data_former) > 0L
  is_current <- !is_former && country_code %in% data_iso_3166_1$Alpha_2

  if (!(is_former || is_current) && !(country_code %in% country_codes_sudd_invalid)) {
    cli::cli_alert_warning("Neither ISO 3166-1 alpha-2 nor ISO 3166-3 alpha-4 {.var country_code} found for {.val {country_code}}.")
  }

  country_code <-
    country_code %>%
    pal::when(is_former ~
                data_former %>%
                dplyr::filter(Date_withdrawn == min(Date_withdrawn)) %>%
                assertr::verify(nrow(.) == 1L) %$%
                Alpha_4,
              is_current ~
                country_code,
              ~
                NA_character_)

  tibble::tibble(country_code = country_code,
                 country_name = country_code_to_name(country_code),
                 is_former_country = is_former,
                 subnational_entity_code = subnational_entity_code)
}

drop_disabled_vars

drop_disabled_vars <- function(data,
                               to_drop) {

  to_drop_present <- intersect(to_drop, colnames(data))
  n_to_drop_present <- length(to_drop_present)

  if (n_to_drop_present) {

    cli::cli_alert_warning(paste0("The {cli::qty(n_to_drop_present)} column{?s} {.var {to_drop_present}} in {.arg data} are ignored because setting/altering ",
                                  "the corresponding values is disabled."))

    data %<>% dplyr::select(-any_of(to_drop))
  }

  data
}

drop_implicit_vars

drop_implicit_vars <- function(data,
                               type = c("add", "edit")) {

  type <- rlang::arg_match(type)

  to_drop <-
    data_cols_absent %>%
    dplyr::filter(purrr::map_lgl(type,
                                 ~ !!type %in% .x)) %$%
    col

  data %>% dplyr::select(-any_of(to_drop))
}

drop_non_applicable_vars

drop_non_applicable_vars <- function(data) {

  if ("level" %in% colnames(data)) {

    if (data$level != "local") {
      data %<>% dplyr::select(-any_of("municipality"))
    }
    if (data$level == "national") {
      data %<>% dplyr::select(-any_of("subnational_entity_name"))
    }
    # TODO: remove this as soon as [issue #52](https://github.com/zdaarau/c2d-app/issues/52) is resolved
    if (data$level != "national" || data$country_code != "CH") {
      data %<>% dplyr::select(-any_of(c("votes_per_subterritory",
                                        "lower_house_yes",
                                        "lower_house_no",
                                        "lower_house_abstentions",
                                        "upper_house_yes",
                                        "upper_house_no",
                                        "upper_house_abstentions",
                                        "position_government")))
    }
  }

  data %<>% dplyr::select(-any_of(c(
    "files",
    "is_former_country",
    # TODO: remove this as soon as [issue #81](https://github.com/zdaarau/c2d-app/issues/81) is fixed
    "sources"
  )))

  data
}

fct_flip

fct_flip <- function(x) {

  checkmate::assert_factor(x,
                           n.levels = 2L)
  flip_map <-
    levels(x) %>%
    magrittr::set_names(value = rev(.)) %>%
    as.list()

  x %>% forcats::fct_recode(!!!flip_map)
}

flatten_array_as_is

A helper function to apply to list elements before converting the list to JSON using jsonlite::toJSON(auto_unbox = TRUE).

flatten_array_as_is <- function(x) {

  x %<>% unlist()

  if (!is.null(x)) {
    x %<>% I()
  }

  x
}

!httr_config

Before 2021-05-25, the RDB API server didn't provide the intermediate R3 certificate (issued by Let's Encrypt) and since curl doesn't support Authority Information Access yet to automatically discover it, we had to manually specify it. Thus, the certificate was shipped with this package (found under certs/3479778542.crt). Because this isn't needed anymore, the cert as well as the code chunk below (purl = FALSE) are ignored from package builds but kept in case they are needed again at some point in the future.

httr_config <- function() {

  httr::config(cainfo = fs::path_package(package = this_pkg,
                                         "certs", "3479778542.crt"))
}

is_session_expired

is_session_expired <- function(token,
                               use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                        pkg = this_pkg)) {
  response <-
    httr::RETRY(verb = "GET",
                url = url_api("users/profile",
                              .use_testing_server = use_testing_server),
                config = httr::add_headers(Authorization = paste("Bearer", token),
                                           Origin = url_admin_portal(.use_testing_server = use_testing_server)),
                times = 3L) %>%
    # ensure we actually got a JSON response
    pal::assert_mime_type(mime_type = "application/json",
                          msg_suffix = mime_error_suffix) %>%
    # parse response
    httr::content(as = "parsed")

  is.null(response[["profile"]])
}

lower_non_abbrs

#' Lowercase non-abbreviations
#'
#' @param x A character vector.
#'
#' @return A character vector of the same length as `x`.
#' @keywords internal
lower_non_abbrs <- function(x) {

  x %>%
    stringr::str_split(pattern = "\\b") %>%
    purrr::map_chr(~ .x %>%
                     dplyr::if_else(stringr::str_detect(string = .,
                                                        pattern = "^[^[:lower:]]+$"),
                                    .,
                                    stringr::str_to_lower(.)) %>%
                     paste0(collapse = ""))
}

order_rfrnd_cols

order_rfrnd_cols <- function(data) {

  data %>% dplyr::relocate(any_of(rfrnd_cols_order))
}

parse_datetime

NOTES:

So far, date-times returned by the RDB API were encountered in 3 different formats:

  1. As MongoDB Date type in canonical mode:

    json "created_on": { "$date": { "$numberLong": "-3280780800000" } }

  2. As MongoDB Date type in relaxed mode:

    json "created_on": { "$date": "2022-03-16T13:52:10.907Z" }

  3. In a format similar to 1) whose name/definition I couldn't find:

    json "created_on": { "$date": -3280780800000 }

Format 3) was the first one encountered, formats 1) and 2) were encountered on the API's testing instance.

parse_datetime <- function(x) {

  x %<>% unlist(use.names = FALSE)

  if (is.character(x) && stringr::str_detect(string = x,
                                             pattern = "^-?\\d+$",
                                             negate = TRUE)) {
    result <-
      x %>%
      clock::naive_time_parse(format = "%Y-%m-%dT%H:%M:%SZ",
                              precision = "millisecond") %>%
      clock::as_date_time(zone = "UTC")

  } else {

    result <-
      as.numeric(x) %>%
      magrittr::divide_by(1000L) %>%
      as.POSIXct(origin = "1970-01-01",
                 tz = "UTC")
  }

  result
}

plot_share_per_period

NOTES:

plot_share_per_period <- function(data_freq,
                                  x,
                                  period) {
  rlang::check_installed("plotly",
                         reason = pal::reason_pkg_required())

  grid_step <- switch(EXPR = period,
                      week = 4L,
                      year = 50L,
                      decade = 50L,
                      century = 100L,
                      1L)

  grid_x <- seq(from = ceiling(pal::safe_min(data_freq[[period]])[1L] / grid_step) * grid_step,
                to = floor(pal::safe_max(data_freq[[period]])[1L] / grid_step) * grid_step,
                by = grid_step)

  plotly::plot_ly(data = data_freq,
                  type = "scatter",
                  mode = "none",
                  stackgroup = "one",
                  groupnorm = "percent",
                  x = ~eval(as.symbol(period)),
                  y = ~n,
                  name = ~eval(as.symbol(x))) %>%
    plotly::layout(hovermode = "x",
                   legend = list(orientation = "h"),
                   xaxis = list(dtick = switch(EXPR = period,
                                               week = 1L,
                                               month = 1L,
                                               quarter = 1L,
                                               year = 10L,
                                               decade = 10L,
                                               century = 100L),
                                showgrid = FALSE,
                                ticklabelstep = switch(EXPR = period,
                                                       week = 4L,
                                                       month = 1L,
                                                       quarter = 1L,
                                                       year = 5L,
                                                       decade = 5L,
                                                       century = 1L),
                                range = switch(EXPR = period,
                                               week = c(1L, 53L),
                                               month = c(1L, 12L),
                                               quarter = c(1L, 4L),
                                               NULL),
                                ticks = "outside",
                                title = list(text = NULL)),
                   yaxis = list(fixedrange = TRUE,
                                hoverformat = ".1f",
                                showgrid = FALSE,
                                ticksuffix = "\u2009% ",
                                title = list(text = NULL)),
                   # draw custom grid
                   shapes =
                     grid_x %>%
                     purrr::map(~ list(type = "line",
                                       y0 = 0L,
                                       y1 = 1L,
                                       yref = "paper",
                                       x0 = .x,
                                       x1 = .x,
                                       line = list(color = "#fff",
                                                   width = 0.2))))
}

restore_topics

NOTE that this function doesn't restore redundant information (parent-tier topics). Once issue #41 is resolved, we can deprecate this anyway.

restore_topics <- function(topics_tier_1,
                           topics_tier_2,
                           topics_tier_3) {
  list(topics_tier_1,
       topics_tier_2,
       topics_tier_3) %>%
    purrr::pmap(~ {

      ..1 %>%
        unlist() %>%
        as.character() %>%
        checkmate::assert_character(any.missing = FALSE,
                                    max.len = 3L,
                                    .var.name = "topics_tier_1")
      ..2 %>%
        unlist() %>%
        as.character() %>%
        checkmate::assert_character(any.missing = FALSE,
                                    max.len = 3L,
                                    .var.name = "topics_tier_2")
      ..3 %>%
        unlist() %>%
        as.character() %>%
        checkmate::assert_character(any.missing = FALSE,
                                    max.len = 3L,
                                    .var.name = "topics_tier_3")

      topics_hierarchy <- hierarchize_topics(tibble::tibble(topics_tier_1 = list(..1),
                                                            topics_tier_2 = list(..2),
                                                            topics_tier_3 = list(..3)))

      topics <- topics_hierarchy$topic_tier_3 %>% setdiff(NA_character_)

      if (length(topics) < 3L) {

        topics <-
          topics_hierarchy %>%
          dplyr::filter(is.na(topic_tier_3)) %$%
          topic_tier_2 %>%
          setdiff(NA_character_) %>%
          c(topics)
      }

      if (length(topics) < 3L) {

        topics <-
          topics_hierarchy %>%
          dplyr::filter(is.na(topic_tier_3) & is.na(topic_tier_2)) %$%
          topic_tier_1 %>%
          setdiff(NA_character_) %>%
          c(topics)
      }

      topics
    })
}

topic_frequency

Counts number of topic occurences. Useful to create sunburst charts via Plotly.

topic_frequency <- function(topics,
                            tier) {
  topics %>%
    purrr::list_c(ptype = character()) %>%
    factor(levels = topics(tiers = tier)) %>%
    tibble::tibble(topic = .) %>%
    dplyr::group_by(topic) %>%
    dplyr::summarise(n = dplyr::n())
}

tidy_rfrnds

TODO:

NOTES:

#' Tidy "raw" RDB API referendum data
#'
#' Converts the "raw" MongoDB data from the RDB API to the tidied [rfrnds()] schema.
#'
#' You can reverse this function again using [untidy_rfrnds()].
#'
#' @param data The MongoDB data as a list (converted from the JSON returned by the RDB API using [jsonlite::fromJSON()]).
#' @param tidy Whether or not to tidy the referendum data, i.e. apply various data cleansing tasks and add additional variables. If `FALSE`, the raw MongoDB
#'   referendum data will only be modified just enough to be able to return it as a [tibble][tibble::tbl_df]. Note that untidy data doesn't conform to the 
#'   [codebook][data_codebook] (i.a. different variable names).
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @keywords internal
tidy_rfrnds <- function(data,
                        tidy = TRUE) {

  checkmate::assert_flag(tidy)
  this_env <- rlang::current_env()

  data %<>%
    # unnest columns and ensure list type for multi-value columns
    # NOTE that despite of the [speed-up in v1.1.4](https://github.com/tidyverse/tidyr/releases/tag/v1.1.4), `tidyr::unnest()` is still much slower than our
    # custom function
    purrr::map(.f = function(l,
                             category_names = names(l$categories),
                             context_names = names(l$context),
                             title_langs = names(l$title)) {

      for (name in category_names) {
        l[[paste0("categories.", name)]] <- l$categories[[name]]
      }

      for (name in context_names) {
        l[[paste0("context.", name)]] <- l$context[[name]]
      }

      for (lang in title_langs) {
        l[[paste0("title.", lang)]] <- l$title[[lang]]
      }

      l$categories <- NULL
      l$context <- NULL
      l$title <- NULL

      for (name in c("tags",
                     "categories.action",
                     "categories.special_topics",
                     "categories.excluded_topics")) {
        l[[name]] %<>% purrr::list_c(ptype = character()) %>% list()
      }

      for (name in c("archive",
                     "files",
                     "context.votes_per_canton")) {
        l[[name]] %<>% list()
      }

      l
    }) %>%
    # drop empty fields
    purrr::modify_depth(.depth = 1L,
                        .f = purrr::compact) %>%
    # convert to tibble
    purrr::map(tibble::as_tibble_row) %>%
    purrr::list_rbind()

  # tidy data
  if (nrow(data) > 0L && tidy) {

    data %<>%
      # rename variables (mind that the MongoDB-based API doesn't demand a fixed schema)
      pal::rename_from(dict = var_names) %>%
      # create/recode variables
      dplyr::mutate(
        # ensure all supposed to floating-point numbers are actually of type double (JSON API is not reliable in this respect)
        dplyr::across(any_of(c("subterritories_no",
                               "subterritories_yes",
                               # TODO: remove/adapt next two lines once [issue #78](https://github.com/zdaarau/c2d-app/issues/78) is resolved
                               "date_time_created"["date_time_created" %in% colnames(.)
                                                   && any(purrr::map_lgl(.$date_time_created, is.numeric))],
                               "date_time_last_edited"["date_time_last_edited" %in% colnames(.)
                                                       && any(purrr::map_lgl(.$date_time_last_edited, is.numeric))])),
                      ~ purrr::map_dbl(.x, ~ if (is.null(.x)) NA_real_ else as.double(.x))),

        # use explicit NA values
        dplyr::across(where(is.integer),
                      ~ dplyr::if_else(.x %in% c(-1L, -2L),
                                       NA_integer_,
                                       .x)),
        dplyr::across(where(is.character),
                      ~ dplyr::if_else(.x %in% c("", "-1", "-2"),
                                       NA_character_,
                                       .x)),
        dplyr::across(any_of(c("subterritories_yes", "subterritories_no")),
                      ~ dplyr::if_else(.x %in% c(-1.0, -2.0),
                                       NA_real_,
                                       .x)),
        dplyr::across(any_of("result"),
                      ~ dplyr::if_else(.x %in% c("Unknown", "Not provided"),
                                       NA_character_,
                                       .x)),
        # convert all values to lowercase
        ## vectors
        dplyr::across(any_of(c("result",
                               "type",
                               "inst_legal_basis_type",
                               "inst_object_type",
                               "inst_object_legal_level",
                               "inst_object_revision_extent",
                               "inst_trigger_type",
                               "inst_trigger_actor_level",
                               "inst_trigger_time_limit",
                               "inst_quorum_approval",
                               "inst_precondition_decision")),
                      stringr::str_to_lower),
        ## lists
        dplyr::across(any_of(c("inst_object_revision_modes",
                               "inst_topics_only",
                               "inst_topics_excluded")),
                      ~ purrr::map(.x = .x,
                                   .f = stringr::str_to_lower)),

        # convert only non-abbreviated values to lowercase
        dplyr::across(any_of(c("inst_object_author",
                               "inst_trigger_actor",
                               "inst_precondition_actor")),
                      ~ purrr::map_chr(.x = .x,
                                       .f = lower_non_abbrs)),

        # specific recodings
        ## binary (dummies)
        dplyr::across(any_of("position_government"),
                      ~ dplyr::case_when(.x == "Acceptance" ~ "yes",
                                         .x == "Rejection" ~ "no",
                                         .default = NA_character_)),
        dplyr::across(any_of("inst_has_urgent_legal_basis"),
                      ~ dplyr::case_when(.x == "Urgent" ~ TRUE,
                                         .x == "Normal" ~ FALSE,
                                         .default = NA)),
        dplyr::across(any_of("inst_is_binding"),
                      ~ dplyr::case_when(.x == "Binding" ~ TRUE,
                                         .x == "Non-binding" ~ FALSE,
                                         .default = NA)),
        dplyr::across(any_of("inst_is_counter_proposal"),
                      ~ dplyr::case_when(.x == "Yes" ~ TRUE,
                                         .x == "No" ~ FALSE,
                                         .default = NA)),
        dplyr::across(any_of("inst_is_assembly"),
                      ~ dplyr::case_when(.x == "Assembly" ~ TRUE,
                                         .x == "Ballot" ~ FALSE,
                                         .default = NA)),
        dplyr::across(any_of("inst_has_precondition"),
                      ~ dplyr::case_when(.x == "Exists" ~ TRUE,
                                         .x == "Does not exist" ~ FALSE,
                                         .default = NA)),
        ## nominal
        ### flatten `id`
        id = purrr::list_c(id,
                           ptype = character()),
        ### split `tags` into separate per-tier vars
        topics_tier_1 = tags %>% purrr::map(infer_topics,
                                            tier = 1L),
        topics_tier_2 = tags %>% purrr::map(infer_topics,
                                            tier = 2L),
        topics_tier_3 = tags %>% purrr::map(~ .x[.x %in% topics_tier_3_]),
        ### various cleanups
        dplyr::across(any_of("type"),
                      ~ dplyr::case_match(.x = .x,
                                          "citizen assembly" ~ "citizens' assembly",
                                          "not provided"     ~ NA_character_,
                                          .default = .x)),
        dplyr::across(any_of(c("inst_trigger_actor",
                               "inst_object_author")),
                      ~ dplyr::case_match(.x = .x,
                                          "institution" ~ "other institution",
                                          .default = .x)),
        dplyr::across(any_of("inst_object_type"),
                      ~ dplyr::case_match(.x = .x,
                                          "legal text (ausformulierter vorschlag)" ~ "legal text (formulated proposal)",
                                          "legal text (allg. anregung)"            ~ "legal text (general proposal)",
                                          .default = .x)),
        dplyr::across(any_of("inst_topics_only"),
                      ~ purrr::map(.x = .x,
                                   .f = \(x) dplyr::case_match(.x = x,
                                                               "infrastructural act"                ~ "infrastructural acts",
                                                               "competence shift"                   ~ "competence shifts",
                                                               "financial act"                      ~ "financial acts",
                                                               "financial act (expenses)"           ~ "financial acts (expenses)",
                                                               "financial act (taxes)"              ~ "financial acts (taxes)",
                                                               "financial act (obligations)"        ~ "financial acts (obligations)",
                                                               "total revision of the constitution" ~ "total revisions of the constitution",
                                                               .default = x))),
        dplyr::across(any_of("inst_topics_excluded"),
                      ~ purrr::map(.x = .x,
                                   .f = \(x) dplyr::case_match(.x = x,
                                                               "budget"                   ~ "budgets",
                                                               "parliamentary competence" ~ "everything within parliamentary competence",
                                                               .default = x))),
        dplyr::across(any_of("inst_quorum_turnout"),
                      ~ stringr::str_replace_all(string = .x,
                                                 pattern = c("^(\\s+)?>(\\s+)?" = ">\u202f",
                                                             "(\\s+)?%(\\s+)?$" = "\u202f%"))),
        ## ordinal
        ## interval
        # TODO: Remove else-clauses once [this](https://github.com/zdaarau/c2d-app/commit/6b72d1928e0182f01b188f3973ba15482fc8c04a) is deployed to
        #       production
        date = if (is.list(date)) {
          clock::as_date(parse_datetime(date))
        } else {
          clock::date_parse(date)
        },
        dplyr::across(any_of(c("date_time_created",
                               "date_time_last_edited")),
                      parse_datetime),
        ## undefined
        files = files %>% purrr::map(~ .x %>% purrr::map(~ .x %>%
                                                           # unnest and restore `date`
                                                           purrr::modify_in(.where = "date",
                                                                            .f = parse_datetime) %>%
                                                           # change subvariable names
                                                           pal::rename_from(dict = sub_var_names$files))))

    # complement `id_official` and `id_sudd` (a two-letter country code plus a 6-digit number) by old `number`
    # TODO: once [issue #?](https://github.com/zdaarau/c2d-app/issues/?) is resolved:
    #       - correct this upstream using `edit_rfrnds()`
    #       - remove corresponding code below
    #       - file issue to completely get rid of field `number`
    if ("number" %in% colnames(data)) {

      data %<>% dplyr::mutate(number = dplyr::if_else(number %in% c("0", ""),
                                                      NA_character_,
                                                      number),
                              dplyr::across(any_of("id_official"),
                                            ~ dplyr::if_else(is.na(.x) & stringr::str_detect(number, "^\\d"),
                                                             number,
                                                             .x)),
                              dplyr::across(any_of("id_sudd"),
                                            ~ dplyr::if_else(is.na(.x) & stringr::str_detect(number, "^\\D"),
                                                             # everything beyond the 8th char seems to be manually added -> strip!
                                                             stringr::str_sub(string = number,
                                                                              end = 8L),
                                                             .x)))
    }

    # ensure `id_official` and `id_sudd` are present
    if (!("id_official" %in% colnames(data))) data$id_official <- NA_character_
    if (!("id_sudd" %in% colnames(data))) data$id_sudd <- NA_character_

    # TODO: remove this once [issue #]() has been resolved
    # create `inst_is_variable/divisible` if necessary
    if ("categories.referendum_text_options" %in% colnames(data)) {

      if (!("inst_is_variable" %in% colnames(data))) {
        data %<>% dplyr::mutate(inst_is_variable = dplyr::case_when(
          categories.referendum_text_options %in% c("Variants possible", "Variants / splitting up possible") ~ TRUE,
          is.na(categories.referendum_text_options)                                                          ~ NA,
          .default                                                                                           = FALSE
        ))
      }
      if (!("inst_is_divisible" %in% colnames(data))) {
        data %<>% dplyr::mutate(inst_is_divisible = dplyr::case_when(
          categories.referendum_text_options %in% c("Splitting up possible", "Variants / splitting up possible") ~ TRUE,
          is.na(categories.referendum_text_options)                                                              ~ NA,
          .default                                                                                               = FALSE
        ))
      }
    }

    # ensure all country codes are known and assign canonical country name
    data %<>% add_country_name()

    data %<>%
      # remove obsolete vars
      dplyr::select(-any_of(c("categories.referendum_text_options",
                              "country_code_historical",
                              "is_past_jurisdiction",
                              "number",
                              "tags"))) %>%

      # convert to (ordered) factor where appropriate
      ## based on codebook
      dplyr::mutate(dplyr::across(everything(),
                                  ~ {
                                    metadata <- data_codebook %>% dplyr::filter(variable_name == dplyr::cur_column())

                                    if (nrow(metadata) != 1L) {
                                      cli::cli_abort("Missing codebook metadata! Please debug",
                                                     .internal = TRUE)
                                    }

                                    if (is.factor(unlist(metadata$ptype))) {

                                      lvls <- levels(unlist(metadata$ptype))
                                      is_ordered <- metadata$value_scale %in% c("ordinal_ascending", "ordinal_descending")

                                      if (is.list(.x)) {
                                        .x %>% purrr::map(.f = factor,
                                                          levels = lvls,
                                                          ordered = is_ordered)
                                      } else {
                                        factor(x = .x,
                                               levels = lvls,
                                               ordered = is_ordered)
                                      }
                                    } else {
                                      .x
                                    }
                                  })) %>%
      ## fctrs without explicit variable_values set in codebook
      dplyr::mutate(
        ### fctrs where we defined a finite set of values
        country_code = factor(x = country_code,
                              levels = val_set$country_code,
                              ordered = FALSE),

        ### fctrs where we did not define a finite set of values (yet)
        dplyr::across(any_of(c("subnational_entity_name",
                               "municipality")),
                      as.factor)
      ) %>%
      # add vars which aren't always included and coerce to proper types
      vctrs::tib_cast(to =
                        data_codebook %>%
                        dplyr::filter(!is_opt) %$%
                        magrittr::set_names(x = ptype,
                                            value = variable_name) %>%
                        tibble::as_tibble(),
                      call = this_env) %>%
      # harmonize col order
      order_rfrnd_cols()
  }

  # convert nested list cols to tibbles
  data %>%
    dplyr::mutate(dplyr::across(any_of(c("files",
                                         "votes_per_subterritory")),
                                ~ purrr::map(.x,
                                             \(x) if (length(x) > 0L) x %>% purrr::map(tibble::as_tibble) %>% purrr::list_rbind() else NULL)),
                  dplyr::across(any_of("archive"),
                                ~ purrr::map(.x,
                                             \(x) if (length(x) > 0L) tibble::as_tibble(x) else NULL))) %>%
    # add variable labels (must be done at last since mutations above drop attrs)
    labelled::set_variable_labels(.labels = var_lbls,
                                  .strict = FALSE)
}

untidy_date

untidy_date <- function(x) {

  as.numeric(x) %>%
    magrittr::multiply_by(1000.0) %>%
    as.list() %>%
    magrittr::set_names(rep("$date",
                            times = length(.)))
}

untidy_rfrnds

#' Untidy into "raw" RDB API referendum data
#'
#' Converts from the tidied [rfrnds()] to the "raw" MongoDB schema used by the RDB API. Basically reverts [tidy_rfrnds()].
#'
#' @param data The data to untidy as returned by [rfrnds()].
#' @param as_tibble Whether or not to return the result as a [tibble][tibble::tbl_df]. If `FALSE`, a list is returned.
#'
#' @return
#' If `as_tibble = FALSE`, a list with one element per referendum, suitable to be converted [jsonlite::toJSON()] and then fed to the RDB API.
#'
#' Otherwise a [tibble][tibble::tbl_df] of the same format as returned by [`rfrnds(tidy = FALSE)`][rfrnds].
#' @keywords internal
untidy_rfrnds <- function(data,
                          as_tibble = FALSE) {

  checkmate::assert_flag(as_tibble)

  var_names_inverse <-
    names(var_names) %>%
    magrittr::set_names(purrr::list_c(var_names, ptype = character()))

  sub_var_names_files_inverse <-
    names(sub_var_names$files) %>%
    magrittr::set_names(purrr::list_c(sub_var_names$files, ptype = character()))

  # restore `number`
  if (all(c("id_official", "id_sudd") %in% colnames(data))) {
    data %<>% dplyr::mutate(id_sudd = dplyr::if_else(is.na(id_sudd),
                                                     id_official,
                                                     id_sudd))
  }

  data %<>%
    # remove variable labels
    labelled::remove_var_label() %>%
    dplyr::mutate(
      # restore strings
      dplyr::across(c(any_of("date"),
                      where(is.factor)),
                    as.character),
      dplyr::across(where(is.list),
                    ~ {
                      if (is.factor(.x[[1L]])) {
                        .x %>% purrr::map(as.character)
                      } else {
                        .x
                      }
                    }),
      # restore dates
      dplyr::across(any_of(c("date_time_created",
                             "date_time_last_edited")),
                    untidy_date),
      # restore individual variables
      ## `files`
      dplyr::across(any_of("files"),
                    ~ purrr::map(.x = .x,
                                 .f = \(x) {

                                   if ("date_time_attached" %in% colnames(x)) {

                                     x$date_time_attached %<>% untidy_date()
                                   }

                                   x %<>% pal::rename_from(dict = sub_var_names_files_inverse)
                                 })),
      ## `inst_topics_excluded`
      dplyr::across(any_of("inst_topics_excluded"),
                    ~ purrr::map(.x = .x,
                                 .f = \(x) dplyr::case_match(.x = x,
                                                             "budgets" ~ "budget",
                                                             .default = x))),
      ## `inst_topics_only`
      dplyr::across(any_of("inst_topics_only"),
                    ~ purrr::map(.x = .x,
                                 .f = \(x) dplyr::case_match(.x = x,
                                                             "infrastructural acts"                ~ "infrastructural act",
                                                             "competence shifts"                   ~ "competence shift",
                                                             "financial acts"                      ~ "financial act",
                                                             "financial acts (expenses)"           ~ "financial act (expenses)",
                                                             "financial acts (taxes)"              ~ "financial act (taxes)",
                                                             "financial acts (obligations)"        ~ "financial act (obligations)",
                                                             "total revisions of the constitution" ~ "total revision of the constitution",
                                                             .default = x))),
      ## `inst_object_type`
      dplyr::across(any_of("inst_object_type"),
                    ~ dplyr::case_match(.x = .x,
                                        "legal text (formulated proposal)" ~ "legal text (ausformulierter vorschlag)",
                                        "legal text (general proposal)"    ~ "legal text (allg. anregung)",
                                        .default = .x)),
      ## `inst_trigger_actor`, `inst_object_author`
      dplyr::across(any_of(c("inst_trigger_actor",
                             "inst_object_author")),
                    ~ dplyr::case_match(.x = .x,
                                        "other institution" ~ "institution",
                                        .default = .x)),
      ## `inst_precondition_actor`
      dplyr::across(any_of("inst_precondition_actor"),
                    ~ dplyr::case_match(.x = .x,
                                        "parliament and president"  ~ "parliament and President",
                                        "parliament and government" ~ "parliament and Government",
                                        .default = .x)),
      ## `type`
      dplyr::across(any_of("type"),
                    ~ dplyr::case_match(.x = .x,
                                        "citizens' assembly" ~ "citizen assembly",
                                        NA_character_        ~ "not provided",
                                        .default = .x)),
      ## `id`
      dplyr::across(any_of("id"),
                    ~ as.list(.x) %>% magrittr::set_names(rep("$oid", times = length(.)))),
      ## binary (dummies)
      dplyr::across(any_of("position_government"),
                    ~ dplyr::case_match(.x = .x,
                                        "yes" ~ "Acceptance",
                                        "no"  ~ "Rejection",
                                        .default = .x)),
      dplyr::across(any_of("inst_has_urgent_legal_basis"),
                    ~ dplyr::if_else(.x,
                                     "Urgent",
                                     "Normal")),
      dplyr::across(any_of("inst_is_binding"),
                    ~ dplyr::if_else(.x,
                                     "Binding",
                                     "Non-binding")),
      dplyr::across(any_of("inst_is_counter_proposal"),
                    ~ dplyr::if_else(.x,
                                     "Yes",
                                     "No")),
      dplyr::across(any_of("inst_is_assembly"),
                    ~ dplyr::if_else(.x,
                                     "Assembly",
                                     "Ballot")),
      dplyr::across(any_of("inst_has_precondition"),
                    ~ dplyr::if_else(.x,
                                     "Exists",
                                     "Does not exist")),
      # uppercase first letter of various vars
      dplyr::across(any_of(c("result",
                             "type",
                             "inst_legal_basis_type",
                             "inst_object_type",
                             "inst_object_legal_level",
                             "inst_object_revision_extent",
                             "inst_trigger_type",
                             "inst_trigger_actor_level",
                             "inst_trigger_time_limit",
                             "inst_quorum_approval",
                             "inst_precondition_decision",
                             # vars containing uppercase abbreviations
                             "inst_object_author",
                             "inst_trigger_actor",
                             "inst_precondition_actor")),
                    ~ pal::sentenceify(x = .x,
                                       punctuation_mark = "")),
      dplyr::across(any_of(c("inst_object_revision_modes",
                             "inst_topics_only",
                             "inst_topics_excluded")),
                    ~ purrr::map(.x = .x,
                                 .f = pal::sentenceify,
                                 punctuation_mark = "")),
      # restore NA values
      dplyr::across(where(is.character) & !any_of("result"),
                    ~ tidyr::replace_na(data = .x,
                                        replace = "")),
      ## implicit NAs (i.e. 'not provided' (-2))
      dplyr::across(where(is.integer) & !any_of(field_to_var_name(union(rfrnd_fields$required_for_additions, rfrnd_fields$required_for_edits))),
                    ~ tidyr::replace_na(data = .x,
                                        replace = -2L)),
      ## explicit NAs (i.e. 'unknown' (-1))
      dplyr::across(any_of("result"),
                    ~ tidyr::replace_na(data = .x,
                                        replace = "Unknown")),
      dplyr::across(where(is.integer) & any_of(field_to_var_name(union(rfrnd_fields$required_for_additions, rfrnd_fields$required_for_edits))),
                    ~ tidyr::replace_na(data = .x,
                                        replace = -1L)),
      dplyr::across(any_of(c("subterritories_yes", "subterritories_no")),
                    ~ tidyr::replace_na(data = .x,
                                        replace = -1.0))
    ) %>%
    # restore variable names
    pal::rename_from(dict = var_names_inverse)

  # restore `referendum_text_options`
  if (all(c("inst_is_divisible", "inst_is_variable") %in% colnames(data))) {
    data %<>% dplyr::mutate(referendum_text_options = dplyr::case_when(!inst_is_divisible & !inst_is_variable ~ "Whole text only",
                                                                       inst_is_divisible & inst_is_variable   ~ "Variants / splitting up possible",
                                                                       inst_is_divisible                      ~ "Splitting up possible",
                                                                       inst_is_variable                       ~ "Variants possible",
                                                                       .default                               = NA_character_))
  }

  # restore `tags`
  topics_var_names <- paste0("topics_tier_", 1:3)
  topics_vars_present <- topics_var_names %in% colnames(data)

  if (any(topics_vars_present)) {

    if (!all(topics_vars_present)) {
      topics_vars_missing <- topics_var_names %>% setdiff(topics_vars_present)
      cli::cli_abort(paste0("{cli::qty(topics_vars_missing)}The following {.var {'topics_tier_#'}} variable{?s} {?is/are} missing from {.arg data}: ",
                            "{.var {topics_vars_missing}}"))
    }

    data %<>%
      dplyr::mutate(tags = restore_topics(topics_tier_1,
                                          topics_tier_2,
                                          topics_tier_3)) %>%
      dplyr::select(-any_of(topics_var_names))
  }

  # remove unknown columns
  data %<>% dplyr::select(any_of(rfrnd_fields$all_flat))

  if (!as_tibble) {

    # remove nested field prefixes
    data %<>% dplyr::rename_with(.cols = matches("^(categories|context|title)\\."),
                                 .fn = ~ stringr::str_remove(string = .x,
                                                             pattern = "^\\w+?\\."))

    # restore nested structure
    categories_fields_present <-
      names(var_names) %>%
      stringr::str_subset(pattern = "^categories\\.") %>%
      stringr::str_remove(pattern = "^categories\\.") %>%
      intersect(colnames(data))

    context_fields_present <-
      names(var_names) %>%
      stringr::str_subset(pattern = "^context\\.") %>%
      stringr::str_remove(pattern = "^context\\.") %>%
      intersect(colnames(data))

    title_fields_present <-
      names(var_names) %>%
      stringr::str_subset(pattern = "^title\\.") %>%
      stringr::str_remove(pattern = "^title\\.") %>%
      intersect(colnames(data))

    if (length(categories_fields_present)) {
      data %<>% tidyr::nest(categories = all_of(categories_fields_present))
    }
    if (length(context_fields_present)) {
      data %<>% tidyr::nest(context = all_of(context_fields_present))
    }
    if (length(title_fields_present)) {
      data %<>% tidyr::nest(title = all_of(title_fields_present))
    }

    data %<>%
      # convert to list
      dplyr::group_split(dplyr::row_number(),
                         .keep = FALSE) %>%
      purrr::map(as.list) %>%
      # tweak list structure
      purrr::modify_depth(.depth = 1L,
                          .f = ~
                            .x %>%
                            # flatten unnecessarily nested elements
                            purrr::modify_at(.at = "tags",
                                             .f = flatten_array_as_is) %>%
                            # convert nested tibbles to lists
                            purrr::modify_at(.at = "files",
                                             .f = ~
                                               .x[[1L]] %>%
                                               pal::when(is.null(.) ~ list(),
                                                         ~ dplyr::group_split(.tbl = .,
                                                                              dplyr::row_number(),
                                                                              .keep = FALSE) %>%
                                                           purrr::map(as.list))) %>%
                            purrr::modify_at(.at = c("archive",
                                                     "categories",
                                                     "context",
                                                     "title"),
                                             .f = ~
                                               .x %>%
                                               purrr::map(as.list) %>%
                                               unlist(recursive = FALSE)) %>%
                            # reduce nesting of nested tibble
                            pal::when(is.null(purrr::pluck(., "context", "votes_per_canton")) ~ .,
                                      ~ purrr::modify_in(.x = .,
                                                         .where = c("context", "votes_per_canton"),
                                                         .f = dplyr::first)) %>%
                            # reduce nesting of array fields
                            pal::when(is.null(purrr::pluck(., "categories", "action")) ~ .,
                                      ~ purrr::modify_in(.x = .,
                                                         .where = c("categories", "action"),
                                                         .f = flatten_array_as_is)) %>%
                            pal::when(is.null(purrr::pluck(., "categories", "excluded_topics")) ~ .,
                                      ~ purrr::modify_in(.x = .,
                                                         .where = c("categories", "excluded_topics"),
                                                         .f = flatten_array_as_is)) %>%
                            pal::when(is.null(purrr::pluck(., "categories", "special_topics")) ~ .,
                                      ~ purrr::modify_in(.x = .,
                                                         .where = c("categories", "special_topics"),
                                                         .f = flatten_array_as_is)))
  }

  data
}

url_api

#' Assemble RDB Services API URL
#'
#' @param ... Optional path components added to the base URL.
#' @param .use_testing_server `r pkg_config$description[pkg_config$key == "use_testing_server"]`
#'
#' @return A character scalar.
#' @family url_assembly
#' @keywords internal
#'
#' @examples
#' rdb:::url_api("health")
url_api <- function(...,
                    .use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                              pkg = this_pkg)) {
  checkmate::assert_flag(.use_testing_server)

  ifelse(.use_testing_server,
         "stagservices.c2d.ch",
         "services.c2d.ch") %>%
    fs::path(...) %>%
    paste0("https://", .)
}

url_admin_portal

#' Assemble RDB admin portal URL
#'
#' @inheritParams url_api
#'
#' @inherit url_api return
#' @family url_assembly
#' @keywords internal
#'
#' @examples
#' rdb:::url_admin_portal("referendum/5bbbfd7b92a21351232e46b5")
url_admin_portal <- function(...,
                             .use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                       pkg = this_pkg)) {
  checkmate::assert_flag(.use_testing_server)

  ifelse(.use_testing_server,
         "c2d-admin.netlify.app",
         "admin.c2d.ch") %>%
    fs::path(...) %>%
    paste0("https://", .)
}

url_codebook

#' Assemble codebook URL
#'
#' @param var Optional variable name to add as the [fragment identifier](https://en.wikipedia.org/wiki/URI_fragment) of the returned URL, which leads to a
#' direct link to the relevant codebook section. Must be either one of the column names of [`data_codebook`], or a valid fragment identifier of a codebook
#' section above the individual variables (`r pal::enum_str(codebook_fragments, last = " or ")`).
#'
#' @return A character scalar.
#' @family url_assembly
#' @keywords internal
#'
#' @examples
#' rdb:::url_codebook("level")
#' rdb:::url_codebook("topics")
url_codebook <- function(var = NULL) {

  checkmate::assert_string(var,
                           null.ok = TRUE)

  if (!is.null(var)) {

    var %<>%
      rlang::arg_match0(values = c(data_codebook$variable_name,
                                   # additional HTML anchors
                                   codebook_fragments)) %>%
      stringr::str_replace_all(pattern = stringr::fixed("_"),
                               replacement = "-")
  }

  paste0("https://rdb.rpkg.dev/articles/codebook.html", "#"[!is.null(var)], var)
}

url_website

#' Assemble website URL
#'
#' @inheritParams url_api
#'
#' @inherit url_api return
#' @family url_assembly
#' @keywords internal
#'
#' @examples
#' rdb:::url_website("referendum/CH/5bbc04f692a21351232e5a01")
url_website <- function(...,
                        .use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                  pkg = this_pkg)) {
  checkmate::assert_flag(.use_testing_server)

  ifelse(.use_testing_server,
         "c2d-site.netlify.app",
         "c2d.ch") %>%
    fs::path(...) %>%
    paste0("https://", .)
}

MongoDB query filter operator helpers

$date

NOTES:

query_filter_date <- function(min,
                              max) {
  list(`$gte` = min,
       `$lte` = max) %>%
    purrr::compact()
}

query_filter_datetime <- function(min,
                                  max) {

  list(`$gte` = purrr::compact(list(`$date` = min)),
       `$lte` = purrr::compact(list(`$date` = max))) %>%
    purrr::compact()
}

$in

MongoDB documentation: https://docs.mongodb.com/manual/reference/operator/query/in/#mongodb-query-op.-in

query_filter_in <- function(x) {

  x %>% pal::when(length(.) == 0L ~ NULL,
                  length(.) == 1L ~ .,
                  ~ list(`$in` = .))
}

sudd.ch-related

parse_sudd_date

Parses pseudo ISO 8601 strings like "1999-03-17" (valid) and "2000-09-00" or "1992-05" (missing day).

parse_sudd_date <- function(x) {

  x_parts <- stringr::str_split_1(string = x,
                                  pattern = "-")
  to_int <- function(x) {
    x %<>% as.integer()
    x[x == 0L] <- NA_integer_
    x
  }

  tibble::tibble(year = to_int(x_parts[1L]),
                 month = to_int(x_parts[2L]),
                 day = to_int(x_parts[3L]))
}

parse_sudd_date_de

Parses German date strings like "17. März 1999" and "??. September 2000".

parse_sudd_date_de <- function(x) {

  components <- stringr::str_split_fixed(string = x,
                                         pattern = "\\s+",
                                         n = 3L)

  list(year = components[, 3L] %>% stringr::str_extract(pattern = "\\d+") %>% as.integer(),
       month = components[, 2L] %>% dplyr::case_match(!!!months_de_fms),
       day = components[, 1L] %>% stringr::str_extract(pattern = "\\d+") %>% as.integer())
}

parse_sudd_id

Derives country_code and country_name from an id_sudd.

An id_sudd is composed of

parse_sudd_id <- function(id_sudd) {

  sudd_year <-
    id_sudd %>%
    stringr::str_extract(pattern = "\\d{4}$") %>%
    as.integer()

  sudd_country_code <-
    id_sudd %>%
    stringr::str_sub(end = 2L) %>%
    stringr::str_to_upper()

  derive_country_vars(country_code = sudd_country_code,
                      date = clock::date_build(year = sudd_year,
                                               month = 1L,
                                               day = 1L))
}

!sudd_rfrnd_field_names

Determines the field_names of a sudd.ch referendum, whether it has_field_names_duplicated, and if so, the actual field_names_duplicated (incl. number of duplications).

NOTES:

sudd_rfrnd_field_names <- function(id_sudd) {

  checkmate::assert_string(id_sudd)

  html <-
    httr::RETRY(verb = "GET",
                url = url_sudd("event.php"),
                query = list(id = id_sudd),
                times = 3L) %>%
    xml2::read_html() %>%
    rvest::html_element(css = "main table") %>%
    rvest::html_children()

  field_names <-
    html %>%
    rvest::html_elements(css = "td.feld") %>%
    rvest::html_text()

  field_names_duplicated <-
    field_names[field_names %in% field_names[duplicated(field_names)]] %>%
    table(exclude = NULL) %>%
    tibble::enframe(name = "field_name",
                    value = "n")

  tibble::tibble(id_sudd = id_sudd,
                 field_names = list(field_names),
                 has_field_names_duplicated = nrow(field_names_duplicated) > 0L,
                 field_names_duplicated = list(field_names_duplicated))
}

sudd_rfrnd

Retrieves a single referendum's data from sudd.ch.

sudd_rfrnd <- function(id_sudd) {

  checkmate::assert_string(id_sudd)

  html <-
    httr::RETRY(verb = "GET",
                url = url_sudd("event.php"),
                query = list(id = id_sudd),
                times = 3L) %>%
    xml2::read_html() %>%
    rvest::html_element(css = "main table") %>%
    rvest::html_children()

  field_names <-
    html %>%
    rvest::html_elements(css = "td.feld") %>%
    rvest::html_text()

  # handle fields with duplicated/ambiguous names
  if (anyDuplicated(field_names)) {

    ## simple duplicates (probably data errors)
    if (id_sudd == "gr011862") {

      ix_to_drop <- c(which(field_names == "\u2517\u2501 Republik")[2L],
                      which(field_names == "\u2517\u2501 Russischer Prinz")[2L])
      html %<>% .[-ix_to_drop]
      field_names %<>% .[-ix_to_drop]

      ## competing / mutually exclusive proposals, e.g. proposals with direct counter proposal and (optionally) tie-breaker question (CH and LI)
    } else {

      option_names <-
        html %>%
        rvest::html_elements(css = "td.feld strong") %>%
        rvest::html_text()

      if (length(option_names) < 2L) {
        cli::cli_abort("Unknown table layout detected for referendum with {.arg id_sudd = {id_sudd}}. Please debug.",
                       .internal = TRUE)
      }

      ix_option_names <- which(field_names %in% option_names)
      option_names_counter <- c("Gegenentwurf", "Gegenvorschlag")
      option_names_tie_breaker <- "Stichfrage"
      option_names_special <- c(option_names_counter, option_names_tie_breaker)
      has_counter_proposal <- any(option_names_counter %in% option_names)
      n_proposals_original <-
        option_names %>%
        setdiff(option_names_special) %>%
        length()

      ix_option_field_names <-
        ix_option_names[-length(ix_option_names)] %>%
        purrr::imap(~ (.x + 1L):(ix_option_names[.y + 1L] - 1L)) %>%
        c(list((dplyr::last(ix_option_names) + 1L):(min(length(field_names), which(field_names %in% c("Medien",
                                                                                                      "Bemerkungen",
                                                                                                      "Gleichzeitig mit",
                                                                                                      "Quellen",
                                                                                                      "Vollst\u00e4ndigkeit",
                                                                                                      "Letzte \u00c4nderung"))) - 1L)))
      # rename field names
      option_suffixes <-
        option_names %>%
        purrr::imap_chr(~ .x %>% pal::when(. %in% option_names_counter                        ~ "counter_proposal",
                                           . %in% option_names_tie_breaker                    ~ "tie_breaker",
                                           has_counter_proposal && n_proposals_original == 1L ~ "proposal",
                                           ~ glue::glue("option_{.y}")))
      renamings <-
        purrr::map2(.x = setdiff(option_names,
                                 option_names_tie_breaker),
                    .y = setdiff(option_suffixes,
                                 "tie_breaker"),
                    .f = ~ rlang::list2(!!paste("\u2517\u2501", .x) := glue::glue("votes_tie_breaker_{.y}"),
                                        !!paste("\u2517\u2501 St\u00e4nde", .x) := glue::glue("subterritories_{.y}_tie_breaker"))) %>%
        purrr::list_flatten() %>%
        as_fm_list()

      for (i in seq_along(option_names)) {

        field_names[ix_option_field_names[[i]]] %<>%
          dplyr::case_match(.x = .,
                            !!!c(renamings,
                                 list("Abgegebene Stimmen"                   ~ glue::glue("votes_{option_suffixes[i]}_total"),
                                      "Stimmen ausser Betracht"              ~ glue::glue("votes_{option_suffixes[i]}_invalid"),
                                      "Ohne Antwort"                         ~ glue::glue("votes_{option_suffixes[i]}_empty"),
                                      "G\u00fcltige (= massgebende) Stimmen" ~ glue::glue("votes_{option_suffixes[i]}_valid"),
                                      "\u2517\u2501 Ja-Stimmen"              ~ glue::glue("votes_{option_suffixes[i]}_yes"),
                                      "\u2517\u2501 Nein-Stimmen"            ~ glue::glue("votes_{option_suffixes[i]}_no"),
                                      "Ja-Stimmen"                           ~ glue::glue("votes_{option_suffixes[i]}_yes"),
                                      "Nein-Stimmen"                         ~ glue::glue("votes_{option_suffixes[i]}_no"),
                                      "St\u00e4nde (Kantone)"                ~ glue::glue("subterritories_{option_suffixes[i]}"),
                                      "\u2517\u2501 Annehmende St\u00e4nde"  ~ glue::glue("subterritories_{option_suffixes[i]}_yes"),
                                      "\u2517\u2501 Verwerfende St\u00e4nde" ~ glue::glue("subterritories_{option_suffixes[i]}_no"))),
                            .default = .)
      }

      # drop obsolete fields
      html %<>% .[-ix_option_names]
      field_names %<>% .[-ix_option_names]
    }
  }

  # handle other special cases
  if (id_sudd %in% c("li011954",
                     "li031985")) {

    field_names %<>% dplyr::case_match(.x = .,
                                       "\u2517\u2501 Initiative"     ~ "votes_proposal",
                                       "\u2517\u2501 Gegenvorschlag" ~ "votes_counter_proposal",
                                       "\u2517\u2501 Nein-Stimmen"   ~ "votes_option_none",
                                       .default = .)
  }

  ## move content of exotic fields to `remarks`
  remarks_field <-
    html[field_names == "Bemerkungen"] %>%
    pal::when(length(.) > 0L ~ rvest::html_elements(x = ., css = "td")[[2L]],
              ~ .)

  remarks_list_col <- list(list(text = rvest::html_text2(remarks_field),
                                urls =
                                  remarks_field %>%
                                  rvest::html_elements(css = "a") %>%
                                  purrr::map_chr(~ .x %>%
                                                   rvest::html_attr(name = "href") %>%
                                                   url_sudd()),
                                html =
                                  remarks_field %>%
                                  xml2::xml_contents() %>%
                                  as.character() %>%
                                  paste0(collapse = "")))
  ix_fields_to_remarks <-
    field_names %>%
    stringr::str_detect(pattern = paste0("^",
                                         pal::fuse_regex("\u2517\u2501\u2501\u2501 .+Stimmen( .+)?",
                                                         "Unklare Stimmen",
                                                         "Unstimmigkeiten",
                                                         "G\u00fcltig stimmende Personen"),
                                         "$")) %>%
    which()

  if (length(ix_fields_to_remarks) > 0L) {

    addendum <- field_names[ix_fields_to_remarks]

    if (length(addendum) > 0L) {

      addendum %<>%
        stringr::str_extract("\\w.*") %>%
        paste0(": ",
               html[ix_fields_to_remarks] %>%
                 rvest::html_elements(css = "td") %>%
                 magrittr::extract2(2L) %>%
                 rvest::html_elements(css = "data") %>%
                 rvest::html_attr("value"),
               collapse = "\n\n")
    }

    remarks_list_col[[1L]]$text %<>% paste0(addendum, "\n\n"[length(addendum) > 0L], .)
    remarks_list_col[[1L]]$html %<>% paste0("<p>\n", addendum, "\n</p>", .)
    html %<>% .[-ix_fields_to_remarks]
    field_names %<>% .[-ix_fields_to_remarks]
  }

  # remove unnecessary fields
  ix_to_drop <- which(field_names %in% c("Nicht eingelegte Stimmzettel",
                                         "Nicht eingelegte Stimmenzettel",
                                         "Nicht teilgenommen"))

  if (length(ix_to_drop)) {
    html %<>% .[-ix_to_drop]
    field_names %<>% .[-ix_to_drop]
  }

  field_names %<>%
    dplyr::case_match(.x = .,
                      "Gebiet"                                          ~ "territory_name_de",
                      "\u2517\u2501 Stellung"                           ~ "territory_type_de",
                      "Datum"                                           ~ "date",
                      "Titel"                                           ~ "title_de",
                      "Vorlage"                                         ~ "title_de",
                      "\u2517\u2501 Fragemuster"                        ~ "question_type_de",
                      "\u2517\u2501 Abstimmungstyp"                     ~ "types",
                      "Ergebnis"                                        ~ "result_de",
                      "Vollst\u00e4ndigkeit"                            ~ "result_status_de",
                      "\u2517\u2501 Mehrheiten"                         ~ "adoption_requirements_de",
                      "Stimmberechtigte"                                ~ "electorate_total",
                      "\u2517\u2501 Davon im Ausland"                   ~ "electorate_abroad",
                      "Stimmausweise"                                   ~ "polling_cards",
                      "Ausgegebene Stimmzettel"                         ~ "polling_cards",
                      "Stimmbeteiligung"                                ~ "votes_total",
                      "Stimmen ausser Betracht"                         ~ "votes_invalid",
                      "Stimmzettel ausser Betracht"                     ~ "votes_invalid",
                      "Leere Stimmen"                                   ~ "votes_empty",
                      "\u2517\u2501 Leere Stimmen"                      ~ "votes_empty",
                      "\u2517\u2501 Leere Stimmzettel"                  ~ "votes_empty",
                      "Ung\u00fcltige Stimmen"                          ~ "votes_void",
                      "\u2517\u2501 Ung\u00fcltige Stimmen"             ~ "votes_void",
                      "\u2517\u2501 Ung\u00fcltige Stimmzettel"         ~ "votes_void",
                      "Ung\u00fcltig eingelegte Stimmzettel"            ~ "votes_void",
                      "Ganz ung\u00fcltige Stimmzettel"                 ~ "votes_void",
                      "G\u00fcltige (= massgebende) Stimmen"            ~ "votes_valid",
                      "\u2517\u2501 Ja-Stimmen"                         ~ "votes_yes",
                      "\u2517\u2501 Nein-Stimmen"                       ~ "votes_no",
                      "\u2517\u2501 Nein zu beiden Vorschl\u00e4gen"    ~ "votes_option_none",
                      "\u2517\u2501 Stimmen ausser Betracht"            ~ "votes_invalid",
                      "Staaten"                                         ~ "subterritories",
                      "\u2517\u2501 Annehmende Staaten"                 ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Staaten"                ~ "subterritories_no",
                      "Gebiete"                                         ~ "subterritories",
                      "\u2517\u2501 Annehmende Gebiete"                 ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Gebiete"                ~ "subterritories_no",
                      "Provinzen"                                       ~ "subterritories",
                      "\u2517\u2501 Annehmende Provinzen"               ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Provinzen"              ~ "subterritories_no",
                      "Inseln"                                          ~ "subterritories",
                      "\u2517\u2501 Annehmende Inseln"                  ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Inseln"                 ~ "subterritories_no",
                      "St\u00e4nde (Kantone)"                           ~ "subterritories",
                      "\u2517\u2501 Annehmende St\u00e4nde"             ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende St\u00e4nde"            ~ "subterritories_no",
                      "Regionen / St\u00e4dte"                          ~ "subterritories",
                      "\u2517\u2501 Annehmende Regionen / St\u00e4dte"  ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Regionen / St\u00e4dte" ~ "subterritories_no",
                      "Wahlkreise"                                      ~ "subterritories",
                      "\u2517\u2501 Annehmende Wahlkreise"              ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Wahlkreise"             ~ "subterritories_no",
                      "Senatswahlkreise"                                ~ "subterritories",
                      "\u2517\u2501 Annehmende Senatswahlkreise"        ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Senatswahlkreise"       ~ "subterritories_no",
                      "Medien"                                          ~ "files",
                      "Bemerkungen"                                     ~ "remarks",
                      "Gleichzeitig mit"                                ~ "ids_sudd_simultaneous",
                      "Quellen"                                         ~ "sources",
                      "Letzte \u00c4nderung"                            ~ "date_last_edited",
                      .default = .) %>%
    # assert field names are unique
    checkmate::assert_character(any.missing = FALSE,
                                unique = TRUE,
                                .var.name = "field_names") %>%
    # referendum-option-specific recodings (sequentially numbered `votes_option_#` columns)
    # TODO: adapt this once we can properly capture more than yes/no answer options, cf. https://gitlab.com/zdaarau/rpkgs/rdb/-/issues/5
    purrr::map_at(.at = which(startsWith(., "\u2517\u2501 ")),
                  .f = function(old_name, old_names) paste0("votes_option_", which(old_names == old_name)),
                  old_names = stringr::str_subset(string = .,
                                                  pattern = "^\u2517\u2501 ")) %>%
    purrr::list_c(ptype = character())

  # assert no original uppercase field names are left over
  ix_field_names_unknown <-
    field_names %>%
    stringr::str_detect(pattern = "[:upper:]") %>%
    which()

  if (length(ix_field_names_unknown)) {
    cli::cli_abort(paste0("Unknown {cli::qty(length(ix_field_names_unknown))} field{?s} {.field {field_names[ix_field_names_unknown]}} present in data for ",
                          "referendum with {.arg {paste0('id_sudd = ', id_sudd)}}."),
                   .internal = TRUE)
  }

  purrr::map2_dfc(.x = html,
                  .y = field_names,
                  .f = function(html, col_name) {

                    cells <- html %>% rvest::html_elements(css = "td")
                    col_text <- rvest::html_text2(cells[[2L]])

                    # extract hyperlinks if necessary
                    if (col_name %in% c("remarks",
                                        "ids_sudd_simultaneous",
                                        "sources")) {
                      urls <-
                        cells[[2L]] %>%
                        rvest::html_elements(css = "a") %>%
                        purrr::map_chr(~ .x %>%
                                         rvest::html_attr(name = "href") %>%
                                         url_sudd())
                    }

                    tibble::tibble(!!col_name :=
                                     col_name %>%
                                     pal::when(
                                       # character scalars
                                       . %in% c("territory_name_de",
                                                "territory_type_de",
                                                "title_de",
                                                "question_type_de",
                                                "result_de",
                                                "result_status_de") ~
                                         col_text,

                                       # integer scalars
                                       stringr::str_detect(
                                         string = .,
                                         pattern = paste0(
                                           "^",
                                           pal::fuse_regex(
                                             "electorate_total",
                                             "electorate_abroad",
                                             "polling_cards",
                                             "votes_total",
                                             "votes_invalid",
                                             "votes_empty",
                                             "votes_void",
                                             "votes_valid",
                                             "votes_yes",
                                             "votes_no",
                                             "votes_proposal",
                                             "votes_counter_proposal",
                                             paste0("votes_",
                                                    pal::fuse_regex("option_(\\d+|none)",
                                                                    "(counter_)?proposal",
                                                                    "tie_breaker(_(option_\\d+|(counter_)?proposal))?"),
                                                    paste0("(_",
                                                           pal::fuse_regex("total",
                                                                           "empty",
                                                                           "void",
                                                                           "invalid",
                                                                           "valid",
                                                                           "yes",
                                                                           "no"),
                                                           ")?"))),
                                           "$")) ~
                                         cells[[2L]] %>%
                                         rvest::html_elements(css = "data") %>%
                                         rvest::html_attr("value") %>%
                                         # fall back to parsing text if no semantic data could be extracted
                                         pal::when(length(.) == 0L ~ col_text %>% stringr::str_remove_all(pattern = "[^\\d]"),
                                                   ~ .) %>%
                                         as.integer(),

                                       startsWith(., "subterritories") ~
                                         cells[[2L]] %>%
                                         rvest::html_elements(css = "data") %>%
                                         rvest::html_attr("value") %>%
                                         # fall back to parsing text if no semantic data could be extracted
                                         pal::when(length(.) == 0L ~ col_text %>% stringr::str_remove_all(pattern = "[^\\d]"),
                                                   ~ .) %>%
                                         as.numeric(),

                                       # date scalars
                                       . == "date" ~
                                         cells[[2L]] %>%
                                         rvest::html_element(css = "time") %>%
                                         rvest::html_attr(name = "datetime"),

                                       . == "date_last_edited" ~
                                         cells[[2L]] %>%
                                         rvest::html_element(css = "time") %>%
                                         rvest::html_attr(name = "datetime") %>%
                                         clock::date_parse(),

                                       # lists (multi-value cols)
                                       . == "remarks" ~
                                         remarks_list_col,

                                       . == "sources" ~
                                         list(list(text = col_text,
                                                   urls = urls,
                                                   html =
                                                     cells[[2L]] %>%
                                                     xml2::xml_contents() %>%
                                                     as.character() %>%
                                                     paste0(collapse = ""))),
                                       . == "types" ~
                                         col_text %>% stringr::str_split(pattern = "\\s*\u2192\\s*"),
                                       . == "adoption_requirements_de" ~
                                         col_text %>% stringr::str_split(pattern = ",\\s*"),
                                       . == "files" ~
                                         cells[[2L]] %>%
                                         rvest::html_elements(css = "a") %>%
                                         purrr::map(~ .x %>%
                                                      rvest::html_attr(name = "href") %>%
                                                      url_sudd() %>%
                                                      tibble::tibble(description = rvest::html_text(.x),
                                                                     url = .)) %>%
                                         purrr::list_rbind() %>%
                                         list(),
                                       . == "ids_sudd_simultaneous" ~
                                         urls %>%
                                         stringr::str_extract(pattern = "(?<=[\\?&]id=)[\\w\\d]+") %>%
                                         list(),

                                       ~ "PARSING ERROR; PLEASE DEBUG"
                                     ))
                  })
}

url_sudd

Completes relative URLs.

url_sudd <- function(x = "") {

  purrr::map_chr(x,
                 \(x) {

                   if (!is.na(x) && stringr::str_detect(x, "https?:")) {
                     x
                   } else {
                     httr::modify_url(url = x %|% "",
                                      scheme = "https",
                                      hostname = "sudd.ch")
                   }
                 })
}

Constants

this_pkg

this_pkg <- utils::packageName()

cli_theme

cli_theme <-
  cli::builtin_theme() %>%
  purrr::list_modify(span.err = list(color = "red",
                                     `font-weight` = "bold"),
                     span.warn = list(color = "orange",
                                     `font-weight` = "bold"),
                     span.content = list(color = "mediumorchid"))

date_backup_rdb

date_backup_rdb <- pal::path_mod_time("data-raw/backups/rdb.rds") |> clock::as_date()

codebook_fragments

codebook_fragments <- c("institution-level-variables",
                        "referendum-level-variables",
                        "topics")

data_cols_absent

data_cols_absent <-
  tibble::tibble(col = character(),
                 type = list(),
                 msg = character()) %>%
  tibble::add_row(col = "id",
                  type = list("add"),
                  msg = "an {.var id} column. It is automatically set by the RDB API back-end. Did you mean to {.fun edit_rfrnds} instead?") %>%
  tibble::add_row(col = "country_name",
                  type = list(c("add", "edit")),
                  msg = "a {.var country_name} column. It is automatically set by the RDB API back-end based on {.var country_code}.") %>%
  tibble::add_row(col = "date_time_created",
                  type = list(c("add", "edit")),
                  msg = "a {.var date_time_created} column. This date is automatically set by the RDB API back-end and not supposed to be changed.") %>%
  tibble::add_row(col = "date_time_last_edited",
                  type = list(c("add", "edit")),
                  msg = paste0("a {.var date_time_last_edited} column. This date is automatically set by the RDB API back-end and not supposed to be changed ",
                               "manually.")) %>%
  dplyr::mutate(msg = paste0("{.arg data} mustn't contain ", msg))

ballot_date_colnames

Mainly used in as_ballot_dates().

ballot_date_colnames <- c("country_code",
                          "country_code_long",
                          "country_code_continual",
                          "country_name",
                          "country_name_long",
                          "subnational_entity_code",
                          "subnational_entity_name",
                          "municipality",
                          "level",
                          "date",
                          "week",
                          "month",
                          "quarter",
                          "year",
                          "decade",
                          "century",
                          "era",
                          "wave_of_democracy",
                          "is_former_country",
                          "un_country_code",
                          "un_region_tier_1_code",
                          "un_region_tier_1_name",
                          "un_region_tier_2_code",
                          "un_region_tier_2_name",
                          "un_region_tier_3_code",
                          "un_region_tier_3_name",
                          "un_subregion")

rfrnd_fields

RDB referendum database field names as returned by the /referendums API endpoint.

rfrnd_fields <- list()

$all*

rfrnd_fields$all <- c("_id",
                      "archive",
                      "canton",
                      "categories",
                      "citizens_abroad",
                      "committee_name",
                      "context",
                      "country_code",
                      "country_code_historical",
                      "country_name",
                      "created_on",
                      "date",
                      "date_time_last_edited",
                      "draft",
                      "files",
                      "id_official",
                      "id_sudd",
                      "institution",
                      "is_past_jurisdiction",
                      "level",
                      "question",
                      "question_en",
                      "municipality",
                      "number",
                      "remarks",
                      "result",
                      "sources",
                      "tags",
                      "title",
                      "total_electorate",
                      "votes_empty",
                      "votes_invalid",
                      "votes_no",
                      "votes_yes")

rfrnd_fields$all_flat <-
  rfrnd_fields$all %>%
  setdiff(c("categories", "context", "title")) %>%
  union(c("categories.action",
          "categories.author_of_the_vote_object",
          "categories.counter_proposal",
          "categories.decision_quorum",
          "categories.degree_of_revision",
          "categories.excluded_topics",
          "categories.hierarchy_of_the_legal_norm",
          "categories.institutional_precondition",
          "categories.institutional_precondition_decision",
          "categories.institutional_precondition_decision_actor",
          "categories.legal_act_type",
          "categories.official_status",
          "categories.referendum_text_options",
          "categories.special_topics",
          "categories.turnout_quorum",
          "categories.vote_object",
          "categories.vote_result_status",
          "categories.vote_trigger",
          "categories.vote_trigger_actor",
          "categories.vote_trigger_number",
          "categories.vote_trigger_state_level",
          "categories.vote_trigger_time",
          "categories.vote_venue",
          "context.national_council_abstentions",
          "context.national_council_no",
          "context.national_council_yes",
          "context.recommendation",
          "context.states_council_abstentions",
          "context.states_council_no",
          "context.states_council_yes",
          "context.states_no",
          "context.states_yes",
          "context.votes_per_canton",
          "title.de",
          "title.en",
          "title.fr"))

$required_for_*

The following fields are mandatory when adding/editing referendums via the admin portal, but not strictly mandatory schema-wise:

| field | edits | additions | remarks | |--------------------------|-------|-----------|-----------------------------------------------------------------------------------------------| | _id | yes | no | must be provided in the URL, not the payload | | country_code | no | yes | must be a valid choice; the back-end automatically derives country_name from country_code | | canton | no | yes | only mandatory if "level":"subnational" | | municipality | no | yes | only mandatory if "level":"local" | | level | no | yes | must be a valid choice | | date | no | yes | must be a valid date (ISO 8601 date string) | | title.en | no | yes | contains NAs from before relaunch | | result | no | yes | must be a valid choice ("Yes", "No" or "Unknown") | | context.recommendation | no | yes | only mandatory if "country_code":"CH","level":"national"; must be a valid choice | | total_electorate | no | yes | must be a valid integer | | citizens_abroad | no | no | must be a valid integer | | votes_yes | no | no | must be a valid integer | | votes_no | no | no | must be a valid integer | | votes_empty | no | no | must be a valid integer | | votes_invalid | no | no | must be a valid integer | | draft | yes | no | defaults to true if not provided for additions | | institution | no | yes | must be a valid choice |

Based on the above, we define the fields that must always be present in the JSON payload when adding/editing referendums as follows:

rfrnd_fields$required_for_edits <- c("_id",
                                     "draft",
                                     "total_electorate",
                                     "citizens_abroad",
                                     "votes_yes",
                                     "votes_no",
                                     "votes_empty",
                                     "votes_invalid")

rfrnd_fields$required_for_additions <- c("country_code",
                                         "level",
                                         "date",
                                         "title.en",
                                         "result",
                                         "total_electorate",
                                         "citizens_abroad",
                                         "votes_yes",
                                         "votes_no",
                                         "votes_empty",
                                         "votes_invalid",
                                         "draft",
                                         "institution")

$never_empty

The following fields are expected to never be empty when returned by the API.

rfrnd_fields$never_empty <- c("_id",
                              "country_code",
                              "country_name",
                              "created_on",
                              "level",
                              "total_electorate",
                              "citizens_abroad",
                              "votes_yes",
                              "votes_no",
                              "votes_empty",
                              "votes_invalid",
                              "draft")

*var_names*

NOTES:

                 # old name                                                 new name
var_names <- list(`_id`                                                = "id",
                  canton                                               = "subnational_entity_name",
                  title.de                                             = "title_de",
                  title.en                                             = "title_en",
                  title.fr                                             = "title_fr",
                  context.states_no                                    = "subterritories_no",
                  context.states_yes                                   = "subterritories_yes",
                  total_electorate                                     = "electorate_total",
                  citizens_abroad                                      = "electorate_abroad",
                  context.votes_per_canton                             = "votes_per_subterritory",
                  context.national_council_yes                         = "lower_house_yes",
                  context.national_council_no                          = "lower_house_no",
                  context.national_council_abstentions                 = "lower_house_abstentions",
                  context.states_council_yes                           = "upper_house_yes",
                  context.states_council_no                            = "upper_house_no",
                  context.states_council_abstentions                   = "upper_house_abstentions",
                  context.recommendation                               = "position_government",
                  draft                                                = "is_draft",
                  created_on                                           = "date_time_created",
                  institution                                          = "type",
                  categories.official_status                           = "inst_legal_basis_type",
                  categories.legal_act_type                            = "inst_has_urgent_legal_basis",
                  categories.vote_result_status                        = "inst_is_binding",
                  categories.counter_proposal                          = "inst_is_counter_proposal",
                  categories.vote_venue                                = "inst_is_assembly",
                  categories.vote_trigger                              = "inst_trigger_type",
                  categories.vote_trigger_actor                        = "inst_trigger_actor",
                  categories.vote_trigger_state_level                  = "inst_trigger_actor_level",
                  categories.vote_trigger_number                       = "inst_trigger_threshold",
                  categories.vote_trigger_time                         = "inst_trigger_time_limit",
                  categories.vote_object                               = "inst_object_type",
                  categories.author_of_the_vote_object                 = "inst_object_author",
                  categories.hierarchy_of_the_legal_norm               = "inst_object_legal_level",
                  categories.degree_of_revision                        = "inst_object_revision_extent",
                  categories.action                                    = "inst_object_revision_modes",
                  categories.turnout_quorum                            = "inst_quorum_turnout",
                  categories.decision_quorum                           = "inst_quorum_approval",
                  categories.institutional_precondition                = "inst_has_precondition",
                  categories.institutional_precondition_decision_actor = "inst_precondition_actor",
                  categories.institutional_precondition_decision       = "inst_precondition_decision",
                  categories.special_topics                            = "inst_topics_only",
                  categories.excluded_topics                           = "inst_topics_excluded")

                                  # old name       new name
sub_var_names <- list(files = list(date       = "date_time_attached",
                                   object_key = "s3_object_key",
                                   size       = "file_size",
                                   deleted    = "is_deleted"))

# create additional formula-lists (mainly to be fed to `dplyr::case_match()`)
var_names_fms <- as_fm_list(var_names)
sub_var_names_fms <- purrr::imap(sub_var_names,
                                 ~ as_fm_list(.x))

Messages

mime_error_suffix <- "This indicates either some network issue or a change in the RDB API."

sudd.ch years

sudd_years <-
  url_sudd("index.php") %>%
  xml2::read_html() %>%
  rvest::html_element(css = "select[id='first']") %>%
  rvest::html_elements("option") %>%
  rvest::html_attr("value") %>%
  as.integer()

sudd_max_year <- pal::safe_max(sudd_years)
sudd_min_year <- pal::safe_min(sudd_years)
rm(sudd_years)

EXPORTED

Referendum data

rfrnds

#' Get referendum data (old version)
#'
#' Downloads the referendum data from the Referendum Database (RDB). See the [`codebook`][codebook] for a detailed description of all variables.
#'
#' @inheritParams assemble_query_filter
#' @inheritParams tidy_rfrnds
#' @inheritParams url_api
#' @param incl_archive Whether or not to include an `archive` column containing data from an earlier, obsolete state of the Referendum Database (RDB).
#' @param use_cache `r pkgsnip::param_lbl("use_cache")`
#' @param max_cache_age `r pkgsnip::param_lbl("max_cache_age")`
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family rfrnd
#' @export
#'
#' @examples
#' # get all referendums (excl. drafts)
#' rdb::rfrnds()
#' 
#' # get only referendums in Austria and Australia on subnational level
#' rdb::rfrnds(country_code = c("AT", "AU"),
#'             level = "subnational",
#'             quiet = TRUE)
#'
#' # get referendums in 2020
#' rdb::rfrnds(date_min = "2020-01-01",
#'             date_max = "2020-12-31",
#'             quiet = TRUE)
#'
#' # get referendums added to the database during the last 30 days
#' rdb::rfrnds(date_time_created_min = clock::date_today(zone = "UTC") |> clock::add_days(-30L),
#'             date_time_created_max = clock::date_today(zone = "UTC"),
#'             quiet = TRUE)
#' 
#' # provide custom `query_filter` for more complex queries like regex matches
#' # cf. https://docs.mongodb.com/manual/reference/operator/query/regex/
#' rdb::rfrnds(query_filter = '{"country_code":{"$regex":"A."}}',
#'             quiet = TRUE)
rfrnds <- function(country_code = NULL,
                   subnational_entity_name = NULL,
                   municipality = NULL,
                   level = NULL,
                   type = NULL,
                   date_min = NULL,
                   date_max = NULL,
                   is_draft = FALSE,
                   date_time_created_min = NULL,
                   date_time_created_max = NULL,
                   date_time_last_edited_min = NULL,
                   date_time_last_edited_max = NULL,
                   query_filter = NULL,
                   incl_archive = FALSE,
                   tidy = TRUE,
                   use_cache = TRUE,
                   max_cache_age = "1 week",
                   use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                            pkg = this_pkg),
                   quiet = FALSE) {

  checkmate::assert_flag(incl_archive)
  checkmate::assert_flag(quiet)

  # TODO: remove this check as soon as [issue #78](https://github.com/zdaarau/c2d-app/issues/78) is resolved
  if (isTRUE(use_testing_server)) cli::cli_abort("{.code mode=stream} is not yet supported on the testing servers.")

  result <- pkgpins::with_cache(expr = {

    if (!quiet) {
      status_msg <- "Fetching JSON data from RDB API..."
      cli::cli_progress_step(msg = status_msg,
                             msg_done = paste(status_msg, "done"),
                             msg_failed = paste(status_msg, "failed"))
    }

    data <-
      httr::RETRY(verb = "GET",
                  url = url_api("referendums",
                                .use_testing_server = use_testing_server),
                  query = list(mode = "stream",
                               format = "json",
                               filter = assemble_query_filter(country_code = country_code,
                                                              subnational_entity_name = subnational_entity_name,
                                                              municipality = municipality,
                                                              level = level,
                                                              type = type,
                                                              date_min = date_min,
                                                              date_max = date_max,
                                                              is_draft = is_draft,
                                                              date_time_created_min = date_time_created_min,
                                                              date_time_created_max = date_time_created_max,
                                                              date_time_last_edited_min = date_time_last_edited_min,
                                                              date_time_last_edited_max = date_time_last_edited_max,
                                                              query_filter = query_filter)),
                  if (!quiet) httr::progress(type = "down"),
                  times = 3L) %>%
      # ensure we actually got a JSON response
      pal::assert_mime_type(mime_type = "application/json",
                            msg_suffix = mime_error_suffix) %>%
      # extract JSON
      httr::content(as = "text",
                    encoding = "UTF-8") %>%
      # ensure body is not empty
      assert_content()

    if (!quiet) {
      status_msg <- "Converting JSON to list data..."
      cli::cli_progress_step(msg = status_msg,
                             msg_done = paste(status_msg, "done"),
                             msg_failed = paste(status_msg, "failed"))
    }
    # NOTE that we cannot rely on params `simplify*` and `flatten` because of varying field lengths in API result (depending on `query`)
    data %<>%
      jsonlite::fromJSON(simplifyVector = FALSE,
                         simplifyDataFrame = FALSE,
                         simplifyMatrix = FALSE,
                         flatten = FALSE) %$%
      items

    if (!quiet) {
      status_msg <- "Tidying data..."
      cli::cli_progress_step(msg = status_msg,
                             msg_done = paste(status_msg, "done"),
                             msg_failed = paste(status_msg, "failed"))
    }

    data %>% tidy_rfrnds(tidy = tidy)
  },
  pkg = this_pkg,
  from_fn = "rfrnds",
  country_code,
  subnational_entity_name,
  municipality,
  level,
  type,
  date_min,
  date_max,
  is_draft,
  date_time_created_min,
  date_time_created_max,
  date_time_last_edited_min,
  date_time_last_edited_max,
  query_filter,
  tidy,
  use_testing_server,
  use_cache = use_cache,
  max_cache_age = max_cache_age)

  # exclude `archive` if requested
  if (!incl_archive) result %<>% dplyr::select(-any_of("archive"))

  result
}

rfrnds_bkp

#' Get referendum data from backup
#'
#' Downloads the referendum data from the Referendum Database (RDB) backup [in the `zdaarau/rpkgs/rdb`
#' repository](https://gitlab.com/zdaarau/rpkgs/rdb/-/blob/master/data-raw/backups/rdb.rds?ref_type=heads). See the [`codebook`][codebook] for a detailed
#' description of all variables.
#'
#' @inheritParams rfrnds
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family rfrnd
#' @export
#'
#' @examples
#' # get all referendums (excl. drafts)
#' rdb::rfrnds_bkp()
rfrnds_bkp <- function(is_draft = FALSE,
                       incl_archive = FALSE,
                       use_cache = TRUE,
                       max_cache_age = "1 week",
                       quiet = FALSE) {

  checkmate::assert_flag(is_draft,
                         null.ok = TRUE)
  checkmate::assert_flag(incl_archive)
  checkmate::assert_flag(quiet)

  result <- pkgpins::with_cache(
    expr = {

      if (!quiet) {
        pal::cli_progress_step_quick(msg = "Fetching latest RDB backup")
      }

      path_temp <- fs::file_temp(pattern = "rdb-", ext = "rds")
      utils::download.file(url = "https://gitlab.com/zdaarau/rpkgs/rdb/-/raw/master/data-raw/backups/rdb.rds?ref_type=heads&inline=false",
                           destfile = path_temp,
                           quiet = TRUE,
                           mode = "wb")

      readRDS(file = path_temp)
    },
    pkg = this_pkg,
    from_fn = "rfrnds",
    use_cache = use_cache,
    max_cache_age = max_cache_age
  )

  # exclude `archive` if requested
  if (!incl_archive) result %<>% dplyr::select(-any_of("archive"))

  # respect `is_draft`
  if (!is.null(is_draft)) {
    result %<>% dplyr::filter(is_draft == !!is_draft)
  }

  result
}

rfrnd

NOTES:

#' Get a single referendum's data
#'
#' Downloads a single referendum's data from the Referendum Database (RDB). See the [`codebook`][codebook] for a detailed description of all variables.
#'
#' @inheritParams rfrnds
#' @param id Referendum's unique [identifier](`r url_codebook("id")`).
#'
#' @inherit rfrnds return
#' @family rfrnd
#' @export
#'
#' @examples
#' rdb::rfrnd(id = "5bbbe26a92a21351232dd73f")
rfrnd <- function(id,
                  incl_archive = FALSE,
                  tidy = TRUE,
                  use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                           pkg = this_pkg)) {
  checkmate::assert_string(id,
                           min.chars = 1L)
  checkmate::assert_flag(incl_archive)
  checkmate::assert_flag(tidy)

  # retrieve data
  data <-
    httr::RETRY(verb = "GET",
                url = url_api("referendums", id,
                              .use_testing_server = use_testing_server),
                config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server)),
                times = 3L) %>%
    # ensure we actually got a JSON response
    pal::assert_mime_type(mime_type = "application/json",
                          msg_suffix = mime_error_suffix) %>%
    # extract JSON
    httr::content(as = "text",
                  encoding = "UTF-8") %>%
    # ensure body is not empty
    assert_content() %>%
    # convert JSON to list
    # NOTE that we cannot rely on params `simplify*` and `flatten` because of varying field lengths in API result
    jsonlite::fromJSON(simplifyVector = FALSE,
                       simplifyDataFrame = FALSE,
                       simplifyMatrix = FALSE,
                       flatten = FALSE) %>%
    # ensure no error occured
    assert_api_success() %>%
    # tidy data
    tidy_rfrnds(tidy = tidy)

  # exclude `archive` if requested
  if (!incl_archive) data %<>% dplyr::select(-any_of("archive"))

  # return data
  data
}

download_file_attachment

#' Download file attachment
#'
#' Downloads a file attachment from the Referendum Database (RDB). The necessary `s3_object_key`s identifying individual files are found in the `files` list
#' column returned by [rfrnds()].
#'
#' @inheritParams url_api
#' @param s3_object_key Key uniquely identifying the file in the RDB [Amazon S3 bucket](https://en.wikipedia.org/wiki/Amazon_S3#Design). A character scalar.
#' @param path Path where the downloaded file is written to.
#' @param use_original_filename Whether to save the file attachment using its original filename as uploaded. Note that original filenames are **not unique**,
#'   i.e. there are multiple file attachments with the same original filename (but differing content, of course). If `FALSE`, `s3_object_key` is used as
#'   filename. Only relevant if `path` is a directory.
#'
#' @return A [response object][httr::response], invisibly.
#' @family rfrnd
#' @export
#'
#' @examples
#' # get object keys
#' obj_keys <-
#'   rdb::rfrnds()$files |>
#'   purrr::list_rbind() |>
#'   dplyr::filter(!is_deleted) |>
#'   _$s3_object_key[1:3]
#'
#' # download them to the current working dir
#' purrr::walk(obj_keys,
#'             rdb::download_file_attachment)
#' 
#' # and delete them again
#' fs::file_delete(obj_keys)
download_file_attachment <- function(s3_object_key,
                                     path = ".",
                                     use_original_filename = FALSE,
                                     use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                              pkg = this_pkg)) {
  checkmate::assert_string(s3_object_key)
  checkmate::assert_atomic(path)
  checkmate::assert_flag(use_original_filename)

  # TODO: remove this check as soon as [issue #78](https://github.com/zdaarau/c2d-app/issues/78) is resolved
  if (isTRUE(use_testing_server)) cli::cli_abort("Accessing file attachments is not yet supported on the testing servers.")

  is_dir <- fs::is_dir(path)

  if (is_dir) {
    checkmate::assert_directory_exists(path,
                                       access = "rw")
  } else {
    checkmate::assert_path_for_output(path,
                                      overwrite = TRUE)
  }

  temp_path <- fs::file_temp()

  response <- httr::RETRY(verb = "GET",
                          url = url_api("s3_objects", s3_object_key,
                                        .use_testing_server = use_testing_server),
                          httr::write_disk(path = temp_path),
                          times = 3L)

  if (is_dir) {

    if (use_original_filename) {

      final_path <-
        response %>%
        httr::headers() %$%
        `content-disposition` %>%
        stringr::str_extract(pattern = "(?<=filename=\").+?(?=\")") %>%
        fs::path(path, .)

    } else {
      final_path <- fs::path(path, s3_object_key)
    }

  } else {
    final_path <- path
  }

  fs::file_move(path = temp_path,
                new_path = final_path)

  invisible(response)
}

add_rfrnds

NOTES:

#' Add new referendums to the RDB
#'
#' Adds new referendum entries to the Referendum Database (RDB) via [its
#' API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#3-referendum-routes).
#'
#' @details
#' Note that adding/editing the column `files` is not supported, i.e. it is simply dropped from `data`.
#'
#' @inheritParams url_api
#' @param data The new referendum data. A [tibble][tibble::tbl_df] that in any case must contain the columns
#' `r rfrnd_fields$required_for_additions %>% dplyr::case_match(.x = ., !!!var_names_fms, .default = .) %>% md_link_codebook() %>% pal::as_md_list()`
#'   
#' plus the column [`subnational_entity_name`](`r url_codebook("subnational_entity_name")`) for referendums of
#' [`level`](`r url_codebook("subnational_entity_name")`) below `"national"`, and the column [`municipality`](`r url_codebook("municipality")`) for referendums
#' of `level = "local"`,
#'   
#' plus any additional [valid][codebook] columns containing the values for the corresponding database fields.
#' @param email The e-mail address of the RDB API user account to be used for authentication. A character scalar.
#' @param password The password of the RDB API user account to be used for authentication. A character scalar.
#' @param quiet Whether or not to print the newly created referendum IDs to console.
#'
#' @return A character vector of newly created referendum IDs.
#' @family rfrnd
#' @export
add_rfrnds <- function(data,
                       email = pal::pkg_config_val(key = "api_username",
                                                   pkg = this_pkg),
                       password = pal::pkg_config_val(key = "api_password",
                                                      pkg = this_pkg),
                       use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                pkg = this_pkg),
                       quiet = FALSE) {

  checkmate::assert_data_frame(data,
                               min.rows = 1L)
  checkmate::assert_flag(quiet)

  ## ensure forbidden columns are absent
  assert_cols_absent(data = data,
                     type = "add")

  ## ensure mandatory columns are present
  rfrnd_fields$required_for_additions %>%
    dplyr::case_match(.x = .,
                      !!!var_names_fms,
                      .default = .) %>%
    purrr::walk(~ if (!(.x %in% colnames(data))) cli::cli_abort(paste0("Mandatory column {.var ", .x, "} is missing from {.arg data}.")))

  # drop non-applicable columns (they're supposed to be absent in MongoDB)
  data %<>% drop_non_applicable_vars()

  ## ensure remaining columns are valid
  assert_cols_valid(data = data,
                    type = "add")

  # convert data to MongoDB schema
  json_items <-
    data %>%
    # restore MongoDB fields
    untidy_rfrnds() %>%
    # convert to JSON
    purrr::map(jsonlite::toJSON,
               auto_unbox = TRUE,
               digits = NA)

  # add data to the MongoDB via HTTP POST on `/referendums` API endpoint
  responses <-
    json_items %>%
    purrr::map(\(x) {

      httr::RETRY(verb = "POST",
                  url = url_api("referendums",
                                .use_testing_server = use_testing_server),
                  config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server),
                                             Authorization = paste("Bearer", auth_session(email = email,
                                                                                          password = password,
                                                                                          use_testing_server = use_testing_server))),
                  body = x,
                  times = 3L,
                  httr::content_type_json()) %>%
        # ensure we actually got a JSON response
        pal::assert_mime_type(mime_type = "application/json",
                              msg_suffix = mime_error_suffix) %>%
        # extract JSON string
        httr::content(as = "text",
                      encoding = "UTF-8") %>%
        # ensure body is not empty
        assert_content() %>%
        # convert to list
        jsonlite::fromJSON(simplifyDataFrame = FALSE,
                           simplifyMatrix = FALSE)
    }) %>%
    # ensure no error occured
    assert_api_success()

  # throw warnings for unsuccessful API calls
  purrr::walk2(.x = responses,
               .y = seq_along(responses),
               .f = ~ if (!is.list(.x) || !isTRUE(nchar(.x$`_id`$`$oid`) > 0L)) {

                 api_failure(.x,
                             raw = json_items[[.y]],
                             prefix = "Failed to add the {.y}. referendum. ")
               })

  ids_new <- unlist(responses,
                    use.names = FALSE)

  if (!quiet) {
    cli::cli_alert_info("New referendum entries created with {.var id}s:")
    cli::cli_li(ids_new)
  }

  invisible(ids_new)
}

edit_rfrnds

NOTES:

#' Edit existing referendums in the RDB
#'
#' Edits existing referendum entries in the  API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#3-referendum-routes) via [its
#' API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#3-referendum-routes).
#'
#' @inherit add_rfrnds details
#' 
#' @inheritParams add_rfrnds
#' @param data Updated referendum data. A [tibble][tibble::tbl_df] that must contain an [`id`](`r url_codebook("id")`) column
#'   identifying the referendums to be edited plus any additional columns containing the new values to update the corresponding database fields with. Note that
#'   due to [current API requirements](https://github.com/zdaarau/c2d-app/issues/50#issuecomment-1222660683), the following columns must always be supplied:
#'   
#'   ```r
#'   rfrnd_fields$required_for_edits %>%
#'     dplyr::case_match(.x = .,
#'                       !!!var_names_fms,
#'                       .default = .) |>
#'     setdiff("id") |>
#'     md_link_codebook() |>
#'     pal::as_md_list() |>
#'     cat()
#'   ```
#'
#' @return `data`, invisibly.
#' @family rfrnd
#' @export
edit_rfrnds <- function(data,
                        email = pal::pkg_config_val(key = "api_username",
                                                    pkg = this_pkg),
                        password = pal::pkg_config_val(key = "api_password",
                                                       pkg = this_pkg),
                        use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                 pkg = this_pkg)) {
  # ensure `data` is valid
  checkmate::assert_data_frame(data,
                               min.rows = 1L)

  ## ensure forbidden columns are absent
  assert_cols_absent(data = data,
                     type = "edit")

  ## ensure mandatory columns are present
  rfrnd_fields$required_for_edits %>%
    dplyr::case_match(.x = .,
                      !!!var_names_fms,
                      .default = .) %>%
    c("id") %>%
    purrr::walk(~ if (!(.x %in% colnames(data))) cli::cli_abort(paste0("Mandatory column {.var ", .x, "} is missing from {.arg data}.")))

  # drop non-applicable columns (they're absent in MongoDB)
  data %<>% drop_non_applicable_vars()

  ## ensure remaining columns are valid
  assert_cols_valid(data,
                    type = "edit")

  # convert data to MongoDB schema
  ids <- data$id

  json_items <-
    data %>%
    # drop `id`
    dplyr::select(-id) %>%
    # restore MongoDB fields
    untidy_rfrnds() %>%
    # convert to JSON
    purrr::map(jsonlite::toJSON,
               auto_unbox = TRUE,
               digits = NA)

  # edit data in the MongoDB via HTTP PUT on `/referendums/{id}` API endpoint
  responses <- purrr::map2(.x = ids,
                           .y = json_items,
                           .f = ~
                             httr::RETRY(verb = "PUT",
                                         url = url_api("referendums", .x,
                                                       .use_testing_server = use_testing_server),
                                         config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server),
                                                                    Authorization = paste("Bearer", auth_session(email = email,
                                                                                                                 password = password,
                                                                                                                 use_testing_server = use_testing_server))),
                                         body = .y,
                                         times = 3L,
                                         httr::content_type_json()) %>%
                             # ensure we actually got a JSON response
                             pal::assert_mime_type(mime_type = "application/json",
                                                   msg_suffix = mime_error_suffix) %>%
                             # extract JSON string
                             httr::content(as = "text",
                                           encoding = "UTF-8") %>%
                             # ensure body is not empty
                             assert_content())

  # throw warnings for unsuccessful API calls
  purrr::walk(.x = seq_along(ids),
              .f = ~ {

                parsed <- jsonlite::fromJSON(responses[[.x]])

                if (!isTRUE(parsed$ok)) {
                  api_failure(parsed,
                              raw = json_items[[.x]],
                              prefix = paste0("Failed to edit referendum with {.var id} {.val ", ids[.x], "}. "))
                }
              })

  invisible(data)
}

delete_rfrnds

#' Delete referendums in the RDB
#'
#' Deletes existing referendum entries in the Referendum Database (RDB) via [its
#' API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#3-referendum-routes).
#'
#' @inheritParams add_rfrnds
#' @param ids IDs of the referendums to be deleted. A character vector.
#'
#' @return `ids`, invisibly.
#' @family rfrnd
#' @export
delete_rfrnds <- function(ids,
                          email = pal::pkg_config_val(key = "api_username",
                                                      pkg = this_pkg),
                          password = pal::pkg_config_val(key = "api_password",
                                                         pkg = this_pkg),
                          use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                   pkg = this_pkg)) {
  checkmate::assert_character(ids,
                              min.chars = 1L,
                              any.missing = FALSE,
                              unique = TRUE)

  # TODO: remove this as soon as https://github.com/zdaarau/c2d-app/issues/45 is deployed to master
  if (!use_testing_server) {
    cli::cli_abort("Referendum deletions are not yet supported on the production servers.")
  }

  responses <- purrr::map(.x = ids,
                          .f = ~
                            httr::RETRY(verb = "DELETE",
                                        url = url_api("referendums", .x,
                                                      .use_testing_server = use_testing_server),
                                        config = httr::add_headers(Authorization = paste("Bearer", auth_session(email = email,
                                                                                                                password = password,
                                                                                                                use_testing_server = use_testing_server))),
                                        times = 3L) %>%
                            # ensure we actually got a JSON response
                            pal::assert_mime_type(mime_type = "application/json",
                                                  msg_suffix = mime_error_suffix) %>%
                            # extract JSON string
                            httr::content(as = "text",
                                          encoding = "UTF-8") %>%
                            # ensure body is not empty
                            assert_content())

  # throw warnings for unsuccessful API calls
  purrr::walk2(.x = ids,
               .y = responses,
               .f = ~ {

                 parsed <- jsonlite::fromJSON(.y)

                 if (!isTRUE(parsed$ok)) {
                   api_failure(parsed,
                               prefix = "Failed to delete referendum with {.var id} {.val {.x}}. ")
                 }
               })

  invisible(ids)
}

validate_rfrnds

#' Validate referendum data
#'
#' Performs various data validation steps to ensure there are no errors in the supplied `data`.
#'
#' @param data Referendum data to validate, as returned by [rfrnds()].
#' @param check_applicability_constraint Whether or not to check that no applicability constraints as defined in the [codebook][data_codebook] are violated.
#' @param check_id_sudd_prefix Whether or not to check that all [`id_sudd`](`r url_codebook("id_sudd")`) prefixes are valid.
#'
#' @return `data`, invisibly.
#' @family rfrnd
#' @export
validate_rfrnds <- function(data,
                            check_applicability_constraint = TRUE,
                            check_id_sudd_prefix = TRUE) {

  checkmate::assert_data_frame(data,
                               min.rows = 1L)
  checkmate::assert_subset(colnames(data),
                           choices = rfrnd_cols_order)
  checkmate::assert_flag(check_applicability_constraint)
  checkmate::assert_flag(check_id_sudd_prefix)

  # check columns
  status_msg <- "Checking basic column validity..."
  cli_progress_id <- cli::cli_progress_step(msg = status_msg,
                                            msg_done = paste(status_msg, "done"),
                                            msg_failed = paste(status_msg, "failed"))

  assert_cols_valid(data = data,
                    type = "validate",
                    action = cli::cli_alert_warning,
                    cli_progress_id = cli_progress_id)

  # check applicability constraints
  if (check_applicability_constraint) {

    status_msg <- "Asserting applicability constraints..."
    cli::cli_progress_step(msg = status_msg,
                           msg_done = paste(status_msg, "done"),
                           msg_failed = paste(status_msg, "failed"))

    var_names_violated <-
      data_codebook %>%
      dplyr::filter(variable_name %in% colnames(data)
                    & !is.na(applicability_constraint)) %$%
      purrr::map2_lgl(.x = magrittr::set_names(x = variable_name,
                                               value = variable_name),
                      .y = applicability_constraint,
                      .f = ~ {

                        data %>%
                          dplyr::filter(!eval(parse(text = .y))) %$%
                          eval(as.symbol(.x)) %>%
                          { is.na(.) | purrr::map_lgl(., is.null) } %>%
                          all()
                      }) %>%
    magrittr::extract(!.) %>%
    names()

    n_var_names_violated <- length(var_names_violated)

    if (n_var_names_violated) {

      cli::cli_progress_done(result = "failed")
      cli::cli_alert_warning("Applicability constraints are violated for {n_var_names_violated} variable{?s}:")

      paste0("{.var ", var_names_violated, "}") %>%
        magrittr::set_names(rep("x",
                                times = length(.))) %>%
        cli::cli_bullets()

      first_var_name_violated <- var_names_violated[1L]

      cli::cli({
        cli::cli_text("\nTo get the applicability constraint of e.g. {.var {first_var_name_violated}}, run:")
        cli::cli_text("")
        cli::cli_code(c("rdb::data_codebook %>%",
                        glue::glue("  dplyr::filter(variable_name == \"{first_var_name_violated}\") %$%"),
                        "  applicability_constraint"))
        cli::cli_text("")
        cli::cli_text("To inspect the entries in violation of the above applicability constraint, run:")
        cli::cli_text("")
        cli::cli_code(c("data %>%",
                        glue::glue("  dplyr::filter(rdb::data_codebook %>%\n",
                                   "                  dplyr::filter(variable_name == \"{first_var_name_violated}\") %$%\n",
                                   "                  applicability_constraint %>%\n",
                                   "                  parse(text = .) %>%\n",
                                   "                  eval() %>%\n",
                                   "                  magrittr::not()) %>%\n",
                                   "  dplyr::select(id, {first_var_name_violated})",
                                   .trim = FALSE)))
      })
    }
  }

  # check `id_sudd` prefix if requested
  if (check_id_sudd_prefix) {

    status_msg <- "Validating `id_sudd` prefixes..."
    cli::cli_progress_step(msg = status_msg,
                           msg_done = paste(status_msg, "done"),
                           msg_failed = paste(status_msg, "failed"))

    if (!all(c("country_code", "id_sudd") %in% colnames(data))) {
      cli::cli_progress_done(result = "failed")
      cli::cli_abort("Columns {.var country_code} and {.var id_sudd} must be present in {.arg data}.")
    }

    # define allowed exceptions
    allowed_exceptions <- tibble::tribble(
      ~country_code, ~id_sudd_prefix,
      # Curacao
      "CW", "an",
      # Szeklerland, cf. https://sudd.ch/event.php?id=hu042008
      "RO", "hu"
    )

    # assemble target country codes
    country_codes <-
      data$country_code %>%
      as.character() %>%
      as.list()

    for (country_code in allowed_exceptions$country_code) {

      additional_country_codes <-
        allowed_exceptions %>%
        dplyr::filter(country_code == !!country_code) %$%
        id_sudd_prefix %>%
        stringr::str_to_upper()

      ix_country_codes <-
        country_codes %>%
        purrr::map_lgl(~ country_code %in% .x) %>%
        which()

      for (i in ix_country_codes) {
        country_codes[[i]] <- unique(c(country_codes[[i]], additional_country_codes))
      }
    }

    # add dummy indicating if target country codes match
    # TODO: instead of modifying input data, print cli msg with all relevant info!
    data$matches_id_sudd_prefix <-
      data$id_sudd %>%
      stringr::str_extract(pattern = "^..") %>%
      stringr::str_to_upper() %>%
      purrr::map2_lgl(.y = country_codes,
                      .f = ~ .x %in% .y)

    data$matches_id_sudd_prefix[is.na(data$id_sudd)] <- NA
  }

  invisible(data)
}

count_rfrnds

#' Count number of referendums
#'
#' Counts the number of referendums per [`level`](`r url_codebook("level")`) in the Referendum Database (RDB).
#'
#' @inheritParams assemble_query_filter
#' @inheritParams url_api
#'
#' @return A named list with `level` as names and referendum counts as values.
#' @family rfrnd
#' @export
#'
#' @examples
#' # the whole database (excl. drafts)
#' rdb::count_rfrnds()
#' 
#' # only Swiss and Austrian referendums
#' rdb::count_rfrnds(country_code = c("CH", "AT"))
#' 
#' # only Swiss referendums created between 2020 and 2021
#' rdb::count_rfrnds(country_code = "CH",
#'                   date_time_created_min = "2020-01-01",
#'                   date_time_created_max = "2021-01-01")
count_rfrnds <- function(is_draft = FALSE,
                         country_code = NULL,
                         subnational_entity_name = NULL,
                         municipality = NULL,
                         level = NULL,
                         type = NULL,
                         date_min = NULL,
                         date_max = NULL,
                         date_time_created_min = NULL,
                         date_time_created_max = NULL,
                         date_time_last_edited_min = NULL,
                         date_time_last_edited_max = NULL,
                         query_filter = NULL,
                         use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                  pkg = this_pkg)) {
  httr::RETRY(verb = "GET",
              url = url_api("referendums/stats",
                            .use_testing_server = use_testing_server),
              query = list(filter = assemble_query_filter(country_code = country_code,
                                                          subnational_entity_name = subnational_entity_name,
                                                          municipality = municipality,
                                                          level = level,
                                                          type = type,
                                                          date_min = date_min,
                                                          date_max = date_max,
                                                          is_draft = is_draft,
                                                          date_time_created_min = date_time_created_min,
                                                          date_time_created_max = date_time_created_max,
                                                          date_time_last_edited_min = date_time_last_edited_min,
                                                          date_time_last_edited_max = date_time_last_edited_max,
                                                          query_filter = query_filter)),
              config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server)),
              times = 3L) %>%
    # ensure we actually got a JSON response
    pal::assert_mime_type(mime_type = "application/json",
                          msg_suffix = mime_error_suffix) %>%
    # parse response
    httr::content(as = "parsed") %$%
    votes %>%
    magrittr::set_names(names(.) %>% dplyr::case_match(.x = ., "sub_national" ~ "subnational", .default = .))
}

rfrnd_exists

#' Test if referendum ID exists
#'
#' Tests whether the referendum with the supplied `id` exists or not.
#'
#' @inheritParams rfrnd
#' @inheritParams url_api
#'
#' @return A logical scalar.
#' @family rfrnd
#' @export
#'
#' @examples
#' rdb::rfrnd_exists("6303a4cba52c3995043a8c24")
rfrnd_exists <- function(id,
                         .use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                   pkg = this_pkg)) {
  checkmate::assert_string(id,
                           min.chars = 1L)

  httr::GET(url = url_api("referendums", id,
                          .use_testing_server = .use_testing_server),
            config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = .use_testing_server))) %>%
    httr::http_error() %>%
    magrittr::not()
}

assert_vars

#' Assert referendum variables are present
#'
#' Asserts the specified `vars` are present in the supplied referendum `data`. Depending on `vars`, additional integrity checks are performed.
#'
#' @param data RDB referendum data as returned by [rfrnds()].
#' @param vars Names of the variables to check. A character vector.
#'
#' @return `data`, invisibly.
#' @family rfrnd
#' @export
#'
#' @examples
#' rdb::rfrnd(id = "5bbbe26a92a21351232dd73f") |> rdb::assert_vars(vars = "country_code")
#' 
#' try(
#'   tibble::tibble(country_code = "AN") |> rdb::assert_vars(vars = "country_code")
#' )
assert_vars <- function(data,
                        vars) {

  vars %>% purrr::walk(~ {

    msg_suffix <- switch(EXPR         = .x,
                         country_code = " with ISO 3166-1 alpha-2 or ISO 3166-3 alpha-4 codes.",
                         "")

    if (!(.x %in% colnames(data))) {
      cli::cli_abort(paste0("{.arg data} must contain a column {.var {.x}}", msg_suffix))
    }

    # run additional content check
    assert_content <- switch(EXPR         = .x,
                             country_code = \(x) {

                               checkmate::assert_vector(x = x,
                                                        .var.name = "data$country_code")
                               check <- checkmate::check_subset(x = as.character(x),
                                                                choices = val_set$country_code)
                               if (!isTRUE(check)) {

                                 expired_codes <- intersect(as.character(x),
                                                            data_iso_3166_3$Alpha_2)
                                 cli::cli_abort(paste0(
                                   "Assertion on {.var data$country_code} failed: ",
                                                       ifelse(length(expired_codes),
                                                              paste0("The following country codes have been deleted from ISO 3166-1 and were moved to ISO ",
                                                                     "3166-3 (former countries) instead: {.val {expired_codes}}"),
                                                              # escape curly braces from checkmate msg
                                                              stringr::str_replace_all(string = check,
                                                                                       pattern = "([\\{\\}])",
                                                                                       replacement = "\\1\\1"))))
                               }
                             },
                             \(x) TRUE)

    assert_content(data[[.x]])
  })

  invisible(data)
}

Referendum metadata

data_codebook

#' RDB Codebook
#'
#' A tibble containing the complete metadata of all [rfrnds()] variables. The codebook below is also available [online](`r url_codebook()`).
#'
#' # Codebook
#'
#' ```r
#' ```
#'
#' @format `r pkgsnip::return_lbl("tibble")`
#' @aliases codebook
#' @family metadata
#' @export
#'
#' @examples
#' rdb::data_codebook
"data_codebook"

val_lbls

#' Get set of possible *value labels* of referendum data variable
#'
#' Returns a character vector of value labels of a specific [rfrnds()] column, in the same order as [var_vals()], or of length `0` if `var_name`'s values are
#' not restricted to a predefined set or no value labels are defined in the [codebook][data_codebook].
#'
#' @param var_name Variable name present in [`data_codebook`] for which the labels are to be returned. A character scalar.
#' @param incl_affixes Whether or not to add the corresponding `value_label_prefix` and `value_label_suffix` to the returned labels.
#'
#' @return A character vector. Of length `0` if `var_name`'s values are not restricted to a predefined set or no value labels are defined in the
#'   [codebook][data_codebook].
#' @family metadata
#' @export
#'
#' @examples
#' rdb::val_lbls("result",
#'               incl_affixes = FALSE)
#' rdb::val_lbls("result")
#' 
#' # Convert the labels to sentence case with trailing dot
#' rdb::val_lbls("result") |> pal::sentenceify()
val_lbls <- function(var_name,
                     incl_affixes = TRUE) {

  var_name <- rlang::arg_match0(arg = var_name,
                                values = rfrnd_cols_order)

  metadata <- data_codebook |> dplyr::filter(variable_name == !!var_name | variable_name_unnested == !!var_name)
  result <- metadata$value_labels |> purrr::list_c(ptype = character())

  if (incl_affixes) {
    if (!is.na(metadata$value_label_prefix)) result <- paste(metadata$value_label_prefix, result)
    if (!is.na(metadata$value_label_suffix)) result <- paste(metadata$value_label_suffix, result)
  }

  result
}

val_scale

#' Get *value scale* of referendum data variables
#'
#' Returns the value scale of the specified [rfrnds()] columns.
#'
#' @param var_names Variable name(s) present in [`data_codebook`] for which the value scale is to be returned. A character vector.
#'
#' @return A character scalar.
#' @family metadata
#' @export
#'
#' @examples
#' rdb::val_scale("level")
#' paste0("topics_tier_", 1:3) |> rdb::val_scale()
val_scale <- function(var_names) {

  var_name <- rlang::arg_match(arg = var_names,
                               values = rfrnd_cols_order,
                               multiple = TRUE)

  c(data_codebook$value_scale,
    data_codebook$value_scale)[match(x = var_names,
                                     table = c(data_codebook$variable_name,
                                               data_codebook$variable_name_unnested))]
}

var_vals

#' Get set of possible *values* of referendum data variable
#'
#' Returns a vector of the possible predefined values a specific column in [rfrnds()] can hold. If the variable values aren't restricted to a predefined
#' set, `NULL` is returned.
#'
#' @param var_name Variable name present in [`data_codebook`]. A character scalar.
#'
#' @return
#' If `var_name`'s values are restricted to a predefined set and
#' - `var_name` is *not* of type list, a vector of the same type as `var_name`.
#' - `var_name` is of type list, a vector of the same type as the elements of `var_name`.
#'
#' Else `NULL`.
#' @family metadata
#' @export
#'
#' @examples
#' rdb::var_vals("result")
#' rdb::var_vals("id")
var_vals <- function(var_name) {

  var_name <- rlang::arg_match0(arg = var_name,
                                values = rfrnd_cols_order)
  data_codebook |>
    dplyr::filter(variable_name == !!var_name | variable_name_unnested == !!var_name) %$%
    variable_values |>
    unlist()
}

var_name_unnested

#' Get unnested variable names
#'
#' Returns the unnested analogue(s) of the specified variable name(s), which result from [unnesting][unnest_var]. For variable names that do *not* refer to
#' nested list columns, `var_names` is simply returned as-is.
#'
#' @inheritParams prettify_var_names
#'
#' @return A character vector of the same length as `var_names`.
#' @family metadata
#' @family unnest
#' @export
#'
#' @examples
#' rdb::var_name_unnested("inst_object_revision_modes")
#' rdb::var_name_unnested(paste0("topics_tier_", 1:3))
var_name_unnested <- function(var_names) {

  var_names <- rlang::arg_match(arg = var_names,
                                values = data_codebook$variable_name,
                                multiple = TRUE)
  data_codebook |>
    dplyr::filter(variable_name %in% !!var_names) %$%
    variable_name_unnested
}

prettify_var_names

#' Prettify referendum data variable names
#'
#' Converts referendum data variable names to their ready-for-publication version. Variable names that are unknown, i.e. not present in [`data_codebook`]), are
#' left untouched.
#'
#' @param var_names Variable name(s). Those not present in [`data_codebook`] remain untouched. A character vector.
#'
#' @return A character vector of the same length as `var_names`.
#' @family metadata
#' @family prettify
#' @export
#'
#' @examples
#' rdb::prettify_var_names("topics_tier_1")
#' 
#' # also supports unnested var names
#' rdb::prettify_var_names("topic_tier_1")
#'
#' # unknown var names are left untouched
#' rdb::prettify_var_names(var_names = c("topic_tier_1", "topic_tier_99"))
prettify_var_names <- function(var_names) {

  checkmate::assert_character(var_names)

  c(data_codebook$variable_name_print,
    data_codebook$variable_name_unnested_print)[match(x = var_names,
                                                      table = c(data_codebook$variable_name,
                                                                data_codebook$variable_name_unnested))] %|% var_names
}

Referendum topics

The classification of political topics assigned to referendums was developped together with Swissvotes, the Institute of Federalism of the University of Fribourg and the Section Politics of the Federal Statistical Office. Any modifications should be coordinated with these parties.

DESCRIPTION

Functions to work with referendum topics.

data_topics

#' Topic hierarchy
#'
#' A tibble reflecting the complete [referendum topics hierarchy](`r url_codebook("topics")`).
#'
#' @format `r pkgsnip::return_lbl("tibble")`
#' @family topics
#' @export
#'
#' @examples
#' rdb::data_topics
"data_topics"

topics

#' List available topics
#'
#' Lists the set of available [referendum topics](`r url_codebook("topics")`) on the specified `tiers`.
#'
#' @param tiers Tiers to include topics from. An integerish vector.
#'
#' @return A character vector.
#' @family topics
#' @export
#'
#' @examples
#' rdb::topics(tiers = 1:2)
topics <- function(tiers = 1:3) {

  checkmate::assert_integerish(tiers,
                               lower = 1L,
                               upper = 3L,
                               any.missing = FALSE,
                               unique = TRUE)
  topic_set <- character()

  if (1L %in% tiers) {
    topic_set %<>% c(data_topics$topic_tier_1)
  }

  if (2L %in% tiers) {
    topic_set %<>% c(data_topics$topic_tier_2)
  }

  if (3L %in% tiers) {
    topic_set %<>% c(data_topics$topic_tier_3)
  }

  topic_set %>%
    setdiff(NA_character_) %>%
    unique()
}

hierarchize_topics

NOTES:

#' Hierarchize topics
#'
#' Reconstructs the hierarchical relations between the three topic variables `topics_tier_1`, `topics_tier_2` and `topics_tier_3`. Can also be used to simply
#' determine the parent topic(s) of any topic.
#'
#' @param x The topics to hierarchize. Either a character vector of topics or a single-row data frame containing at least the columns `topics_tier_1`,
#'   `topics_tier_2` and `topics_tier_3`.
#'
#' @return A [tibble][tibble::tbl_df] with the columns `topic_tier_1`, `topic_tier_2` and `topic_tier_3`.
#' @family topics
#' @export
#'
#' @examples
#' rdb::hierarchize_topics("territorial questions")
#'
#' # hierarchize the topics of all Austrian referendums
#' rdb::rfrnds(quiet = TRUE) |>
#'   dplyr::filter(country_code == "AT") |>
#'   dplyr::group_split(id) |>
#'   purrr::map(rdb::hierarchize_topics)
hierarchize_topics <- function(x) {

  test_char <- checkmate::test_character(x, any.missing = FALSE)

  if (!test_char) {

    topic_var_names <- paste0("topics_tier_", 1:3)
    test_df <- checkmate::test_data_frame(x,
                                          min.rows = 1L,
                                          max.rows = 1L)
    has_topic_vars <- all(topic_var_names %in% colnames(x))

    if (!test_df || !has_topic_vars) {
      cli::cli_abort(paste0("{.arg x} must be either a character vector of topics or a single-row data frame containing at least the columns ",
                            "{.field topics_tier_1}, {.field topics_tier_2} and {.field topics_tier_3}."))
    }

    x <- unlist(x[, topic_var_names],
                use.names = FALSE)
  }

  checkmate::assert_subset(x,
                           choices = c(topics_tier_1_, topics_tier_2_, topics_tier_3_),
                           empty.ok = TRUE)

  topics_tier_1 <- x[x %in% topics_tier_1_]
  topics_tier_2 <- x[x %in% topics_tier_2_]
  topics_tier_3 <- x[x %in% topics_tier_3_]
  inferred_topics_tier_1 <- infer_topics(topics = c(topics_tier_2, topics_tier_3),
                                         tier = 1L)
  inferred_topics_tier_2 <- infer_topics(topics = topics_tier_3,
                                         tier = 2L)
  non_parent_topics_tier_1 <- setdiff(topics_tier_1, inferred_topics_tier_1)
  non_parent_topics_tier_2 <- setdiff(topics_tier_2, inferred_topics_tier_2)

  # 0. initialize empty tibble
  result <- tibble::tibble(topic_tier_1 = character(),
                           topic_tier_2 = character(),
                           topic_tier_3 = character())

  # 1. add third-tier topics
  result <-
    topics_tier_3 %>%
    purrr::map(~ tibble::tibble(topic_tier_1 = infer_topics(topics = .x,
                                                            tier = 1L),
                                topic_tier_2 = infer_topics(topics = .x,
                                                            tier = 2L),
                                topic_tier_3 = .x)) %>%
    purrr::list_rbind() %>%
    dplyr::bind_rows(result)

  # 2. add remaining second-tier topics
  result <-
    non_parent_topics_tier_2 %>%
    purrr::map(~ tibble::tibble(topic_tier_1 = infer_topics(topics = .x,
                                                            tier = 1L),
                                topic_tier_2 = .x,
                                topic_tier_3 = NA_character_)) %>%
    purrr::list_rbind() %>%
    dplyr::bind_rows(result)

  # 3. add remaining first-tier topics
  result %>%
    dplyr::bind_rows(tibble::tibble(topic_tier_1 = non_parent_topics_tier_1,
                                    topic_tier_2 = NA_character_,
                                    topic_tier_3 = NA_character_)) %>%
    # sort result
    dplyr::arrange(topic_tier_1, topic_tier_2, topic_tier_3)
}

hierarchize_topics_fast

#' Hierarchize topics (fast)
#'
#' Reconstructs the hierarchical relations between the three topic variables `topics_tier_1`, `topics_tier_2` and `topics_tier_3`. Other than
#' [hierarchize_topics()], this function assumes that the three topic variables are always *complete*, i.e. that no (grand)parent topics of lower-tier topics
#' are missing. This assumption is met by [rfrnds()] and [rfrnd()].
#'
#' @param topics_tier_1 First-tier topics. A character vector.
#' @param topics_tier_2 Second-tier topics. A character vector.
#' @param topics_tier_3 Third-tier topics. A character vector.
#'
#' @inherit hierarchize_topics return
#' @family topics
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' rdb::rfrnd(id = "5bbbe26a92a21351232dd73f") %$%
#'   rdb::hierarchize_topics_fast(unlist(topics_tier_1),
#'                                unlist(topics_tier_2),
#'                                unlist(topics_tier_3))
#'
#' # hierarchize the topics of all Austrian referendums
#' rdb::rfrnds(quiet = TRUE) |>
#'   dplyr::filter(country_code == "AT") |>
#'   dplyr::group_split(id) |>
#'   purrr::map(~ rdb::hierarchize_topics_fast(unlist(.x$topics_tier_1),
#'                                             unlist(.x$topics_tier_2),
#'                                             unlist(.x$topics_tier_3)))
hierarchize_topics_fast <- function(topics_tier_1 = character(),
                                  topics_tier_2 = character(),
                                  topics_tier_3 = character()) {

  checkmate::assert_subset(topics_tier_1,
                           choices = topics_tier_1_)
  checkmate::assert_subset(topics_tier_2,
                           choices = topics_tier_2_)
  checkmate::assert_subset(topics_tier_3,
                           choices = topics_tier_3_)
  # add tier-3 hierarchy
  result <- data_topics[data_topics$topic_tier_3 %in% topics_tier_3, ]

  # add non-parent tier-2 hierarchy
  topics_tier_2 %<>% setdiff(result$topic_tier_2)
  result %<>% dplyr::bind_rows(unique(data_topics[data_topics$topic_tier_2 %in% topics_tier_2, 1:2]))

  # add non-parent tier-1 topics
  topics_tier_1 %<>% setdiff(result$topic_tier_1)
  result %>% dplyr::bind_rows(tibble::tibble(topic_tier_1 = topics_tier_1))
}

infer_topics

NOTES:

#' Infer higher-tier topics
#'
#' Determines the top-tier (`tier = 1L`) or second-tier (`tier = 2L`) topics corresponding to `topics` in the
#' [hierarchy][data_topics], i.e. either `topics` themselves or their (grand)parent topics.
#'
#' @param topics Topics from which the corresponding (grand)parent topics are to be determined. A factor or character vector.
#' @param tier Tier of the inferred topics. Either `1L` or `2L`.
#'
#' @return A character vector.
#' @family topics
#' @export
#'
#' @examples
#' rdb::infer_topics(topics = c("EU", "animal protection"),
#'                   tier = 1L)
#' rdb::infer_topics(topics = c("EU", "animal protection"),
#'                   tier = 2L)
#' 
#' # topics of different tiers can mixed in `topics`
#' rdb::infer_topics(topics = c("EU", "environment"),
#'                   tier = 2L)
#' 
#' # but `topics` of a higher tier than `tier` will be ignored
#' rdb::infer_topics(topics = "foreign policy",
#'                   tier = 2L)
infer_topics <- function(topics,
                         tier = 1L) {

  if (is.factor(topics)) topics <- as.character(topics)

  checkmate::assert_subset(topics,
                           choices = c(topics_tier_1_, topics_tier_2_, topics_tier_3_))
  checkmate::assert_int(tier,
                        lower = 1L,
                        upper = 2L)

  # inferred from lower-tier topics
  result <- data_topics[data_topics$topic_tier_2 %in% topics | data_topics$topic_tier_3 %in% topics, ]
  result %<>% .[[paste0("topic_tier_", tier)]]

  # plus top-tier topics
  if (tier == 1L) result %<>% c(topics[topics %in% topics_tier_1_])

  unique(result)
}

Augmentation

DESCRIPTION

Functions to augment the RDB referendum data by additional information (columns).

add_former_country_flag

#' Add `is_former_country` flag to referendum data
#'
#' Augments `data` with an additional column `is_former_country` indicating whether or not the column `country_code` holds an [ISO 3166-3
#' alpha-4 code](https://en.wikipedia.org/wiki/ISO_3166-3) referring to a historical country which ceased to exist. `is_former_country` being `FALSE` means
#' `country_code` holds an [ISO 3166-1 alpha-2 code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) instead.
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds() |>
#'   rdb:::add_former_country_flag() |>
#'   dplyr::select(id,
#'                 starts_with("country_"),
#'                 is_former_country)
add_former_country_flag <- function(data) {

  # ensure minimal validity
  checkmate::assert_data_frame(data)
  assert_vars(data = data,
              vars = "country_code")
  data %>%
    dplyr::mutate(is_former_country = nchar(as.character(country_code)) > 2L) %>%
    # add var lbl
    labelled::set_variable_labels(.labels = var_lbls["is_former_country"])
}

add_country_code_continual

TODO:

#' Add continual country code to referendum data
#'
#' Augments `data` with an additional column `country_code_continual` holding the current or future [ISO 3166-1
#' alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code of the country where the referendum took place. If the country still exists,
#' `country_code_continual` is identical to `country_code`, otherwise it is the `country_code` of the successor country. If the country was succeeded by
#' multiple countries, the code of the largest one in terms of population is taken.
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::add_country_code_continual() |>
#'   dplyr::select(id,
#'                 starts_with("country_"))
add_country_code_continual <- function(data) {

  # ensure minimal validity
  checkmate::assert_data_frame(data)
  assert_vars(data = data,
              vars = "country_code")

  data %>%
    dplyr::mutate(country_code_continual = factor(x = purrr::map2_chr(.x = as.character(country_code),
                                                                      .y = add_former_country_flag(data)$is_former_country,
                                                                      .f = ~ {
                                                                        if (.y) {
                                                                          data_iso_3166_3$Alpha_2_new_main[data_iso_3166_3$Alpha_4 == .x]

                                                                        } else {
                                                                          .x
                                                                        }
                                                                      }),
                                                  levels = val_set$country_code_continual,
                                                  ordered = FALSE)) %>%
    # add var lbl
    labelled::set_variable_labels(.labels = var_lbls["country_code_continual"])
}

add_country_code_long

#' Add long country code to referendum data
#'
#' Augments `data` with an additional column holding the current or former three-letter [ISO 3166-1 alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3)
#' code of the country in which the referendum took place (see [ISO 3166-3](https://en.wikipedia.org/wiki/ISO_3166-3_alpha-3) for former country codes).
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb:::add_country_code_long() |>
#'   dplyr::select(id,
#'                 starts_with("country_"))
add_country_code_long <- function(data) {

  # ensure minimal validity
  checkmate::assert_data_frame(data)
  assert_vars(data = data,
              vars = "country_code")
  data %>%
    # remove possibly existing long country code
    dplyr::select(-any_of("country_code_long")) %>%
    # add long country code
    dplyr::mutate(country_code_long = factor(x = purrr::map2_chr(.x = as.character(country_code),
                                                                 .y = add_former_country_flag(data)$is_former_country,
                                                                 .f = ~ if (.y) {
                                                                   data_iso_3166_3$Alpha_3[data_iso_3166_3$Alpha_4 == .x]
                                                                 } else {
                                                                   data_iso_3166_1$Alpha_3[data_iso_3166_1$Alpha_2 == .x]
                                                                 }),
                                             levels = val_set$country_code_long,
                                             ordered = FALSE)) %>%
    # ensure no NAs
    assertr::assert(predicate = assertr::not_na,
                    country_code_long) %>%
    order_rfrnd_cols() %>%
    # add var lbl
    labelled::set_variable_labels(.labels = var_lbls["country_code_long"])
}

add_country_name

#' Add short country name to referendum data
#'
#' Augments `data` with an additional column holding the common English name of the country in which the referendum took place.
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb:::add_country_name() |>
#'   dplyr::select(id,
#'                 starts_with("country_"))
add_country_name <- function(data) {

  # ensure minimal validity
  checkmate::assert_data_frame(data)
  assert_vars(data = data,
              vars = "country_code")
  data %>%
    # remove possibly existing country name
    dplyr::select(-any_of("country_name")) %>%
    # add country name
    dplyr::mutate(country_name = factor(x = purrr::map2_chr(.x = as.character(country_code),
                                                            .y = add_former_country_flag(data)$is_former_country,
                                                            .f = ~ if (.y) {
                                                              data_iso_3166_3$name_short[data_iso_3166_3$Alpha_4 == .x]
                                                            } else {
                                                              data_iso_3166_1$name_short[data_iso_3166_1$Alpha_2 == .x]
                                                            }),
                                        levels = val_set$country_name,
                                        ordered = FALSE)) %>%
    # ensure no NAs
    assertr::assert(predicate = assertr::not_na,
                    country_name) %>%
    order_rfrnd_cols() %>%
    # add var lbl
    labelled::set_variable_labels(.labels = var_lbls["country_name"])
}

add_country_name_long

#' Add long country name to referendum data
#'
#' Augments `data` with an additional column holding the official full English name(s) of the country in which the referendum took place.
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb:::add_country_name_long() |>
#'   dplyr::select(id,
#'                 starts_with("country_name"))
add_country_name_long <- function(data) {

  # ensure minimal validity
  checkmate::assert_data_frame(data)
  assert_vars(data = data,
              vars = "country_code")
  data %>%
    # remove possibly existing long country name
    dplyr::select(-any_of("country_name_long")) %>%
    # add long country name
    dplyr::mutate(country_name_long = factor(x = purrr::map2_chr(.x = as.character(country_code),
                                                                 .y = add_former_country_flag(data)$is_former_country,
                                                                 .f = ~ if (.y) {
                                                                   data_iso_3166_3$name_long[data_iso_3166_3$Alpha_4 == .x]
                                                                 } else {
                                                                   data_iso_3166_1$name_long[data_iso_3166_1$Alpha_2 == .x]
                                                                 }),
                                             levels = val_set$country_name_long,
                                             ordered = FALSE)) %>%
    # ensure no NAs
    assertr::assert(predicate = assertr::not_na,
                    country_name_long) %>%
    order_rfrnd_cols() %>%
    # add var lbl
    labelled::set_variable_labels(.labels = var_lbls["country_name_long"])
}

add_period

#' Add period to referendum data
#'
#' Augments `data` with an additional column holding the specified period in which the referendum took place. The new column is named after `period` and its
#' values are always of type integer.
#'
#' ```r
#' ```
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `date`.
#' @param period Type of period to add. One of
#'   `r pal::fn_param_defaults(fn = add_period, param = "period") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::add_period() |>
#'   dplyr::select(id, date, week)
#'
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::add_period("year") |>
#'   dplyr::select(id, date, year)
add_period <- function(data,
                       period = c("week", "month", "quarter", "year", "decade", "century")) {

  checkmate::assert_data_frame(data)
  period <- rlang::arg_match(period)
  assert_vars(data = data,
              vars = "date")

  # define necessary date transformations
  get_period <- switch(EXPR    = period,
                       week    = function(x) clock::as_iso_year_week_day(x) %>% clock::get_week(),
                       month   = function(x) clock::get_month(x),
                       quarter = function(x) clock::as_year_quarter_day(x) %>% clock::get_quarter(),
                       year    = function(x) clock::get_year(x),
                       decade  = function(x) (clock::get_year(x) %/% 10L) * 10L,
                       century = function(x) (clock::get_year(x) %/% 100L) * 100L)

  # define lbl parts
  period_lbl <- switch(EXPR    = period,
                       week    = glue::glue("{period} (1\u201353)"),
                       month   = glue::glue("{period} (1\u201312)"),
                       quarter = glue::glue("{period} (1\u20134)"),
                       period)
  data %>%
    # add period
    dplyr::mutate(!!as.symbol(period) := get_period(date)) %>%
    # harmonize col order
    order_rfrnd_cols() %>%
    # add var lbl
    labelled::set_variable_labels(.labels = var_lbls[period])
}

add_turnout

TODO:

#' Add turnout to referendum data
#'
#' @description
#' Augments `data` with an additional column `turnout` containing the voter turnout calculated as:
#'
#' \Sexpr[results=rd, stage=build]{
#'   katex::math_to_rd(tex = "\\\\frac{votes\\\\_yes+votes\\\\_no+votes\\\\_empty+votes\\\\_invalid}{electorate\\\\_total}",
#'                     ascii = "(votes_yes + votes_no + votes_empty + votes_invalid) / electorate_total",
#'                     displayMode = TRUE)
#' }
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the columns `electorate_total`, `votes_yes`, `votes_no`,
#'   `votes_empty` and `votes_invalid`.
#' @param rough Whether to fall back on a "rough" calculation of the turnout in case any of the variables `votes_empty` or `votes_invalid` is unknown (`NA`), or
#'   to be strict and return `NA` in such a case.
#' @param excl_dubious Whether or not to exclude obviously dubious turnout numbers (those > 1.0) by setting them to `NA`. Such numbers stem either from
#'   data errors or (officially) tampered numbers.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' # rough turnout numbers
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::add_turnout() |>
#'   dplyr::select(id,
#'                 electorate_total,
#'                 starts_with("votes_"),
#'                 turnout)
#'
#' # strict turnout numbers
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::add_turnout(rough = FALSE) |>
#'   dplyr::select(id,
#'                 electorate_total,
#'                 starts_with("votes_"),
#'                 turnout)
add_turnout <- function(data,
                        rough = TRUE,
                        excl_dubious = TRUE) {

  checkmate::assert_data_frame(data)
  checkmate::assert_flag(rough)
  checkmate::assert_flag(excl_dubious)
  assert_vars(data = data,
              vars = c("electorate_total",
                       "votes_yes",
                       "votes_no",
                       "votes_empty",
                       "votes_invalid"))
  data %>%
    dplyr::rowwise() %>%
    dplyr::mutate(turnout = sum(votes_yes, votes_no, votes_empty, votes_invalid, na.rm = rough) / electorate_total) %>%
    dplyr::ungroup() %>%
    # set dubious turnout numbers to NA if requested
    dplyr::mutate(turnout = dplyr::if_else(excl_dubious & turnout > 1.0,
                                           NA_real_,
                                           turnout)) %>%
    # harmonize col order
    order_rfrnd_cols() %>%
    # add var lbl
    labelled::set_variable_labels(turnout = var_lbls[["turnout"]] %>% ifelse(test = rough,
                                                                             yes = stringr::str_replace(string = .,
                                                                                                        pattern = stringr::fixed("turnout"),
                                                                                                        replacement = "turnout (rough)"),
                                                                             no = .))
}

add_world_regions

#' Add UN world regions to referendum data
#'
#' @description
#' Augments `data` with information about the [United Nations (UN) geoscheme](https://en.wikipedia.org/wiki/United_Nations_geoscheme) on three different
#' grouping tiers based on the [UN M49 area code hierarchy](https://en.wikipedia.org/wiki/UN_M49#Code_lists).
#' 
#' In total, eight different columns are added:
#' - `un_country_code`: UN M49 country code
#' - `un_region_tier_1_code`: UN tier-1 region's M49 area code
#' - `un_region_tier_1_name`: UN tier-1 region's English name
#' - `un_region_tier_2_code`: UN tier-2 region's M49 area code
#' - `un_region_tier_2_name`: UN tier-2 region's English name
#' - `un_region_tier_3_code`: UN tier-3 region's M49 area code
#' - `un_region_tier_3_name`: UN tier-3 region's English name
#' - `un_subregion`: Combinatiorial English UN subregion name which, except for Northern Europe, corresponds to the lowest `un_region_tier_*_name`.
#'
#' Tier-1 regions are the highest, i.e. most aggregated UN regions, commonly referred to as continents. Tier-2 regions are also known as "subregions" and tier-3
#' regions as "sub-subregions".
#'
#' Only part of all UN tier-2 regions are further divided into UN tier-3 regions, meaning that not all countries are part of a UN tier-3 region. If a country
#' doesn't belong to any UN tier-3 region, the corresponding `un_region_tier_3_*` values will simply be `NA`. The `un_subregion` column specifically addresses
#' this issue by providing a uniform combination of `un_region_tier_2_name` and `un_region_tier_3_name`.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `country_code` (with [ISO 3166-1
#'   alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) or [ISO 3166-3 alpha-4](https://en.wikipedia.org/wiki/ISO_3166-3) codes).
#' @param add_un_country_code Whether or not to also add a column `un_country_code` holding the UN M49 code of the country in which the referendum took place.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnd(id = "5bbbe26a92a21351232dd73f") |>
#'   rdb::add_world_regions() |>
#'   dplyr::select(id,
#'                 starts_with("country_"),
#'                 starts_with("un_"))
add_world_regions <- function(data,
                              add_un_country_code = TRUE) {
  # ensure minimal validity
  checkmate::assert_data_frame(data)
  assert_vars(data = data,
              vars = "country_code")
  checkmate::assert_flag(add_un_country_code)

  has_country_code_continual <- "country_code_continual" %in% colnames(data)

  # add UN regions to input data
  data %<>%
    # temporarily add required base var `country_code_continual` if necessary
    add_country_code_continual() %>%
    # remove possibly existing UN region vars
    dplyr::select(-any_of(setdiff(colnames(un_regions),
                                  "country_code"))) %>%
    # add UN regions
    dplyr::left_join(y = un_regions,
                     by = c(country_code_continual = "country_code")) %>%
    # ensure every row got at least a UN tier-1 region assigned
    assertr::assert(predicate = assertr::not_na,
                    un_region_tier_1_code) %>%
    # harmonize col order
    order_rfrnd_cols() %>%
    # add var lbl
    labelled::set_variable_labels(.labels = purrr::keep_at(x = var_lbls,
                                                           at = c("un_country_code",
                                                                  "un_region_tier_1_code",
                                                                  "un_region_tier_1_name",
                                                                  "un_region_tier_2_code",
                                                                  "un_region_tier_2_name",
                                                                  "un_region_tier_3_code",
                                                                  "un_region_tier_3_name",
                                                                  "un_subregion")))
  # drop vars if necessary/requested
  if (!has_country_code_continual) {
    data %<>% dplyr::select(-country_code_continual)
  }
  if (!add_un_country_code) {
    data %<>% dplyr::select(-un_country_code)
  }

  data
}

add_urls

#' Add various URLs to referendum data
#'
#' Augments `data` with additional columns holding URLs of the specified `types`. The new columns will be named after `types`, prefixed with `url_`, so
#' `types = "sudd"` will add the column `url_sudd` etc.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `id_sudd` for `types = "sudd"` and the columns
#'   `country_code`, `level` and `id_official` for `types = "swissvotes"`.
#' @param types Type(s) of URLs to add. One or more of
#'   `r pal::fn_param_defaults(fn = add_urls, param = "types") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   dplyr::filter(country_code == "CH" & level == "national") |>
#'   rdb::add_urls() |>
#'   dplyr::select(id,
#'                 country_code,
#'                 level,
#'                 starts_with("id_"),
#'                 starts_with("url_"))
add_urls <- function(data,
                     types = c("sudd", "swissvotes")) {

  checkmate::assert_data_frame(data)
  types <- rlang::arg_match(arg = types,
                            multiple = TRUE)

  if ("sudd" %in% types) {

    assert_vars(data = data,
              vars = "id_sudd")

    data %<>% dplyr::mutate(url_sudd = dplyr::if_else(is.na(id_sudd),
                                                      NA_character_,
                                                      url_sudd(glue::glue("event.php?id={id_sudd}"))))
  }

  if ("swissvotes" %in% types) {

    assert_vars(data = data,
                vars = c("country_code",
                         "level",
                         "id_official"))

    data %<>% dplyr::mutate(url_swissvotes = dplyr::if_else(country_code == "CH" & level == "national" & !is.na(id_official),
                                                            paste0("https://swissvotes.ch/vote/", id_official), # nolint: paste_linter
                                                            NA_character_))
  }

  data
}

Transformation

DESCRIPTION

Functions to transform the RDB referendum data into other shapes, each with a specific purpose.

as_ballot_dates

#' Transform to ballot-date-level observations
#'
#' Transforms referendum-level observations to ones on the level of ballot date and jurisdiction via [nesting][tidyr::nest] of referendum-level columns. The
#' individual values of all the referendums on a specific ballot date in a specific jurisdiction are preserved in a list column named `rfrnd_data`.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `date`.
#' @param cols_to_retain Additional non-standard columns to be preserved as top-level columns instead of being nested in the list column `rfrnd_data`. They
#'   mustn't vary within ballot-date-level observations. `r pkgsnip::param_lbl("tidy_select_support")`
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @export
#'
#' @examples
#' # standard RDB columns are retained as far as possible
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::as_ballot_dates()
#' 
#' # non-standard columns must be explicitly specified in order to be retained
#' data_rdb <-
#'   rdb::rfrnds(quiet = TRUE) |>
#'     rdb::add_world_regions() |>
#'     dplyr::mutate(region_custom =
#'                     factor(x = dplyr::if_else(country_code == "CH",
#'                                               "Switzerland & Liechtenstein",
#'                                               un_region_tier_1_name),
#'                            levels = c("Switzerland & Liechtenstein",
#'                                       levels(un_region_tier_1_name))) |>
#'                     forcats::fct_relevel("Switzerland & Liechtenstein",
#'                                          after = 3L) |>
#'                     forcats::fct_recode("rest of Europe" = "Europe"))
#'
#' data_rdb |> rdb::as_ballot_dates() |> colnames()
#' data_rdb |> rdb::as_ballot_dates(cols_to_retain = region_custom) |> colnames()
#'
#' # non-standard columns to retain must actually be retainable
#' try(
#'   data_rdb |> rdb::as_ballot_dates(cols_to_retain = title_en)
#' )
as_ballot_dates <- function(data,
                            cols_to_retain = NULL) {

  checkmate::assert_data_frame(data)
  defused_cols_to_retain <- rlang::enquo(cols_to_retain)
  ix_cols_to_retain <- tidyselect::eval_select(expr = defused_cols_to_retain,
                                               data = data)
  names_cols_to_retain <- names(ix_cols_to_retain)

  # ensure date col is present
  if (!("date" %in% colnames(data))) {
    cli::cli_abort("Unable to transform to ballot-date-level data since no {.var {date}} column is present in {.arg data}.")
  }

  # nest data
  cols_to_nest <-
    data |>
    colnames() |>
    setdiff(c(ballot_date_colnames,
              names_cols_to_retain))

  result <- data |> tidyr::nest(rfrnd_data = any_of(cols_to_nest))

  # ensure `cols_to_retain` don't vary within ballot dates
  n_rows_nested <-
    data |>
    dplyr::summarise(n = dplyr::n(),
                     .by = any_of(ballot_date_colnames)) %$%
    n

  if (!identical(purrr::map_int(result$rfrnd_data,
                                nrow),
                 n_rows_nested)) {

    cli::cli_abort(paste0("Retaining the additional non-standard {cli::qty(length(ix_cols_to_retain))} column{?s} {.var {names_cols_to_retain}} while ",
                          "converting to ballot-date-level observations is impossible because {?(some of)} {?this/these} column{?s} var{?ies/y} within ballot ",
                          "dates."))
  }

  result
}

unnest_var

#' Unnest multi-value variable
#'
#' Unnests a multi-value variable of type list to long format. Multi-value variables can contain more than one value per observation and thus break with the
#' [tidy-data convention](https://tidyr.tidyverse.org/articles/tidy-data.html). This function allows to conveniently expand `data` to contain a single `var`
#' value per observation only, thereby increasing the number of observations (i.e. rows).
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column specified in `var`.
#' @param var `data` column to unnest. One of the multi-value variables:
#' `r data_codebook |> dplyr::filter(is_multi_valued) %$% variable_name |> pal::wrap_chr(wrap = "\x60") |> pal::as_md_list()`
#'   
#' `r pkgsnip::param_lbl("tidy_select_support")`
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @family unnest
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::unnest_var(topics_tier_2)
unnest_var <- function(data,
                       var) {

  # tidy selection and arg check
  checkmate::assert_data_frame(data)
  defused_var <- rlang::enquo(var)
  i_var <- tidyselect::eval_select(expr = defused_var,
                                   data = data)
  name_var <- names(i_var)
  n_var <- length(i_var)

  if (n_var > 1L) {
    cli::cli_abort("Only {.emph one} {.arg var} can be unnested at a time, but {.val {n_var}} were provided.")
  }

  name_var <- rlang::arg_match0(arg = name_var,
                                arg_nm = "var",
                                values =
                                  data_codebook |>
                                  dplyr::filter(is_multi_valued) %$%
                                  variable_name)

  name_var_unnested <- var_name_unnested(name_var)

  data |>
    tidyr::unnest_longer(col = all_of(name_var),
                         values_to = name_var_unnested,
                         keep_empty = TRUE,
                         ptype = character()) |>
    dplyr::mutate(!!as.symbol(name_var_unnested) := factor(x = !!as.symbol(name_var_unnested),
                                                           levels = var_vals(name_var),
                                                           ordered = val_scale(name_var) %in% c("ordinal_ascending", "ordinal_descending")))
}

n_rfrnds

#' Count number of referendums
#'
#' Counts the number of RDB referendums, optionally by additional columns specified via `by`.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the columns specified in `by` (if any).
#' @param by Optional `data` column(s) to group by before counting number of referendums. `r pkgsnip::param_lbl("tidy_select_support")`
#' @param complete_fcts Whether or not to complete the result with implicitly missing combinations of those columns specified in `by` which are of type factor.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::n_rfrnds(by = level)
#'
#' # count ballot dates instead of referendums
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::as_ballot_dates() |>
#'   rdb::n_rfrnds(by = level)
n_rfrnds <- function(data,
                     by = NULL,
                     complete_fcts = TRUE) {
  # arg checks
  checkmate::assert_data_frame(data)
  checkmate::assert_flag(complete_fcts)

  # tidy selection
  defused_by <- rlang::enquo(by)
  ix_by <- tidyselect::eval_select(expr = defused_by,
                                   data = data)
  names_by <- names(ix_by)

  result <-
    data |>
    dplyr::group_by(!!!rlang::syms(names_by)) |>
    dplyr::summarise(n = dplyr::n(),
                     .groups = "drop")

  if (complete_fcts) {
    result %<>% tidyr::complete(!!!rlang::syms(names_by),
                                fill = list(n = 0L))
  }

  result
}

n_rfrnds_per_period

#' Count number of referendums per period
#'
#' Counts the number of RDB referendums per desired period, optionally by additional columns specified via `by`.
#'
#' ```r
#' ```
#'
#' @inheritParams n_rfrnds
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column specified in `period` or the column `date` (to
#'   compute the [period column][add_period]), plus the one(s) specified via `by` (if any).
#' @param period Type of period to count referendums by. One of
#'   `r pal::fn_param_defaults(fn = add_period, param = "period") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#' @param fill_gaps Whether or not to add zero-value rows to the result for `period` gaps in `data`.
#' @param period_floor Lower `period` limit up to which gaps are filled. If `NULL`, the lower limit is set to the minimum of `period` present in `data`. Only
#'   relevant if `fill_gaps = TRUE` and `period` is set to a unique timespan type (`"year"`, `"decade"` or `"century"`).
#' @param period_ceiling Upper `period` limit up to which gaps are filled. If `NULL`, the upper limit is set to the maximum of `period` present in `data`. Only
#'   relevant if `fill_gaps = TRUE` and `period` is set to a unique timespan type (`"year"`, `"decade"` or `"century"`).
#' @param descending Whether to sort the resulting table by `period` in descending or in ascending order.
#'
#' @inherit add_period details
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::n_rfrnds_per_period()
#'
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::n_rfrnds_per_period(by = level)
#' 
#' # without filling gaps
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::n_rfrnds_per_period(by = level,
#'                            fill_gaps = FALSE)
#'
#' # per decade and by multiple columns
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::n_rfrnds_per_period(by = c(level, type),
#'                            period = "decade")
#'
#' # count ballot dates instead of referendums
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::as_ballot_dates() |>
#'   rdb::n_rfrnds_per_period()
n_rfrnds_per_period <- function(data,
                                by = NULL,
                                period = c("week", "month", "quarter", "year", "decade", "century"),
                                fill_gaps = TRUE,
                                period_floor = NULL,
                                period_ceiling = NULL,
                                descending = FALSE) {
  # arg checks
  checkmate::assert_data_frame(data)
  period <- rlang::arg_match(period)
  checkmate::assert_flag(fill_gaps)
  checkmate::assert_int(period_floor,
                        null.ok = TRUE)
  checkmate::assert_int(period_ceiling,
                        null.ok = TRUE)
  checkmate::assert_flag(descending)

  # tidy selection
  defused_by <- rlang::enquo(by)
  ix_by <- tidyselect::eval_select(expr = defused_by,
                                   data = data)
  names_by <- names(ix_by)

  # add period col if necessary
  if (!(period %in% colnames(data))) {
    data %<>% add_period(period = period)
  }

  result <-
    data |>
    dplyr::group_by(!!!rlang::syms(names_by), !!as.symbol(period)) |>
    dplyr::summarise(n = dplyr::n(),
                     .groups = "drop")
  # fill gaps
  # (only if input data (and thus result) is non-empty since otherwise we can't infer a sensible period range for year/decade/century)
  if (fill_gaps && nrow(result)) {

    # define sensible min/max period vals
    is_recurring_period <- period %in% c("week", "month", "quarter")
    period_step <- switch(EXPR = period,
                          century = 100L,
                          decade = 10L,
                          1L)
    period_min <- period |> pal::when(is.null(period_floor) && !is_recurring_period ~ pal::safe_min(data[[.]]),
                                      !is_recurring_period ~ period_floor,
                                      ~ 1L)
    period_max <- period |> pal::when(is.null(period_ceiling) && !is_recurring_period ~ pal::safe_max(data[[.]]),
                                      !is_recurring_period ~ period_ceiling,
                                      . == "week" ~ 53L,
                                      . == "month" ~ 12L,
                                      . == "quarter" ~ 4L)
    period_seq <- seq(from = (period_min %/% period_step) * period_step,
                      to = period_max,
                      by = period_step)
    result %<>%
      # reduce to results `>= period_floor` and `<= period_ceiling`
      dplyr::filter(!!as.symbol(period) %in% period_seq) %>%
      # convert period col to fct, so `tidyr::complete()` knows the missing vals
      dplyr::mutate(!!as.symbol(period) := factor(x = !!as.symbol(period),
                                                  levels = period_seq,
                                                  ordered = TRUE)) %>%
      tidyr::complete(!!!rlang::syms(names_by), !!as.symbol(period),
                      fill = list(n = 0L)) %>%
      # convert period col back to int
      dplyr::mutate(!!as.symbol(period) := as.integer(as.character(!!as.symbol(period))))
  }

  result |> dplyr::arrange(if (descending) dplyr::desc(!!as.symbol(period)) else !!as.symbol(period))
}

prettify_col_names

#' Prettify referendum data column names
#'
#' Renames referendum data column names to be ready for publication. Useful e.g. to create tables or visualizations.
#'
#' Note that
#' - column names unknown to this function are not changed.
#' - column *labels* are [removed][labelled::remove_var_label] so they aren't inadvertently used instead of the column *names* (i.a. relevant for [gt][gt::gt]
#'   [>= 0.9.0](https://gt.rstudio.com/news/index.html#minor-improvements-and-bug-fixes-0-9-0)).
#'
#' @param data RDB referendum data as returned by [rfrnds()].
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @family prettify
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::prettify_col_names()
prettify_col_names <- function(data) {

  data |>
    dplyr::rename_with(.cols = everything(),
                       .fn = prettify_var_names) |>
    # we remove the var lbls so gt doesn't automatically pick them up instead of the column names
    # cf. https://gt.rstudio.com/news/index.html#minor-improvements-and-bug-fixes-0-9-0
    labelled::remove_var_label()
}

Visualization

DESCRIPTION

Functions to visualize the RDB referendum data (using plotly).

plot_rfrnd_share_per_period

#' Referendum share per period stacked area chart
#'
#' Creates a [Plotly stacked area chart](https://plotly.com/r/filled-area-plots/#stacked-area-chart-with-cumulative-values) that visualizes the share of
#' referendums per period, grouped by another column.
#'
#' ```r
#' ```
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column specified in `period` or the column `date` (to
#'   compute the [period column][add_period]), plus the column specified in `by`.
#' @param by `data` column to group by before counting number of referendums. `r pkgsnip::param_lbl("tidy_select_support")`
#' @param period Type of period to count referendums by. One of
#'   `r pal::fn_param_defaults(fn = add_period, param = "period") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#'
#' @return `r pkgsnip::param_lbl("plotly_obj")`
#' @family visualize
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE,
#'             max_cache_age = "1 year") |>
#'   rdb::plot_rfrnd_share_per_period(by = "level")
plot_rfrnd_share_per_period <- function(data,
                                        by,
                                        period = c("week", "month", "quarter", "year", "decade", "century")) {
  period <- rlang::arg_match(period)

  # add period col if necessary
  if (!(period %in% colnames(data))) {
    data %<>% add_period(period = period)
  }

  # tidy selection
  defused_by <- rlang::enquo(by)
  i_by <- tidyselect::eval_select(expr = defused_by,
                                  data = data)
  n_by <- length(i_by)
  name_by <- names(i_by)

  # ensure `x` is < 2
  if (n_by > 1L) {
    cli::cli_abort("Only {.emph one} column can be specified in {.arg by}, but {.val {n_by}} were provided.")
  }

  data %>%
    # calculate freqs
    dplyr::group_by(!!as.symbol(name_by), !!as.symbol(period)) %>%
    dplyr::summarise(n = dplyr::n(),
                     .groups = "drop") %>%
    # plot
    plot_share_per_period(x = name_by,
                          period = period)
}

plot_topic_segmentation

NOTES:

TODO:

#' Topic segmentation sunburst chart
#'
#' Creates a [Plotly sunburst chart](https://plotly.com/r/sunburst-charts/) that visualizes the hierarchical segmentation of referendum topic occurences.
#'
#' A *topic lineage* is the hierarchical compound of a `topic_tier_1` and optionally a grandchild `topic_tier_3` and/or a child `topic_tier_2`.
#'
#' Note that topics can be assigned on any tier to referendums (i.e. in one case, a `topic_tier_1` plus a child `topic_tier_2` is assigned, and in another case
#' only a `topic_tier_1` without any further child topic).
#'
#' Furthermore, it should be noted that not every `topic_tier_2` has potential child `topic_tier_3`s. See the [full topic hierarchy](`r url_codebook("topics")`)
#' for details.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the columns `topics_tier_1`, `topics_tier_2` and
#'   `topics_tier_3`.
#' @param method Applied method to count the number of topic occurences. One of
#'   - **`"per_rfrnd"`**: All *referendums* have the same weight. For a referendum with n different topics of the same tier, every topic is counted 1/n.
#'   - **`"per_topic_lineage"`**: All *topic lineages* have the same weight. For a referendum with n different topics of the same tier, every topic is fully
#'     counted, meaning that e.g. a referendum with three different tier-3 topics has a tripled impact on the result compared to a referendum that only has a
#'     single one. Noticeably faster than `"per_rfrnd"`.
#'   - **`"naive"`**: Naive procedure which doesn't properly reflect topic proportions on tier 2 and 3. Based on the (wrong) assumptions that a) all referendums
#'     have the same number of topic lineages assigned and b) topics are not deduplicated per tier. By far the fastest method, though.
#'
#' @return `r pkgsnip::param_lbl("plotly_obj")`
#' @family visualize
#' @export
#'
#' @examples
#' # count each referendum equally
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::plot_topic_segmentation(method = "per_rfrnd")
#'
#' # count each topic lineage equally
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::plot_topic_segmentation(method = "per_topic_lineage")
#'
#' # naive count (way faster, but with misleading proportions on tier 2 and 3)
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::plot_topic_segmentation(method = "naive")
plot_topic_segmentation <- function(data,
                                    method = c("per_rfrnd", "per_topic_lineage", "naive")) {

  method <- rlang::arg_match(method)
  rlang::check_installed("plotly",
                         reason = pal::reason_pkg_required())
  is_naive <- method == "naive"

  # assemble necessary data structure
  if (is_naive) {

    ## naively
    data_plot <-
      dplyr::bind_rows(
        data$topics_tier_1 %>%
          topic_frequency(tier = 1L) %>%
          dplyr::mutate(parent_topic = ""),
        data$topics_tier_2 %>%
          topic_frequency(tier = 2L) %>%
          dplyr::mutate(parent_topic = purrr::map_chr(.x = as.character(topic),
                                                      .f = infer_topics,
                                                      tier = 1L)),
        data$topics_tier_3 %>%
          topic_frequency(tier = 3L) %>%
          dplyr::mutate(parent_topic = purrr::map_chr(.x = as.character(topic),
                                                      .f = infer_topics,
                                                      tier = 2L))
      ) %>%
      dplyr::rename(value = n)

  } else {

    is_per_rfrnd <- method == "per_rfrnd"
    data_plot <- data %>% dplyr::select(starts_with("topics_tier_"))

    ### per rfrnd, i.e. in fractional numbers
    if (is_per_rfrnd) {

      data_plot %<>%
        purrr::pmap(~ hierarchize_topics_fast(unlist(..1),
                                              unlist(..2),
                                              unlist(..3)) %>%
                      dplyr::mutate(value = 1.0 / nrow(.))) %>%
        purrr::list_rbind()

      ### per topic lineage
    } else {

      data_plot %<>%
        purrr::pmap(~ hierarchize_topics_fast(unlist(..1),
                                              unlist(..2),
                                              unlist(..3))) %>%
        purrr::list_rbind() %>%
        dplyr::mutate(value = 1.0)
    }

    data_plot <-
      dplyr::bind_rows(
        data_plot %>%
          dplyr::group_by(topic_tier_1) %>%
          dplyr::summarise(value = sum(value)) %>%
          dplyr::mutate(topic = topic_tier_1,
                        parent_topic = "",
                        value,
                        .keep = "none"),
        data_plot %>%
          dplyr::group_by(topic_tier_2) %>%
          dplyr::summarise(value = sum(value)) %>%
          dplyr::mutate(topic = topic_tier_2,
                        parent_topic =
                          topic %>%
                          purrr::map_chr(~ {
                            if (is.na(.x)) {
                              NA_character_
                            } else {
                              infer_topics(topics = .x,
                                           tier = 1L)
                            }}),
                        value,
                        .keep = "none"),
        data_plot %>%
          dplyr::group_by(topic_tier_3) %>%
          dplyr::summarise(value = sum(value)) %>%
          dplyr::mutate(topic = topic_tier_3,
                        parent_topic =
                          topic %>%
                          purrr::map_chr(~ {
                            if (is.na(.x)) {
                              NA_character_
                            } else {
                              infer_topics(topics = .x,
                                           tier = 2L)
                            }}),
                        value,
                        .keep = "none")
      ) %>%
      dplyr::filter(!is.na(topic))

    ### add NA rows filling the gaps
    data_plot %<>%
      dplyr::filter(parent_topic != "") %>%
      dplyr::group_by(parent_topic) %>%
      dplyr::summarise(value_total = sum(value),
                       .groups = "drop") %>%
      dplyr::mutate(topic = "<i>not defined</i>",
                    value = purrr::map2_dbl(.x = value_total,
                                            .y = parent_topic,
                                            .f = ~
                                              data_plot %>%
                                              dplyr::filter(topic == .y) %$%
                                              value %>%
                                              checkmate::assert_number() %>%
                                              magrittr::subtract(.x)),
                    parent_topic,
                    .keep = "none") %>%
      dplyr::bind_rows(data_plot, .) %>%
      dplyr::mutate(id = ifelse(topic == "<i>not defined</i>",
                                paste0("NA_", parent_topic),
                                topic))
  }

  # create plot
  plotly::plot_ly(data = data_plot,
                  type = "sunburst",
                  labels = ~topic,
                  parents = ~parent_topic,
                  ids = if (is_naive) ~topic else ~id,
                  values = ~value,
                  branchvalues = ifelse(is_naive,
                                        "remainder",
                                        "total"),
                  insidetextorientation = "radial")
}

plot_topic_share_per_period

#' Topic share per period stacked area chart
#'
#' Creates a [Plotly stacked area chart](https://plotly.com/r/filled-area-plots/#stacked-area-chart-with-cumulative-values) that visualizes the share of
#' referendum topic occurences per period.
#'
#' ```r
#' ```
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `topics_tier_#` of the specified `tier`.
#' @param tier Tier of the topics variable to plot. `1L`, `2L` or `3L`.
#' @param period Type of period to count topics by. One of
#'   `r pal::fn_param_defaults(fn = add_period, param = "period") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#' @param weight_by_n_rfrnds Whether or not to weight topic occurences by number of referendums. If `TRUE`, for a referendum with n different topics of the same
#'   `tier`, every topic is counted 1/n.
#'
#' @return `r pkgsnip::param_lbl("plotly_obj")`
#' @family visualize
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::plot_topic_share_per_period(period = "decade")
plot_topic_share_per_period <- function(data,
                                        tier = 1L,
                                        period = c("week", "month", "quarter", "year", "decade", "century"),
                                        weight_by_n_rfrnds = TRUE) {
  checkmate::assert_int(tier,
                        lower = 1L,
                        upper = 3L)
  period <- rlang::arg_match(period)
  checkmate::assert_flag(weight_by_n_rfrnds)

  # add period col if necessary
  if (!(period %in% colnames(data))) {
    data %<>% add_period(period = period)
  }

  # ensure topics var is present
  var_name_topics <- glue::glue("topics_tier_{tier}")
  var_name_topic <- var_name_unnested(var_name_topics)

  if (!(var_name_topics %in% colnames(data))) {
    cli::cli_abort("Required column {.var {var_name_topics}} is missing from {.arg data}.")
  }

  data |>
    # add proper count var
    dplyr::mutate(count = if (weight_by_n_rfrnds) 1.0 / lengths(!!as.symbol(var_name_topics)) else 1.0) |>
    # unnest topics var
    unnest_var(var = var_name_topics) |>
    # calculate freqs
    dplyr::group_by(!!as.symbol(period), !!as.symbol(var_name_topic)) |>
    dplyr::summarise(n = sum(count),
                     .groups = "drop") |>
    # plot
    plot_share_per_period(x = var_name_topic,
                          period = period)
}

ggplot_streamgraph

TODO:

#' Period streamgraph
#'
#' Creates a ggplot2 [streamgraph](https://en.wikipedia.org/wiki/Streamgraph) based on [ggstream::geom_stream()] using the specified `period` as time
#' resolution.
#'
#' @inheritParams n_rfrnds_per_period
#' @param by `data` column to group by before counting number of referendums.
#' @param stacking Stacking type. One of
#'   - `"mirror"` to stack absolute values symmetrically around the zero line on the x-axis.
#'   - `"ridge"` to stack absolute values from the zero line on the x-axis upwards.
#'   - `"proportional"` to stack relative values that add up to 100 %.
#' @param bandwidth Kernel density estimation bandwidth. A numeric scalar.
#' @param y_lim Optional Y axis range limit. Only relevant if `stacking` is one of `"mirror"` or `"ridge"`. The limit applies to the upper side if
#'   `stacking = "ridge"` and to both sides if `stacking = "mirror"`. A numeric scalar equal or greater than zero.
#' @param color_palette Color palette function that when called with a single integer argument returns that many color codes.
#' @param prune_legend Whether or not to drop `by` factor levels which don't occur in `data` from the legend. Only has an effect if `by` is of type factor.
#'
#' @return `r pkgsnip::return_lbl("ggplot2_obj")`
#' @family visualize
#' @export
#'
#' @examples
#' data_rdb <- rdb::rfrnds(quiet = TRUE)
#'
#' rdb::ggplot_streamgraph(data = data_rdb,
#'                         by = topics_tier_1,
#'                         period = "year")
#'
#' # you can specify a different color palette
#' rdb::ggplot_streamgraph(data = data_rdb,
#'                         by = topics_tier_1,
#'                         period = "year",
#'                         color_palette = viridisLite::viridis)
#'
#' # by default, only factor levels which occur in data are included in the legend
#' data_rdb |>
#'   dplyr::filter(country_code == "AT") |>
#'   rdb::ggplot_streamgraph(by = topics_tier_1,
#'                           period = "decade")
#'
#' # but you can include *all* factor levels in the legend if you want to
#' data_rdb |>
#'   dplyr::filter(country_code == "AT") |>
#'   rdb::ggplot_streamgraph(by = topics_tier_1,
#'                           period = "decade",
#'                           prune_legend = FALSE)
ggplot_streamgraph <- function(data,
                               by,
                               period = c("week", "month", "quarter", "year", "decade", "century"),
                               stacking = c("mirror", "ridge", "proportional"),
                               bandwidth = 0.75,
                               y_lim = NULL,
                               color_palette = viridisLite::turbo,
                               prune_legend = TRUE) {

  stacking <- rlang::arg_match(stacking)
  checkmate::assert_number(bandwidth)
  checkmate::assert_number(y_lim,
                           lower = 0.0,
                           finite = TRUE,
                           null.ok = TRUE)
  checkmate::assert_function(color_palette)
  checkmate::assert_flag(prune_legend)
  rlang::check_installed("ggplot2",
                         reason = pal::reason_pkg_required())
  rlang::check_installed("ggstream",
                         reason = pal::reason_pkg_required())
  rlang::check_installed("scales",
                         reason = pal::reason_pkg_required())
  rlang::check_installed("viridisLite",
                         reason = pal::reason_pkg_required())
  ix_by <- tidyselect::eval_select(expr = rlang::enquo(by),
                                   data = data)
  names_by <- names(ix_by)
  name_by <- names_by
  n_by <- length(ix_by)

  if (n_by > 1L) {
    cli::cli_abort("Only {.emph one} data column can be specified in {.arg by}, but {.val {n_by}} were provided.")
  }

  # unnest list col if necessary
  if (is.list(data[[ix_by]])) {
    data %<>% unnest_var(var = tidyselect::all_of(names_by))
    name_by <- var_name_unnested(names_by)
  }

  result <- n_rfrnds_per_period(data = data,
                                period = period,
                                by = !!as.symbol(name_by))

  # create stable color mapping based on lvls of `by`
  # TODO: this probably could be removed and we could instead pass on a *discrete fill scale* fn (e.g. `\(...) viridis::scale_fill_viridis(..., discrete = T)`)
  #       to `ggplot2::scale_fill_discrete(type = )` once the below mentioned issue #23 is resolved
  if (is.factor(data[[name_by]])) {
    vals_by <- levels(data[[name_by]])
  } else {
    vals_by <- sort(unique(data[[name_by]]))
  }
  colors_by <- color_palette(length(vals_by))
  names(colors_by) <- vals_by

  # we need to remove zero-n rows (plus the corresponding colors) since ggstream doesn't handle them properly
  # TODO: remove this workaround once [issue #23](https://github.com/davidsjoberg/ggstream/issues/23) is fixed
  ## if we prune the legend, we need to prune the fill colors, too (otherwise, colors aren't matched properly)
  if (prune_legend) {
    colors_by %<>% magrittr::extract(names(.) %in% unique(result[[name_by]][result$n > 0L]))
  }
  result %<>%
    dplyr::group_by(!!as.symbol(name_by)) %>%
    dplyr::group_modify(\(d, k) if (sum(d$n) > 0L) d else d[0L, ]) %>%
    dplyr::ungroup()

  result <-
    ggplot2::ggplot(data = result,
                    mapping = ggplot2::aes(x = !!as.symbol(period),
                                           y = n,
                                           fill = !!as.symbol(name_by))) +
    ggstream::geom_stream(type = stacking,
                          n_grid = 10000L,
                          show.legend = TRUE,
                          bw = bandwidth) +
    ggplot2::scale_fill_discrete(type = colors_by,
                                 name = prettify_var_names(name_by),
                                 drop = prune_legend) +
    ggplot2::xlab(ggplot2::element_blank()) +
    ggplot2::ylab(ggplot2::element_blank())

  if (stacking == "ridge" && !is.null(y_lim)) {
    result <- result + ggplot2::coord_cartesian(ylim = c(0.0, y_lim),
                                                default = TRUE)

  } else if (stacking == "mirror") {

    if (!is.null(y_lim)) {
      result <- result + ggplot2::coord_cartesian(ylim = c(-y_lim, y_lim),
                                                  default = TRUE)
    }

    # make y scale absolute in both directions
    result <- result + ggplot2::scale_y_continuous(labels = \(x) abs(x))

  } else if (stacking == "proportional") {
    result <- result + ggplot2::scale_y_continuous(labels = scales::label_percent(suffix = "\u2009%"))
  }

  result
}

Tabulation

DESCRIPTION

Functions to create analyses in tabular form (using gt tables).

tbl_n_rfrnds

#' Tabulate number of referendums
#'
#' Creates a ready-to-print [gt][gt::gt] table with the number of referendums, optionally counted `by` up to three additional variables.
#'
#' The first variable specified in `by` will be reflected in additional rows in the resulting table, i.e. expand it vertically. The second and third variables
#' will be reflected in additional columns, i.e. expand it horizontally.
#'
#' @inheritParams n_rfrnds
#' @param by Up to three additional `data` columns to group by before counting number of referendums. `r pkgsnip::param_lbl("tidy_select_support")`
#' @param n_rows Maximum number of rows to be included in the resulting table. All the rows exceeding that limit are replaced by a single row of ellipses. An
#'   integer scalar or `Inf` for an unlimited number of rows.
#' @param order How to order the rows of the resulting table. One of
#'   - `"ascending"` to sort in ascending order by the number of referendums,
#'   - `"descending"` to sort in descending order by the number of referendums, or
#'   - `NULL` to leave the sorting unchanged.
#' @param incl_row_head Whether or not to include a row heading with the [prettified][prettify_var_names] name of the first `by` variable.
#' @param incl_col_head Whether or not to include column headings (in the table's [stub][gt::tab_stubhead]) with the [prettified][prettify_var_names] names of
#'   the second and third `by` variables.
#' @param add_total_row Whether or not to add a summary row at the very end of the table containing column totals. If `NULL`, a total row is added only if
#'   at least one column is provided in `by`.
#' @param add_total_col Whether or not to add a summary column at the very end of the table containing row totals. If `NULL`, a total column is added only if
#'   multiple columns are provided in `by`.
#' @param lbl_total_row Label of the summary row containing column totals. Only relevant if `add_total_row = TRUE`. A character scalar. [gt::md()] or
#'   [gt::html()] can be used to format the label text.
#' @param lbl_total_col Label of the summary column containing row totals. Only relevant if `add_total_col = TRUE`. A character scalar. [gt::md()] or
#'   [gt::html()] can be used to format the label text.
#'
#' @return `r pkgsnip::return_lbl("gt_obj")`
#' @family tabulate
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds()
#'
#' # grouped by a single column
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds(by = level)
#'
#' # grouped by two columns
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds(by = c(type, level))
#'
#' # grouped by three columns
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds(by = c(country_name, level, type),
#'                     n_rows = 10L,
#'                     order = "descending")
#'
#' # count ballot dates instead of referendums
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::as_ballot_dates() |>
#'   rdb::tbl_n_rfrnds(by = c(country_name, level),
#'                     n_rows = 10L,
#'                     order = "descending")
tbl_n_rfrnds <- function(data,
                         by = NULL,
                         complete_fcts = TRUE,
                         n_rows = Inf,
                         order = NULL,
                         incl_row_head = TRUE,
                         incl_col_head = TRUE,
                         add_total_row = NULL,
                         add_total_col = NULL,
                         lbl_total_row = gt::md("**Total**"),
                         lbl_total_col = gt::md("**Total**")) {

  if (!isTRUE(is.infinite(n_rows))) {
    checkmate::assert_int(n_rows,
                          lower = 1L)
  }
  if (!is.null(order)) {
    rlang::arg_match0(arg = order,
                      values = c("ascending", "descending"))
  }
  checkmate::assert_flag(incl_row_head)
  checkmate::assert_flag(incl_col_head)
  checkmate::assert_flag(add_total_row,
                         null.ok = TRUE)
  checkmate::assert_flag(add_total_col,
                         null.ok = TRUE)
  checkmate::assert_string(lbl_total_row)
  checkmate::assert_string(lbl_total_col)
  rlang::check_installed("gt",
                         version = "0.9.0",
                         reason = pal::reason_pkg_required())

  ix_by <- tidyselect::eval_select(expr = rlang::enquo(by),
                                   data = data)
  n_by <- length(ix_by)
  has_by <- n_by > 0L
  has_by_rest <- n_by > 1L

  if (n_by > 3L) {
    cli::cli_abort("At most {.emph three} data columns can be specified in {.arg by}, but {.val {n_by}} were provided.")
  }

  if (is.null(add_total_row)) {
    add_total_row <- has_by
  }

  if (is.null(add_total_col)) {
    add_total_col <- has_by_rest
  }

  by_colname_1st <- names(ix_by[1L]) %|% ":no_by"
  by_colnames_rest <- names(ix_by[-1L])

  result <-
    data |>
    n_rfrnds(by = {{ by }},
             complete_fcts = complete_fcts) |>
    dplyr::mutate(dplyr::across(where(is.factor),
                                ~ forcats::fct_na_value_to_level(f = .x,
                                                                 level = "N/A"))) |>
    pal::when(has_by_rest ~ tidyr::pivot_wider(data = .,
                                               names_from = by_colnames_rest,
                                               names_sort = TRUE,
                                               values_from = n),
              ~ .) |>
    dplyr::mutate(`:total` = rowSums(x = dplyr::pick(-any_of(by_colname_1st)),
                                     na.rm = TRUE),
                  dplyr::across(everything(),
                                ~ tidyr::replace_na(data = .x,
                                                    replace = 0L)),
                  # TODO: remove type conversion below once [issue #1305](https://github.com/rstudio/gt/issues/1305) is fixed
                  dplyr::across(any_of(by_colname_1st),
                                as.character)) |>
    pal::when(isTRUE(order == "descending") ~ dplyr::arrange(.data = .,
                                                             -`:total`),
              isTRUE(order == "ascending") ~ dplyr::arrange(.data = .,
                                                            `:total`),
              ~ .) |>
    pal::when(!add_total_col ~ dplyr::select(.data = .,
                                             -`:total`),
              ~ .)
  total_n <-
    result |>
    dplyr::select(-any_of(by_colname_1st)) |>
    purrr::map_int(\(x) sum(x, na.rm = TRUE))

  chop_rows <- n_rows < nrow(result)

  if (chop_rows) {

    result %<>%
      utils::head(n = n_rows) %>%
      # add placeholder/ellipsis row
      dplyr::mutate(dplyr::across(everything(),
                                  as.character)) %>%
      rbind("\u2026")
  }

  # NOTE: if we chop rows (and have multiple n cols), it's impossible to create our total row using `gt::grand_summary_rows()` since its `fns` arg only
  #       receives column content, no metadata; thus we create the total row manually
  if (add_total_row && chop_rows) {
    result %<>% rbind(c(lbl_total_row, total_n))
  }

  result %<>% gt::gt(rowname_col = ifelse(has_by,
                                          by_colname_1st,
                                          "rowname"),
                     process_md = TRUE)

  if (incl_row_head && has_by) {
    result %<>% gt::tab_row_group(label =
                                    by_colname_1st |>
                                    prettify_var_names() |>
                                    pal::wrap_chr(wrap = "*") |>
                                    gt::md(),
                                  rows = tidyselect::everything(),
                                  id = by_colname_1st)
  }

  if (add_total_col) {
    result %<>% gt::cols_label(`:total` = lbl_total_col)
  }

  if (add_total_row) {
    if (chop_rows) {
      result %<>% gt::tab_style(style = gt::cell_borders(sides = "top",
                                                         color = "#D3D3D3",
                                                         style = "double",
                                                         weight = gt::px(6L)),
                                locations = list(gt::cells_body(rows = n_rows + 2L),
                                                 gt::cells_stub(rows = n_rows + 2L)))

    } else {
      result %<>% gt::grand_summary_rows(fns = list(id = "total", label = "DUMMY") ~ sum(., na.rm = TRUE),
                                         fmt = ~ gt::fmt_integer(., sep_mark = ""))

      # TODO: remove this workaround and replace `"DUMMY"` with `lbl_total_row` above as soon as [#1295](https://github.com/rstudio/gt/issues/1295)
      #       is fixed.
      result$`_summary`[[1L]]$fns$total$label <- lbl_total_row
    }
  }

  if (incl_col_head && has_by_rest) {
    result %<>%
      gt::tab_stubhead(label =
                         by_colnames_rest |>
                         prettify_var_names() |>
                         pal::wrap_chr(wrap = "*") |>
                         paste0(collapse = "<br><br>") |>
                         gt::md()) %>%
      gt::tab_style(style = gt::cell_text(align = "right",
                                          v_align = "middle"),
                    locations = gt::cells_stubhead())
  }

  result |>
    gt::tab_spanner_delim(delim = "_",
                          split = "last") |>
    # right-align cols; required since they're of type chr if we chopped rows
    gt::cols_align(align = "right",
                   columns = -tidyselect::any_of(by_colname_1st)) |>
    # hide table header if there are less than two `by` cols
    pal::when(!has_by_rest ~ gt::tab_options(data = .,
                                             column_labels.hidden = TRUE),
              ~ .)
}

tbl_n_rfrnds_per_period

#' Tabulate number of referendums per period
#'
#' Creates a ready-to-print [gt][gt::gt] table with the number of referendums per period, optionally counted `by` up to two additional columns.
#'
#' ```r
#' ```
#'
#' @inheritParams n_rfrnds_per_period
#' @inheritParams tbl_n_rfrnds
#' @param by Up to two additional `data` columns to group by before counting number of referendums. `r pkgsnip::param_lbl("tidy_select_support")`
#' @param squeeze_zero_rows Whether or not to compress consecutive zero-sum rows into single period span rows.
#' @param add_total_col Whether or not to add a summary column at the very end of the table containing row totals. If `NULL`, a total column is added only if
#'   `by` is non-empty.
#'
#' @return `r pkgsnip::return_lbl("gt_obj")`
#' @family tabulate
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds_per_period(period = "decade")
#'
#' # grouped by a single additional column
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds_per_period(by = level,
#'                                period = "decade")
#'
#' # grouped by two addtional columns
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds_per_period(by = c(level, type),
#'                                period = "decade")
#'
#' # count ballot dates instead of referendums
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::as_ballot_dates() |>
#'   rdb::tbl_n_rfrnds_per_period(period = "decade")
tbl_n_rfrnds_per_period <- function(data,
                                    by = NULL,
                                    period = c("week", "month", "quarter", "year", "decade", "century"),
                                    fill_gaps = TRUE,
                                    period_floor = NULL,
                                    period_ceiling = NULL,
                                    squeeze_zero_rows = TRUE,
                                    descending = TRUE,
                                    add_total_row = TRUE,
                                    add_total_col = NULL,
                                    lbl_total_row = gt::md("**Total**"),
                                    lbl_total_col = lbl_total_row) {

  period <- rlang::arg_match(period)
  checkmate::assert_flag(squeeze_zero_rows)
  checkmate::assert_flag(add_total_row)
  checkmate::assert_flag(add_total_col,
                         null.ok = TRUE)
  checkmate::assert_string(lbl_total_row)
  checkmate::assert_string(lbl_total_col)
  rlang::check_installed("gt",
                         version = "0.9.0",
                         reason = pal::reason_pkg_required())

  ix_by <- tidyselect::eval_select(expr = rlang::enquo(by),
                                   data = data)
  n_by <- length(ix_by)
  has_by <- n_by > 0L

  if (n_by > 2L) {
    cli::cli_abort("At most {.emph two} additional data columns can be specified in {.arg by}, but {.val {n_by}} were provided.")
  }

  if (is.null(add_total_col)) {
    add_total_col <- has_by
  }

  by_names_print <- ifelse(has_by,
                           names(ix_by) %>%
                             prettify_var_names() %>%
                             pal::wrap_chr(wrap = "*") %>%
                             paste0(collapse = "<br><br>"),
                           "")
  data_to_plot <-
    data %>%
    n_rfrnds_per_period(period = period,
                        by = {{ by }},
                        fill_gaps = fill_gaps,
                        period_floor = period_floor,
                        period_ceiling = period_ceiling,
                        descending = descending) %>%
    dplyr::mutate(dplyr::across(where(is.factor),
                                ~ forcats::fct_na_value_to_level(f = .x,
                                                                 level = "N/A"))) %>%
    pal::when(has_by ~ tidyr::pivot_wider(data = .,
                                          names_from = {{ by }},
                                          names_sort = TRUE,
                                          values_from = n),
              ~ .) %>%
    pal::when(add_total_col ~ dplyr::mutate(.data = .,
                                            `:total` = rowSums(x = dplyr::pick(-!!as.symbol(period)),
                                                               na.rm = TRUE)),
              ~ .) %>%
    dplyr::mutate(dplyr::across(everything(),
                                ~ tidyr::replace_na(data = .x,
                                                    replace = 0L)),
                  # TODO: remove type conversion below once [issue #1305](https://github.com/rstudio/gt/issues/1305) is fixed
                  dplyr::across(all_of(period),
                                as.character))

  # squeeze consecutive all-0 rows into single row if requested
  ix <- integer()
  ix_rm <- integer()

  if (squeeze_zero_rows) {

    for (i in pal::safe_seq_len(nrow(data_to_plot))) {

      if (data_to_plot %>%
          dplyr::select(-any_of(c(period, ":total"))) %>%
          magrittr::extract(i, ) %>%
          sum() %>%
          magrittr::equals(0L)) {

        ix %<>% c(i)

      } else {
        if (length(ix) > 1L) {

          data_to_plot[ix[1L], period] <- paste0(data_to_plot[ix[length(ix)], period], "\u2013", data_to_plot[ix[1L], period])
          ix_rm %<>% c(ix[-1L])
        }
        ix <- integer()
      }
    }

    if (length(ix) > 1L) {
      data_to_plot[ix[1L], period] <- paste0(data_to_plot[ix[length(ix)], period], "\u2013", data_to_plot[ix[1L], period])
      ix_rm %<>% c(ix[-1L])
    }
  }

  # add "s" to decade/century period values
  if (period %in% c("decade", "century")) {
    data_to_plot[[period]] %<>% stringr::str_replace_all(pattern = "(\\d+)",
                                                         replacement = "\\1s")
  }

  data_to_plot %>%
    dplyr::filter(!(dplyr::row_number() %in% ix_rm)) %>%
    gt::gt(rowname_col = period) %>%
    pal::when(add_total_col ~ gt::cols_label(.data = .,
                                             `:total` = lbl_total_col),
              ~ .) %>%
    pal::when(add_total_row ~ {

      result <- gt::grand_summary_rows(data = .,
                                       fns = list(label = "DUMMY", id = "total") ~ sum(., na.rm = TRUE),
                                       fmt = ~ gt::fmt_integer(., sep_mark = ""))

      # TODO: remove this workaround and replace `"DUMMY"` with `lbl_total_row` above as soon as [#1295](https://github.com/rstudio/gt/issues/1295) is fixed.
      result$`_summary`[[1L]]$fns$total$label <- lbl_total_row

      result
    },
              ~ .) %>%
    gt::tab_spanner_delim(delim = "_",
                          split = "last") %>%
    gt::tab_stubhead(label = gt::md(by_names_print)) %>%
    gt::tab_style(style = gt::cell_text(align = "right",
                                        v_align = "middle"),
                  locations = gt::cells_stubhead())
}

sudd.ch scraping

The list.php endpoint supports

DESCRIPTION

Functions to scrape the data from the sudd.ch database.

list_sudd_territories

NOTES:

#' List referendum territories from [sudd.ch](https://sudd.ch/)
#'
#' Lists [all referendum territories from sudd.ch](https://sudd.ch/list.php?mode=allareas), which means each `country_name_de` together with all the associated
#' `territory_name_de`, their search URL and their number of occurrences.
#'
#' Note that the values in the `territory_name_de` column returned by this function can differ from those in the `territory_name_de` column of
#' [sudd_rfrnds()] and [list_sudd_rfrnds()]. The latter is often more extensive and usually includes the `country_name_de` (in parentheses) for
#' subnational referendums.
#'
#' @inheritSection sudd_rfrnds About [sudd.ch](https://sudd.ch/)
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family sudd
#' @export
#'
#' @examples
#' rdb::list_sudd_territories()
list_sudd_territories <- function() {

  rows <-
    httr::RETRY(verb = "GET",
                url = url_sudd("list.php"),
                query = list(mode = "allareas"),
                times = 3L) %>%
    xml2::read_html() %>%
    rvest::html_elements(css = "main table tr") %>%
    purrr::map(rvest::html_elements,
               css = "td")

  col_1 <- rows %>% purrr::map(magrittr::extract2, 1L)
  col_2 <- rows %>% purrr::map(\(x) if (length(x) > 1L) x[[2L]] else xml2::as_xml_document(list()))
  col_3 <- rows %>% purrr::map(\(x) if (length(x) > 2L) x[[3L]] else xml2::as_xml_document(list()))

  tibble::tibble(country_name_de = purrr::map_chr(col_1, rvest::html_text),
                 territory_name_de = purrr::map_chr(col_2, rvest::html_text),
                 filter_url = purrr::map_chr(col_2,
                                             ~ {
                                               if (length(.x) > 0L) {
                                                 .x %>%
                                                   rvest::html_element(css = "a") %>%
                                                   rvest::html_attr(name = "href") %>%
                                                   url_sudd()
                                               } else {
                                                 NA_character_
                                               }
                                             }),
                 n = purrr::map_chr(col_3, rvest::html_text)) %>%
    dplyr::mutate(country_name_de = dplyr::if_else(stringr::str_detect(string = country_name_de,
                                                                       pattern = "^\\s*$"),
                                                   NA_character_,
                                                   country_name_de)) %>%
    tidyr::fill(country_name_de,
                .direction = "down") %>%
    dplyr::filter(!dplyr::if_all(c(territory_name_de, n),
                                 is.na))
}

list_sudd_titles

#' List referendum titles from [sudd.ch](https://sudd.ch/)
#'
#' Lists [all referendum titles from sudd.ch](https://sudd.ch/list.php?mode=alltopics), together with their search URLs and number of occurrences.
#'
#' @inheritSection sudd_rfrnds About [sudd.ch](https://sudd.ch/)
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family sudd
#' @export
#'
#' @examples
#' rdb::list_sudd_titles() |> dplyr::filter(n > 1)
list_sudd_titles <- function() {

  rows <-
    httr::RETRY(verb = "GET",
                url = url_sudd("list.php"),
                query = list(mode = "alltopics"),
                times = 3L) %>%
    xml2::read_html() %>%
    rvest::html_elements(css = "main table tr") %>%
    purrr::map(rvest::html_elements,
               css = "td")

  col_1 <- rows %>% purrr::map(magrittr::extract2, 1L)
  col_2 <- rows %>% purrr::map(magrittr::extract2, 2L)

  tibble::tibble(title_de = col_1 %>% purrr::map_chr(rvest::html_text),
                 filter_url = col_1 %>% purrr::map_chr(~ .x %>%
                                                         rvest::html_element(css = "a") %>%
                                                         rvest::html_attr(name = "href") %>%
                                                         url_sudd()),
                 n = col_2 %>% purrr::map_chr(rvest::html_text))
}

list_sudd_rfrnds

#' List referendum data from [sudd.ch](https://sudd.ch/)
#'
#' @description
#' Lists the referendum data from [sudd.ch](https://sudd.ch/) in various ways its [`list.php`](https://sudd.ch/list.php) endpoint allows. The output of this
#' function can be directly fed to [sudd_rfrnds()].
#'
#' @inheritSection sudd_rfrnds About [sudd.ch](https://sudd.ch/)
#' @param mode The listing mode. One of
#' - `"by_date"`: Lists [all referendums in the sudd.ch database **by `date`**](https://sudd.ch/list.php?mode=allrefs), together with their `id_sudd`,
#'   `country_code`, `territory_name_de` and `title_de`. Specifying the sorting order of the results via the `order` parameter is supported.
#' - `"by_mod_date"`: Lists [all referendums in the sudd.ch database **by `date_last_edited`**](https://sudd.ch/list.php?mode=moddate), together with their
#'   `id_sudd`, `country_code`, `territory_name_de`, `date` and `title_de`.
#' - `"filter"`: Allows to provide additional arguments (via the `filter` parameter) that limit the results accordingly. Specifying the sorting order of the
#'   results via the `order` parameter is supported.
#' - `"random"`: Lists the `id_sudd` of five randomly selected referendums, together with their `country_code`.
#' @param order The sorting order of the results. Only relevant if `mode` is either `"by_date"` or `"filter"`.
#' @param filter A list with valid filtering arguments. Only relevant if `mode = "filter"`. The supported filtering arguments include
#' - `territory_name_de`: A string that must be (partially) matched by the referendums' `territory_name_de`. Matching is case-insensitive and no [fuzzy
#'   matching](https://en.wikipedia.org/wiki/Approximate_string_matching) is supported.
#' - `title_de`: A string that must be (partially) matched by the referendums' `title_de`. Matching is case-insensitive and no [fuzzy
#'   matching](https://en.wikipedia.org/wiki/Approximate_string_matching) is supported.
#' - `year_min`: The lower year limit of the referendums' `date`. A positive integer.
#' - `year_max`: The upper year limit of the referendums' `date`. A positive integer.
#' @param use_cache `r pkgsnip::param_lbl("use_cache")`
#' @param max_cache_age `r pkgsnip::param_lbl("max_cache_age")`
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return A [tibble][tibble::tbl_df] containing at least an `id_sudd` column.
#' @family sudd
#' @export
#'
#' @examples
#' # list all referendums by modification date (takes a while)
#' \dontrun{
#' rdb::list_sudd_rfrnds(mode = "by_mod_date")}
#' 
#' # list all referendums whose title matches "AHV"
#' rdb::list_sudd_rfrnds(mode = "filter",
#'                       filter = list(title_de = "AHV"),
#'                       quiet = TRUE)
#' 
#' # get sudd.ch referendum data from all referendums from 2020 onwards
#' rdb::list_sudd_rfrnds(mode = "filter",
#'                       filter = list(year_min = 2020),
#'                       quiet = TRUE) |>
#'   rdb::sudd_rfrnds(quiet = TRUE)
#' 
#' # get sudd.ch referendum data from five randomly picked referendums
#' rdb::list_sudd_rfrnds(mode = "random",
#'                       quiet = TRUE) |>
#'   rdb::sudd_rfrnds(quiet = TRUE)
list_sudd_rfrnds <- function(mode = c("by_date",
                                      "by_mod_date",
                                      "filter",
                                      "random"),
                             order = c("ascending",
                                       "descending"),
                             filter = list(territory_name_de = NULL,
                                           title_de = NULL,
                                           year_min = NULL,
                                           year_max = NULL),
                             use_cache = TRUE,
                             max_cache_age = "1 week",
                             quiet = FALSE) {
  # check args
  mode <- rlang::arg_match(mode)
  order <-
    rlang::arg_match(order) %>%
    dplyr::case_match(.x = .,
                      "ascending"  ~ "asc",
                      "descending" ~ "desc",
                      .default = .)
  checkmate::assert_flag(quiet)

  # do not cache `mode = "random"`
  if (mode == "random") use_cache <- FALSE

  pkgpins::with_cache(expr = {

    checkmate::assert_list(filter,
                           names = "unique")

    checkmate::assert_subset(names(filter),
                             choices = c("territory_name_de",
                                         "title_de",
                                         "year_min",
                                         "year_max"))

    checkmate::assert_string(filter$territory_name_de,
                             null.ok = TRUE)
    checkmate::assert_string(filter$title_de,
                             null.ok = TRUE)
    has_filter <-
      filter %>%
      purrr::map_lgl(is.null) %>%
      all() %>%
      magrittr::not()

    if (has_filter && mode != "filter") {
      cli::cli_alert_warning("{.arg filter} is ignored because {.arg mode} is set to {.val {mode}}.")
    }

    filter$year_min <- checkmate::assert_int(filter$year_min,
                                             lower = sudd_min_year,
                                             upper = sudd_max_year,
                                             null.ok = TRUE,
                                             coerce = TRUE)
    filter$year_max <- checkmate::assert_int(filter$year_max,
                                             lower = filter$year_min %||% sudd_min_year,
                                             upper = sudd_max_year,
                                             null.ok = TRUE,
                                             coerce = TRUE)

    is_year_missing <- purrr::map_lgl(c(filter$year_min, filter$year_max), is.null) %>% { any(.) && !all(.) }

    if (is_year_missing) {
      filter$year_min <- filter$year_min %||% sudd_min_year
      filter$year_max <- filter$year_max %||% sudd_max_year
    }

    if (mode == "filter" && all(purrr::map_lgl(filter, is.null))) {
      cli::cli_abort("At least one filtering argument must be provided in {.arg filter} when {.arg mode = \"filter\"}.")
    }

    names(filter) %<>% dplyr::case_match(.x = .,
                                         "territory_name_de" ~ "area",
                                         "title_de"          ~ "topic",
                                         "year_min"          ~ "first",
                                         "year_max"          ~ "last",
                                         .default = .)
    # assemble query params
    query <- c(list(mode = mode %>% dplyr::case_match(.x = .,
                                                      "by_date"     ~ "allrefs",
                                                      "by_mod_date" ~ "moddate",
                                                      .default = .))[mode != "filter"],
               list(sense = order)[mode %in% c("by_date", "filter")],
               filter[mode == "filter"])

    # retrieve and parse data
    if (!quiet) {
      status_msg <- "Fetching raw HTML data from {.url sudd.ch}..."
      cli::cli_progress_step(msg = status_msg,
                             msg_done = paste(status_msg, "done"),
                             msg_failed = paste(status_msg, "failed"))
    }

    html <-
      httr::RETRY(verb = "GET",
                  url = url_sudd("list.php"),
                  query = query,
                  times = 3L) %>%
      xml2::read_html()

    if (!quiet) {
      status_msg <- "Parsing and tidying raw HTML data..."
      cli::cli_progress_step(msg = status_msg,
                             msg_done = paste(status_msg, "done"),
                             msg_failed = paste(status_msg, "failed"))
    }

    if (mode == "random") {

      result <-
        html %>%
        rvest::html_elements(css = "main ul li a") %>%
        rvest::html_attr(name = "href") %>%
        stringr::str_extract(pattern = "(?<=id=)[\\w\\d]+") %>%
        tibble::tibble(id_sudd = .)

    } else {

      rows <-
        html %>%
        rvest::html_element(css = "main table") %>%
        rvest::html_children() %>%
        purrr::map(rvest::html_elements,
                   css = "td")

      col_1 <- rows %>% purrr::map(magrittr::extract2, 1L)
      col_2 <- rows %>% purrr::map(magrittr::extract2, 2L)
      col_3 <- rows %>% purrr::map(magrittr::extract2, 3L)
      col_4 <- rows %>% purrr::map(magrittr::extract2, 4L)

      result <-
        tibble::tibble(id_sudd = purrr::map_chr(col_4,
                                                \(x) {
                                                  x %>%
                                                    rvest::html_element(css = "a") %>%
                                                    rvest::html_attr(name = "href") %>%
                                                    stringr::str_extract(pattern = "(?<=[\\?&]id=)[\\w\\d]+")
                                                }),
                       territory_name_de = purrr::map_chr(col_2, rvest::html_text),
                       !!!(col_3 %>% purrr::map_chr(rvest::html_text) %>% parse_sudd_date_de()),
                       title_de = purrr::map_chr(col_4, rvest::html_text)) %>%
        # add `date`
        dplyr::mutate(date = clock::date_build(year = year,
                                               month = month,
                                               day = day,
                                               invalid = "NA")) %>%
        dplyr::relocate(date,
                        .before = year)

      if (mode == "by_mod_date") {

        result %<>%
          tibble::add_column(date_last_edited =
                               col_1 %>%
                               purrr::map_chr(~ .x %>%
                                                rvest::html_element(css = "time") %>%
                                                rvest::html_attr(name = "datetime")) %>%
                               clock::date_parse()) %>%
          tidyr::fill(date_last_edited,
                      .direction = "down")
      }
    }

    result %>%
      # derive vars from `id_sudd`
      dplyr::bind_cols(.$id_sudd |>
                         purrr::map(parse_sudd_id) |>
                         purrr::list_rbind()) %>%
      dplyr::select(id_sudd,
                    starts_with("country_"),
                    is_former_country,
                    starts_with("subnational_entity_"),
                    everything())
  },
  pkg = this_pkg,
  from_fn = "list_sudd_rfrnds",
  mode,
  order,
  filter,
  use_cache = use_cache,
  max_cache_age = max_cache_age)
}

sudd_rfrnds

NOTES:

#' Get referendum data from [sudd.ch](https://sudd.ch/)
#'
#' Downloads referendum data from [sudd.ch](https://sudd.ch/).
#'
#' # About [sudd.ch](https://sudd.ch/)
#'
#' **sudd** stands for _**Su**chmaschine für **d**irekte **D**emokratie_ (German) and is operated by [Beat Müller](mailto:beat@sudd.ch). Its database content is
#' licensed under [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA
#' 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).
#'
#' @param ids_sudd The referendum identifiers assigned by [sudd.ch](https://sudd.ch/). Either as a character vector or a data frame containing a column
#'   `id_sudd`. `NA`s are ignored.
#' @param use_cache `r pkgsnip::param_lbl("use_cache")`
#' @param max_cache_age `r pkgsnip::param_lbl("max_cache_age")`
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return `r pkgsnip::return_lbl("tibble")` The column names are aligned with those of [rfrnds()] as closely as possible.
#' @family sudd
#' @importFrom rlang :=
#' @export
#'
#' @examples
#' rdb::rfrnd(id = "5bbc045192a21351232e596f")$id_sudd |> rdb::sudd_rfrnds()
#' 
#' rdb::rfrnds(quiet = TRUE) |>
#'   dplyr::filter(country_code == "AT") |>
#'   rdb::sudd_rfrnds()
sudd_rfrnds <- function(ids_sudd,
                        use_cache = TRUE,
                        max_cache_age = "1 week",
                        quiet = FALSE) {

  checkmate::assert_flag(quiet)

  if (purrr::pluck_depth(ids_sudd) > 1L) {

    if (!("id_sudd" %in% colnames(ids_sudd))) {
      cli::cli_abort(paste0("{.arg ids_sudd} must be either a character vector of valid sudd.ch referendum identifiers or a data frame with a column of such",
                            " named {.var id_sudd}."))
    }

    ids_sudd <- ids_sudd$id_sudd
  }

  ids_sudd <-
    checkmate::assert_character(ids_sudd,
                                all.missing = FALSE) %>%
    magrittr::extract(!is.na(.))

  pkgpins::with_cache(expr = {

    ids_sudd %>%
      purrr::map(.f = \(x) sudd_rfrnd(x),
                 .progress = if (quiet) FALSE else "Scraping referendum data from sudd.ch") %>%
      purrr::list_rbind() %>%
      # properly parse `date`
      dplyr::bind_cols(.$date |>
                         purrr::map(parse_sudd_date) |>
                         purrr::list_rbind()) %>%
      dplyr::mutate(date = clock::date_build(year = year,
                                             month = month,
                                             day = day,
                                             invalid = "NA")) %>%
      # add `id_sudd`
      tibble::add_column(id_sudd = ids_sudd,
                         .before = 1L) %>%
      # derive vars from `id_sudd`
      dplyr::bind_cols(ids_sudd |>
                         purrr::map(parse_sudd_id) |>
                         purrr::list_rbind()) %>%
      # reorder columns
      dplyr::relocate(id_sudd,
                      country_code,
                      country_name,
                      is_former_country,
                      subnational_entity_code,
                      territory_name_de,
                      any_of(c("territory_type_de",
                               "date",
                               "year",
                               "month",
                               "day",
                               "title_de",
                               "question_type_de",
                               "types",
                               "result_de",
                               "result_status_de",
                               "adoption_requirements_de",
                               "electorate_total",
                               "electorate_abroad",
                               "polling_cards",
                               "votes_total",
                               "votes_empty",
                               "votes_void",
                               "votes_invalid",
                               "votes_valid",
                               "votes_yes",
                               "votes_no")),
                      matches("^votes_(option_\\d+|(counter_)?proposal)(_(total|empty|void|invalid|valid|yes|no))?$"),
                      any_of("votes_option_none"),
                      matches("^$"),
                      any_of(c("subterritories",
                               "subterritories_yes",
                               "subterritories_no",
                               "files",
                               "remarks",
                               "sources",
                               "ids_sudd_simultaneous",
                               "date_last_edited")))
  },
  pkg = this_pkg,
  from_fn = "sudd_rfrnds",
  ids_sudd,
  use_cache = use_cache,
  max_cache_age = max_cache_age)
}

Miscellaneous

is_online

#' Test RDB API availability
#'
#' Checks if the RDB API server is online and operational.
#'
#' @inheritParams url_api
#' @param quiet Whether or not to suppress printing a warning in case the API is unavailable.
#'
#' @return A logical scalar.
#' @family api_status
#' @export
is_online <- function(use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                               pkg = this_pkg),
                      quiet = FALSE) {

  checkmate::assert_flag(quiet)

  result <- FALSE
  response <- tryCatch(expr = httr::RETRY(verb = "GET",
                                          url = url_api("health",
                                                        .use_testing_server = use_testing_server),
                                          times = 3L),
                       error = function(e) e$message)

  if (inherits(response, "response")) {

    response %<>%
      # ensure we actually got a plaintext response
      pal::assert_mime_type(mime_type = "text/plain",
                            msg_suffix = mime_error_suffix) %>%
      # parse response
      httr::content(as = "text",
                    encoding = "UTF-8")

    if (response == "OK") {
      result <- TRUE

    } else if (!quiet) {
      cli::cli_alert_warning("RDB API server responded with: {.val {response}}")
    }

  } else {
    cli::cli_alert_warning(response)
  }

  result
}

pkg_config

#' `r this_pkg` package configuration metadata
#'
#' A [tibble][tibble::tbl_df] with metadata of all possible `r this_pkg` package configuration options. See [pal::pkg_config_val()] for more information.
#'
#' @format `r pkgsnip::return_lbl("tibble_cols", cols = colnames(pkg_config))`
#' @export
#'
#' @examples
#' rdb::pkg_config
"pkg_config"

TEMP

data_rdb_aargau <-
  readxl::read_excel("/home/salim/Arbeit/ZDA/Git/zdaarau/rpkgs/rdb/19-AG-Abstimmungen-1888-1971.xlsx") %>%
  dplyr::mutate(date = clock::date_parse(x = Datum,
                                         format = "%d.%m.%Y"),
                title_en = deeplr::translate2(text = Vorlage,
                                              auth_key = Sys.getenv("DEEPL_TOKEN"),
                                              target_lang = "EN",
                                              source_lang = "DE",
                                              preserve_formatting = TRUE)) %>%
  dplyr::select(date,
                title_de = Vorlage,
                title_en,
                electorate_total = Stimmberechtigte,
                votes_total = "Eingegangene Stimmzettel",
                votes_yes = "Anzahl JA-Stimmen",
                votes_no = "Anzahl NEIN-Stimmen",
                votes_empty = Leer)


zdaarau/c2d documentation built on Dec. 18, 2024, 1:24 p.m.