c2d: Download Data from the Referendum Database (RDB), Which Covers Direct Democratic Votes Worldwide

Documented in assemble_query_filter

# DO NOT EDIT THIS FILE BY HAND! Instead edit the R Markdown source file `Rmd/rdb.Rmd` and run `pkgpurl::purl_rmd()`.
# See `README.md#r-markdown-format` for more information on the literate programming approach used applying the R Markdown format.

# rdb: Download Data from the Referendum Database (RDB), Which Covers Direct Democratic Votes Worldwide
# Copyright (C) 2024 Centre for Democracy Studies Aarau (ZDA)
# 
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or any later version.
# 
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
# 
# You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.

utils::globalVariables(names = c(".",
                                 # tidyselect fns
                                 "all_of",
                                 "any_of",
                                 "everything",
                                 "matches",
                                 "starts_with",
                                 "where",
                                 ".x",
                                 # other
                                 "Alpha_2",
                                 "Alpha_3",
                                 "Alpha_4",
                                 "applicability_constraint",
                                 "archive",
                                 "Children",
                                 "children_tier_1",
                                 "children_tier_2",
                                 "children_tier_3",
                                 "Code",
                                 "committee_name",
                                 "Common_name",
                                 "content-disposition",
                                 "count",
                                 "country_code",
                                 "country_code_long",
                                 "country_code_continual",
                                 "country_name",
                                 "country_name_de",
                                 "country_name_long",
                                 "date_last_edited",
                                 "date_time_created",
                                 "date_time_last_active",
                                 "date_time_last_edited",
                                 "Date_withdrawn",
                                 "day",
                                 "electorate_abroad",
                                 "electorate_total",
                                 "files",
                                 "id",
                                 "id_official",
                                 "id_sudd",
                                 "id_sudd_prefix",
                                 "inst_has_precondition",
                                 "inst_has_urgent_legal_basis",
                                 "inst_is_assembly",
                                 "inst_is_binding",
                                 "inst_is_counter_proposal",
                                 "inst_legal_basis_type",
                                 "inst_object_author",
                                 "inst_object_legal_level",
                                 "inst_object_revision_extent",
                                 "inst_object_revision_modes",
                                 "inst_object_type",
                                 "inst_precondition_actor",
                                 "inst_precondition_decision",
                                 "inst_quorum_approval",
                                 "inst_quorum_turnout",
                                 "inst_topics_excluded",
                                 "inst_topics_only",
                                 "inst_trigger_actor",
                                 "inst_trigger_actor_level",
                                 "inst_trigger_threshold",
                                 "inst_trigger_time_limit",
                                 "inst_trigger_type",
                                 "is_draft",
                                 "is_former_country",
                                 "is_multi_valued",
                                 "is_opt",
                                 "is_testing_server",
                                 "ISO_Alpha_3",
                                 "items",
                                 "level",
                                 "lower_house_abstentions",
                                 "lower_house_no",
                                 "lower_house_yes",
                                 "month",
                                 "municipality",
                                 "n",
                                 "Name",
                                 "name_long",
                                 "number",
                                 "Official_name",
                                 "Parent",
                                 "parent_topic",
                                 "position_government",
                                 "ptype",
                                 "question",
                                 "question_en",
                                 "referendum_text_options",
                                 "remarks",
                                 "result",
                                 "rowid",
                                 "sources",
                                 "subnational_entity_code",
                                 "subnational_entity_name",
                                 "subterritories_no",
                                 "subterritories_yes",
                                 "sudd_prefix",
                                 "tags",
                                 "topic",
                                 "topic_tier_1",
                                 "topic_tier_2",
                                 "topic_tier_3",
                                 "topics_tier_1",
                                 "topics_tier_2",
                                 "topics_tier_3",
                                 "territory_name_de",
                                 "territory_name_de_short",
                                 "title_de",
                                 "title_fr",
                                 "title_en",
                                 "turnout",
                                 "type",
                                 "Type",
                                 "un_country_code",
                                 "un_region_tier_1_code",
                                 "un_region_tier_2_name",
                                 "un_region_tier_3_name",
                                 "upper_house_abstentions",
                                 "upper_house_no",
                                 "upper_house_yes",
                                 "url_sudd",
                                 "url_swissvotes",
                                 "value_labels",
                                 "value_scale",
                                 "variable_name",
                                 "variable_name_unnested",
                                 "variable_name_print",
                                 "variable_values",
                                 "value",
                                 "value_total",
                                 "votes",
                                 "votes_empty",
                                 "votes_invalid",
                                 "votes_no",
                                 "votes_per_subterritory",
                                 "votes_yes",
                                 "year"))

# katex is used in documentation dynamically via `\Sexpr`, so it has to be imported to avoid an error when rendering the corresponding help page
# TODO: since the `R CMD check` warning can be considered a bug in this case, investigate the situation and possibly submit bug report
has_katex <- function() {
  nchar(katex::example_math()) > 0L
}

.onLoad <- function(libname, pkgname) {
  
  # clear pkgpins cache
  tryCatch(expr = pkgpins::clear_cache(board = pkgpins::board(pkg = pkgname),
                                       max_age = pal::pkg_config_val(key = "global_max_cache_age",
                                                                     pkg = pkgname)),
           error = function(e) cli::cli_alert_warning(text = "Failed to clear pkgpins cache on load of {.pkg {pkgname}}. Error message: {e$message}"))
}



api_failure <- function(parsed,
                        raw = NULL,
                        prefix = "") {
  
  env <- parent.frame(n = 2L)
  
  assign(x = "parsed",
         value = parsed,
         pos = env)
  
  msg_part_val <- ifelse(utils::hasName(parsed$error, "value"),
                         paste0(": ", paste0("{.var ", names(parsed$error$value), "}: {.warn ", parsed$error$value, "}",
                                             collapse = ", ")),
                         "")
  
  msg_part_error <- ifelse(utils::hasName(parsed, "error"),
                           paste0("error {.err {parsed$error$id}}", msg_part_val),
                           "{.err {.y}}.")
  
  cli_div_id <- cli::cli_div(theme = cli_theme)
  cli::cli_alert_warning(paste0(prefix, "The API server responded with ", msg_part_error),
                         .envir = env)
  if (!is.null(raw)) {
    cli::cli_alert_info("The following JSON payload was sent: {.content {raw}}")
  }
  cli::cli_end(id = cli_div_id)
}

as_fm_list <- function(x) {
  
  purrr::imap(x,
              ~ rlang::new_formula(lhs = .y,
                                   rhs = .x,
                                   env = emptyenv()))
}

#' Assemble MongoDB query filter document
#'
#' @param country_code The `country_code`(s) to be included. A character vector.
#' @param subnational_entity_name The `subnational_entity_name`(s) to be included. A character vector.
#' @param municipality The `municipality`(s) to be included. A character vector.
#' @param level The `level`(s) to be included. A character vector.
#' @param type The `type`(s) to be included. A character vector.
#' @param date_min The minimum `date` to be included. A [date][Date] or something coercible to.
#' @param date_max The maximum `date` to be included. A [date][Date] or something coercible to.
#' @param is_draft `TRUE` means to include only referendum entries with _draft_ status, `FALSE` to include only normal entries. Set to `NULL` in order to
#'   include both draft and normal entries.
#' @param date_time_created_min The minimum `date_time_created` to be included. A [datetime][base::DateTimeClasses], or something coercible to (like
#'   `"2006-01-02"` or `"2006-01-02T15:04:05Z"`; assumed to be in UTC if no timezone is given).
#' @param date_time_created_max The maximum `date_time_created` to be included. A [datetime][base::DateTimeClasses], or something coercible to (like
#'   `"2006-01-02"` or `"2006-01-02T15:04:05Z"`; assumed to be in UTC if no timezone is given).
#' @param date_time_last_edited_min The minimum `date_time_last_edited` to be included. A [datetime][base::DateTimeClasses], or something coercible to (like
#'   `"2006-01-02"` or `"2006-01-02T15:04:05Z"`; assumed to be in UTC if no timezone is given).
#' @param date_time_last_edited_max The maximum `date_time_last_edited` to be included. A [datetime][base::DateTimeClasses], or something coercible to (like
#'   `"2006-01-02"` or `"2006-01-02T15:04:05Z"`; assumed to be in UTC if no timezone is given).
#' @param query_filter A valid [MongoDB JSON query filter document](https://docs.mongodb.com/manual/core/document/#query-filter-documents) which allows for
#'   maximum control over what data is included. This takes precedence over all of the above listed parameters, i.e. if `query_filter` is provided, the
#'   parameters `r formals(assemble_query_filter) |> names() |> setdiff(c("query_filter", "base64_encode")) |> pal::enum_str(wrap = "\x60")` are ignored.
#' @param base64_encode Whether or not to [Base64](https://en.wikipedia.org/wiki/Base64)-encode the resulting query filter document. Note that the
#'   `query_filter` argument provided to other functions of this package must be Base64-encoded.
#'
#' @return A character scalar containing a valid [MongoDB JSON query filter document](https://docs.mongodb.com/manual/core/document/#query-filter-documents),
#'   [Base64](https://en.wikipedia.org/wiki/Base64)-encoded if `base64_encode = TRUE`.
#' @keywords internal
assemble_query_filter <- function(country_code = NULL,
                                  subnational_entity_name = NULL,
                                  municipality = NULL,
                                  level = NULL,
                                  type = NULL,
                                  date_min = NULL,
                                  date_max = NULL,
                                  is_draft = NULL,
                                  date_time_created_min = NULL,
                                  date_time_created_max = NULL,
                                  date_time_last_edited_min = NULL,
                                  date_time_last_edited_max = NULL,
                                  query_filter = NULL,
                                  base64_encode = TRUE) {
  
  checkmate::assert_string(query_filter,
                           null.ok = TRUE)
  checkmate::assert_flag(base64_encode)
  
  # assemble JSON query filter document if `query_filter` is not provided
  if (is.null(query_filter)) {
    
    purrr::map_chr(.x = country_code,
                   .f = checkmate::assert_choice,
                   choices = val_set$country_code,
                   null.ok = TRUE,
                   .var.name = "country_code")
    checkmate::assert_character(subnational_entity_name,
                                any.missing = FALSE,
                                null.ok = TRUE)
    checkmate::assert_character(municipality,
                                any.missing = FALSE,
                                null.ok = TRUE)
    purrr::map_chr(.x = level,
                   .f = checkmate::assert_choice,
                   choices = var_vals("level"),
                   null.ok = TRUE,
                   .var.name = "level")
    purrr::map_chr(.x = type,
                   .f = checkmate::assert_choice,
                   choices = var_vals("type"),
                   null.ok = TRUE,
                   .var.name = "type")
    checkmate::assert_flag(is_draft,
                           null.ok = TRUE)
    
    date_min %<>% lubridate::as_date()
    date_max %<>% lubridate::as_date()
    date_time_created_min %<>% lubridate::as_datetime(tz = "UTC")
    date_time_created_max %<>% lubridate::as_datetime(tz = "UTC")
    date_time_last_edited_min %<>% lubridate::as_datetime(tz = "UTC")
    date_time_last_edited_max %<>% lubridate::as_datetime(tz = "UTC")
    
    query_filter <-
      list(country_code = query_filter_in(country_code),
           canton = query_filter_in(subnational_entity_name),
           municipality = query_filter_in(municipality),
           level = query_filter_in(level),
           institution =
             type %>%
             pal::when(length(.) == 0L ~ .,
                       ~ dplyr::case_match(.x = .,
                                           "citizens' assembly" ~ "citizen assembly",
                                           .default = .) %>%
                         stringr::str_to_sentence()) %>%
             query_filter_in(),
           date = query_filter_date(min = date_min,
                                    max = date_max),
           draft = is_draft,
           created_on = query_filter_datetime(min = date_time_created_min,
                                              max = date_time_created_max),
           date_time_last_edited = query_filter_datetime(min = date_time_last_edited_min,
                                                         max = date_time_last_edited_max)) %>%
      # remove `NULL` elements
      purrr::compact() %>%
      # convert to JSON
      jsonlite::toJSON(POSIXt = "ISO8601",
                       auto_unbox = TRUE,
                       digits = NA,
                       pretty = FALSE)
  }
  
  if (base64_encode) {
    query_filter %<>% jsonlite::base64_enc()
  }
  
  query_filter
}

assert_api_success <- function(x) {
  
  if (!is.null(x$error$id)) {
    cli_div_id <- cli::cli_div(theme = cli_theme)
    cli::cli_abort("API server responded with error {.err {x$error$id}}")
    cli::cli_end(id = cli_div_id)
  }
  
  invisible(x)
}

assert_cols_absent <- function(data,
                               type) {
  
  type <- rlang::arg_match0(arg = type,
                            values = unique(unlist(data_cols_absent$type)))
  cols <-
    data_cols_absent %>%
    dplyr::filter(purrr::map_lgl(type,
                                 ~ !!type %in% .x)) %$%
    col
  
  col_names <- colnames(data)
  
  purrr::walk(cols,
              ~ {
                
                if (.x %in% col_names) {
                  
                  data_cols_absent %>%
                    dplyr::filter(col == !!.x & purrr::map_lgl(type,
                                                               ~ !!type %in% .x)) %$%
                    msg %>%
                    cli::cli_abort()
                }
              })
  
  invisible(data)
}

assert_cols_valid <- function(data,
                              type = c("validate", "add", "edit"),
                              action = cli::cli_abort,
                              cli_progress_id = NULL) {
  
  type <- rlang::arg_match(type)
  
  non_na_col_names <- c("id",
                        "country_code",
                        "date",
                        "level")
  na_col_names <-
    data %>%
    dplyr::select(any_of(non_na_col_names)) %>%
    dplyr::filter(dplyr::if_any(.cols = everything(),
                                .fns = is.na)) %>%
    dplyr::select(where(~ anyNA(.x))) %>%
    colnames()
  n_na_col_names <- length(na_col_names)
  
  if (n_na_col_names) {
    cli::cli_progress_done(id = cli_progress_id,
                           result = "failed")
    action("Detected {n_na_col_names} column{?s} in {.arg data} that contain forbidden {.val NA}s: {.var {na_col_names}}")
  }
  
  ## check `id`
  if ("id" %in% colnames(data) && anyDuplicated(data$id)) {
    cli::cli_progress_done(id = cli_progress_id,
                           result = "failed")
    action("Duplicated {.var id}s detected. IDs must be unique.")
  }
  
  ## check `date`
  if ("date" %in% colnames(data)) {
    
    check <- checkmate::check_date(data$date,
                                   any.missing = FALSE)
    if (!isTRUE(check)) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action("Failed to validate {.var data$date}. {check}")
    }
  }
  
  ## check `level`
  if ("level" %in% colnames(data)) {
    
    check <- checkmate::check_subset(as.character(data$level),
                                     choices = var_vals("level"))
    if (!isTRUE(check)) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action("Failed to validate {.var data$level}. {check}")
    }
  }

  ## check `country_code`
  if ("country_code" %in% colnames(data)) {
    
    check <- checkmate::check_subset(as.character(data$country_code),
                                     choices = val_set$country_code)
    if (!isTRUE(check)) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action("Failed to validate {.var data$country_code}. {check}")
    }
    
    ## ensure `position_government` is present for additions when `country_code = "CH" & level = "national"`
    if (type == "add"
        && (data %>%
            dplyr::filter(country_code == "CH" & level == "national") %>%
            nrow() %>%
            magrittr::is_greater_than(0L))
        && !("position_government" %in% colnames(data))) {
      
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("Referendums with {.code country_code = \"CH\" & level = \"national\"} present in {.arg data} but column {.var ",
                    "position_government} is missing."))
    }
  }
  
  ## check `subnational_entity_name`
  ## TODO: check `subnational_entity_code` instead once it's available
  if (any(data[["level"]] != "national")) {
    
    if (!("subnational_entity_name" %in% colnames(data))) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("Referendums of {.var level} below {.val national} present in {.arg data} but column {.var subnational_entity_name} is missing."))
    }
    
    ix_missing_subnational_entities <-
      data %>%
      tibble::rowid_to_column() %>%
      dplyr::filter(level != "national" & is.na(subnational_entity_name)) %$%
      rowid
    
    n_missing_subnational_entities <- length(ix_missing_subnational_entities)
    
    if (n_missing_subnational_entities) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("{n_missing_subnational_entities} row{?s} in {.arg data} {?is/are} missing a {.var subnational_entity_name}. Affected {?is/are} ",
                    "the row{?s} with ind{?ex/ices} {.val {ix_missing_subnational_entities}}."))
    }
  }
  if ("subnational_entity_name" %in% colnames(data)) {
    
    ix_illegal_subnational_entities <-
      data %>%
      tibble::rowid_to_column() %>%
      dplyr::filter(level == "national" & !is.na(subnational_entity_name)) %$%
      rowid
    
    n_illegal_subnational_entities <- length(ix_illegal_subnational_entities)
    
    if (n_illegal_subnational_entities) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("{n_illegal_subnational_entities} row{?s} in {.arg data} {?has/have} a {.var subnational_entity_name} set although they are on the ",
                    "national level. Affected {?is/are} the row{?s} with ind{?ex/ices} {.val {ix_illegal_subnational_entities}}."))
    }
  }
  
  ## check `municipality`
  if (any(data[["level"]] == "local")) {
    
    if (!("municipality" %in% colnames(data))) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("Referendums of {.var level = \"local\"} present in {.arg data} but column {.var municipality} is missing."))
    }
    
    ix_missing_municipalities <-
      data %>%
      tibble::rowid_to_column() %>%
      dplyr::filter(level == "local" & is.na(municipality)) %$%
      rowid
    
    n_missing_municipalities <- length(ix_missing_municipalities)
    
    if (n_missing_municipalities) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("{n_missing_municipalities} row{?s} in {.arg data} {?is/are} missing a {.var municipality}. Affected {?is/are} the row{?s} with ",
                    "ind{?ex/ices} {.val {ix_missing_subnational_entities}}."))
    }
  }
  if ("municipality" %in% colnames(data)) {
    
    ix_illegal_municipalities <-
      data %>%
      tibble::rowid_to_column() %>%
      dplyr::filter(level != "local" & !is.na(municipality)) %$%
      rowid
    
    n_illegal_municipalities <- length(ix_illegal_municipalities)
    
    if (n_illegal_municipalities) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("{n_illegal_municipalities} row{?s} in {.arg data} {?has/have} a {.var municipality} set although they are not on the local level. ",
                    "Affected {?is/are} the row{?s} with ind{?ex/ices} {.val {ix_illegal_municipalities}}."))
    }
  }
  
  ## check variables that are only meant to be set for Swiss national referendums
  ## TODO: Remove this as soon as [issue #52](https://github.com/zdaarau/c2d-app/issues/52) is resolved.
  ### `votes_per_subterritory`
  if (all(c("votes_per_subterritory", "level", "country_code") %in% colnames(data))) {
    
    ix_illegal_votes_per_subterritory <-
      data %>%
      tibble::rowid_to_column() %>%
      dplyr::filter((level != "national" | country_code != "CH") & !purrr::map_lgl(votes_per_subterritory, is.null)) %$%
      rowid
    
    n_illegal_votes_per_subterritory <- length(ix_illegal_votes_per_subterritory)
    
    if (n_illegal_votes_per_subterritory) {
      cli::cli_progress_done(id = cli_progress_id,
                             result = "failed")
      action(paste0("{n_illegal_votes_per_subterritory} row{?s} in {.arg data} {?has/have} a {.var position_government} set although they are not Swiss ",
                    "national referendums. Affected {?is/are} the row{?s} with ind{?ex/ices} {.val {ix_illegal_votes_per_subterritory}}."))
    }
  }
  ### non-list vars
  c("lower_house_yes",
    "lower_house_no",
    "lower_house_abstentions",
    "upper_house_yes",
    "upper_house_no",
    "upper_house_abstentions",
    "position_government") %>%
    purrr::walk(function(var_name) {
      
      if (all(c(var_name, "level", "country_code") %in% colnames(data))) {
        
        ix_illegal <-
          data %>%
          tibble::rowid_to_column() %>%
          dplyr::filter((level != "national" | country_code != "CH") & !is.na(!!as.symbol(var_name))) %$%
          rowid
        
        n_illegal <- length(ix_illegal)
        
        if (n_illegal) {
          cli::cli_progress_done(id = cli_progress_id,
                                 result = "failed")
          action(paste0("{n_illegal} row{?s} in {.arg data} {?has/have} a {.var {var_name}} set although {cli::qty(n_illegal)}{?it is not a/they are not} ",
                        "Swiss national referendum{?s}. Affected {?is/are} the row{?s} with ind{?ex/ices} {.val {ix_illegal}}."))
        }
      }
    })
  
  invisible(data)
}

assert_content <- function(x) {
  
  if (!nchar(x)) {
    cli::cli_abort("Received empty response from RDB API. Please debug.",
                   .internal = TRUE)
  }
  
  invisible(x)
}

#' Authenticate a user session for the [RDB API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#1-reflexive-routes)
#'
#' Creates a new user session token if necessary. The token is stored in the R option `rdb.user_session_tokens`, a [tibble][tibble::tbl_df] with the columns
#' `email`, `token` and `date_time_last_active`.
#' 
#' `email` and `password` default to the [package configuration options][pkg_config] `api_username` and `api_password` respectively.
#'
#' User session tokens expire automatically after 15 days of inactivity.
#'
#' @inheritParams url_api
#' @param email The e-mail address of the user for which a session should be created. A character scalar.
#' @param password The password of the user for which a session should be created. A character scalar.
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return The user session token as a character scalar, invisibly.
#' @keywords internal
auth_session <- function(email = pal::pkg_config_val(key = "api_username",
                                                     pkg = this_pkg),
                         password = pal::pkg_config_val(key = "api_password",
                                                        pkg = this_pkg),
                         use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                  pkg = this_pkg),
                         quiet = FALSE) {
  
  checkmate::assert_string(email,
                           min.chars = 3L)
  checkmate::assert_string(password,
                           min.chars = 1L)
  checkmate::assert_flag(quiet)
  
  # get existing tokens or initialize empty tibble
  tokens <-
    getOption("rdb.user_session_tokens") %>%
    pal::when(all(c("email", "token", "date_time_last_active") %in% colnames(.)) ~ .,
              ~ tibble::tibble(email = character(),
                               token = character(),
                               is_testing_server = logical(),
                               date_time_last_active = as.POSIXct(NULL)))
  # extract latest token
  token <- tokens %>% dplyr::filter(email == !!email & is_testing_server == !!use_testing_server)
  
  if (nrow(token)) {
    token %<>% dplyr::filter(date_time_last_active == max(date_time_last_active))
    token %<>% .[1L, ]
  }
  
  # ensure token is not expired (checked if older than 14 days), else set to `NULL`
  if (nrow(token) &&
      checkmate::test_string(token$token, min.chars = 1L) &&
      ((token$date_time_last_active > clock::add_days(clock::date_now(zone = "UTC"), -14L)) || !is_session_expired(token = token$token,
                                                                                                                   use_testing_server = use_testing_server))) {
    token <- token$token
    
  } else {
    token <- NULL
  }
  
  # create new session if necessary
  if (is.null(token)) {
    
    if (!quiet) {
      status_msg <- "Authenticating new user session"
      cli::cli_progress_step(msg = status_msg,
                             msg_done = paste(status_msg, "done"),
                             msg_failed = paste(status_msg, "failed"))
    }
    
    token <-
      httr::RETRY(verb = "POST",
                  url = url_api("users/session",
                                .use_testing_server = use_testing_server),
                  config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server)),
                  times = 3L,
                  encode = "json",
                  body = list(email = email,
                              password = password)) %>%
      # ensure we actually got a JSON response
      pal::assert_mime_type(mime_type = "application/json",
                            msg_suffix = mime_error_suffix) %>%
      # parse response
      httr::content(as = "parsed") %$%
      token
    
    if (!quiet) {
      cli::cli_progress_done()
    }
  }
  
  # update `rdb.user_session_tokens` option
  options(rdb.user_session_tokens =
            tokens %>%
            dplyr::filter(token != !!token) %>%
            tibble::add_row(email = email,
                            token = token,
                            is_testing_server = use_testing_server,
                            date_time_last_active = clock::date_now(zone = "UTC")))
  # return token
  invisible(token)
}

md_link_codebook <- function(var_names) {
  
  purrr::map_chr(var_names,
                 \(x) paste0("[`", x, "`](", url_codebook(x), ")"))
}

country_code_to_name <- function(country_code) {
  
  purrr::map2_chr(.x = country_code,
                  .y = nchar(as.character(country_code)) > 2L,
                  .f = ~ {
                    
                    if (isTRUE(.y)) {
                      
                      result <-
                        data_iso_3166_3 %>%
                        dplyr::filter(Alpha_4 == !!.x) %$%
                        name_short
                      
                    } else {
                      
                      result <-
                        data_iso_3166_1 %>%
                        dplyr::filter(Alpha_2 == !!.x) %$%
                        name_short
                    }
                    
                    if (length(result) == 0L) {
                      result <- NA_character_
                    }
                    
                    result
                  })
}

field_to_var_name <- function(x) {
  
  x %>% purrr::map_chr(~ var_names[[.x]] %||% .x)
}

derive_country_vars <- function(country_code,
                                date) {
  
  country_code %<>% as.character()
  subnational_entity_code <- NA_character_
  
  # handle subnational entities
  ## Ascension
  if (country_code == "AC") {
    
    country_code <- "SH"
    subnational_entity_code <- "SH-AC"
  }
  
  # assign canonical pseudo codes
  ## Kosovo
  country_code %<>% dplyr::case_match(.x = .,
                                      "KS" ~ "XK",
                                      .default = .)
  data_former <-
    data_iso_3166_3 %>%
    dplyr::filter(Alpha_2 == !!country_code & !!date <= (clock::add_years(Date_withdrawn, 50L))) %>%
    dplyr::filter(Date_withdrawn == pal::safe_max(Date_withdrawn))
  
  is_former <- nrow(data_former) > 0L
  is_current <- !is_former && country_code %in% data_iso_3166_1$Alpha_2
  
  if (!(is_former || is_current) && !(country_code %in% country_codes_sudd_invalid)) {
    cli::cli_alert_warning("Neither ISO 3166-1 alpha-2 nor ISO 3166-3 alpha-4 {.var country_code} found for {.val {country_code}}.")
  }
  
  country_code <-
    country_code %>%
    pal::when(is_former ~
                data_former %>%
                dplyr::filter(Date_withdrawn == min(Date_withdrawn)) %>%
                assertr::verify(nrow(.) == 1L) %$%
                Alpha_4,
              is_current ~
                country_code,
              ~
                NA_character_)
  
  tibble::tibble(country_code = country_code,
                 country_name = country_code_to_name(country_code),
                 is_former_country = is_former,
                 subnational_entity_code = subnational_entity_code)
}

drop_disabled_vars <- function(data,
                               to_drop) {
  
  to_drop_present <- intersect(to_drop, colnames(data))
  n_to_drop_present <- length(to_drop_present)
  
  if (n_to_drop_present) {
    
    cli::cli_alert_warning(paste0("The {cli::qty(n_to_drop_present)} column{?s} {.var {to_drop_present}} in {.arg data} are ignored because setting/altering ",
                                  "the corresponding values is disabled."))
    
    data %<>% dplyr::select(-any_of(to_drop))
  }
  
  data
}

drop_implicit_vars <- function(data,
                               type = c("add", "edit")) {
  
  type <- rlang::arg_match(type)
  
  to_drop <-
    data_cols_absent %>%
    dplyr::filter(purrr::map_lgl(type,
                                 ~ !!type %in% .x)) %$%
    col
  
  data %>% dplyr::select(-any_of(to_drop))
}

drop_non_applicable_vars <- function(data) {
  
  if ("level" %in% colnames(data)) {
    
    if (data$level != "local") {
      data %<>% dplyr::select(-any_of("municipality"))
    }
    if (data$level == "national") {
      data %<>% dplyr::select(-any_of("subnational_entity_name"))
    }
    # TODO: remove this as soon as [issue #52](https://github.com/zdaarau/c2d-app/issues/52) is resolved
    if (data$level != "national" || data$country_code != "CH") {
      data %<>% dplyr::select(-any_of(c("votes_per_subterritory",
                                        "lower_house_yes",
                                        "lower_house_no",
                                        "lower_house_abstentions",
                                        "upper_house_yes",
                                        "upper_house_no",
                                        "upper_house_abstentions",
                                        "position_government")))
    }
  }
  
  data %<>% dplyr::select(-any_of(c(
    "files",
    "is_former_country",
    # TODO: remove this as soon as [issue #81](https://github.com/zdaarau/c2d-app/issues/81) is fixed
    "sources"
  )))
  
  data
}

fct_flip <- function(x) {
  
  checkmate::assert_factor(x,
                           n.levels = 2L)
  flip_map <-
    levels(x) %>%
    magrittr::set_names(value = rev(.)) %>%
    as.list()
  
  x %>% forcats::fct_recode(!!!flip_map)
}

flatten_array_as_is <- function(x) {
  
  x %<>% unlist()
  
  if (!is.null(x)) {
    x %<>% I()
  }
  
  x
}



is_session_expired <- function(token,
                               use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                        pkg = this_pkg)) {
  response <-
    httr::RETRY(verb = "GET",
                url = url_api("users/profile",
                              .use_testing_server = use_testing_server),
                config = httr::add_headers(Authorization = paste("Bearer", token),
                                           Origin = url_admin_portal(.use_testing_server = use_testing_server)),
                times = 3L) %>%
    # ensure we actually got a JSON response
    pal::assert_mime_type(mime_type = "application/json",
                          msg_suffix = mime_error_suffix) %>%
    # parse response
    httr::content(as = "parsed")
  
  is.null(response[["profile"]])
}

#' Lowercase non-abbreviations
#'
#' @param x A character vector.
#'
#' @return A character vector of the same length as `x`.
#' @keywords internal
lower_non_abbrs <- function(x) {
  
  x %>%
    stringr::str_split(pattern = "\\b") %>%
    purrr::map_chr(~ .x %>%
                     dplyr::if_else(stringr::str_detect(string = .,
                                                        pattern = "^[^[:lower:]]+$"),
                                    .,
                                    stringr::str_to_lower(.)) %>%
                     paste0(collapse = ""))
}

order_rfrnd_cols <- function(data) {
  
  data %>% dplyr::relocate(any_of(rfrnd_cols_order))
}

parse_datetime <- function(x) {
  
  x %<>% unlist(use.names = FALSE)
  
  if (is.character(x) && stringr::str_detect(string = x,
                                             pattern = "^-?\\d+$",
                                             negate = TRUE)) {
    result <-
      x %>%
      clock::naive_time_parse(format = "%Y-%m-%dT%H:%M:%SZ",
                              precision = "millisecond") %>%
      clock::as_date_time(zone = "UTC")
    
  } else {
    
    result <-
      as.numeric(x) %>%
      magrittr::divide_by(1000L) %>%
      as.POSIXct(origin = "1970-01-01",
                 tz = "UTC")
  }
  
  result
}

plot_share_per_period <- function(data_freq,
                                  x,
                                  period) {
  rlang::check_installed("plotly",
                         reason = pal::reason_pkg_required())
  
  grid_step <- switch(EXPR = period,
                      week = 4L,
                      year = 50L,
                      decade = 50L,
                      century = 100L,
                      1L)
  
  grid_x <- seq(from = ceiling(pal::safe_min(data_freq[[period]])[1L] / grid_step) * grid_step,
                to = floor(pal::safe_max(data_freq[[period]])[1L] / grid_step) * grid_step,
                by = grid_step)
  
  plotly::plot_ly(data = data_freq,
                  type = "scatter",
                  mode = "none",
                  stackgroup = "one",
                  groupnorm = "percent",
                  x = ~eval(as.symbol(period)),
                  y = ~n,
                  name = ~eval(as.symbol(x))) %>%
    plotly::layout(hovermode = "x",
                   legend = list(orientation = "h"),
                   xaxis = list(dtick = switch(EXPR = period,
                                               week = 1L,
                                               month = 1L,
                                               quarter = 1L,
                                               year = 10L,
                                               decade = 10L,
                                               century = 100L),
                                showgrid = FALSE,
                                ticklabelstep = switch(EXPR = period,
                                                       week = 4L,
                                                       month = 1L,
                                                       quarter = 1L,
                                                       year = 5L,
                                                       decade = 5L,
                                                       century = 1L),
                                range = switch(EXPR = period,
                                               week = c(1L, 53L),
                                               month = c(1L, 12L),
                                               quarter = c(1L, 4L),
                                               NULL),
                                ticks = "outside",
                                title = list(text = NULL)),
                   yaxis = list(fixedrange = TRUE,
                                hoverformat = ".1f",
                                showgrid = FALSE,
                                ticksuffix = "\u2009% ",
                                title = list(text = NULL)),
                   # draw custom grid
                   shapes =
                     grid_x %>%
                     purrr::map(~ list(type = "line",
                                       y0 = 0L,
                                       y1 = 1L,
                                       yref = "paper",
                                       x0 = .x,
                                       x1 = .x,
                                       line = list(color = "#fff",
                                                   width = 0.2))))
}

restore_topics <- function(topics_tier_1,
                           topics_tier_2,
                           topics_tier_3) {
  list(topics_tier_1,
       topics_tier_2,
       topics_tier_3) %>%
    purrr::pmap(~ {
      
      ..1 %>%
        unlist() %>%
        as.character() %>%
        checkmate::assert_character(any.missing = FALSE,
                                    max.len = 3L,
                                    .var.name = "topics_tier_1")
      ..2 %>%
        unlist() %>%
        as.character() %>%
        checkmate::assert_character(any.missing = FALSE,
                                    max.len = 3L,
                                    .var.name = "topics_tier_2")
      ..3 %>%
        unlist() %>%
        as.character() %>%
        checkmate::assert_character(any.missing = FALSE,
                                    max.len = 3L,
                                    .var.name = "topics_tier_3")
      
      topics_hierarchy <- hierarchize_topics(tibble::tibble(topics_tier_1 = list(..1),
                                                            topics_tier_2 = list(..2),
                                                            topics_tier_3 = list(..3)))
      
      topics <- topics_hierarchy$topic_tier_3 %>% setdiff(NA_character_)
      
      if (length(topics) < 3L) {
        
        topics <-
          topics_hierarchy %>%
          dplyr::filter(is.na(topic_tier_3)) %$%
          topic_tier_2 %>%
          setdiff(NA_character_) %>%
          c(topics)
      }
      
      if (length(topics) < 3L) {
        
        topics <-
          topics_hierarchy %>%
          dplyr::filter(is.na(topic_tier_3) & is.na(topic_tier_2)) %$%
          topic_tier_1 %>%
          setdiff(NA_character_) %>%
          c(topics)
      }
      
      topics
    })
}

topic_frequency <- function(topics,
                            tier) {
  topics %>%
    purrr::list_c(ptype = character()) %>%
    factor(levels = topics(tiers = tier)) %>%
    tibble::tibble(topic = .) %>%
    dplyr::group_by(topic) %>%
    dplyr::summarise(n = dplyr::n())
}

#' Tidy "raw" RDB API referendum data
#'
#' Converts the "raw" MongoDB data from the RDB API to the tidied [rfrnds()] schema.
#'
#' You can reverse this function again using [untidy_rfrnds()].
#'
#' @param data The MongoDB data as a list (converted from the JSON returned by the RDB API using [jsonlite::fromJSON()]).
#' @param tidy Whether or not to tidy the referendum data, i.e. apply various data cleansing tasks and add additional variables. If `FALSE`, the raw MongoDB
#'   referendum data will only be modified just enough to be able to return it as a [tibble][tibble::tbl_df]. Note that untidy data doesn't conform to the 
#'   [codebook][data_codebook] (i.a. different variable names).
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @keywords internal
tidy_rfrnds <- function(data,
                        tidy = TRUE) {
  
  checkmate::assert_flag(tidy)
  this_env <- rlang::current_env()
  
  data %<>%
    # unnest columns and ensure list type for multi-value columns
    # NOTE that despite of the [speed-up in v1.1.4](https://github.com/tidyverse/tidyr/releases/tag/v1.1.4), `tidyr::unnest()` is still much slower than our
    # custom function
    purrr::map(.f = function(l,
                             category_names = names(l$categories),
                             context_names = names(l$context),
                             title_langs = names(l$title)) {
      
      for (name in category_names) {
        l[[paste0("categories.", name)]] <- l$categories[[name]]
      }
      
      for (name in context_names) {
        l[[paste0("context.", name)]] <- l$context[[name]]
      }
      
      for (lang in title_langs) {
        l[[paste0("title.", lang)]] <- l$title[[lang]]
      }
      
      l$categories <- NULL
      l$context <- NULL
      l$title <- NULL
      
      for (name in c("tags",
                     "categories.action",
                     "categories.special_topics",
                     "categories.excluded_topics")) {
        l[[name]] %<>% purrr::list_c(ptype = character()) %>% list()
      }
      
      for (name in c("archive",
                     "files",
                     "context.votes_per_canton")) {
        l[[name]] %<>% list()
      }
      
      l
    }) %>%
    # drop empty fields
    purrr::modify_depth(.depth = 1L,
                        .f = purrr::compact) %>%
    # convert to tibble
    purrr::map(tibble::as_tibble_row) %>%
    purrr::list_rbind()
  
  # tidy data
  if (nrow(data) > 0L && tidy) {
    
    data %<>%
      # rename variables (mind that the MongoDB-based API doesn't demand a fixed schema)
      pal::rename_from(dict = var_names) %>%
      # create/recode variables
      dplyr::mutate(
        # ensure all supposed to floating-point numbers are actually of type double (JSON API is not reliable in this respect)
        dplyr::across(any_of(c("subterritories_no",
                               "subterritories_yes",
                               # TODO: remove/adapt next two lines once [issue #78](https://github.com/zdaarau/c2d-app/issues/78) is resolved
                               "date_time_created"["date_time_created" %in% colnames(.)
                                                   && any(purrr::map_lgl(.$date_time_created, is.numeric))],
                               "date_time_last_edited"["date_time_last_edited" %in% colnames(.)
                                                       && any(purrr::map_lgl(.$date_time_last_edited, is.numeric))])),
                      ~ purrr::map_dbl(.x, ~ if (is.null(.x)) NA_real_ else as.double(.x))),
        
        # use explicit NA values
        dplyr::across(where(is.integer),
                      ~ dplyr::if_else(.x %in% c(-1L, -2L),
                                       NA_integer_,
                                       .x)),
        dplyr::across(where(is.character),
                      ~ dplyr::if_else(.x %in% c("", "-1", "-2"),
                                       NA_character_,
                                       .x)),
        dplyr::across(any_of(c("subterritories_yes", "subterritories_no")),
                      ~ dplyr::if_else(.x %in% c(-1.0, -2.0),
                                       NA_real_,
                                       .x)),
        dplyr::across(any_of("result"),
                      ~ dplyr::if_else(.x %in% c("Unknown", "Not provided"),
                                       NA_character_,
                                       .x)),
        # convert all values to lowercase
        ## vectors
        dplyr::across(any_of(c("result",
                               "type",
                               "inst_legal_basis_type",
                               "inst_object_type",
                               "inst_object_legal_level",
                               "inst_object_revision_extent",
                               "inst_trigger_type",
                               "inst_trigger_actor_level",
                               "inst_trigger_time_limit",
                               "inst_quorum_approval",
                               "inst_precondition_decision")),
                      stringr::str_to_lower),
        ## lists
        dplyr::across(any_of(c("inst_object_revision_modes",
                               "inst_topics_only",
                               "inst_topics_excluded")),
                      ~ purrr::map(.x = .x,
                                   .f = stringr::str_to_lower)),
        
        # convert only non-abbreviated values to lowercase
        dplyr::across(any_of(c("inst_object_author",
                               "inst_trigger_actor",
                               "inst_precondition_actor")),
                      ~ purrr::map_chr(.x = .x,
                                       .f = lower_non_abbrs)),
        
        # specific recodings
        ## binary (dummies)
        dplyr::across(any_of("position_government"),
                      ~ dplyr::case_when(.x == "Acceptance" ~ "yes",
                                         .x == "Rejection" ~ "no",
                                         .default = NA_character_)),
        dplyr::across(any_of("inst_has_urgent_legal_basis"),
                      ~ dplyr::case_when(.x == "Urgent" ~ TRUE,
                                         .x == "Normal" ~ FALSE,
                                         .default = NA)),
        dplyr::across(any_of("inst_is_binding"),
                      ~ dplyr::case_when(.x == "Binding" ~ TRUE,
                                         .x == "Non-binding" ~ FALSE,
                                         .default = NA)),
        dplyr::across(any_of("inst_is_counter_proposal"),
                      ~ dplyr::case_when(.x == "Yes" ~ TRUE,
                                         .x == "No" ~ FALSE,
                                         .default = NA)),
        dplyr::across(any_of("inst_is_assembly"),
                      ~ dplyr::case_when(.x == "Assembly" ~ TRUE,
                                         .x == "Ballot" ~ FALSE,
                                         .default = NA)),
        dplyr::across(any_of("inst_has_precondition"),
                      ~ dplyr::case_when(.x == "Exists" ~ TRUE,
                                         .x == "Does not exist" ~ FALSE,
                                         .default = NA)),
        ## nominal
        ### flatten `id`
        id = purrr::list_c(id,
                           ptype = character()),
        ### split `tags` into separate per-tier vars
        topics_tier_1 = tags %>% purrr::map(infer_topics,
                                            tier = 1L),
        topics_tier_2 = tags %>% purrr::map(infer_topics,
                                            tier = 2L),
        topics_tier_3 = tags %>% purrr::map(~ .x[.x %in% topics_tier_3_]),
        ### various cleanups
        dplyr::across(any_of("type"),
                      ~ dplyr::case_match(.x = .x,
                                          "citizen assembly" ~ "citizens' assembly",
                                          "not provided"     ~ NA_character_,
                                          .default = .x)),
        dplyr::across(any_of(c("inst_trigger_actor",
                               "inst_object_author")),
                      ~ dplyr::case_match(.x = .x,
                                          "institution" ~ "other institution",
                                          .default = .x)),
        dplyr::across(any_of("inst_object_type"),
                      ~ dplyr::case_match(.x = .x,
                                          "legal text (ausformulierter vorschlag)" ~ "legal text (formulated proposal)",
                                          "legal text (allg. anregung)"            ~ "legal text (general proposal)",
                                          .default = .x)),
        dplyr::across(any_of("inst_topics_only"),
                      ~ purrr::map(.x = .x,
                                   .f = \(x) dplyr::case_match(.x = x,
                                                               "infrastructural act"                ~ "infrastructural acts",
                                                               "competence shift"                   ~ "competence shifts",
                                                               "financial act"                      ~ "financial acts",
                                                               "financial act (expenses)"           ~ "financial acts (expenses)",
                                                               "financial act (taxes)"              ~ "financial acts (taxes)",
                                                               "financial act (obligations)"        ~ "financial acts (obligations)",
                                                               "total revision of the constitution" ~ "total revisions of the constitution",
                                                               .default = x))),
        dplyr::across(any_of("inst_topics_excluded"),
                      ~ purrr::map(.x = .x,
                                   .f = \(x) dplyr::case_match(.x = x,
                                                               "budget"                   ~ "budgets",
                                                               "parliamentary competence" ~ "everything within parliamentary competence",
                                                               .default = x))),
        dplyr::across(any_of("inst_quorum_turnout"),
                      ~ stringr::str_replace_all(string = .x,
                                                 pattern = c("^(\\s+)?>(\\s+)?" = ">\u202f",
                                                             "(\\s+)?%(\\s+)?$" = "\u202f%"))),
        ## ordinal
        ## interval
        # TODO: Remove else-clauses once [this](https://github.com/zdaarau/c2d-app/commit/6b72d1928e0182f01b188f3973ba15482fc8c04a) is deployed to
        #       production
        date = if (is.list(date)) {
          clock::as_date(parse_datetime(date))
        } else {
          clock::date_parse(date)
        },
        dplyr::across(any_of(c("date_time_created",
                               "date_time_last_edited")),
                      parse_datetime),
        ## undefined
        files = files %>% purrr::map(~ .x %>% purrr::map(~ .x %>%
                                                           # unnest and restore `date`
                                                           purrr::modify_in(.where = "date",
                                                                            .f = parse_datetime) %>%
                                                           # change subvariable names
                                                           pal::rename_from(dict = sub_var_names$files))))
    
    # complement `id_official` and `id_sudd` (a two-letter country code plus a 6-digit number) by old `number`
    # TODO: once [issue #?](https://github.com/zdaarau/c2d-app/issues/?) is resolved:
    #       - correct this upstream using `edit_rfrnds()`
    #       - remove corresponding code below
    #       - file issue to completely get rid of field `number`
    if ("number" %in% colnames(data)) {
      
      data %<>% dplyr::mutate(number = dplyr::if_else(number %in% c("0", ""),
                                                      NA_character_,
                                                      number),
                              dplyr::across(any_of("id_official"),
                                            ~ dplyr::if_else(is.na(.x) & stringr::str_detect(number, "^\\d"),
                                                             number,
                                                             .x)),
                              dplyr::across(any_of("id_sudd"),
                                            ~ dplyr::if_else(is.na(.x) & stringr::str_detect(number, "^\\D"),
                                                             # everything beyond the 8th char seems to be manually added -> strip!
                                                             stringr::str_sub(string = number,
                                                                              end = 8L),
                                                             .x)))
    }
    
    # ensure `id_official` and `id_sudd` are present
    if (!("id_official" %in% colnames(data))) data$id_official <- NA_character_
    if (!("id_sudd" %in% colnames(data))) data$id_sudd <- NA_character_
    
    # TODO: remove this once [issue #]() has been resolved
    # create `inst_is_variable/divisible` if necessary
    if ("categories.referendum_text_options" %in% colnames(data)) {
      
      if (!("inst_is_variable" %in% colnames(data))) {
        data %<>% dplyr::mutate(inst_is_variable = dplyr::case_when(
          categories.referendum_text_options %in% c("Variants possible", "Variants / splitting up possible") ~ TRUE,
          is.na(categories.referendum_text_options)                                                          ~ NA,
          .default                                                                                           = FALSE
        ))
      }
      if (!("inst_is_divisible" %in% colnames(data))) {
        data %<>% dplyr::mutate(inst_is_divisible = dplyr::case_when(
          categories.referendum_text_options %in% c("Splitting up possible", "Variants / splitting up possible") ~ TRUE,
          is.na(categories.referendum_text_options)                                                              ~ NA,
          .default                                                                                               = FALSE
        ))
      }
    }
    
    # ensure all country codes are known and assign canonical country name
    data %<>% add_country_name()
    
    data %<>%
      # remove obsolete vars
      dplyr::select(-any_of(c("categories.referendum_text_options",
                              "country_code_historical",
                              "is_past_jurisdiction",
                              "number",
                              "tags"))) %>%
      
      # convert to (ordered) factor where appropriate
      ## based on codebook
      dplyr::mutate(dplyr::across(everything(),
                                  ~ {
                                    metadata <- data_codebook %>% dplyr::filter(variable_name == dplyr::cur_column())
                                    
                                    if (nrow(metadata) != 1L) {
                                      cli::cli_abort("Missing codebook metadata! Please debug",
                                                     .internal = TRUE)
                                    }
                                    
                                    if (is.factor(unlist(metadata$ptype))) {
                                      
                                      lvls <- levels(unlist(metadata$ptype))
                                      is_ordered <- metadata$value_scale %in% c("ordinal_ascending", "ordinal_descending")
                                      
                                      if (is.list(.x)) {
                                        .x %>% purrr::map(.f = factor,
                                                          levels = lvls,
                                                          ordered = is_ordered)
                                      } else {
                                        factor(x = .x,
                                               levels = lvls,
                                               ordered = is_ordered)
                                      }
                                    } else {
                                      .x
                                    }
                                  })) %>%
      ## fctrs without explicit variable_values set in codebook
      dplyr::mutate(
        ### fctrs where we defined a finite set of values
        country_code = factor(x = country_code,
                              levels = val_set$country_code,
                              ordered = FALSE),
        
        ### fctrs where we did not define a finite set of values (yet)
        dplyr::across(any_of(c("subnational_entity_name",
                               "municipality")),
                      as.factor)
      ) %>%
      # add vars which aren't always included and coerce to proper types
      vctrs::tib_cast(to =
                        data_codebook %>%
                        dplyr::filter(!is_opt) %$%
                        magrittr::set_names(x = ptype,
                                            value = variable_name) %>%
                        tibble::as_tibble(),
                      call = this_env) %>%
      # harmonize col order
      order_rfrnd_cols()
  }
  
  # convert nested list cols to tibbles
  data %>%
    dplyr::mutate(dplyr::across(any_of(c("files",
                                         "votes_per_subterritory")),
                                ~ purrr::map(.x,
                                             \(x) if (length(x) > 0L) x %>% purrr::map(tibble::as_tibble) %>% purrr::list_rbind() else NULL)),
                  dplyr::across(any_of("archive"),
                                ~ purrr::map(.x,
                                             \(x) if (length(x) > 0L) tibble::as_tibble(x) else NULL))) %>%
    # add variable labels (must be done at last since mutations above drop attrs)
    labelled::set_variable_labels(.labels = var_lbls,
                                  .strict = FALSE)
}

untidy_date <- function(x) {
  
  as.numeric(x) %>%
    magrittr::multiply_by(1000.0) %>%
    as.list() %>%
    magrittr::set_names(rep("$date",
                            times = length(.)))
}

#' Untidy into "raw" RDB API referendum data
#'
#' Converts from the tidied [rfrnds()] to the "raw" MongoDB schema used by the RDB API. Basically reverts [tidy_rfrnds()].
#'
#' @param data The data to untidy as returned by [rfrnds()].
#' @param as_tibble Whether or not to return the result as a [tibble][tibble::tbl_df]. If `FALSE`, a list is returned.
#'
#' @return
#' If `as_tibble = FALSE`, a list with one element per referendum, suitable to be converted [jsonlite::toJSON()] and then fed to the RDB API.
#'
#' Otherwise a [tibble][tibble::tbl_df] of the same format as returned by [`rfrnds(tidy = FALSE)`][rfrnds].
#' @keywords internal
untidy_rfrnds <- function(data,
                          as_tibble = FALSE) {
  
  checkmate::assert_flag(as_tibble)
  
  var_names_inverse <-
    names(var_names) %>%
    magrittr::set_names(purrr::list_c(var_names, ptype = character()))
  
  sub_var_names_files_inverse <-
    names(sub_var_names$files) %>%
    magrittr::set_names(purrr::list_c(sub_var_names$files, ptype = character()))
  
  # restore `number`
  if (all(c("id_official", "id_sudd") %in% colnames(data))) {
    data %<>% dplyr::mutate(id_sudd = dplyr::if_else(is.na(id_sudd),
                                                     id_official,
                                                     id_sudd))
  }
  
  data %<>%
    # remove variable labels
    labelled::remove_var_label() %>%
    dplyr::mutate(
      # restore strings
      dplyr::across(c(any_of("date"),
                      where(is.factor)),
                    as.character),
      dplyr::across(where(is.list),
                    ~ {
                      if (is.factor(.x[[1L]])) {
                        .x %>% purrr::map(as.character)
                      } else {
                        .x
                      }
                    }),
      # restore dates
      dplyr::across(any_of(c("date_time_created",
                             "date_time_last_edited")),
                    untidy_date),
      # restore individual variables
      ## `files`
      dplyr::across(any_of("files"),
                    ~ purrr::map(.x = .x,
                                 .f = \(x) {
                                   
                                   if ("date_time_attached" %in% colnames(x)) {
                                     
                                     x$date_time_attached %<>% untidy_date()
                                   }
                                   
                                   x %<>% pal::rename_from(dict = sub_var_names_files_inverse)
                                 })),
      ## `inst_topics_excluded`
      dplyr::across(any_of("inst_topics_excluded"),
                    ~ purrr::map(.x = .x,
                                 .f = \(x) dplyr::case_match(.x = x,
                                                             "budgets" ~ "budget",
                                                             .default = x))),
      ## `inst_topics_only`
      dplyr::across(any_of("inst_topics_only"),
                    ~ purrr::map(.x = .x,
                                 .f = \(x) dplyr::case_match(.x = x,
                                                             "infrastructural acts"                ~ "infrastructural act",
                                                             "competence shifts"                   ~ "competence shift",
                                                             "financial acts"                      ~ "financial act",
                                                             "financial acts (expenses)"           ~ "financial act (expenses)",
                                                             "financial acts (taxes)"              ~ "financial act (taxes)",
                                                             "financial acts (obligations)"        ~ "financial act (obligations)",
                                                             "total revisions of the constitution" ~ "total revision of the constitution",
                                                             .default = x))),
      ## `inst_object_type`
      dplyr::across(any_of("inst_object_type"),
                    ~ dplyr::case_match(.x = .x,
                                        "legal text (formulated proposal)" ~ "legal text (ausformulierter vorschlag)",
                                        "legal text (general proposal)"    ~ "legal text (allg. anregung)",
                                        .default = .x)),
      ## `inst_trigger_actor`, `inst_object_author`
      dplyr::across(any_of(c("inst_trigger_actor",
                             "inst_object_author")),
                    ~ dplyr::case_match(.x = .x,
                                        "other institution" ~ "institution",
                                        .default = .x)),
      ## `inst_precondition_actor`
      dplyr::across(any_of("inst_precondition_actor"),
                    ~ dplyr::case_match(.x = .x,
                                        "parliament and president"  ~ "parliament and President",
                                        "parliament and government" ~ "parliament and Government",
                                        .default = .x)),
      ## `type`
      dplyr::across(any_of("type"),
                    ~ dplyr::case_match(.x = .x,
                                        "citizens' assembly" ~ "citizen assembly",
                                        NA_character_        ~ "not provided",
                                        .default = .x)),
      ## `id`
      dplyr::across(any_of("id"),
                    ~ as.list(.x) %>% magrittr::set_names(rep("$oid", times = length(.)))),
      ## binary (dummies)
      dplyr::across(any_of("position_government"),
                    ~ dplyr::case_match(.x = .x,
                                        "yes" ~ "Acceptance",
                                        "no"  ~ "Rejection",
                                        .default = .x)),
      dplyr::across(any_of("inst_has_urgent_legal_basis"),
                    ~ dplyr::if_else(.x,
                                     "Urgent",
                                     "Normal")),
      dplyr::across(any_of("inst_is_binding"),
                    ~ dplyr::if_else(.x,
                                     "Binding",
                                     "Non-binding")),
      dplyr::across(any_of("inst_is_counter_proposal"),
                    ~ dplyr::if_else(.x,
                                     "Yes",
                                     "No")),
      dplyr::across(any_of("inst_is_assembly"),
                    ~ dplyr::if_else(.x,
                                     "Assembly",
                                     "Ballot")),
      dplyr::across(any_of("inst_has_precondition"),
                    ~ dplyr::if_else(.x,
                                     "Exists",
                                     "Does not exist")),
      # uppercase first letter of various vars
      dplyr::across(any_of(c("result",
                             "type",
                             "inst_legal_basis_type",
                             "inst_object_type",
                             "inst_object_legal_level",
                             "inst_object_revision_extent",
                             "inst_trigger_type",
                             "inst_trigger_actor_level",
                             "inst_trigger_time_limit",
                             "inst_quorum_approval",
                             "inst_precondition_decision",
                             # vars containing uppercase abbreviations
                             "inst_object_author",
                             "inst_trigger_actor",
                             "inst_precondition_actor")),
                    ~ pal::sentenceify(x = .x,
                                       punctuation_mark = "")),
      dplyr::across(any_of(c("inst_object_revision_modes",
                             "inst_topics_only",
                             "inst_topics_excluded")),
                    ~ purrr::map(.x = .x,
                                 .f = pal::sentenceify,
                                 punctuation_mark = "")),
      # restore NA values
      dplyr::across(where(is.character) & !any_of("result"),
                    ~ tidyr::replace_na(data = .x,
                                        replace = "")),
      ## implicit NAs (i.e. 'not provided' (-2))
      dplyr::across(where(is.integer) & !any_of(field_to_var_name(union(rfrnd_fields$required_for_additions, rfrnd_fields$required_for_edits))),
                    ~ tidyr::replace_na(data = .x,
                                        replace = -2L)),
      ## explicit NAs (i.e. 'unknown' (-1))
      dplyr::across(any_of("result"),
                    ~ tidyr::replace_na(data = .x,
                                        replace = "Unknown")),
      dplyr::across(where(is.integer) & any_of(field_to_var_name(union(rfrnd_fields$required_for_additions, rfrnd_fields$required_for_edits))),
                    ~ tidyr::replace_na(data = .x,
                                        replace = -1L)),
      dplyr::across(any_of(c("subterritories_yes", "subterritories_no")),
                    ~ tidyr::replace_na(data = .x,
                                        replace = -1.0))
    ) %>%
    # restore variable names
    pal::rename_from(dict = var_names_inverse)
  
  # restore `referendum_text_options`
  if (all(c("inst_is_divisible", "inst_is_variable") %in% colnames(data))) {
    data %<>% dplyr::mutate(referendum_text_options = dplyr::case_when(!inst_is_divisible & !inst_is_variable ~ "Whole text only",
                                                                       inst_is_divisible & inst_is_variable   ~ "Variants / splitting up possible",
                                                                       inst_is_divisible                      ~ "Splitting up possible",
                                                                       inst_is_variable                       ~ "Variants possible",
                                                                       .default                               = NA_character_))
  }
  
  # restore `tags`
  topics_var_names <- paste0("topics_tier_", 1:3)
  topics_vars_present <- topics_var_names %in% colnames(data)
  
  if (any(topics_vars_present)) {
    
    if (!all(topics_vars_present)) {
      topics_vars_missing <- topics_var_names %>% setdiff(topics_vars_present)
      cli::cli_abort(paste0("{cli::qty(topics_vars_missing)}The following {.var {'topics_tier_#'}} variable{?s} {?is/are} missing from {.arg data}: ",
                            "{.var {topics_vars_missing}}"))
    }
    
    data %<>%
      dplyr::mutate(tags = restore_topics(topics_tier_1,
                                          topics_tier_2,
                                          topics_tier_3)) %>%
      dplyr::select(-any_of(topics_var_names))
  }
  
  # remove unknown columns
  data %<>% dplyr::select(any_of(rfrnd_fields$all_flat))
  
  if (!as_tibble) {
    
    # remove nested field prefixes
    data %<>% dplyr::rename_with(.cols = matches("^(categories|context|title)\\."),
                                 .fn = ~ stringr::str_remove(string = .x,
                                                             pattern = "^\\w+?\\."))
    
    # restore nested structure
    categories_fields_present <-
      names(var_names) %>%
      stringr::str_subset(pattern = "^categories\\.") %>%
      stringr::str_remove(pattern = "^categories\\.") %>%
      intersect(colnames(data))
    
    context_fields_present <-
      names(var_names) %>%
      stringr::str_subset(pattern = "^context\\.") %>%
      stringr::str_remove(pattern = "^context\\.") %>%
      intersect(colnames(data))
    
    title_fields_present <-
      names(var_names) %>%
      stringr::str_subset(pattern = "^title\\.") %>%
      stringr::str_remove(pattern = "^title\\.") %>%
      intersect(colnames(data))
    
    if (length(categories_fields_present)) {
      data %<>% tidyr::nest(categories = all_of(categories_fields_present))
    }
    if (length(context_fields_present)) {
      data %<>% tidyr::nest(context = all_of(context_fields_present))
    }
    if (length(title_fields_present)) {
      data %<>% tidyr::nest(title = all_of(title_fields_present))
    }
    
    data %<>%
      # convert to list
      dplyr::group_split(dplyr::row_number(),
                         .keep = FALSE) %>%
      purrr::map(as.list) %>%
      # tweak list structure
      purrr::modify_depth(.depth = 1L,
                          .f = ~
                            .x %>%
                            # flatten unnecessarily nested elements
                            purrr::modify_at(.at = "tags",
                                             .f = flatten_array_as_is) %>%
                            # convert nested tibbles to lists
                            purrr::modify_at(.at = "files",
                                             .f = ~
                                               .x[[1L]] %>%
                                               pal::when(is.null(.) ~ list(),
                                                         ~ dplyr::group_split(.tbl = .,
                                                                              dplyr::row_number(),
                                                                              .keep = FALSE) %>%
                                                           purrr::map(as.list))) %>%
                            purrr::modify_at(.at = c("archive",
                                                     "categories",
                                                     "context",
                                                     "title"),
                                             .f = ~
                                               .x %>%
                                               purrr::map(as.list) %>%
                                               unlist(recursive = FALSE)) %>%
                            # reduce nesting of nested tibble
                            pal::when(is.null(purrr::pluck(., "context", "votes_per_canton")) ~ .,
                                      ~ purrr::modify_in(.x = .,
                                                         .where = c("context", "votes_per_canton"),
                                                         .f = dplyr::first)) %>%
                            # reduce nesting of array fields
                            pal::when(is.null(purrr::pluck(., "categories", "action")) ~ .,
                                      ~ purrr::modify_in(.x = .,
                                                         .where = c("categories", "action"),
                                                         .f = flatten_array_as_is)) %>%
                            pal::when(is.null(purrr::pluck(., "categories", "excluded_topics")) ~ .,
                                      ~ purrr::modify_in(.x = .,
                                                         .where = c("categories", "excluded_topics"),
                                                         .f = flatten_array_as_is)) %>%
                            pal::when(is.null(purrr::pluck(., "categories", "special_topics")) ~ .,
                                      ~ purrr::modify_in(.x = .,
                                                         .where = c("categories", "special_topics"),
                                                         .f = flatten_array_as_is)))
  }
  
  data
}

#' Assemble RDB Services API URL
#'
#' @param ... Optional path components added to the base URL.
#' @param .use_testing_server `r pkg_config$description[pkg_config$key == "use_testing_server"]`
#'
#' @return A character scalar.
#' @family url_assembly
#' @keywords internal
#'
#' @examples
#' rdb:::url_api("health")
url_api <- function(...,
                    .use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                              pkg = this_pkg)) {
  checkmate::assert_flag(.use_testing_server)
  
  ifelse(.use_testing_server,
         "stagservices.c2d.ch",
         "services.c2d.ch") %>%
    fs::path(...) %>%
    paste0("https://", .)
}

#' Assemble RDB admin portal URL
#'
#' @inheritParams url_api
#'
#' @inherit url_api return
#' @family url_assembly
#' @keywords internal
#'
#' @examples
#' rdb:::url_admin_portal("referendum/5bbbfd7b92a21351232e46b5")
url_admin_portal <- function(...,
                             .use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                       pkg = this_pkg)) {
  checkmate::assert_flag(.use_testing_server)
  
  ifelse(.use_testing_server,
         "c2d-admin.netlify.app",
         "admin.c2d.ch") %>%
    fs::path(...) %>%
    paste0("https://", .)
}

#' Assemble codebook URL
#'
#' @param var Optional variable name to add as the [fragment identifier](https://en.wikipedia.org/wiki/URI_fragment) of the returned URL, which leads to a
#' direct link to the relevant codebook section. Must be either one of the column names of [`data_codebook`], or a valid fragment identifier of a codebook
#' section above the individual variables (`r pal::enum_str(codebook_fragments, last = " or ")`).
#'
#' @return A character scalar.
#' @family url_assembly
#' @keywords internal
#'
#' @examples
#' rdb:::url_codebook("level")
#' rdb:::url_codebook("topics")
url_codebook <- function(var = NULL) {
  
  checkmate::assert_string(var,
                           null.ok = TRUE)
  
  if (!is.null(var)) {
    
    var %<>%
      rlang::arg_match0(values = c(data_codebook$variable_name,
                                   # additional HTML anchors
                                   codebook_fragments)) %>%
      stringr::str_replace_all(pattern = stringr::fixed("_"),
                               replacement = "-")
  }
  
  paste0("https://rdb.rpkg.dev/articles/codebook.html", "#"[!is.null(var)], var)
}

#' Assemble website URL
#'
#' @inheritParams url_api
#'
#' @inherit url_api return
#' @family url_assembly
#' @keywords internal
#'
#' @examples
#' rdb:::url_website("referendum/CH/5bbc04f692a21351232e5a01")
url_website <- function(...,
                        .use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                  pkg = this_pkg)) {
  checkmate::assert_flag(.use_testing_server)
  
  ifelse(.use_testing_server,
         "c2d-site.netlify.app",
         "c2d.ch") %>%
    fs::path(...) %>%
    paste0("https://", .)
}

query_filter_date <- function(min,
                              max) {
  list(`$gte` = min,
       `$lte` = max) %>%
    purrr::compact()
}

query_filter_datetime <- function(min,
                                  max) {
  
  list(`$gte` = purrr::compact(list(`$date` = min)),
       `$lte` = purrr::compact(list(`$date` = max))) %>%
    purrr::compact()
}

query_filter_in <- function(x) {
  
  x %>% pal::when(length(.) == 0L ~ NULL,
                  length(.) == 1L ~ .,
                  ~ list(`$in` = .))
}

parse_sudd_date <- function(x) {
  
  x_parts <- stringr::str_split_1(string = x,
                                  pattern = "-")
  to_int <- function(x) {
    x %<>% as.integer()
    x[x == 0L] <- NA_integer_
    x
  }
  
  tibble::tibble(year = to_int(x_parts[1L]),
                 month = to_int(x_parts[2L]),
                 day = to_int(x_parts[3L]))
}

parse_sudd_date_de <- function(x) {
  
  components <- stringr::str_split_fixed(string = x,
                                         pattern = "\\s+",
                                         n = 3L)
  
  list(year = components[, 3L] %>% stringr::str_extract(pattern = "\\d+") %>% as.integer(),
       month = components[, 2L] %>% dplyr::case_match(!!!months_de_fms),
       day = components[, 1L] %>% stringr::str_extract(pattern = "\\d+") %>% as.integer())
}

parse_sudd_id <- function(id_sudd) {
  
  sudd_year <-
    id_sudd %>%
    stringr::str_extract(pattern = "\\d{4}$") %>%
    as.integer()
  
  sudd_country_code <-
    id_sudd %>%
    stringr::str_sub(end = 2L) %>%
    stringr::str_to_upper()
  
  derive_country_vars(country_code = sudd_country_code,
                      date = clock::date_build(year = sudd_year,
                                               month = 1L,
                                               day = 1L))
}



sudd_rfrnd <- function(id_sudd) {
  
  checkmate::assert_string(id_sudd)
  
  html <-
    httr::RETRY(verb = "GET",
                url = url_sudd("event.php"),
                query = list(id = id_sudd),
                times = 3L) %>%
    xml2::read_html() %>%
    rvest::html_element(css = "main table") %>%
    rvest::html_children()
  
  field_names <-
    html %>%
    rvest::html_elements(css = "td.feld") %>%
    rvest::html_text()
  
  # handle fields with duplicated/ambiguous names
  if (anyDuplicated(field_names)) {
    
    ## simple duplicates (probably data errors)
    if (id_sudd == "gr011862") {
      
      ix_to_drop <- c(which(field_names == "\u2517\u2501 Republik")[2L],
                      which(field_names == "\u2517\u2501 Russischer Prinz")[2L])
      html %<>% .[-ix_to_drop]
      field_names %<>% .[-ix_to_drop]
      
      ## competing / mutually exclusive proposals, e.g. proposals with direct counter proposal and (optionally) tie-breaker question (CH and LI)
    } else {
      
      option_names <-
        html %>%
        rvest::html_elements(css = "td.feld strong") %>%
        rvest::html_text()
      
      if (length(option_names) < 2L) {
        cli::cli_abort("Unknown table layout detected for referendum with {.arg id_sudd = {id_sudd}}. Please debug.",
                       .internal = TRUE)
      }
      
      ix_option_names <- which(field_names %in% option_names)
      option_names_counter <- c("Gegenentwurf", "Gegenvorschlag")
      option_names_tie_breaker <- "Stichfrage"
      option_names_special <- c(option_names_counter, option_names_tie_breaker)
      has_counter_proposal <- any(option_names_counter %in% option_names)
      n_proposals_original <-
        option_names %>%
        setdiff(option_names_special) %>%
        length()
      
      ix_option_field_names <-
        ix_option_names[-length(ix_option_names)] %>%
        purrr::imap(~ (.x + 1L):(ix_option_names[.y + 1L] - 1L)) %>%
        c(list((dplyr::last(ix_option_names) + 1L):(min(length(field_names), which(field_names %in% c("Medien",
                                                                                                      "Bemerkungen",
                                                                                                      "Gleichzeitig mit",
                                                                                                      "Quellen",
                                                                                                      "Vollst\u00e4ndigkeit",
                                                                                                      "Letzte \u00c4nderung"))) - 1L)))
      # rename field names
      option_suffixes <-
        option_names %>%
        purrr::imap_chr(~ .x %>% pal::when(. %in% option_names_counter                        ~ "counter_proposal",
                                           . %in% option_names_tie_breaker                    ~ "tie_breaker",
                                           has_counter_proposal && n_proposals_original == 1L ~ "proposal",
                                           ~ glue::glue("option_{.y}")))
      renamings <-
        purrr::map2(.x = setdiff(option_names,
                                 option_names_tie_breaker),
                    .y = setdiff(option_suffixes,
                                 "tie_breaker"),
                    .f = ~ rlang::list2(!!paste("\u2517\u2501", .x) := glue::glue("votes_tie_breaker_{.y}"),
                                        !!paste("\u2517\u2501 St\u00e4nde", .x) := glue::glue("subterritories_{.y}_tie_breaker"))) %>%
        purrr::list_flatten() %>%
        as_fm_list()
      
      for (i in seq_along(option_names)) {
        
        field_names[ix_option_field_names[[i]]] %<>%
          dplyr::case_match(.x = .,
                            !!!c(renamings,
                                 list("Abgegebene Stimmen"                   ~ glue::glue("votes_{option_suffixes[i]}_total"),
                                      "Stimmen ausser Betracht"              ~ glue::glue("votes_{option_suffixes[i]}_invalid"),
                                      "Ohne Antwort"                         ~ glue::glue("votes_{option_suffixes[i]}_empty"),
                                      "G\u00fcltige (= massgebende) Stimmen" ~ glue::glue("votes_{option_suffixes[i]}_valid"),
                                      "\u2517\u2501 Ja-Stimmen"              ~ glue::glue("votes_{option_suffixes[i]}_yes"),
                                      "\u2517\u2501 Nein-Stimmen"            ~ glue::glue("votes_{option_suffixes[i]}_no"),
                                      "Ja-Stimmen"                           ~ glue::glue("votes_{option_suffixes[i]}_yes"),
                                      "Nein-Stimmen"                         ~ glue::glue("votes_{option_suffixes[i]}_no"),
                                      "St\u00e4nde (Kantone)"                ~ glue::glue("subterritories_{option_suffixes[i]}"),
                                      "\u2517\u2501 Annehmende St\u00e4nde"  ~ glue::glue("subterritories_{option_suffixes[i]}_yes"),
                                      "\u2517\u2501 Verwerfende St\u00e4nde" ~ glue::glue("subterritories_{option_suffixes[i]}_no"))),
                            .default = .)
      }
      
      # drop obsolete fields
      html %<>% .[-ix_option_names]
      field_names %<>% .[-ix_option_names]
    }
  }
  
  # handle other special cases
  if (id_sudd %in% c("li011954",
                     "li031985")) {
    
    field_names %<>% dplyr::case_match(.x = .,
                                       "\u2517\u2501 Initiative"     ~ "votes_proposal",
                                       "\u2517\u2501 Gegenvorschlag" ~ "votes_counter_proposal",
                                       "\u2517\u2501 Nein-Stimmen"   ~ "votes_option_none",
                                       .default = .)
  }
  
  ## move content of exotic fields to `remarks`
  remarks_field <-
    html[field_names == "Bemerkungen"] %>%
    pal::when(length(.) > 0L ~ rvest::html_elements(x = ., css = "td")[[2L]],
              ~ .)
  
  remarks_list_col <- list(list(text = rvest::html_text2(remarks_field),
                                urls =
                                  remarks_field %>%
                                  rvest::html_elements(css = "a") %>%
                                  purrr::map_chr(~ .x %>%
                                                   rvest::html_attr(name = "href") %>%
                                                   url_sudd()),
                                html =
                                  remarks_field %>%
                                  xml2::xml_contents() %>%
                                  as.character() %>%
                                  paste0(collapse = "")))
  ix_fields_to_remarks <-
    field_names %>%
    stringr::str_detect(pattern = paste0("^",
                                         pal::fuse_regex("\u2517\u2501\u2501\u2501 .+Stimmen( .+)?",
                                                         "Unklare Stimmen",
                                                         "Unstimmigkeiten",
                                                         "G\u00fcltig stimmende Personen"),
                                         "$")) %>%
    which()
  
  if (length(ix_fields_to_remarks) > 0L) {
    
    addendum <- field_names[ix_fields_to_remarks]
    
    if (length(addendum) > 0L) {
      
      addendum %<>%
        stringr::str_extract("\\w.*") %>%
        paste0(": ",
               html[ix_fields_to_remarks] %>%
                 rvest::html_elements(css = "td") %>%
                 magrittr::extract2(2L) %>%
                 rvest::html_elements(css = "data") %>%
                 rvest::html_attr("value"),
               collapse = "\n\n")
    }
    
    remarks_list_col[[1L]]$text %<>% paste0(addendum, "\n\n"[length(addendum) > 0L], .)
    remarks_list_col[[1L]]$html %<>% paste0("<p>\n", addendum, "\n</p>", .)
    html %<>% .[-ix_fields_to_remarks]
    field_names %<>% .[-ix_fields_to_remarks]
  }
  
  # remove unnecessary fields
  ix_to_drop <- which(field_names %in% c("Nicht eingelegte Stimmzettel",
                                         "Nicht eingelegte Stimmenzettel",
                                         "Nicht teilgenommen"))
  
  if (length(ix_to_drop)) {
    html %<>% .[-ix_to_drop]
    field_names %<>% .[-ix_to_drop]
  }
  
  field_names %<>%
    dplyr::case_match(.x = .,
                      "Gebiet"                                          ~ "territory_name_de",
                      "\u2517\u2501 Stellung"                           ~ "territory_type_de",
                      "Datum"                                           ~ "date",
                      "Titel"                                           ~ "title_de",
                      "Vorlage"                                         ~ "title_de",
                      "\u2517\u2501 Fragemuster"                        ~ "question_type_de",
                      "\u2517\u2501 Abstimmungstyp"                     ~ "types",
                      "Ergebnis"                                        ~ "result_de",
                      "Vollst\u00e4ndigkeit"                            ~ "result_status_de",
                      "\u2517\u2501 Mehrheiten"                         ~ "adoption_requirements_de",
                      "Stimmberechtigte"                                ~ "electorate_total",
                      "\u2517\u2501 Davon im Ausland"                   ~ "electorate_abroad",
                      "Stimmausweise"                                   ~ "polling_cards",
                      "Ausgegebene Stimmzettel"                         ~ "polling_cards",
                      "Stimmbeteiligung"                                ~ "votes_total",
                      "Stimmen ausser Betracht"                         ~ "votes_invalid",
                      "Stimmzettel ausser Betracht"                     ~ "votes_invalid",
                      "Leere Stimmen"                                   ~ "votes_empty",
                      "\u2517\u2501 Leere Stimmen"                      ~ "votes_empty",
                      "\u2517\u2501 Leere Stimmzettel"                  ~ "votes_empty",
                      "Ung\u00fcltige Stimmen"                          ~ "votes_void",
                      "\u2517\u2501 Ung\u00fcltige Stimmen"             ~ "votes_void",
                      "\u2517\u2501 Ung\u00fcltige Stimmzettel"         ~ "votes_void",
                      "Ung\u00fcltig eingelegte Stimmzettel"            ~ "votes_void",
                      "Ganz ung\u00fcltige Stimmzettel"                 ~ "votes_void",
                      "G\u00fcltige (= massgebende) Stimmen"            ~ "votes_valid",
                      "\u2517\u2501 Ja-Stimmen"                         ~ "votes_yes",
                      "\u2517\u2501 Nein-Stimmen"                       ~ "votes_no",
                      "\u2517\u2501 Nein zu beiden Vorschl\u00e4gen"    ~ "votes_option_none",
                      "\u2517\u2501 Stimmen ausser Betracht"            ~ "votes_invalid",
                      "Staaten"                                         ~ "subterritories",
                      "\u2517\u2501 Annehmende Staaten"                 ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Staaten"                ~ "subterritories_no",
                      "Gebiete"                                         ~ "subterritories",
                      "\u2517\u2501 Annehmende Gebiete"                 ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Gebiete"                ~ "subterritories_no",
                      "Provinzen"                                       ~ "subterritories",
                      "\u2517\u2501 Annehmende Provinzen"               ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Provinzen"              ~ "subterritories_no",
                      "Inseln"                                          ~ "subterritories",
                      "\u2517\u2501 Annehmende Inseln"                  ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Inseln"                 ~ "subterritories_no",
                      "St\u00e4nde (Kantone)"                           ~ "subterritories",
                      "\u2517\u2501 Annehmende St\u00e4nde"             ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende St\u00e4nde"            ~ "subterritories_no",
                      "Regionen / St\u00e4dte"                          ~ "subterritories",
                      "\u2517\u2501 Annehmende Regionen / St\u00e4dte"  ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Regionen / St\u00e4dte" ~ "subterritories_no",
                      "Wahlkreise"                                      ~ "subterritories",
                      "\u2517\u2501 Annehmende Wahlkreise"              ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Wahlkreise"             ~ "subterritories_no",
                      "Senatswahlkreise"                                ~ "subterritories",
                      "\u2517\u2501 Annehmende Senatswahlkreise"        ~ "subterritories_yes",
                      "\u2517\u2501 Verwerfende Senatswahlkreise"       ~ "subterritories_no",
                      "Medien"                                          ~ "files",
                      "Bemerkungen"                                     ~ "remarks",
                      "Gleichzeitig mit"                                ~ "ids_sudd_simultaneous",
                      "Quellen"                                         ~ "sources",
                      "Letzte \u00c4nderung"                            ~ "date_last_edited",
                      .default = .) %>%
    # assert field names are unique
    checkmate::assert_character(any.missing = FALSE,
                                unique = TRUE,
                                .var.name = "field_names") %>%
    # referendum-option-specific recodings (sequentially numbered `votes_option_#` columns)
    # TODO: adapt this once we can properly capture more than yes/no answer options, cf. https://gitlab.com/zdaarau/rpkgs/rdb/-/issues/5
    purrr::map_at(.at = which(startsWith(., "\u2517\u2501 ")),
                  .f = function(old_name, old_names) paste0("votes_option_", which(old_names == old_name)),
                  old_names = stringr::str_subset(string = .,
                                                  pattern = "^\u2517\u2501 ")) %>%
    purrr::list_c(ptype = character())

  # assert no original uppercase field names are left over
  ix_field_names_unknown <-
    field_names %>%
    stringr::str_detect(pattern = "[:upper:]") %>%
    which()
  
  if (length(ix_field_names_unknown)) {
    cli::cli_abort(paste0("Unknown {cli::qty(length(ix_field_names_unknown))} field{?s} {.field {field_names[ix_field_names_unknown]}} present in data for ",
                          "referendum with {.arg {paste0('id_sudd = ', id_sudd)}}."),
                   .internal = TRUE)
  }
  
  purrr::map2_dfc(.x = html,
                  .y = field_names,
                  .f = function(html, col_name) {
                    
                    cells <- html %>% rvest::html_elements(css = "td")
                    col_text <- rvest::html_text2(cells[[2L]])
                    
                    # extract hyperlinks if necessary
                    if (col_name %in% c("remarks",
                                        "ids_sudd_simultaneous",
                                        "sources")) {
                      urls <-
                        cells[[2L]] %>%
                        rvest::html_elements(css = "a") %>%
                        purrr::map_chr(~ .x %>%
                                         rvest::html_attr(name = "href") %>%
                                         url_sudd())
                    }
                    
                    tibble::tibble(!!col_name :=
                                     col_name %>%
                                     pal::when(
                                       # character scalars
                                       . %in% c("territory_name_de",
                                                "territory_type_de",
                                                "title_de",
                                                "question_type_de",
                                                "result_de",
                                                "result_status_de") ~
                                         col_text,
                                       
                                       # integer scalars
                                       stringr::str_detect(
                                         string = .,
                                         pattern = paste0(
                                           "^",
                                           pal::fuse_regex(
                                             "electorate_total",
                                             "electorate_abroad",
                                             "polling_cards",
                                             "votes_total",
                                             "votes_invalid",
                                             "votes_empty",
                                             "votes_void",
                                             "votes_valid",
                                             "votes_yes",
                                             "votes_no",
                                             "votes_proposal",
                                             "votes_counter_proposal",
                                             paste0("votes_",
                                                    pal::fuse_regex("option_(\\d+|none)",
                                                                    "(counter_)?proposal",
                                                                    "tie_breaker(_(option_\\d+|(counter_)?proposal))?"),
                                                    paste0("(_",
                                                           pal::fuse_regex("total",
                                                                           "empty",
                                                                           "void",
                                                                           "invalid",
                                                                           "valid",
                                                                           "yes",
                                                                           "no"),
                                                           ")?"))),
                                           "$")) ~
                                         cells[[2L]] %>%
                                         rvest::html_elements(css = "data") %>%
                                         rvest::html_attr("value") %>%
                                         # fall back to parsing text if no semantic data could be extracted
                                         pal::when(length(.) == 0L ~ col_text %>% stringr::str_remove_all(pattern = "[^\\d]"),
                                                   ~ .) %>%
                                         as.integer(),
                                       
                                       startsWith(., "subterritories") ~
                                         cells[[2L]] %>%
                                         rvest::html_elements(css = "data") %>%
                                         rvest::html_attr("value") %>%
                                         # fall back to parsing text if no semantic data could be extracted
                                         pal::when(length(.) == 0L ~ col_text %>% stringr::str_remove_all(pattern = "[^\\d]"),
                                                   ~ .) %>%
                                         as.numeric(),
                                       
                                       # date scalars
                                       . == "date" ~
                                         cells[[2L]] %>%
                                         rvest::html_element(css = "time") %>%
                                         rvest::html_attr(name = "datetime"),
                                       
                                       . == "date_last_edited" ~
                                         cells[[2L]] %>%
                                         rvest::html_element(css = "time") %>%
                                         rvest::html_attr(name = "datetime") %>%
                                         clock::date_parse(),
                                       
                                       # lists (multi-value cols)
                                       . == "remarks" ~
                                         remarks_list_col,
                                       
                                       . == "sources" ~
                                         list(list(text = col_text,
                                                   urls = urls,
                                                   html =
                                                     cells[[2L]] %>%
                                                     xml2::xml_contents() %>%
                                                     as.character() %>%
                                                     paste0(collapse = ""))),
                                       . == "types" ~
                                         col_text %>% stringr::str_split(pattern = "\\s*\u2192\\s*"),
                                       . == "adoption_requirements_de" ~
                                         col_text %>% stringr::str_split(pattern = ",\\s*"),
                                       . == "files" ~
                                         cells[[2L]] %>%
                                         rvest::html_elements(css = "a") %>%
                                         purrr::map(~ .x %>%
                                                      rvest::html_attr(name = "href") %>%
                                                      url_sudd() %>%
                                                      tibble::tibble(description = rvest::html_text(.x),
                                                                     url = .)) %>%
                                         purrr::list_rbind() %>%
                                         list(),
                                       . == "ids_sudd_simultaneous" ~
                                         urls %>%
                                         stringr::str_extract(pattern = "(?<=[\\?&]id=)[\\w\\d]+") %>%
                                         list(),
                                       
                                       ~ "PARSING ERROR; PLEASE DEBUG"
                                     ))
                  })
}

url_sudd <- function(x = "") {

  purrr::map_chr(x,
                 \(x) {
                   
                   if (!is.na(x) && stringr::str_detect(x, "https?:")) {
                     x
                   } else {
                     httr::modify_url(url = x %|% "",
                                      scheme = "https",
                                      hostname = "sudd.ch")
                   }
                 })
}

this_pkg <- utils::packageName()

cli_theme <-
  cli::builtin_theme() %>%
  purrr::list_modify(span.err = list(color = "red",
                                     `font-weight` = "bold"),
                     span.warn = list(color = "orange",
                                     `font-weight` = "bold"),
                     span.content = list(color = "mediumorchid"))

date_backup_rdb <- pal::path_mod_time("data-raw/backups/rdb.rds") |> clock::as_date()

codebook_fragments <- c("institution-level-variables",
                        "referendum-level-variables",
                        "topics")

data_cols_absent <-
  tibble::tibble(col = character(),
                 type = list(),
                 msg = character()) %>%
  tibble::add_row(col = "id",
                  type = list("add"),
                  msg = "an {.var id} column. It is automatically set by the RDB API back-end. Did you mean to {.fun edit_rfrnds} instead?") %>%
  tibble::add_row(col = "country_name",
                  type = list(c("add", "edit")),
                  msg = "a {.var country_name} column. It is automatically set by the RDB API back-end based on {.var country_code}.") %>%
  tibble::add_row(col = "date_time_created",
                  type = list(c("add", "edit")),
                  msg = "a {.var date_time_created} column. This date is automatically set by the RDB API back-end and not supposed to be changed.") %>%
  tibble::add_row(col = "date_time_last_edited",
                  type = list(c("add", "edit")),
                  msg = paste0("a {.var date_time_last_edited} column. This date is automatically set by the RDB API back-end and not supposed to be changed ",
                               "manually.")) %>%
  dplyr::mutate(msg = paste0("{.arg data} mustn't contain ", msg))

ballot_date_colnames <- c("country_code",
                          "country_code_long",
                          "country_code_continual",
                          "country_name",
                          "country_name_long",
                          "subnational_entity_code",
                          "subnational_entity_name",
                          "municipality",
                          "level",
                          "date",
                          "week",
                          "month",
                          "quarter",
                          "year",
                          "decade",
                          "century",
                          "era",
                          "wave_of_democracy",
                          "is_former_country",
                          "un_country_code",
                          "un_region_tier_1_code",
                          "un_region_tier_1_name",
                          "un_region_tier_2_code",
                          "un_region_tier_2_name",
                          "un_region_tier_3_code",
                          "un_region_tier_3_name",
                          "un_subregion")

rfrnd_fields <- list()

rfrnd_fields$all <- c("_id",
                      "archive",
                      "canton",
                      "categories",
                      "citizens_abroad",
                      "committee_name",
                      "context",
                      "country_code",
                      "country_code_historical",
                      "country_name",
                      "created_on",
                      "date",
                      "date_time_last_edited",
                      "draft",
                      "files",
                      "id_official",
                      "id_sudd",
                      "institution",
                      "is_past_jurisdiction",
                      "level",
                      "question",
                      "question_en",
                      "municipality",
                      "number",
                      "remarks",
                      "result",
                      "sources",
                      "tags",
                      "title",
                      "total_electorate",
                      "votes_empty",
                      "votes_invalid",
                      "votes_no",
                      "votes_yes")

rfrnd_fields$all_flat <-
  rfrnd_fields$all %>%
  setdiff(c("categories", "context", "title")) %>%
  union(c("categories.action",
          "categories.author_of_the_vote_object",
          "categories.counter_proposal",
          "categories.decision_quorum",
          "categories.degree_of_revision",
          "categories.excluded_topics",
          "categories.hierarchy_of_the_legal_norm",
          "categories.institutional_precondition",
          "categories.institutional_precondition_decision",
          "categories.institutional_precondition_decision_actor",
          "categories.legal_act_type",
          "categories.official_status",
          "categories.referendum_text_options",
          "categories.special_topics",
          "categories.turnout_quorum",
          "categories.vote_object",
          "categories.vote_result_status",
          "categories.vote_trigger",
          "categories.vote_trigger_actor",
          "categories.vote_trigger_number",
          "categories.vote_trigger_state_level",
          "categories.vote_trigger_time",
          "categories.vote_venue",
          "context.national_council_abstentions",
          "context.national_council_no",
          "context.national_council_yes",
          "context.recommendation",
          "context.states_council_abstentions",
          "context.states_council_no",
          "context.states_council_yes",
          "context.states_no",
          "context.states_yes",
          "context.votes_per_canton",
          "title.de",
          "title.en",
          "title.fr"))

rfrnd_fields$required_for_edits <- c("_id",
                                     "draft",
                                     "total_electorate",
                                     "citizens_abroad",
                                     "votes_yes",
                                     "votes_no",
                                     "votes_empty",
                                     "votes_invalid")

rfrnd_fields$required_for_additions <- c("country_code",
                                         "level",
                                         "date",
                                         "title.en",
                                         "result",
                                         "total_electorate",
                                         "citizens_abroad",
                                         "votes_yes",
                                         "votes_no",
                                         "votes_empty",
                                         "votes_invalid",
                                         "draft",
                                         "institution")

rfrnd_fields$never_empty <- c("_id",
                              "country_code",
                              "country_name",
                              "created_on",
                              "level",
                              "total_electorate",
                              "citizens_abroad",
                              "votes_yes",
                              "votes_no",
                              "votes_empty",
                              "votes_invalid",
                              "draft")

                 # old name                                                 new name
var_names <- list(`_id`                                                = "id",
                  canton                                               = "subnational_entity_name",
                  title.de                                             = "title_de",
                  title.en                                             = "title_en",
                  title.fr                                             = "title_fr",
                  context.states_no                                    = "subterritories_no",
                  context.states_yes                                   = "subterritories_yes",
                  total_electorate                                     = "electorate_total",
                  citizens_abroad                                      = "electorate_abroad",
                  context.votes_per_canton                             = "votes_per_subterritory",
                  context.national_council_yes                         = "lower_house_yes",
                  context.national_council_no                          = "lower_house_no",
                  context.national_council_abstentions                 = "lower_house_abstentions",
                  context.states_council_yes                           = "upper_house_yes",
                  context.states_council_no                            = "upper_house_no",
                  context.states_council_abstentions                   = "upper_house_abstentions",
                  context.recommendation                               = "position_government",
                  draft                                                = "is_draft",
                  created_on                                           = "date_time_created",
                  institution                                          = "type",
                  categories.official_status                           = "inst_legal_basis_type",
                  categories.legal_act_type                            = "inst_has_urgent_legal_basis",
                  categories.vote_result_status                        = "inst_is_binding",
                  categories.counter_proposal                          = "inst_is_counter_proposal",
                  categories.vote_venue                                = "inst_is_assembly",
                  categories.vote_trigger                              = "inst_trigger_type",
                  categories.vote_trigger_actor                        = "inst_trigger_actor",
                  categories.vote_trigger_state_level                  = "inst_trigger_actor_level",
                  categories.vote_trigger_number                       = "inst_trigger_threshold",
                  categories.vote_trigger_time                         = "inst_trigger_time_limit",
                  categories.vote_object                               = "inst_object_type",
                  categories.author_of_the_vote_object                 = "inst_object_author",
                  categories.hierarchy_of_the_legal_norm               = "inst_object_legal_level",
                  categories.degree_of_revision                        = "inst_object_revision_extent",
                  categories.action                                    = "inst_object_revision_modes",
                  categories.turnout_quorum                            = "inst_quorum_turnout",
                  categories.decision_quorum                           = "inst_quorum_approval",
                  categories.institutional_precondition                = "inst_has_precondition",
                  categories.institutional_precondition_decision_actor = "inst_precondition_actor",
                  categories.institutional_precondition_decision       = "inst_precondition_decision",
                  categories.special_topics                            = "inst_topics_only",
                  categories.excluded_topics                           = "inst_topics_excluded")

                                  # old name       new name
sub_var_names <- list(files = list(date       = "date_time_attached",
                                   object_key = "s3_object_key",
                                   size       = "file_size",
                                   deleted    = "is_deleted"))

# create additional formula-lists (mainly to be fed to `dplyr::case_match()`)
var_names_fms <- as_fm_list(var_names)
sub_var_names_fms <- purrr::imap(sub_var_names,
                                 ~ as_fm_list(.x))

mime_error_suffix <- "This indicates either some network issue or a change in the RDB API."

sudd_years <-
  url_sudd("index.php") %>%
  xml2::read_html() %>%
  rvest::html_element(css = "select[id='first']") %>%
  rvest::html_elements("option") %>%
  rvest::html_attr("value") %>%
  as.integer()

sudd_max_year <- pal::safe_max(sudd_years)
sudd_min_year <- pal::safe_min(sudd_years)
rm(sudd_years)

#' Get referendum data (old version)
#'
#' Downloads the referendum data from the Referendum Database (RDB). See the [`codebook`][codebook] for a detailed description of all variables.
#'
#' @inheritParams assemble_query_filter
#' @inheritParams tidy_rfrnds
#' @inheritParams url_api
#' @param incl_archive Whether or not to include an `archive` column containing data from an earlier, obsolete state of the Referendum Database (RDB).
#' @param use_cache `r pkgsnip::param_lbl("use_cache")`
#' @param max_cache_age `r pkgsnip::param_lbl("max_cache_age")`
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family rfrnd
#' @export
#'
#' @examples
#' # get all referendums (excl. drafts)
#' rdb::rfrnds()
#' 
#' # get only referendums in Austria and Australia on subnational level
#' rdb::rfrnds(country_code = c("AT", "AU"),
#'             level = "subnational",
#'             quiet = TRUE)
#'
#' # get referendums in 2020
#' rdb::rfrnds(date_min = "2020-01-01",
#'             date_max = "2020-12-31",
#'             quiet = TRUE)
#'
#' # get referendums added to the database during the last 30 days
#' rdb::rfrnds(date_time_created_min = clock::date_today(zone = "UTC") |> clock::add_days(-30L),
#'             date_time_created_max = clock::date_today(zone = "UTC"),
#'             quiet = TRUE)
#' 
#' # provide custom `query_filter` for more complex queries like regex matches
#' # cf. https://docs.mongodb.com/manual/reference/operator/query/regex/
#' rdb::rfrnds(query_filter = '{"country_code":{"$regex":"A."}}',
#'             quiet = TRUE)
rfrnds <- function(country_code = NULL,
                   subnational_entity_name = NULL,
                   municipality = NULL,
                   level = NULL,
                   type = NULL,
                   date_min = NULL,
                   date_max = NULL,
                   is_draft = FALSE,
                   date_time_created_min = NULL,
                   date_time_created_max = NULL,
                   date_time_last_edited_min = NULL,
                   date_time_last_edited_max = NULL,
                   query_filter = NULL,
                   incl_archive = FALSE,
                   tidy = TRUE,
                   use_cache = TRUE,
                   max_cache_age = "1 week",
                   use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                            pkg = this_pkg),
                   quiet = FALSE) {
  
  checkmate::assert_flag(incl_archive)
  checkmate::assert_flag(quiet)
  
  # TODO: remove this check as soon as [issue #78](https://github.com/zdaarau/c2d-app/issues/78) is resolved
  if (isTRUE(use_testing_server)) cli::cli_abort("{.code mode=stream} is not yet supported on the testing servers.")
  
  result <- pkgpins::with_cache(expr = {
    
    if (!quiet) {
      status_msg <- "Fetching JSON data from RDB API..."
      cli::cli_progress_step(msg = status_msg,
                             msg_done = paste(status_msg, "done"),
                             msg_failed = paste(status_msg, "failed"))
    }
    
    data <-
      httr::RETRY(verb = "GET",
                  url = url_api("referendums",
                                .use_testing_server = use_testing_server),
                  query = list(mode = "stream",
                               format = "json",
                               filter = assemble_query_filter(country_code = country_code,
                                                              subnational_entity_name = subnational_entity_name,
                                                              municipality = municipality,
                                                              level = level,
                                                              type = type,
                                                              date_min = date_min,
                                                              date_max = date_max,
                                                              is_draft = is_draft,
                                                              date_time_created_min = date_time_created_min,
                                                              date_time_created_max = date_time_created_max,
                                                              date_time_last_edited_min = date_time_last_edited_min,
                                                              date_time_last_edited_max = date_time_last_edited_max,
                                                              query_filter = query_filter)),
                  if (!quiet) httr::progress(type = "down"),
                  times = 3L) %>%
      # ensure we actually got a JSON response
      pal::assert_mime_type(mime_type = "application/json",
                            msg_suffix = mime_error_suffix) %>%
      # extract JSON
      httr::content(as = "text",
                    encoding = "UTF-8") %>%
      # ensure body is not empty
      assert_content()
    
    if (!quiet) {
      status_msg <- "Converting JSON to list data..."
      cli::cli_progress_step(msg = status_msg,
                             msg_done = paste(status_msg, "done"),
                             msg_failed = paste(status_msg, "failed"))
    }
    # NOTE that we cannot rely on params `simplify*` and `flatten` because of varying field lengths in API result (depending on `query`)
    data %<>%
      jsonlite::fromJSON(simplifyVector = FALSE,
                         simplifyDataFrame = FALSE,
                         simplifyMatrix = FALSE,
                         flatten = FALSE) %$%
      items
    
    if (!quiet) {
      status_msg <- "Tidying data..."
      cli::cli_progress_step(msg = status_msg,
                             msg_done = paste(status_msg, "done"),
                             msg_failed = paste(status_msg, "failed"))
    }
    
    data %>% tidy_rfrnds(tidy = tidy)
  },
  pkg = this_pkg,
  from_fn = "rfrnds",
  country_code,
  subnational_entity_name,
  municipality,
  level,
  type,
  date_min,
  date_max,
  is_draft,
  date_time_created_min,
  date_time_created_max,
  date_time_last_edited_min,
  date_time_last_edited_max,
  query_filter,
  tidy,
  use_testing_server,
  use_cache = use_cache,
  max_cache_age = max_cache_age)
  
  # exclude `archive` if requested
  if (!incl_archive) result %<>% dplyr::select(-any_of("archive"))
  
  result
}

#' Get referendum data from backup
#'
#' Downloads the referendum data from the Referendum Database (RDB) backup [in the `zdaarau/rpkgs/rdb`
#' repository](https://gitlab.com/zdaarau/rpkgs/rdb/-/blob/master/data-raw/backups/rdb.rds?ref_type=heads). See the [`codebook`][codebook] for a detailed
#' description of all variables.
#'
#' @inheritParams rfrnds
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family rfrnd
#' @export
#'
#' @examples
#' # get all referendums (excl. drafts)
#' rdb::rfrnds_bkp()
rfrnds_bkp <- function(is_draft = FALSE,
                       incl_archive = FALSE,
                       use_cache = TRUE,
                       max_cache_age = "1 week",
                       quiet = FALSE) {
  
  checkmate::assert_flag(is_draft,
                         null.ok = TRUE)
  checkmate::assert_flag(incl_archive)
  checkmate::assert_flag(quiet)
  
  result <- pkgpins::with_cache(
    expr = {
      
      if (!quiet) {
        pal::cli_progress_step_quick(msg = "Fetching latest RDB backup")
      }
      
      path_temp <- fs::file_temp(pattern = "rdb-", ext = "rds")
      utils::download.file(url = "https://gitlab.com/zdaarau/rpkgs/rdb/-/raw/master/data-raw/backups/rdb.rds?ref_type=heads&inline=false",
                           destfile = path_temp,
                           quiet = TRUE,
                           mode = "wb")
      
      readRDS(file = path_temp)
    },
    pkg = this_pkg,
    from_fn = "rfrnds",
    use_cache = use_cache,
    max_cache_age = max_cache_age
  )
  
  # exclude `archive` if requested
  if (!incl_archive) result %<>% dplyr::select(-any_of("archive"))
  
  # respect `is_draft`
  if (!is.null(is_draft)) {
    result %<>% dplyr::filter(is_draft == !!is_draft)
  }
  
  result
}

#' Get a single referendum's data
#'
#' Downloads a single referendum's data from the Referendum Database (RDB). See the [`codebook`][codebook] for a detailed description of all variables.
#'
#' @inheritParams rfrnds
#' @param id Referendum's unique [identifier](`r url_codebook("id")`).
#'
#' @inherit rfrnds return
#' @family rfrnd
#' @export
#'
#' @examples
#' rdb::rfrnd(id = "5bbbe26a92a21351232dd73f")
rfrnd <- function(id,
                  incl_archive = FALSE,
                  tidy = TRUE,
                  use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                           pkg = this_pkg)) {
  checkmate::assert_string(id,
                           min.chars = 1L)
  checkmate::assert_flag(incl_archive)
  checkmate::assert_flag(tidy)
  
  # retrieve data
  data <-
    httr::RETRY(verb = "GET",
                url = url_api("referendums", id,
                              .use_testing_server = use_testing_server),
                config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server)),
                times = 3L) %>%
    # ensure we actually got a JSON response
    pal::assert_mime_type(mime_type = "application/json",
                          msg_suffix = mime_error_suffix) %>%
    # extract JSON
    httr::content(as = "text",
                  encoding = "UTF-8") %>%
    # ensure body is not empty
    assert_content() %>%
    # convert JSON to list
    # NOTE that we cannot rely on params `simplify*` and `flatten` because of varying field lengths in API result
    jsonlite::fromJSON(simplifyVector = FALSE,
                       simplifyDataFrame = FALSE,
                       simplifyMatrix = FALSE,
                       flatten = FALSE) %>%
    # ensure no error occured
    assert_api_success() %>%
    # tidy data
    tidy_rfrnds(tidy = tidy)
  
  # exclude `archive` if requested
  if (!incl_archive) data %<>% dplyr::select(-any_of("archive"))
  
  # return data
  data
}

#' Download file attachment
#'
#' Downloads a file attachment from the Referendum Database (RDB). The necessary `s3_object_key`s identifying individual files are found in the `files` list
#' column returned by [rfrnds()].
#'
#' @inheritParams url_api
#' @param s3_object_key Key uniquely identifying the file in the RDB [Amazon S3 bucket](https://en.wikipedia.org/wiki/Amazon_S3#Design). A character scalar.
#' @param path Path where the downloaded file is written to.
#' @param use_original_filename Whether to save the file attachment using its original filename as uploaded. Note that original filenames are **not unique**,
#'   i.e. there are multiple file attachments with the same original filename (but differing content, of course). If `FALSE`, `s3_object_key` is used as
#'   filename. Only relevant if `path` is a directory.
#'
#' @return A [response object][httr::response], invisibly.
#' @family rfrnd
#' @export
#'
#' @examples
#' # get object keys
#' obj_keys <-
#'   rdb::rfrnds()$files |>
#'   purrr::list_rbind() |>
#'   dplyr::filter(!is_deleted) |>
#'   _$s3_object_key[1:3]
#'
#' # download them to the current working dir
#' purrr::walk(obj_keys,
#'             rdb::download_file_attachment)
#' 
#' # and delete them again
#' fs::file_delete(obj_keys)
download_file_attachment <- function(s3_object_key,
                                     path = ".",
                                     use_original_filename = FALSE,
                                     use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                              pkg = this_pkg)) {
  checkmate::assert_string(s3_object_key)
  checkmate::assert_atomic(path)
  checkmate::assert_flag(use_original_filename)
  
  # TODO: remove this check as soon as [issue #78](https://github.com/zdaarau/c2d-app/issues/78) is resolved
  if (isTRUE(use_testing_server)) cli::cli_abort("Accessing file attachments is not yet supported on the testing servers.")
  
  is_dir <- fs::is_dir(path)
  
  if (is_dir) {
    checkmate::assert_directory_exists(path,
                                       access = "rw")
  } else {
    checkmate::assert_path_for_output(path,
                                      overwrite = TRUE)
  }
  
  temp_path <- fs::file_temp()
  
  response <- httr::RETRY(verb = "GET",
                          url = url_api("s3_objects", s3_object_key,
                                        .use_testing_server = use_testing_server),
                          httr::write_disk(path = temp_path),
                          times = 3L)
  
  if (is_dir) {
    
    if (use_original_filename) {
      
      final_path <-
        response %>%
        httr::headers() %$%
        `content-disposition` %>%
        stringr::str_extract(pattern = "(?<=filename=\").+?(?=\")") %>%
        fs::path(path, .)
      
    } else {
      final_path <- fs::path(path, s3_object_key)
    }
    
  } else {
    final_path <- path
  }
  
  fs::file_move(path = temp_path,
                new_path = final_path)
  
  invisible(response)
}

#' Add new referendums to the RDB
#'
#' Adds new referendum entries to the Referendum Database (RDB) via [its
#' API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#3-referendum-routes).
#'
#' @details
#' Note that adding/editing the column `files` is not supported, i.e. it is simply dropped from `data`.
#'
#' @inheritParams url_api
#' @param data The new referendum data. A [tibble][tibble::tbl_df] that in any case must contain the columns
#' `r rfrnd_fields$required_for_additions %>% dplyr::case_match(.x = ., !!!var_names_fms, .default = .) %>% md_link_codebook() %>% pal::as_md_list()`
#'   
#' plus the column [`subnational_entity_name`](`r url_codebook("subnational_entity_name")`) for referendums of
#' [`level`](`r url_codebook("subnational_entity_name")`) below `"national"`, and the column [`municipality`](`r url_codebook("municipality")`) for referendums
#' of `level = "local"`,
#'   
#' plus any additional [valid][codebook] columns containing the values for the corresponding database fields.
#' @param email The e-mail address of the RDB API user account to be used for authentication. A character scalar.
#' @param password The password of the RDB API user account to be used for authentication. A character scalar.
#' @param quiet Whether or not to print the newly created referendum IDs to console.
#'
#' @return A character vector of newly created referendum IDs.
#' @family rfrnd
#' @export
add_rfrnds <- function(data,
                       email = pal::pkg_config_val(key = "api_username",
                                                   pkg = this_pkg),
                       password = pal::pkg_config_val(key = "api_password",
                                                      pkg = this_pkg),
                       use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                pkg = this_pkg),
                       quiet = FALSE) {
  
  checkmate::assert_data_frame(data,
                               min.rows = 1L)
  checkmate::assert_flag(quiet)
  
  ## ensure forbidden columns are absent
  assert_cols_absent(data = data,
                     type = "add")
  
  ## ensure mandatory columns are present
  rfrnd_fields$required_for_additions %>%
    dplyr::case_match(.x = .,
                      !!!var_names_fms,
                      .default = .) %>%
    purrr::walk(~ if (!(.x %in% colnames(data))) cli::cli_abort(paste0("Mandatory column {.var ", .x, "} is missing from {.arg data}.")))
  
  # drop non-applicable columns (they're supposed to be absent in MongoDB)
  data %<>% drop_non_applicable_vars()
  
  ## ensure remaining columns are valid
  assert_cols_valid(data = data,
                    type = "add")
  
  # convert data to MongoDB schema
  json_items <-
    data %>%
    # restore MongoDB fields
    untidy_rfrnds() %>%
    # convert to JSON
    purrr::map(jsonlite::toJSON,
               auto_unbox = TRUE,
               digits = NA)
  
  # add data to the MongoDB via HTTP POST on `/referendums` API endpoint
  responses <-
    json_items %>%
    purrr::map(\(x) {
      
      httr::RETRY(verb = "POST",
                  url = url_api("referendums",
                                .use_testing_server = use_testing_server),
                  config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server),
                                             Authorization = paste("Bearer", auth_session(email = email,
                                                                                          password = password,
                                                                                          use_testing_server = use_testing_server))),
                  body = x,
                  times = 3L,
                  httr::content_type_json()) %>%
        # ensure we actually got a JSON response
        pal::assert_mime_type(mime_type = "application/json",
                              msg_suffix = mime_error_suffix) %>%
        # extract JSON string
        httr::content(as = "text",
                      encoding = "UTF-8") %>%
        # ensure body is not empty
        assert_content() %>%
        # convert to list
        jsonlite::fromJSON(simplifyDataFrame = FALSE,
                           simplifyMatrix = FALSE)
    }) %>%
    # ensure no error occured
    assert_api_success()
  
  # throw warnings for unsuccessful API calls
  purrr::walk2(.x = responses,
               .y = seq_along(responses),
               .f = ~ if (!is.list(.x) || !isTRUE(nchar(.x$`_id`$`$oid`) > 0L)) {
                 
                 api_failure(.x,
                             raw = json_items[[.y]],
                             prefix = "Failed to add the {.y}. referendum. ")
               })
  
  ids_new <- unlist(responses,
                    use.names = FALSE)
  
  if (!quiet) {
    cli::cli_alert_info("New referendum entries created with {.var id}s:")
    cli::cli_li(ids_new)
  }
  
  invisible(ids_new)
}

#' Edit existing referendums in the RDB
#'
#' Edits existing referendum entries in the  API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#3-referendum-routes) via [its
#' API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#3-referendum-routes).
#'
#' @inherit add_rfrnds details
#' 
#' @inheritParams add_rfrnds
#' @param data Updated referendum data. A [tibble][tibble::tbl_df] that must contain an [`id`](`r url_codebook("id")`) column
#'   identifying the referendums to be edited plus any additional columns containing the new values to update the corresponding database fields with. Note that
#'   due to [current API requirements](https://github.com/zdaarau/c2d-app/issues/50#issuecomment-1222660683), the following columns must always be supplied:
#'   
#'   ```{r, echo = FALSE, results = "asis"}
#'   rfrnd_fields$required_for_edits %>%
#'     dplyr::case_match(.x = .,
#'                       !!!var_names_fms,
#'                       .default = .) |>
#'     setdiff("id") |>
#'     md_link_codebook() |>
#'     pal::as_md_list() |>
#'     cat()
#'   ```
#'
#' @return `data`, invisibly.
#' @family rfrnd
#' @export
edit_rfrnds <- function(data,
                        email = pal::pkg_config_val(key = "api_username",
                                                    pkg = this_pkg),
                        password = pal::pkg_config_val(key = "api_password",
                                                       pkg = this_pkg),
                        use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                 pkg = this_pkg)) {
  # ensure `data` is valid
  checkmate::assert_data_frame(data,
                               min.rows = 1L)
  
  ## ensure forbidden columns are absent
  assert_cols_absent(data = data,
                     type = "edit")
  
  ## ensure mandatory columns are present
  rfrnd_fields$required_for_edits %>%
    dplyr::case_match(.x = .,
                      !!!var_names_fms,
                      .default = .) %>%
    c("id") %>%
    purrr::walk(~ if (!(.x %in% colnames(data))) cli::cli_abort(paste0("Mandatory column {.var ", .x, "} is missing from {.arg data}.")))
  
  # drop non-applicable columns (they're absent in MongoDB)
  data %<>% drop_non_applicable_vars()
  
  ## ensure remaining columns are valid
  assert_cols_valid(data,
                    type = "edit")
  
  # convert data to MongoDB schema
  ids <- data$id
  
  json_items <-
    data %>%
    # drop `id`
    dplyr::select(-id) %>%
    # restore MongoDB fields
    untidy_rfrnds() %>%
    # convert to JSON
    purrr::map(jsonlite::toJSON,
               auto_unbox = TRUE,
               digits = NA)
  
  # edit data in the MongoDB via HTTP PUT on `/referendums/{id}` API endpoint
  responses <- purrr::map2(.x = ids,
                           .y = json_items,
                           .f = ~
                             httr::RETRY(verb = "PUT",
                                         url = url_api("referendums", .x,
                                                       .use_testing_server = use_testing_server),
                                         config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server),
                                                                    Authorization = paste("Bearer", auth_session(email = email,
                                                                                                                 password = password,
                                                                                                                 use_testing_server = use_testing_server))),
                                         body = .y,
                                         times = 3L,
                                         httr::content_type_json()) %>%
                             # ensure we actually got a JSON response
                             pal::assert_mime_type(mime_type = "application/json",
                                                   msg_suffix = mime_error_suffix) %>%
                             # extract JSON string
                             httr::content(as = "text",
                                           encoding = "UTF-8") %>%
                             # ensure body is not empty
                             assert_content())
  
  # throw warnings for unsuccessful API calls
  purrr::walk(.x = seq_along(ids),
              .f = ~ {
                
                parsed <- jsonlite::fromJSON(responses[[.x]])
                
                if (!isTRUE(parsed$ok)) {
                  api_failure(parsed,
                              raw = json_items[[.x]],
                              prefix = paste0("Failed to edit referendum with {.var id} {.val ", ids[.x], "}. "))
                }
              })
  
  invisible(data)
}

#' Delete referendums in the RDB
#'
#' Deletes existing referendum entries in the Referendum Database (RDB) via [its
#' API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#3-referendum-routes).
#'
#' @inheritParams add_rfrnds
#' @param ids IDs of the referendums to be deleted. A character vector.
#'
#' @return `ids`, invisibly.
#' @family rfrnd
#' @export
delete_rfrnds <- function(ids,
                          email = pal::pkg_config_val(key = "api_username",
                                                      pkg = this_pkg),
                          password = pal::pkg_config_val(key = "api_password",
                                                         pkg = this_pkg),
                          use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                   pkg = this_pkg)) {
  checkmate::assert_character(ids,
                              min.chars = 1L,
                              any.missing = FALSE,
                              unique = TRUE)
  
  # TODO: remove this as soon as https://github.com/zdaarau/c2d-app/issues/45 is deployed to master
  if (!use_testing_server) {
    cli::cli_abort("Referendum deletions are not yet supported on the production servers.")
  }
  
  responses <- purrr::map(.x = ids,
                          .f = ~
                            httr::RETRY(verb = "DELETE",
                                        url = url_api("referendums", .x,
                                                      .use_testing_server = use_testing_server),
                                        config = httr::add_headers(Authorization = paste("Bearer", auth_session(email = email,
                                                                                                                password = password,
                                                                                                                use_testing_server = use_testing_server))),
                                        times = 3L) %>%
                            # ensure we actually got a JSON response
                            pal::assert_mime_type(mime_type = "application/json",
                                                  msg_suffix = mime_error_suffix) %>%
                            # extract JSON string
                            httr::content(as = "text",
                                          encoding = "UTF-8") %>%
                            # ensure body is not empty
                            assert_content())
  
  # throw warnings for unsuccessful API calls
  purrr::walk2(.x = ids,
               .y = responses,
               .f = ~ {
                 
                 parsed <- jsonlite::fromJSON(.y)
                 
                 if (!isTRUE(parsed$ok)) {
                   api_failure(parsed,
                               prefix = "Failed to delete referendum with {.var id} {.val {.x}}. ")
                 }
               })
  
  invisible(ids)
}

#' Validate referendum data
#'
#' Performs various data validation steps to ensure there are no errors in the supplied `data`.
#'
#' @param data Referendum data to validate, as returned by [rfrnds()].
#' @param check_applicability_constraint Whether or not to check that no applicability constraints as defined in the [codebook][data_codebook] are violated.
#' @param check_id_sudd_prefix Whether or not to check that all [`id_sudd`](`r url_codebook("id_sudd")`) prefixes are valid.
#'
#' @return `data`, invisibly.
#' @family rfrnd
#' @export
validate_rfrnds <- function(data,
                            check_applicability_constraint = TRUE,
                            check_id_sudd_prefix = TRUE) {
  
  checkmate::assert_data_frame(data,
                               min.rows = 1L)
  checkmate::assert_subset(colnames(data),
                           choices = rfrnd_cols_order)
  checkmate::assert_flag(check_applicability_constraint)
  checkmate::assert_flag(check_id_sudd_prefix)
  
  # check columns
  status_msg <- "Checking basic column validity..."
  cli_progress_id <- cli::cli_progress_step(msg = status_msg,
                                            msg_done = paste(status_msg, "done"),
                                            msg_failed = paste(status_msg, "failed"))
  
  assert_cols_valid(data = data,
                    type = "validate",
                    action = cli::cli_alert_warning,
                    cli_progress_id = cli_progress_id)
  
  # check applicability constraints
  if (check_applicability_constraint) {
    
    status_msg <- "Asserting applicability constraints..."
    cli::cli_progress_step(msg = status_msg,
                           msg_done = paste(status_msg, "done"),
                           msg_failed = paste(status_msg, "failed"))
    
    var_names_violated <-
      data_codebook %>%
      dplyr::filter(variable_name %in% colnames(data)
                    & !is.na(applicability_constraint)) %$%
      purrr::map2_lgl(.x = magrittr::set_names(x = variable_name,
                                               value = variable_name),
                      .y = applicability_constraint,
                      .f = ~ {
                        
                        data %>%
                          dplyr::filter(!eval(parse(text = .y))) %$%
                          eval(as.symbol(.x)) %>%
                          { is.na(.) | purrr::map_lgl(., is.null) } %>%
                          all()
                      }) %>%
    magrittr::extract(!.) %>%
    names()
    
    n_var_names_violated <- length(var_names_violated)
    
    if (n_var_names_violated) {
      
      cli::cli_progress_done(result = "failed")
      cli::cli_alert_warning("Applicability constraints are violated for {n_var_names_violated} variable{?s}:")
      
      paste0("{.var ", var_names_violated, "}") %>%
        magrittr::set_names(rep("x",
                                times = length(.))) %>%
        cli::cli_bullets()
      
      first_var_name_violated <- var_names_violated[1L]
      
      cli::cli({
        cli::cli_text("\nTo get the applicability constraint of e.g. {.var {first_var_name_violated}}, run:")
        cli::cli_text("")
        cli::cli_code(c("rdb::data_codebook %>%",
                        glue::glue("  dplyr::filter(variable_name == \"{first_var_name_violated}\") %$%"),
                        "  applicability_constraint"))
        cli::cli_text("")
        cli::cli_text("To inspect the entries in violation of the above applicability constraint, run:")
        cli::cli_text("")
        cli::cli_code(c("data %>%",
                        glue::glue("  dplyr::filter(rdb::data_codebook %>%\n",
                                   "                  dplyr::filter(variable_name == \"{first_var_name_violated}\") %$%\n",
                                   "                  applicability_constraint %>%\n",
                                   "                  parse(text = .) %>%\n",
                                   "                  eval() %>%\n",
                                   "                  magrittr::not()) %>%\n",
                                   "  dplyr::select(id, {first_var_name_violated})",
                                   .trim = FALSE)))
      })
    }
  }
  
  # check `id_sudd` prefix if requested
  if (check_id_sudd_prefix) {
    
    status_msg <- "Validating `id_sudd` prefixes..."
    cli::cli_progress_step(msg = status_msg,
                           msg_done = paste(status_msg, "done"),
                           msg_failed = paste(status_msg, "failed"))
    
    if (!all(c("country_code", "id_sudd") %in% colnames(data))) {
      cli::cli_progress_done(result = "failed")
      cli::cli_abort("Columns {.var country_code} and {.var id_sudd} must be present in {.arg data}.")
    }
    
    # define allowed exceptions
    allowed_exceptions <- tibble::tribble(
      ~country_code, ~id_sudd_prefix,
      # Curacao
      "CW", "an",
      # Szeklerland, cf. https://sudd.ch/event.php?id=hu042008
      "RO", "hu"
    )
    
    # assemble target country codes
    country_codes <-
      data$country_code %>%
      as.character() %>%
      as.list()
    
    for (country_code in allowed_exceptions$country_code) {
      
      additional_country_codes <-
        allowed_exceptions %>%
        dplyr::filter(country_code == !!country_code) %$%
        id_sudd_prefix %>%
        stringr::str_to_upper()
      
      ix_country_codes <-
        country_codes %>%
        purrr::map_lgl(~ country_code %in% .x) %>%
        which()
      
      for (i in ix_country_codes) {
        country_codes[[i]] <- unique(c(country_codes[[i]], additional_country_codes))
      }
    }
    
    # add dummy indicating if target country codes match
    # TODO: instead of modifying input data, print cli msg with all relevant info!
    data$matches_id_sudd_prefix <-
      data$id_sudd %>%
      stringr::str_extract(pattern = "^..") %>%
      stringr::str_to_upper() %>%
      purrr::map2_lgl(.y = country_codes,
                      .f = ~ .x %in% .y)
    
    data$matches_id_sudd_prefix[is.na(data$id_sudd)] <- NA
  }
  
  invisible(data)
}

#' Count number of referendums
#'
#' Counts the number of referendums per [`level`](`r url_codebook("level")`) in the Referendum Database (RDB).
#'
#' @inheritParams assemble_query_filter
#' @inheritParams url_api
#'
#' @return A named list with `level` as names and referendum counts as values.
#' @family rfrnd
#' @export
#'
#' @examples
#' # the whole database (excl. drafts)
#' rdb::count_rfrnds()
#' 
#' # only Swiss and Austrian referendums
#' rdb::count_rfrnds(country_code = c("CH", "AT"))
#' 
#' # only Swiss referendums created between 2020 and 2021
#' rdb::count_rfrnds(country_code = "CH",
#'                   date_time_created_min = "2020-01-01",
#'                   date_time_created_max = "2021-01-01")
count_rfrnds <- function(is_draft = FALSE,
                         country_code = NULL,
                         subnational_entity_name = NULL,
                         municipality = NULL,
                         level = NULL,
                         type = NULL,
                         date_min = NULL,
                         date_max = NULL,
                         date_time_created_min = NULL,
                         date_time_created_max = NULL,
                         date_time_last_edited_min = NULL,
                         date_time_last_edited_max = NULL,
                         query_filter = NULL,
                         use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                  pkg = this_pkg)) {
  httr::RETRY(verb = "GET",
              url = url_api("referendums/stats",
                            .use_testing_server = use_testing_server),
              query = list(filter = assemble_query_filter(country_code = country_code,
                                                          subnational_entity_name = subnational_entity_name,
                                                          municipality = municipality,
                                                          level = level,
                                                          type = type,
                                                          date_min = date_min,
                                                          date_max = date_max,
                                                          is_draft = is_draft,
                                                          date_time_created_min = date_time_created_min,
                                                          date_time_created_max = date_time_created_max,
                                                          date_time_last_edited_min = date_time_last_edited_min,
                                                          date_time_last_edited_max = date_time_last_edited_max,
                                                          query_filter = query_filter)),
              config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server)),
              times = 3L) %>%
    # ensure we actually got a JSON response
    pal::assert_mime_type(mime_type = "application/json",
                          msg_suffix = mime_error_suffix) %>%
    # parse response
    httr::content(as = "parsed") %$%
    votes %>%
    magrittr::set_names(names(.) %>% dplyr::case_match(.x = ., "sub_national" ~ "subnational", .default = .))
}

#' Test if referendum ID exists
#'
#' Tests whether the referendum with the supplied `id` exists or not.
#'
#' @inheritParams rfrnd
#' @inheritParams url_api
#'
#' @return A logical scalar.
#' @family rfrnd
#' @export
#'
#' @examples
#' rdb::rfrnd_exists("6303a4cba52c3995043a8c24")
rfrnd_exists <- function(id,
                         .use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                                   pkg = this_pkg)) {
  checkmate::assert_string(id,
                           min.chars = 1L)
  
  httr::GET(url = url_api("referendums", id,
                          .use_testing_server = .use_testing_server),
            config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = .use_testing_server))) %>%
    httr::http_error() %>%
    magrittr::not()
}

#' Assert referendum variables are present
#'
#' Asserts the specified `vars` are present in the supplied referendum `data`. Depending on `vars`, additional integrity checks are performed.
#'
#' @param data RDB referendum data as returned by [rfrnds()].
#' @param vars Names of the variables to check. A character vector.
#'
#' @return `data`, invisibly.
#' @family rfrnd
#' @export
#'
#' @examples
#' rdb::rfrnd(id = "5bbbe26a92a21351232dd73f") |> rdb::assert_vars(vars = "country_code")
#' 
#' try(
#'   tibble::tibble(country_code = "AN") |> rdb::assert_vars(vars = "country_code")
#' )
assert_vars <- function(data,
                        vars) {
  
  vars %>% purrr::walk(~ {
    
    msg_suffix <- switch(EXPR         = .x,
                         country_code = " with ISO 3166-1 alpha-2 or ISO 3166-3 alpha-4 codes.",
                         "")
    
    if (!(.x %in% colnames(data))) {
      cli::cli_abort(paste0("{.arg data} must contain a column {.var {.x}}", msg_suffix))
    }
    
    # run additional content check
    assert_content <- switch(EXPR         = .x,
                             country_code = \(x) {
                               
                               checkmate::assert_vector(x = x,
                                                        .var.name = "data$country_code")
                               check <- checkmate::check_subset(x = as.character(x),
                                                                choices = val_set$country_code)
                               if (!isTRUE(check)) {
                                 
                                 expired_codes <- intersect(as.character(x),
                                                            data_iso_3166_3$Alpha_2)
                                 cli::cli_abort(paste0(
                                   "Assertion on {.var data$country_code} failed: ",
                                                       ifelse(length(expired_codes),
                                                              paste0("The following country codes have been deleted from ISO 3166-1 and were moved to ISO ",
                                                                     "3166-3 (former countries) instead: {.val {expired_codes}}"),
                                                              # escape curly braces from checkmate msg
                                                              stringr::str_replace_all(string = check,
                                                                                       pattern = "([\\{\\}])",
                                                                                       replacement = "\\1\\1"))))
                               }
                             },
                             \(x) TRUE)
    
    assert_content(data[[.x]])
  })
  
  invisible(data)
}

#' RDB Codebook
#'
#' A tibble containing the complete metadata of all [rfrnds()] variables. The codebook below is also available [online](`r url_codebook()`).
#'
#' # Codebook
#'
#' ```{r, child = "vignettes/codebook.Rmd"}
#' ```
#'
#' @format `r pkgsnip::return_lbl("tibble")`
#' @aliases codebook
#' @family metadata
#' @export
#'
#' @examples
#' rdb::data_codebook
"data_codebook"

#' Get set of possible *value labels* of referendum data variable
#'
#' Returns a character vector of value labels of a specific [rfrnds()] column, in the same order as [var_vals()], or of length `0` if `var_name`'s values are
#' not restricted to a predefined set or no value labels are defined in the [codebook][data_codebook].
#'
#' @param var_name Variable name present in [`data_codebook`] for which the labels are to be returned. A character scalar.
#' @param incl_affixes Whether or not to add the corresponding `value_label_prefix` and `value_label_suffix` to the returned labels.
#'
#' @return A character vector. Of length `0` if `var_name`'s values are not restricted to a predefined set or no value labels are defined in the
#'   [codebook][data_codebook].
#' @family metadata
#' @export
#'
#' @examples
#' rdb::val_lbls("result",
#'               incl_affixes = FALSE)
#' rdb::val_lbls("result")
#' 
#' # Convert the labels to sentence case with trailing dot
#' rdb::val_lbls("result") |> pal::sentenceify()
val_lbls <- function(var_name,
                     incl_affixes = TRUE) {
  
  var_name <- rlang::arg_match0(arg = var_name,
                                values = rfrnd_cols_order)
  
  metadata <- data_codebook |> dplyr::filter(variable_name == !!var_name | variable_name_unnested == !!var_name)
  result <- metadata$value_labels |> purrr::list_c(ptype = character())
  
  if (incl_affixes) {
    if (!is.na(metadata$value_label_prefix)) result <- paste(metadata$value_label_prefix, result)
    if (!is.na(metadata$value_label_suffix)) result <- paste(metadata$value_label_suffix, result)
  }
  
  result
}

#' Get *value scale* of referendum data variables
#'
#' Returns the value scale of the specified [rfrnds()] columns.
#'
#' @param var_names Variable name(s) present in [`data_codebook`] for which the value scale is to be returned. A character vector.
#'
#' @return A character scalar.
#' @family metadata
#' @export
#'
#' @examples
#' rdb::val_scale("level")
#' paste0("topics_tier_", 1:3) |> rdb::val_scale()
val_scale <- function(var_names) {
  
  var_name <- rlang::arg_match(arg = var_names,
                               values = rfrnd_cols_order,
                               multiple = TRUE)
  
  c(data_codebook$value_scale,
    data_codebook$value_scale)[match(x = var_names,
                                     table = c(data_codebook$variable_name,
                                               data_codebook$variable_name_unnested))]
}

#' Get set of possible *values* of referendum data variable
#'
#' Returns a vector of the possible predefined values a specific column in [rfrnds()] can hold. If the variable values aren't restricted to a predefined
#' set, `NULL` is returned.
#'
#' @param var_name Variable name present in [`data_codebook`]. A character scalar.
#'
#' @return
#' If `var_name`'s values are restricted to a predefined set and
#' - `var_name` is *not* of type list, a vector of the same type as `var_name`.
#' - `var_name` is of type list, a vector of the same type as the elements of `var_name`.
#'
#' Else `NULL`.
#' @family metadata
#' @export
#'
#' @examples
#' rdb::var_vals("result")
#' rdb::var_vals("id")
var_vals <- function(var_name) {
  
  var_name <- rlang::arg_match0(arg = var_name,
                                values = rfrnd_cols_order)
  data_codebook |>
    dplyr::filter(variable_name == !!var_name | variable_name_unnested == !!var_name) %$%
    variable_values |>
    unlist()
}

#' Get unnested variable names
#'
#' Returns the unnested analogue(s) of the specified variable name(s), which result from [unnesting][unnest_var]. For variable names that do *not* refer to
#' nested list columns, `var_names` is simply returned as-is.
#'
#' @inheritParams prettify_var_names
#'
#' @return A character vector of the same length as `var_names`.
#' @family metadata
#' @family unnest
#' @export
#'
#' @examples
#' rdb::var_name_unnested("inst_object_revision_modes")
#' rdb::var_name_unnested(paste0("topics_tier_", 1:3))
var_name_unnested <- function(var_names) {
  
  var_names <- rlang::arg_match(arg = var_names,
                                values = data_codebook$variable_name,
                                multiple = TRUE)
  data_codebook |>
    dplyr::filter(variable_name %in% !!var_names) %$%
    variable_name_unnested
}

#' Prettify referendum data variable names
#'
#' Converts referendum data variable names to their ready-for-publication version. Variable names that are unknown, i.e. not present in [`data_codebook`]), are
#' left untouched.
#'
#' @param var_names Variable name(s). Those not present in [`data_codebook`] remain untouched. A character vector.
#'
#' @return A character vector of the same length as `var_names`.
#' @family metadata
#' @family prettify
#' @export
#'
#' @examples
#' rdb::prettify_var_names("topics_tier_1")
#' 
#' # also supports unnested var names
#' rdb::prettify_var_names("topic_tier_1")
#'
#' # unknown var names are left untouched
#' rdb::prettify_var_names(var_names = c("topic_tier_1", "topic_tier_99"))
prettify_var_names <- function(var_names) {
  
  checkmate::assert_character(var_names)
  
  c(data_codebook$variable_name_print,
    data_codebook$variable_name_unnested_print)[match(x = var_names,
                                                      table = c(data_codebook$variable_name,
                                                                data_codebook$variable_name_unnested))] %|% var_names
}

#' Topic hierarchy
#'
#' A tibble reflecting the complete [referendum topics hierarchy](`r url_codebook("topics")`).
#'
#' @format `r pkgsnip::return_lbl("tibble")`
#' @family topics
#' @export
#'
#' @examples
#' rdb::data_topics
"data_topics"

#' List available topics
#'
#' Lists the set of available [referendum topics](`r url_codebook("topics")`) on the specified `tiers`.
#'
#' @param tiers Tiers to include topics from. An integerish vector.
#'
#' @return A character vector.
#' @family topics
#' @export
#'
#' @examples
#' rdb::topics(tiers = 1:2)
topics <- function(tiers = 1:3) {
  
  checkmate::assert_integerish(tiers,
                               lower = 1L,
                               upper = 3L,
                               any.missing = FALSE,
                               unique = TRUE)
  topic_set <- character()
  
  if (1L %in% tiers) {
    topic_set %<>% c(data_topics$topic_tier_1)
  }
  
  if (2L %in% tiers) {
    topic_set %<>% c(data_topics$topic_tier_2)
  }
  
  if (3L %in% tiers) {
    topic_set %<>% c(data_topics$topic_tier_3)
  }
  
  topic_set %>%
    setdiff(NA_character_) %>%
    unique()
}

#' Hierarchize topics
#'
#' Reconstructs the hierarchical relations between the three topic variables `topics_tier_1`, `topics_tier_2` and `topics_tier_3`. Can also be used to simply
#' determine the parent topic(s) of any topic.
#'
#' @param x The topics to hierarchize. Either a character vector of topics or a single-row data frame containing at least the columns `topics_tier_1`,
#'   `topics_tier_2` and `topics_tier_3`.
#'
#' @return A [tibble][tibble::tbl_df] with the columns `topic_tier_1`, `topic_tier_2` and `topic_tier_3`.
#' @family topics
#' @export
#'
#' @examples
#' rdb::hierarchize_topics("territorial questions")
#'
#' # hierarchize the topics of all Austrian referendums
#' rdb::rfrnds(quiet = TRUE) |>
#'   dplyr::filter(country_code == "AT") |>
#'   dplyr::group_split(id) |>
#'   purrr::map(rdb::hierarchize_topics)
hierarchize_topics <- function(x) {
  
  test_char <- checkmate::test_character(x, any.missing = FALSE)
  
  if (!test_char) {
    
    topic_var_names <- paste0("topics_tier_", 1:3)
    test_df <- checkmate::test_data_frame(x,
                                          min.rows = 1L,
                                          max.rows = 1L)
    has_topic_vars <- all(topic_var_names %in% colnames(x))
    
    if (!test_df || !has_topic_vars) {
      cli::cli_abort(paste0("{.arg x} must be either a character vector of topics or a single-row data frame containing at least the columns ",
                            "{.field topics_tier_1}, {.field topics_tier_2} and {.field topics_tier_3}."))
    }
    
    x <- unlist(x[, topic_var_names],
                use.names = FALSE)
  }
  
  checkmate::assert_subset(x,
                           choices = c(topics_tier_1_, topics_tier_2_, topics_tier_3_),
                           empty.ok = TRUE)
  
  topics_tier_1 <- x[x %in% topics_tier_1_]
  topics_tier_2 <- x[x %in% topics_tier_2_]
  topics_tier_3 <- x[x %in% topics_tier_3_]
  inferred_topics_tier_1 <- infer_topics(topics = c(topics_tier_2, topics_tier_3),
                                         tier = 1L)
  inferred_topics_tier_2 <- infer_topics(topics = topics_tier_3,
                                         tier = 2L)
  non_parent_topics_tier_1 <- setdiff(topics_tier_1, inferred_topics_tier_1)
  non_parent_topics_tier_2 <- setdiff(topics_tier_2, inferred_topics_tier_2)
  
  # 0. initialize empty tibble
  result <- tibble::tibble(topic_tier_1 = character(),
                           topic_tier_2 = character(),
                           topic_tier_3 = character())
  
  # 1. add third-tier topics
  result <-
    topics_tier_3 %>%
    purrr::map(~ tibble::tibble(topic_tier_1 = infer_topics(topics = .x,
                                                            tier = 1L),
                                topic_tier_2 = infer_topics(topics = .x,
                                                            tier = 2L),
                                topic_tier_3 = .x)) %>%
    purrr::list_rbind() %>%
    dplyr::bind_rows(result)
  
  # 2. add remaining second-tier topics
  result <-
    non_parent_topics_tier_2 %>%
    purrr::map(~ tibble::tibble(topic_tier_1 = infer_topics(topics = .x,
                                                            tier = 1L),
                                topic_tier_2 = .x,
                                topic_tier_3 = NA_character_)) %>%
    purrr::list_rbind() %>%
    dplyr::bind_rows(result)
  
  # 3. add remaining first-tier topics
  result %>%
    dplyr::bind_rows(tibble::tibble(topic_tier_1 = non_parent_topics_tier_1,
                                    topic_tier_2 = NA_character_,
                                    topic_tier_3 = NA_character_)) %>%
    # sort result
    dplyr::arrange(topic_tier_1, topic_tier_2, topic_tier_3)
}

#' Hierarchize topics (fast)
#'
#' Reconstructs the hierarchical relations between the three topic variables `topics_tier_1`, `topics_tier_2` and `topics_tier_3`. Other than
#' [hierarchize_topics()], this function assumes that the three topic variables are always *complete*, i.e. that no (grand)parent topics of lower-tier topics
#' are missing. This assumption is met by [rfrnds()] and [rfrnd()].
#'
#' @param topics_tier_1 First-tier topics. A character vector.
#' @param topics_tier_2 Second-tier topics. A character vector.
#' @param topics_tier_3 Third-tier topics. A character vector.
#'
#' @inherit hierarchize_topics return
#' @family topics
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' rdb::rfrnd(id = "5bbbe26a92a21351232dd73f") %$%
#'   rdb::hierarchize_topics_fast(unlist(topics_tier_1),
#'                                unlist(topics_tier_2),
#'                                unlist(topics_tier_3))
#'
#' # hierarchize the topics of all Austrian referendums
#' rdb::rfrnds(quiet = TRUE) |>
#'   dplyr::filter(country_code == "AT") |>
#'   dplyr::group_split(id) |>
#'   purrr::map(~ rdb::hierarchize_topics_fast(unlist(.x$topics_tier_1),
#'                                             unlist(.x$topics_tier_2),
#'                                             unlist(.x$topics_tier_3)))
hierarchize_topics_fast <- function(topics_tier_1 = character(),
                                  topics_tier_2 = character(),
                                  topics_tier_3 = character()) {
  
  checkmate::assert_subset(topics_tier_1,
                           choices = topics_tier_1_)
  checkmate::assert_subset(topics_tier_2,
                           choices = topics_tier_2_)
  checkmate::assert_subset(topics_tier_3,
                           choices = topics_tier_3_)
  # add tier-3 hierarchy
  result <- data_topics[data_topics$topic_tier_3 %in% topics_tier_3, ]
  
  # add non-parent tier-2 hierarchy
  topics_tier_2 %<>% setdiff(result$topic_tier_2)
  result %<>% dplyr::bind_rows(unique(data_topics[data_topics$topic_tier_2 %in% topics_tier_2, 1:2]))
  
  # add non-parent tier-1 topics
  topics_tier_1 %<>% setdiff(result$topic_tier_1)
  result %>% dplyr::bind_rows(tibble::tibble(topic_tier_1 = topics_tier_1))
}

#' Infer higher-tier topics
#'
#' Determines the top-tier (`tier = 1L`) or second-tier (`tier = 2L`) topics corresponding to `topics` in the
#' [hierarchy][data_topics], i.e. either `topics` themselves or their (grand)parent topics.
#'
#' @param topics Topics from which the corresponding (grand)parent topics are to be determined. A factor or character vector.
#' @param tier Tier of the inferred topics. Either `1L` or `2L`.
#'
#' @return A character vector.
#' @family topics
#' @export
#'
#' @examples
#' rdb::infer_topics(topics = c("EU", "animal protection"),
#'                   tier = 1L)
#' rdb::infer_topics(topics = c("EU", "animal protection"),
#'                   tier = 2L)
#' 
#' # topics of different tiers can mixed in `topics`
#' rdb::infer_topics(topics = c("EU", "environment"),
#'                   tier = 2L)
#' 
#' # but `topics` of a higher tier than `tier` will be ignored
#' rdb::infer_topics(topics = "foreign policy",
#'                   tier = 2L)
infer_topics <- function(topics,
                         tier = 1L) {
  
  if (is.factor(topics)) topics <- as.character(topics)
  
  checkmate::assert_subset(topics,
                           choices = c(topics_tier_1_, topics_tier_2_, topics_tier_3_))
  checkmate::assert_int(tier,
                        lower = 1L,
                        upper = 2L)
  
  # inferred from lower-tier topics
  result <- data_topics[data_topics$topic_tier_2 %in% topics | data_topics$topic_tier_3 %in% topics, ]
  result %<>% .[[paste0("topic_tier_", tier)]]
  
  # plus top-tier topics
  if (tier == 1L) result %<>% c(topics[topics %in% topics_tier_1_])
  
  unique(result)
}

#' Add `is_former_country` flag to referendum data
#'
#' Augments `data` with an additional column `is_former_country` indicating whether or not the column `country_code` holds an [ISO 3166-3
#' alpha-4 code](https://en.wikipedia.org/wiki/ISO_3166-3) referring to a historical country which ceased to exist. `is_former_country` being `FALSE` means
#' `country_code` holds an [ISO 3166-1 alpha-2 code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) instead.
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds() |>
#'   rdb:::add_former_country_flag() |>
#'   dplyr::select(id,
#'                 starts_with("country_"),
#'                 is_former_country)
add_former_country_flag <- function(data) {
  
  # ensure minimal validity
  checkmate::assert_data_frame(data)
  assert_vars(data = data,
              vars = "country_code")
  data %>%
    dplyr::mutate(is_former_country = nchar(as.character(country_code)) > 2L) %>%
    # add var lbl
    labelled::set_variable_labels(.labels = var_lbls["is_former_country"])
}

#' Add continual country code to referendum data
#'
#' Augments `data` with an additional column `country_code_continual` holding the current or future [ISO 3166-1
#' alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code of the country where the referendum took place. If the country still exists,
#' `country_code_continual` is identical to `country_code`, otherwise it is the `country_code` of the successor country. If the country was succeeded by
#' multiple countries, the code of the largest one in terms of population is taken.
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::add_country_code_continual() |>
#'   dplyr::select(id,
#'                 starts_with("country_"))
add_country_code_continual <- function(data) {
  
  # ensure minimal validity
  checkmate::assert_data_frame(data)
  assert_vars(data = data,
              vars = "country_code")
  
  data %>%
    dplyr::mutate(country_code_continual = factor(x = purrr::map2_chr(.x = as.character(country_code),
                                                                      .y = add_former_country_flag(data)$is_former_country,
                                                                      .f = ~ {
                                                                        if (.y) {
                                                                          data_iso_3166_3$Alpha_2_new_main[data_iso_3166_3$Alpha_4 == .x]
                                                                          
                                                                        } else {
                                                                          .x
                                                                        }
                                                                      }),
                                                  levels = val_set$country_code_continual,
                                                  ordered = FALSE)) %>%
    # add var lbl
    labelled::set_variable_labels(.labels = var_lbls["country_code_continual"])
}

#' Add long country code to referendum data
#'
#' Augments `data` with an additional column holding the current or former three-letter [ISO 3166-1 alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3)
#' code of the country in which the referendum took place (see [ISO 3166-3](https://en.wikipedia.org/wiki/ISO_3166-3_alpha-3) for former country codes).
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb:::add_country_code_long() |>
#'   dplyr::select(id,
#'                 starts_with("country_"))
add_country_code_long <- function(data) {
  
  # ensure minimal validity
  checkmate::assert_data_frame(data)
  assert_vars(data = data,
              vars = "country_code")
  data %>%
    # remove possibly existing long country code
    dplyr::select(-any_of("country_code_long")) %>%
    # add long country code
    dplyr::mutate(country_code_long = factor(x = purrr::map2_chr(.x = as.character(country_code),
                                                                 .y = add_former_country_flag(data)$is_former_country,
                                                                 .f = ~ if (.y) {
                                                                   data_iso_3166_3$Alpha_3[data_iso_3166_3$Alpha_4 == .x]
                                                                 } else {
                                                                   data_iso_3166_1$Alpha_3[data_iso_3166_1$Alpha_2 == .x]
                                                                 }),
                                             levels = val_set$country_code_long,
                                             ordered = FALSE)) %>%
    # ensure no NAs
    assertr::assert(predicate = assertr::not_na,
                    country_code_long) %>%
    order_rfrnd_cols() %>%
    # add var lbl
    labelled::set_variable_labels(.labels = var_lbls["country_code_long"])
}

#' Add short country name to referendum data
#'
#' Augments `data` with an additional column holding the common English name of the country in which the referendum took place.
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb:::add_country_name() |>
#'   dplyr::select(id,
#'                 starts_with("country_"))
add_country_name <- function(data) {
  
  # ensure minimal validity
  checkmate::assert_data_frame(data)
  assert_vars(data = data,
              vars = "country_code")
  data %>%
    # remove possibly existing country name
    dplyr::select(-any_of("country_name")) %>%
    # add country name
    dplyr::mutate(country_name = factor(x = purrr::map2_chr(.x = as.character(country_code),
                                                            .y = add_former_country_flag(data)$is_former_country,
                                                            .f = ~ if (.y) {
                                                              data_iso_3166_3$name_short[data_iso_3166_3$Alpha_4 == .x]
                                                            } else {
                                                              data_iso_3166_1$name_short[data_iso_3166_1$Alpha_2 == .x]
                                                            }),
                                        levels = val_set$country_name,
                                        ordered = FALSE)) %>%
    # ensure no NAs
    assertr::assert(predicate = assertr::not_na,
                    country_name) %>%
    order_rfrnd_cols() %>%
    # add var lbl
    labelled::set_variable_labels(.labels = var_lbls["country_name"])
}

#' Add long country name to referendum data
#'
#' Augments `data` with an additional column holding the official full English name(s) of the country in which the referendum took place.
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb:::add_country_name_long() |>
#'   dplyr::select(id,
#'                 starts_with("country_name"))
add_country_name_long <- function(data) {
  
  # ensure minimal validity
  checkmate::assert_data_frame(data)
  assert_vars(data = data,
              vars = "country_code")
  data %>%
    # remove possibly existing long country name
    dplyr::select(-any_of("country_name_long")) %>%
    # add long country name
    dplyr::mutate(country_name_long = factor(x = purrr::map2_chr(.x = as.character(country_code),
                                                                 .y = add_former_country_flag(data)$is_former_country,
                                                                 .f = ~ if (.y) {
                                                                   data_iso_3166_3$name_long[data_iso_3166_3$Alpha_4 == .x]
                                                                 } else {
                                                                   data_iso_3166_1$name_long[data_iso_3166_1$Alpha_2 == .x]
                                                                 }),
                                             levels = val_set$country_name_long,
                                             ordered = FALSE)) %>%
    # ensure no NAs
    assertr::assert(predicate = assertr::not_na,
                    country_name_long) %>%
    order_rfrnd_cols() %>%
    # add var lbl
    labelled::set_variable_labels(.labels = var_lbls["country_name_long"])
}

#' Add period to referendum data
#'
#' Augments `data` with an additional column holding the specified period in which the referendum took place. The new column is named after `period` and its
#' values are always of type integer.
#'
#' ```{r, child = "snippets/period_note.Rmd"}
#' ```
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `date`.
#' @param period Type of period to add. One of
#'   `r pal::fn_param_defaults(fn = add_period, param = "period") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::add_period() |>
#'   dplyr::select(id, date, week)
#'
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::add_period("year") |>
#'   dplyr::select(id, date, year)
add_period <- function(data,
                       period = c("week", "month", "quarter", "year", "decade", "century")) {
  
  checkmate::assert_data_frame(data)
  period <- rlang::arg_match(period)
  assert_vars(data = data,
              vars = "date")
  
  # define necessary date transformations
  get_period <- switch(EXPR    = period,
                       week    = function(x) clock::as_iso_year_week_day(x) %>% clock::get_week(),
                       month   = function(x) clock::get_month(x),
                       quarter = function(x) clock::as_year_quarter_day(x) %>% clock::get_quarter(),
                       year    = function(x) clock::get_year(x),
                       decade  = function(x) (clock::get_year(x) %/% 10L) * 10L,
                       century = function(x) (clock::get_year(x) %/% 100L) * 100L)
  
  # define lbl parts
  period_lbl <- switch(EXPR    = period,
                       week    = glue::glue("{period} (1\u201353)"),
                       month   = glue::glue("{period} (1\u201312)"),
                       quarter = glue::glue("{period} (1\u20134)"),
                       period)
  data %>%
    # add period
    dplyr::mutate(!!as.symbol(period) := get_period(date)) %>%
    # harmonize col order
    order_rfrnd_cols() %>%
    # add var lbl
    labelled::set_variable_labels(.labels = var_lbls[period])
}

#' Add turnout to referendum data
#'
#' @description
#' Augments `data` with an additional column `turnout` containing the voter turnout calculated as:
#'
#' \Sexpr[results=rd, stage=build]{
#'   katex::math_to_rd(tex = "\\\\frac{votes\\\\_yes+votes\\\\_no+votes\\\\_empty+votes\\\\_invalid}{electorate\\\\_total}",
#'                     ascii = "(votes_yes + votes_no + votes_empty + votes_invalid) / electorate_total",
#'                     displayMode = TRUE)
#' }
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the columns `electorate_total`, `votes_yes`, `votes_no`,
#'   `votes_empty` and `votes_invalid`.
#' @param rough Whether to fall back on a "rough" calculation of the turnout in case any of the variables `votes_empty` or `votes_invalid` is unknown (`NA`), or
#'   to be strict and return `NA` in such a case.
#' @param excl_dubious Whether or not to exclude obviously dubious turnout numbers (those > 1.0) by setting them to `NA`. Such numbers stem either from
#'   data errors or (officially) tampered numbers.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' # rough turnout numbers
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::add_turnout() |>
#'   dplyr::select(id,
#'                 electorate_total,
#'                 starts_with("votes_"),
#'                 turnout)
#'
#' # strict turnout numbers
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::add_turnout(rough = FALSE) |>
#'   dplyr::select(id,
#'                 electorate_total,
#'                 starts_with("votes_"),
#'                 turnout)
add_turnout <- function(data,
                        rough = TRUE,
                        excl_dubious = TRUE) {
  
  checkmate::assert_data_frame(data)
  checkmate::assert_flag(rough)
  checkmate::assert_flag(excl_dubious)
  assert_vars(data = data,
              vars = c("electorate_total",
                       "votes_yes",
                       "votes_no",
                       "votes_empty",
                       "votes_invalid"))
  data %>%
    dplyr::rowwise() %>%
    dplyr::mutate(turnout = sum(votes_yes, votes_no, votes_empty, votes_invalid, na.rm = rough) / electorate_total) %>%
    dplyr::ungroup() %>%
    # set dubious turnout numbers to NA if requested
    dplyr::mutate(turnout = dplyr::if_else(excl_dubious & turnout > 1.0,
                                           NA_real_,
                                           turnout)) %>%
    # harmonize col order
    order_rfrnd_cols() %>%
    # add var lbl
    labelled::set_variable_labels(turnout = var_lbls[["turnout"]] %>% ifelse(test = rough,
                                                                             yes = stringr::str_replace(string = .,
                                                                                                        pattern = stringr::fixed("turnout"),
                                                                                                        replacement = "turnout (rough)"),
                                                                             no = .))
}

#' Add UN world regions to referendum data
#'
#' @description
#' Augments `data` with information about the [United Nations (UN) geoscheme](https://en.wikipedia.org/wiki/United_Nations_geoscheme) on three different
#' grouping tiers based on the [UN M49 area code hierarchy](https://en.wikipedia.org/wiki/UN_M49#Code_lists).
#' 
#' In total, eight different columns are added:
#' - `un_country_code`: UN M49 country code
#' - `un_region_tier_1_code`: UN tier-1 region's M49 area code
#' - `un_region_tier_1_name`: UN tier-1 region's English name
#' - `un_region_tier_2_code`: UN tier-2 region's M49 area code
#' - `un_region_tier_2_name`: UN tier-2 region's English name
#' - `un_region_tier_3_code`: UN tier-3 region's M49 area code
#' - `un_region_tier_3_name`: UN tier-3 region's English name
#' - `un_subregion`: Combinatiorial English UN subregion name which, except for Northern Europe, corresponds to the lowest `un_region_tier_*_name`.
#'
#' Tier-1 regions are the highest, i.e. most aggregated UN regions, commonly referred to as continents. Tier-2 regions are also known as "subregions" and tier-3
#' regions as "sub-subregions".
#'
#' Only part of all UN tier-2 regions are further divided into UN tier-3 regions, meaning that not all countries are part of a UN tier-3 region. If a country
#' doesn't belong to any UN tier-3 region, the corresponding `un_region_tier_3_*` values will simply be `NA`. The `un_subregion` column specifically addresses
#' this issue by providing a uniform combination of `un_region_tier_2_name` and `un_region_tier_3_name`.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `country_code` (with [ISO 3166-1
#'   alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) or [ISO 3166-3 alpha-4](https://en.wikipedia.org/wiki/ISO_3166-3) codes).
#' @param add_un_country_code Whether or not to also add a column `un_country_code` holding the UN M49 code of the country in which the referendum took place.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnd(id = "5bbbe26a92a21351232dd73f") |>
#'   rdb::add_world_regions() |>
#'   dplyr::select(id,
#'                 starts_with("country_"),
#'                 starts_with("un_"))
add_world_regions <- function(data,
                              add_un_country_code = TRUE) {
  # ensure minimal validity
  checkmate::assert_data_frame(data)
  assert_vars(data = data,
              vars = "country_code")
  checkmate::assert_flag(add_un_country_code)
  
  has_country_code_continual <- "country_code_continual" %in% colnames(data)
  
  # add UN regions to input data
  data %<>%
    # temporarily add required base var `country_code_continual` if necessary
    add_country_code_continual() %>%
    # remove possibly existing UN region vars
    dplyr::select(-any_of(setdiff(colnames(un_regions),
                                  "country_code"))) %>%
    # add UN regions
    dplyr::left_join(y = un_regions,
                     by = c(country_code_continual = "country_code")) %>%
    # ensure every row got at least a UN tier-1 region assigned
    assertr::assert(predicate = assertr::not_na,
                    un_region_tier_1_code) %>%
    # harmonize col order
    order_rfrnd_cols() %>%
    # add var lbl
    labelled::set_variable_labels(.labels = purrr::keep_at(x = var_lbls,
                                                           at = c("un_country_code",
                                                                  "un_region_tier_1_code",
                                                                  "un_region_tier_1_name",
                                                                  "un_region_tier_2_code",
                                                                  "un_region_tier_2_name",
                                                                  "un_region_tier_3_code",
                                                                  "un_region_tier_3_name",
                                                                  "un_subregion")))
  # drop vars if necessary/requested
  if (!has_country_code_continual) {
    data %<>% dplyr::select(-country_code_continual)
  }
  if (!add_un_country_code) {
    data %<>% dplyr::select(-un_country_code)
  }
  
  data
}

#' Add various URLs to referendum data
#'
#' Augments `data` with additional columns holding URLs of the specified `types`. The new columns will be named after `types`, prefixed with `url_`, so
#' `types = "sudd"` will add the column `url_sudd` etc.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `id_sudd` for `types = "sudd"` and the columns
#'   `country_code`, `level` and `id_official` for `types = "swissvotes"`.
#' @param types Type(s) of URLs to add. One or more of
#'   `r pal::fn_param_defaults(fn = add_urls, param = "types") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   dplyr::filter(country_code == "CH" & level == "national") |>
#'   rdb::add_urls() |>
#'   dplyr::select(id,
#'                 country_code,
#'                 level,
#'                 starts_with("id_"),
#'                 starts_with("url_"))
add_urls <- function(data,
                     types = c("sudd", "swissvotes")) {
  
  checkmate::assert_data_frame(data)
  types <- rlang::arg_match(arg = types,
                            multiple = TRUE)
  
  if ("sudd" %in% types) {
    
    assert_vars(data = data,
              vars = "id_sudd")
    
    data %<>% dplyr::mutate(url_sudd = dplyr::if_else(is.na(id_sudd),
                                                      NA_character_,
                                                      url_sudd(glue::glue("event.php?id={id_sudd}"))))
  }
  
  if ("swissvotes" %in% types) {
    
    assert_vars(data = data,
                vars = c("country_code",
                         "level",
                         "id_official"))
    
    data %<>% dplyr::mutate(url_swissvotes = dplyr::if_else(country_code == "CH" & level == "national" & !is.na(id_official),
                                                            paste0("https://swissvotes.ch/vote/", id_official), # nolint: paste_linter
                                                            NA_character_))
  }
  
  data
}

#' Transform to ballot-date-level observations
#'
#' Transforms referendum-level observations to ones on the level of ballot date and jurisdiction via [nesting][tidyr::nest] of referendum-level columns. The
#' individual values of all the referendums on a specific ballot date in a specific jurisdiction are preserved in a list column named `rfrnd_data`.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `date`.
#' @param cols_to_retain Additional non-standard columns to be preserved as top-level columns instead of being nested in the list column `rfrnd_data`. They
#'   mustn't vary within ballot-date-level observations. `r pkgsnip::param_lbl("tidy_select_support")`
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @export
#'
#' @examples
#' # standard RDB columns are retained as far as possible
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::as_ballot_dates()
#' 
#' # non-standard columns must be explicitly specified in order to be retained
#' data_rdb <-
#'   rdb::rfrnds(quiet = TRUE) |>
#'     rdb::add_world_regions() |>
#'     dplyr::mutate(region_custom =
#'                     factor(x = dplyr::if_else(country_code == "CH",
#'                                               "Switzerland & Liechtenstein",
#'                                               un_region_tier_1_name),
#'                            levels = c("Switzerland & Liechtenstein",
#'                                       levels(un_region_tier_1_name))) |>
#'                     forcats::fct_relevel("Switzerland & Liechtenstein",
#'                                          after = 3L) |>
#'                     forcats::fct_recode("rest of Europe" = "Europe"))
#'
#' data_rdb |> rdb::as_ballot_dates() |> colnames()
#' data_rdb |> rdb::as_ballot_dates(cols_to_retain = region_custom) |> colnames()
#'
#' # non-standard columns to retain must actually be retainable
#' try(
#'   data_rdb |> rdb::as_ballot_dates(cols_to_retain = title_en)
#' )
as_ballot_dates <- function(data,
                            cols_to_retain = NULL) {
  
  checkmate::assert_data_frame(data)
  defused_cols_to_retain <- rlang::enquo(cols_to_retain)
  ix_cols_to_retain <- tidyselect::eval_select(expr = defused_cols_to_retain,
                                               data = data)
  names_cols_to_retain <- names(ix_cols_to_retain)
  
  # ensure date col is present
  if (!("date" %in% colnames(data))) {
    cli::cli_abort("Unable to transform to ballot-date-level data since no {.var {date}} column is present in {.arg data}.")
  }
  
  # nest data
  cols_to_nest <-
    data |>
    colnames() |>
    setdiff(c(ballot_date_colnames,
              names_cols_to_retain))
  
  result <- data |> tidyr::nest(rfrnd_data = any_of(cols_to_nest))
  
  # ensure `cols_to_retain` don't vary within ballot dates
  n_rows_nested <-
    data |>
    dplyr::summarise(n = dplyr::n(),
                     .by = any_of(ballot_date_colnames)) %$%
    n
  
  if (!identical(purrr::map_int(result$rfrnd_data,
                                nrow),
                 n_rows_nested)) {
    
    cli::cli_abort(paste0("Retaining the additional non-standard {cli::qty(length(ix_cols_to_retain))} column{?s} {.var {names_cols_to_retain}} while ",
                          "converting to ballot-date-level observations is impossible because {?(some of)} {?this/these} column{?s} var{?ies/y} within ballot ",
                          "dates."))
  }
  
  result
}

#' Unnest multi-value variable
#'
#' Unnests a multi-value variable of type list to long format. Multi-value variables can contain more than one value per observation and thus break with the
#' [tidy-data convention](https://tidyr.tidyverse.org/articles/tidy-data.html). This function allows to conveniently expand `data` to contain a single `var`
#' value per observation only, thereby increasing the number of observations (i.e. rows).
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column specified in `var`.
#' @param var `data` column to unnest. One of the multi-value variables:
#' `r data_codebook |> dplyr::filter(is_multi_valued) %$% variable_name |> pal::wrap_chr(wrap = "\x60") |> pal::as_md_list()`
#'   
#' `r pkgsnip::param_lbl("tidy_select_support")`
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @family unnest
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::unnest_var(topics_tier_2)
unnest_var <- function(data,
                       var) {
  
  # tidy selection and arg check
  checkmate::assert_data_frame(data)
  defused_var <- rlang::enquo(var)
  i_var <- tidyselect::eval_select(expr = defused_var,
                                   data = data)
  name_var <- names(i_var)
  n_var <- length(i_var)
  
  if (n_var > 1L) {
    cli::cli_abort("Only {.emph one} {.arg var} can be unnested at a time, but {.val {n_var}} were provided.")
  }
  
  name_var <- rlang::arg_match0(arg = name_var,
                                arg_nm = "var",
                                values =
                                  data_codebook |>
                                  dplyr::filter(is_multi_valued) %$%
                                  variable_name)
  
  name_var_unnested <- var_name_unnested(name_var)
  
  data |>
    tidyr::unnest_longer(col = all_of(name_var),
                         values_to = name_var_unnested,
                         keep_empty = TRUE,
                         ptype = character()) |>
    dplyr::mutate(!!as.symbol(name_var_unnested) := factor(x = !!as.symbol(name_var_unnested),
                                                           levels = var_vals(name_var),
                                                           ordered = val_scale(name_var) %in% c("ordinal_ascending", "ordinal_descending")))
}

#' Count number of referendums
#'
#' Counts the number of RDB referendums, optionally by additional columns specified via `by`.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the columns specified in `by` (if any).
#' @param by Optional `data` column(s) to group by before counting number of referendums. `r pkgsnip::param_lbl("tidy_select_support")`
#' @param complete_fcts Whether or not to complete the result with implicitly missing combinations of those columns specified in `by` which are of type factor.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::n_rfrnds(by = level)
#'
#' # count ballot dates instead of referendums
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::as_ballot_dates() |>
#'   rdb::n_rfrnds(by = level)
n_rfrnds <- function(data,
                     by = NULL,
                     complete_fcts = TRUE) {
  # arg checks
  checkmate::assert_data_frame(data)
  checkmate::assert_flag(complete_fcts)
  
  # tidy selection
  defused_by <- rlang::enquo(by)
  ix_by <- tidyselect::eval_select(expr = defused_by,
                                   data = data)
  names_by <- names(ix_by)
  
  result <-
    data |>
    dplyr::group_by(!!!rlang::syms(names_by)) |>
    dplyr::summarise(n = dplyr::n(),
                     .groups = "drop")
  
  if (complete_fcts) {
    result %<>% tidyr::complete(!!!rlang::syms(names_by),
                                fill = list(n = 0L))
  }
  
  result
}

#' Count number of referendums per period
#'
#' Counts the number of RDB referendums per desired period, optionally by additional columns specified via `by`.
#'
#' ```{r, child = "snippets/period_note.Rmd"}
#' ```
#'
#' @inheritParams n_rfrnds
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column specified in `period` or the column `date` (to
#'   compute the [period column][add_period]), plus the one(s) specified via `by` (if any).
#' @param period Type of period to count referendums by. One of
#'   `r pal::fn_param_defaults(fn = add_period, param = "period") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#' @param fill_gaps Whether or not to add zero-value rows to the result for `period` gaps in `data`.
#' @param period_floor Lower `period` limit up to which gaps are filled. If `NULL`, the lower limit is set to the minimum of `period` present in `data`. Only
#'   relevant if `fill_gaps = TRUE` and `period` is set to a unique timespan type (`"year"`, `"decade"` or `"century"`).
#' @param period_ceiling Upper `period` limit up to which gaps are filled. If `NULL`, the upper limit is set to the maximum of `period` present in `data`. Only
#'   relevant if `fill_gaps = TRUE` and `period` is set to a unique timespan type (`"year"`, `"decade"` or `"century"`).
#' @param descending Whether to sort the resulting table by `period` in descending or in ascending order.
#'
#' @inherit add_period details
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::n_rfrnds_per_period()
#'
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::n_rfrnds_per_period(by = level)
#' 
#' # without filling gaps
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::n_rfrnds_per_period(by = level,
#'                            fill_gaps = FALSE)
#'
#' # per decade and by multiple columns
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::n_rfrnds_per_period(by = c(level, type),
#'                            period = "decade")
#'
#' # count ballot dates instead of referendums
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::as_ballot_dates() |>
#'   rdb::n_rfrnds_per_period()
n_rfrnds_per_period <- function(data,
                                by = NULL,
                                period = c("week", "month", "quarter", "year", "decade", "century"),
                                fill_gaps = TRUE,
                                period_floor = NULL,
                                period_ceiling = NULL,
                                descending = FALSE) {
  # arg checks
  checkmate::assert_data_frame(data)
  period <- rlang::arg_match(period)
  checkmate::assert_flag(fill_gaps)
  checkmate::assert_int(period_floor,
                        null.ok = TRUE)
  checkmate::assert_int(period_ceiling,
                        null.ok = TRUE)
  checkmate::assert_flag(descending)
  
  # tidy selection
  defused_by <- rlang::enquo(by)
  ix_by <- tidyselect::eval_select(expr = defused_by,
                                   data = data)
  names_by <- names(ix_by)
  
  # add period col if necessary
  if (!(period %in% colnames(data))) {
    data %<>% add_period(period = period)
  }
  
  result <-
    data |>
    dplyr::group_by(!!!rlang::syms(names_by), !!as.symbol(period)) |>
    dplyr::summarise(n = dplyr::n(),
                     .groups = "drop")
  # fill gaps
  # (only if input data (and thus result) is non-empty since otherwise we can't infer a sensible period range for year/decade/century)
  if (fill_gaps && nrow(result)) {
    
    # define sensible min/max period vals
    is_recurring_period <- period %in% c("week", "month", "quarter")
    period_step <- switch(EXPR = period,
                          century = 100L,
                          decade = 10L,
                          1L)
    period_min <- period |> pal::when(is.null(period_floor) && !is_recurring_period ~ pal::safe_min(data[[.]]),
                                      !is_recurring_period ~ period_floor,
                                      ~ 1L)
    period_max <- period |> pal::when(is.null(period_ceiling) && !is_recurring_period ~ pal::safe_max(data[[.]]),
                                      !is_recurring_period ~ period_ceiling,
                                      . == "week" ~ 53L,
                                      . == "month" ~ 12L,
                                      . == "quarter" ~ 4L)
    period_seq <- seq(from = (period_min %/% period_step) * period_step,
                      to = period_max,
                      by = period_step)
    result %<>%
      # reduce to results `>= period_floor` and `<= period_ceiling`
      dplyr::filter(!!as.symbol(period) %in% period_seq) %>%
      # convert period col to fct, so `tidyr::complete()` knows the missing vals
      dplyr::mutate(!!as.symbol(period) := factor(x = !!as.symbol(period),
                                                  levels = period_seq,
                                                  ordered = TRUE)) %>%
      tidyr::complete(!!!rlang::syms(names_by), !!as.symbol(period),
                      fill = list(n = 0L)) %>%
      # convert period col back to int
      dplyr::mutate(!!as.symbol(period) := as.integer(as.character(!!as.symbol(period))))
  }
  
  result |> dplyr::arrange(if (descending) dplyr::desc(!!as.symbol(period)) else !!as.symbol(period))
}

#' Prettify referendum data column names
#'
#' Renames referendum data column names to be ready for publication. Useful e.g. to create tables or visualizations.
#'
#' Note that
#' - column names unknown to this function are not changed.
#' - column *labels* are [removed][labelled::remove_var_label] so they aren't inadvertently used instead of the column *names* (i.a. relevant for [gt][gt::gt]
#'   [>= 0.9.0](https://gt.rstudio.com/news/index.html#minor-improvements-and-bug-fixes-0-9-0)).
#'
#' @param data RDB referendum data as returned by [rfrnds()].
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @family prettify
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::prettify_col_names()
prettify_col_names <- function(data) {
  
  data |>
    dplyr::rename_with(.cols = everything(),
                       .fn = prettify_var_names) |>
    # we remove the var lbls so gt doesn't automatically pick them up instead of the column names
    # cf. https://gt.rstudio.com/news/index.html#minor-improvements-and-bug-fixes-0-9-0
    labelled::remove_var_label()
}

#' Referendum share per period stacked area chart
#'
#' Creates a [Plotly stacked area chart](https://plotly.com/r/filled-area-plots/#stacked-area-chart-with-cumulative-values) that visualizes the share of
#' referendums per period, grouped by another column.
#'
#' ```{r, child = "snippets/period_note.Rmd"}
#' ```
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column specified in `period` or the column `date` (to
#'   compute the [period column][add_period]), plus the column specified in `by`.
#' @param by `data` column to group by before counting number of referendums. `r pkgsnip::param_lbl("tidy_select_support")`
#' @param period Type of period to count referendums by. One of
#'   `r pal::fn_param_defaults(fn = add_period, param = "period") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#'
#' @return `r pkgsnip::param_lbl("plotly_obj")`
#' @family visualize
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE,
#'             max_cache_age = "1 year") |>
#'   rdb::plot_rfrnd_share_per_period(by = "level")
plot_rfrnd_share_per_period <- function(data,
                                        by,
                                        period = c("week", "month", "quarter", "year", "decade", "century")) {
  period <- rlang::arg_match(period)
  
  # add period col if necessary
  if (!(period %in% colnames(data))) {
    data %<>% add_period(period = period)
  }
  
  # tidy selection
  defused_by <- rlang::enquo(by)
  i_by <- tidyselect::eval_select(expr = defused_by,
                                  data = data)
  n_by <- length(i_by)
  name_by <- names(i_by)
  
  # ensure `x` is < 2
  if (n_by > 1L) {
    cli::cli_abort("Only {.emph one} column can be specified in {.arg by}, but {.val {n_by}} were provided.")
  }
  
  data %>%
    # calculate freqs
    dplyr::group_by(!!as.symbol(name_by), !!as.symbol(period)) %>%
    dplyr::summarise(n = dplyr::n(),
                     .groups = "drop") %>%
    # plot
    plot_share_per_period(x = name_by,
                          period = period)
}

#' Topic segmentation sunburst chart
#'
#' Creates a [Plotly sunburst chart](https://plotly.com/r/sunburst-charts/) that visualizes the hierarchical segmentation of referendum topic occurences.
#'
#' A *topic lineage* is the hierarchical compound of a `topic_tier_1` and optionally a grandchild `topic_tier_3` and/or a child `topic_tier_2`.
#'
#' Note that topics can be assigned on any tier to referendums (i.e. in one case, a `topic_tier_1` plus a child `topic_tier_2` is assigned, and in another case
#' only a `topic_tier_1` without any further child topic).
#'
#' Furthermore, it should be noted that not every `topic_tier_2` has potential child `topic_tier_3`s. See the [full topic hierarchy](`r url_codebook("topics")`)
#' for details.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the columns `topics_tier_1`, `topics_tier_2` and
#'   `topics_tier_3`.
#' @param method Applied method to count the number of topic occurences. One of
#'   - **`"per_rfrnd"`**: All *referendums* have the same weight. For a referendum with n different topics of the same tier, every topic is counted 1/n.
#'   - **`"per_topic_lineage"`**: All *topic lineages* have the same weight. For a referendum with n different topics of the same tier, every topic is fully
#'     counted, meaning that e.g. a referendum with three different tier-3 topics has a tripled impact on the result compared to a referendum that only has a
#'     single one. Noticeably faster than `"per_rfrnd"`.
#'   - **`"naive"`**: Naive procedure which doesn't properly reflect topic proportions on tier 2 and 3. Based on the (wrong) assumptions that a) all referendums
#'     have the same number of topic lineages assigned and b) topics are not deduplicated per tier. By far the fastest method, though.
#'
#' @return `r pkgsnip::param_lbl("plotly_obj")`
#' @family visualize
#' @export
#'
#' @examples
#' # count each referendum equally
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::plot_topic_segmentation(method = "per_rfrnd")
#'
#' # count each topic lineage equally
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::plot_topic_segmentation(method = "per_topic_lineage")
#'
#' # naive count (way faster, but with misleading proportions on tier 2 and 3)
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::plot_topic_segmentation(method = "naive")
plot_topic_segmentation <- function(data,
                                    method = c("per_rfrnd", "per_topic_lineage", "naive")) {
  
  method <- rlang::arg_match(method)
  rlang::check_installed("plotly",
                         reason = pal::reason_pkg_required())
  is_naive <- method == "naive"
  
  # assemble necessary data structure
  if (is_naive) {
    
    ## naively
    data_plot <-
      dplyr::bind_rows(
        data$topics_tier_1 %>%
          topic_frequency(tier = 1L) %>%
          dplyr::mutate(parent_topic = ""),
        data$topics_tier_2 %>%
          topic_frequency(tier = 2L) %>%
          dplyr::mutate(parent_topic = purrr::map_chr(.x = as.character(topic),
                                                      .f = infer_topics,
                                                      tier = 1L)),
        data$topics_tier_3 %>%
          topic_frequency(tier = 3L) %>%
          dplyr::mutate(parent_topic = purrr::map_chr(.x = as.character(topic),
                                                      .f = infer_topics,
                                                      tier = 2L))
      ) %>%
      dplyr::rename(value = n)
    
  } else {
    
    is_per_rfrnd <- method == "per_rfrnd"
    data_plot <- data %>% dplyr::select(starts_with("topics_tier_"))
    
    ### per rfrnd, i.e. in fractional numbers
    if (is_per_rfrnd) {
      
      data_plot %<>%
        purrr::pmap(~ hierarchize_topics_fast(unlist(..1),
                                              unlist(..2),
                                              unlist(..3)) %>%
                      dplyr::mutate(value = 1.0 / nrow(.))) %>%
        purrr::list_rbind()
      
      ### per topic lineage
    } else {
      
      data_plot %<>%
        purrr::pmap(~ hierarchize_topics_fast(unlist(..1),
                                              unlist(..2),
                                              unlist(..3))) %>%
        purrr::list_rbind() %>%
        dplyr::mutate(value = 1.0)
    }
    
    data_plot <-
      dplyr::bind_rows(
        data_plot %>%
          dplyr::group_by(topic_tier_1) %>%
          dplyr::summarise(value = sum(value)) %>%
          dplyr::mutate(topic = topic_tier_1,
                        parent_topic = "",
                        value,
                        .keep = "none"),
        data_plot %>%
          dplyr::group_by(topic_tier_2) %>%
          dplyr::summarise(value = sum(value)) %>%
          dplyr::mutate(topic = topic_tier_2,
                        parent_topic =
                          topic %>%
                          purrr::map_chr(~ {
                            if (is.na(.x)) {
                              NA_character_
                            } else {
                              infer_topics(topics = .x,
                                           tier = 1L)
                            }}),
                        value,
                        .keep = "none"),
        data_plot %>%
          dplyr::group_by(topic_tier_3) %>%
          dplyr::summarise(value = sum(value)) %>%
          dplyr::mutate(topic = topic_tier_3,
                        parent_topic =
                          topic %>%
                          purrr::map_chr(~ {
                            if (is.na(.x)) {
                              NA_character_
                            } else {
                              infer_topics(topics = .x,
                                           tier = 2L)
                            }}),
                        value,
                        .keep = "none")
      ) %>%
      dplyr::filter(!is.na(topic))
    
    ### add NA rows filling the gaps
    data_plot %<>%
      dplyr::filter(parent_topic != "") %>%
      dplyr::group_by(parent_topic) %>%
      dplyr::summarise(value_total = sum(value),
                       .groups = "drop") %>%
      dplyr::mutate(topic = "<i>not defined</i>",
                    value = purrr::map2_dbl(.x = value_total,
                                            .y = parent_topic,
                                            .f = ~
                                              data_plot %>%
                                              dplyr::filter(topic == .y) %$%
                                              value %>%
                                              checkmate::assert_number() %>%
                                              magrittr::subtract(.x)),
                    parent_topic,
                    .keep = "none") %>%
      dplyr::bind_rows(data_plot, .) %>%
      dplyr::mutate(id = ifelse(topic == "<i>not defined</i>",
                                paste0("NA_", parent_topic),
                                topic))
  }
  
  # create plot
  plotly::plot_ly(data = data_plot,
                  type = "sunburst",
                  labels = ~topic,
                  parents = ~parent_topic,
                  ids = if (is_naive) ~topic else ~id,
                  values = ~value,
                  branchvalues = ifelse(is_naive,
                                        "remainder",
                                        "total"),
                  insidetextorientation = "radial")
}

#' Topic share per period stacked area chart
#'
#' Creates a [Plotly stacked area chart](https://plotly.com/r/filled-area-plots/#stacked-area-chart-with-cumulative-values) that visualizes the share of
#' referendum topic occurences per period.
#'
#' ```{r, child = "snippets/period_note.Rmd"}
#' ```
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `topics_tier_#` of the specified `tier`.
#' @param tier Tier of the topics variable to plot. `1L`, `2L` or `3L`.
#' @param period Type of period to count topics by. One of
#'   `r pal::fn_param_defaults(fn = add_period, param = "period") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#' @param weight_by_n_rfrnds Whether or not to weight topic occurences by number of referendums. If `TRUE`, for a referendum with n different topics of the same
#'   `tier`, every topic is counted 1/n.
#'
#' @return `r pkgsnip::param_lbl("plotly_obj")`
#' @family visualize
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::plot_topic_share_per_period(period = "decade")
plot_topic_share_per_period <- function(data,
                                        tier = 1L,
                                        period = c("week", "month", "quarter", "year", "decade", "century"),
                                        weight_by_n_rfrnds = TRUE) {
  checkmate::assert_int(tier,
                        lower = 1L,
                        upper = 3L)
  period <- rlang::arg_match(period)
  checkmate::assert_flag(weight_by_n_rfrnds)
  
  # add period col if necessary
  if (!(period %in% colnames(data))) {
    data %<>% add_period(period = period)
  }
  
  # ensure topics var is present
  var_name_topics <- glue::glue("topics_tier_{tier}")
  var_name_topic <- var_name_unnested(var_name_topics)
  
  if (!(var_name_topics %in% colnames(data))) {
    cli::cli_abort("Required column {.var {var_name_topics}} is missing from {.arg data}.")
  }
  
  data |>
    # add proper count var
    dplyr::mutate(count = if (weight_by_n_rfrnds) 1.0 / lengths(!!as.symbol(var_name_topics)) else 1.0) |>
    # unnest topics var
    unnest_var(var = var_name_topics) |>
    # calculate freqs
    dplyr::group_by(!!as.symbol(period), !!as.symbol(var_name_topic)) |>
    dplyr::summarise(n = sum(count),
                     .groups = "drop") |>
    # plot
    plot_share_per_period(x = var_name_topic,
                          period = period)
}

#' Period streamgraph
#'
#' Creates a ggplot2 [streamgraph](https://en.wikipedia.org/wiki/Streamgraph) based on [ggstream::geom_stream()] using the specified `period` as time
#' resolution.
#'
#' @inheritParams n_rfrnds_per_period
#' @param by `data` column to group by before counting number of referendums.
#' @param stacking Stacking type. One of
#'   - `"mirror"` to stack absolute values symmetrically around the zero line on the x-axis.
#'   - `"ridge"` to stack absolute values from the zero line on the x-axis upwards.
#'   - `"proportional"` to stack relative values that add up to 100 %.
#' @param bandwidth Kernel density estimation bandwidth. A numeric scalar.
#' @param y_lim Optional Y axis range limit. Only relevant if `stacking` is one of `"mirror"` or `"ridge"`. The limit applies to the upper side if
#'   `stacking = "ridge"` and to both sides if `stacking = "mirror"`. A numeric scalar equal or greater than zero.
#' @param color_palette Color palette function that when called with a single integer argument returns that many color codes.
#' @param prune_legend Whether or not to drop `by` factor levels which don't occur in `data` from the legend. Only has an effect if `by` is of type factor.
#'
#' @return `r pkgsnip::return_lbl("ggplot2_obj")`
#' @family visualize
#' @export
#'
#' @examples
#' data_rdb <- rdb::rfrnds(quiet = TRUE)
#'
#' rdb::ggplot_streamgraph(data = data_rdb,
#'                         by = topics_tier_1,
#'                         period = "year")
#'
#' # you can specify a different color palette
#' rdb::ggplot_streamgraph(data = data_rdb,
#'                         by = topics_tier_1,
#'                         period = "year",
#'                         color_palette = viridisLite::viridis)
#'
#' # by default, only factor levels which occur in data are included in the legend
#' data_rdb |>
#'   dplyr::filter(country_code == "AT") |>
#'   rdb::ggplot_streamgraph(by = topics_tier_1,
#'                           period = "decade")
#'
#' # but you can include *all* factor levels in the legend if you want to
#' data_rdb |>
#'   dplyr::filter(country_code == "AT") |>
#'   rdb::ggplot_streamgraph(by = topics_tier_1,
#'                           period = "decade",
#'                           prune_legend = FALSE)
ggplot_streamgraph <- function(data,
                               by,
                               period = c("week", "month", "quarter", "year", "decade", "century"),
                               stacking = c("mirror", "ridge", "proportional"),
                               bandwidth = 0.75,
                               y_lim = NULL,
                               color_palette = viridisLite::turbo,
                               prune_legend = TRUE) {
  
  stacking <- rlang::arg_match(stacking)
  checkmate::assert_number(bandwidth)
  checkmate::assert_number(y_lim,
                           lower = 0.0,
                           finite = TRUE,
                           null.ok = TRUE)
  checkmate::assert_function(color_palette)
  checkmate::assert_flag(prune_legend)
  rlang::check_installed("ggplot2",
                         reason = pal::reason_pkg_required())
  rlang::check_installed("ggstream",
                         reason = pal::reason_pkg_required())
  rlang::check_installed("scales",
                         reason = pal::reason_pkg_required())
  rlang::check_installed("viridisLite",
                         reason = pal::reason_pkg_required())
  ix_by <- tidyselect::eval_select(expr = rlang::enquo(by),
                                   data = data)
  names_by <- names(ix_by)
  name_by <- names_by
  n_by <- length(ix_by)
  
  if (n_by > 1L) {
    cli::cli_abort("Only {.emph one} data column can be specified in {.arg by}, but {.val {n_by}} were provided.")
  }
  
  # unnest list col if necessary
  if (is.list(data[[ix_by]])) {
    data %<>% unnest_var(var = tidyselect::all_of(names_by))
    name_by <- var_name_unnested(names_by)
  }
  
  result <- n_rfrnds_per_period(data = data,
                                period = period,
                                by = !!as.symbol(name_by))
  
  # create stable color mapping based on lvls of `by`
  # TODO: this probably could be removed and we could instead pass on a *discrete fill scale* fn (e.g. `\(...) viridis::scale_fill_viridis(..., discrete = T)`)
  #       to `ggplot2::scale_fill_discrete(type = )` once the below mentioned issue #23 is resolved
  if (is.factor(data[[name_by]])) {
    vals_by <- levels(data[[name_by]])
  } else {
    vals_by <- sort(unique(data[[name_by]]))
  }
  colors_by <- color_palette(length(vals_by))
  names(colors_by) <- vals_by
  
  # we need to remove zero-n rows (plus the corresponding colors) since ggstream doesn't handle them properly
  # TODO: remove this workaround once [issue #23](https://github.com/davidsjoberg/ggstream/issues/23) is fixed
  ## if we prune the legend, we need to prune the fill colors, too (otherwise, colors aren't matched properly)
  if (prune_legend) {
    colors_by %<>% magrittr::extract(names(.) %in% unique(result[[name_by]][result$n > 0L]))
  }
  result %<>%
    dplyr::group_by(!!as.symbol(name_by)) %>%
    dplyr::group_modify(\(d, k) if (sum(d$n) > 0L) d else d[0L, ]) %>%
    dplyr::ungroup()
  
  result <-
    ggplot2::ggplot(data = result,
                    mapping = ggplot2::aes(x = !!as.symbol(period),
                                           y = n,
                                           fill = !!as.symbol(name_by))) +
    ggstream::geom_stream(type = stacking,
                          n_grid = 10000L,
                          show.legend = TRUE,
                          bw = bandwidth) +
    ggplot2::scale_fill_discrete(type = colors_by,
                                 name = prettify_var_names(name_by),
                                 drop = prune_legend) +
    ggplot2::xlab(ggplot2::element_blank()) +
    ggplot2::ylab(ggplot2::element_blank())
  
  if (stacking == "ridge" && !is.null(y_lim)) {
    result <- result + ggplot2::coord_cartesian(ylim = c(0.0, y_lim),
                                                default = TRUE)
    
  } else if (stacking == "mirror") {
    
    if (!is.null(y_lim)) {
      result <- result + ggplot2::coord_cartesian(ylim = c(-y_lim, y_lim),
                                                  default = TRUE)
    }
    
    # make y scale absolute in both directions
    result <- result + ggplot2::scale_y_continuous(labels = \(x) abs(x))
    
  } else if (stacking == "proportional") {
    result <- result + ggplot2::scale_y_continuous(labels = scales::label_percent(suffix = "\u2009%"))
  }
  
  result
}

#' Tabulate number of referendums
#'
#' Creates a ready-to-print [gt][gt::gt] table with the number of referendums, optionally counted `by` up to three additional variables.
#'
#' The first variable specified in `by` will be reflected in additional rows in the resulting table, i.e. expand it vertically. The second and third variables
#' will be reflected in additional columns, i.e. expand it horizontally.
#'
#' @inheritParams n_rfrnds
#' @param by Up to three additional `data` columns to group by before counting number of referendums. `r pkgsnip::param_lbl("tidy_select_support")`
#' @param n_rows Maximum number of rows to be included in the resulting table. All the rows exceeding that limit are replaced by a single row of ellipses. An
#'   integer scalar or `Inf` for an unlimited number of rows.
#' @param order How to order the rows of the resulting table. One of
#'   - `"ascending"` to sort in ascending order by the number of referendums,
#'   - `"descending"` to sort in descending order by the number of referendums, or
#'   - `NULL` to leave the sorting unchanged.
#' @param incl_row_head Whether or not to include a row heading with the [prettified][prettify_var_names] name of the first `by` variable.
#' @param incl_col_head Whether or not to include column headings (in the table's [stub][gt::tab_stubhead]) with the [prettified][prettify_var_names] names of
#'   the second and third `by` variables.
#' @param add_total_row Whether or not to add a summary row at the very end of the table containing column totals. If `NULL`, a total row is added only if
#'   at least one column is provided in `by`.
#' @param add_total_col Whether or not to add a summary column at the very end of the table containing row totals. If `NULL`, a total column is added only if
#'   multiple columns are provided in `by`.
#' @param lbl_total_row Label of the summary row containing column totals. Only relevant if `add_total_row = TRUE`. A character scalar. [gt::md()] or
#'   [gt::html()] can be used to format the label text.
#' @param lbl_total_col Label of the summary column containing row totals. Only relevant if `add_total_col = TRUE`. A character scalar. [gt::md()] or
#'   [gt::html()] can be used to format the label text.
#'
#' @return `r pkgsnip::return_lbl("gt_obj")`
#' @family tabulate
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds()
#'
#' # grouped by a single column
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds(by = level)
#'
#' # grouped by two columns
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds(by = c(type, level))
#'
#' # grouped by three columns
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds(by = c(country_name, level, type),
#'                     n_rows = 10L,
#'                     order = "descending")
#'
#' # count ballot dates instead of referendums
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::as_ballot_dates() |>
#'   rdb::tbl_n_rfrnds(by = c(country_name, level),
#'                     n_rows = 10L,
#'                     order = "descending")
tbl_n_rfrnds <- function(data,
                         by = NULL,
                         complete_fcts = TRUE,
                         n_rows = Inf,
                         order = NULL,
                         incl_row_head = TRUE,
                         incl_col_head = TRUE,
                         add_total_row = NULL,
                         add_total_col = NULL,
                         lbl_total_row = gt::md("**Total**"),
                         lbl_total_col = gt::md("**Total**")) {
  
  if (!isTRUE(is.infinite(n_rows))) {
    checkmate::assert_int(n_rows,
                          lower = 1L)
  }
  if (!is.null(order)) {
    rlang::arg_match0(arg = order,
                      values = c("ascending", "descending"))
  }
  checkmate::assert_flag(incl_row_head)
  checkmate::assert_flag(incl_col_head)
  checkmate::assert_flag(add_total_row,
                         null.ok = TRUE)
  checkmate::assert_flag(add_total_col,
                         null.ok = TRUE)
  checkmate::assert_string(lbl_total_row)
  checkmate::assert_string(lbl_total_col)
  rlang::check_installed("gt",
                         version = "0.9.0",
                         reason = pal::reason_pkg_required())
  
  ix_by <- tidyselect::eval_select(expr = rlang::enquo(by),
                                   data = data)
  n_by <- length(ix_by)
  has_by <- n_by > 0L
  has_by_rest <- n_by > 1L
  
  if (n_by > 3L) {
    cli::cli_abort("At most {.emph three} data columns can be specified in {.arg by}, but {.val {n_by}} were provided.")
  }
  
  if (is.null(add_total_row)) {
    add_total_row <- has_by
  }
  
  if (is.null(add_total_col)) {
    add_total_col <- has_by_rest
  }
  
  by_colname_1st <- names(ix_by[1L]) %|% ":no_by"
  by_colnames_rest <- names(ix_by[-1L])
  
  result <-
    data |>
    n_rfrnds(by = {{ by }},
             complete_fcts = complete_fcts) |>
    dplyr::mutate(dplyr::across(where(is.factor),
                                ~ forcats::fct_na_value_to_level(f = .x,
                                                                 level = "N/A"))) |>
    pal::when(has_by_rest ~ tidyr::pivot_wider(data = .,
                                               names_from = by_colnames_rest,
                                               names_sort = TRUE,
                                               values_from = n),
              ~ .) |>
    dplyr::mutate(`:total` = rowSums(x = dplyr::pick(-any_of(by_colname_1st)),
                                     na.rm = TRUE),
                  dplyr::across(everything(),
                                ~ tidyr::replace_na(data = .x,
                                                    replace = 0L)),
                  # TODO: remove type conversion below once [issue #1305](https://github.com/rstudio/gt/issues/1305) is fixed
                  dplyr::across(any_of(by_colname_1st),
                                as.character)) |>
    pal::when(isTRUE(order == "descending") ~ dplyr::arrange(.data = .,
                                                             -`:total`),
              isTRUE(order == "ascending") ~ dplyr::arrange(.data = .,
                                                            `:total`),
              ~ .) |>
    pal::when(!add_total_col ~ dplyr::select(.data = .,
                                             -`:total`),
              ~ .)
  total_n <-
    result |>
    dplyr::select(-any_of(by_colname_1st)) |>
    purrr::map_int(\(x) sum(x, na.rm = TRUE))
    
  chop_rows <- n_rows < nrow(result)
  
  if (chop_rows) {
    
    result %<>%
      utils::head(n = n_rows) %>%
      # add placeholder/ellipsis row
      dplyr::mutate(dplyr::across(everything(),
                                  as.character)) %>%
      rbind("\u2026")
  }
  
  # NOTE: if we chop rows (and have multiple n cols), it's impossible to create our total row using `gt::grand_summary_rows()` since its `fns` arg only
  #       receives column content, no metadata; thus we create the total row manually
  if (add_total_row && chop_rows) {
    result %<>% rbind(c(lbl_total_row, total_n))
  }
  
  result %<>% gt::gt(rowname_col = ifelse(has_by,
                                          by_colname_1st,
                                          "rowname"),
                     process_md = TRUE)
  
  if (incl_row_head && has_by) {
    result %<>% gt::tab_row_group(label =
                                    by_colname_1st |>
                                    prettify_var_names() |>
                                    pal::wrap_chr(wrap = "*") |>
                                    gt::md(),
                                  rows = tidyselect::everything(),
                                  id = by_colname_1st)
  }
  
  if (add_total_col) {
    result %<>% gt::cols_label(`:total` = lbl_total_col)
  }
    
  if (add_total_row) {
    if (chop_rows) {
      result %<>% gt::tab_style(style = gt::cell_borders(sides = "top",
                                                         color = "#D3D3D3",
                                                         style = "double",
                                                         weight = gt::px(6L)),
                                locations = list(gt::cells_body(rows = n_rows + 2L),
                                                 gt::cells_stub(rows = n_rows + 2L)))
      
    } else {
      result %<>% gt::grand_summary_rows(fns = list(id = "total", label = "DUMMY") ~ sum(., na.rm = TRUE),
                                         fmt = ~ gt::fmt_integer(., sep_mark = ""))
      
      # TODO: remove this workaround and replace `"DUMMY"` with `lbl_total_row` above as soon as [#1295](https://github.com/rstudio/gt/issues/1295)
      #       is fixed.
      result$`_summary`[[1L]]$fns$total$label <- lbl_total_row
    }
  }
  
  if (incl_col_head && has_by_rest) {
    result %<>%
      gt::tab_stubhead(label =
                         by_colnames_rest |>
                         prettify_var_names() |>
                         pal::wrap_chr(wrap = "*") |>
                         paste0(collapse = "<br><br>") |>
                         gt::md()) %>%
      gt::tab_style(style = gt::cell_text(align = "right",
                                          v_align = "middle"),
                    locations = gt::cells_stubhead())
  }
  
  result |>
    gt::tab_spanner_delim(delim = "_",
                          split = "last") |>
    # right-align cols; required since they're of type chr if we chopped rows
    gt::cols_align(align = "right",
                   columns = -tidyselect::any_of(by_colname_1st)) |>
    # hide table header if there are less than two `by` cols
    pal::when(!has_by_rest ~ gt::tab_options(data = .,
                                             column_labels.hidden = TRUE),
              ~ .)
}

#' Tabulate number of referendums per period
#'
#' Creates a ready-to-print [gt][gt::gt] table with the number of referendums per period, optionally counted `by` up to two additional columns.
#'
#' ```{r, child = "snippets/period_note.Rmd"}
#' ```
#'
#' @inheritParams n_rfrnds_per_period
#' @inheritParams tbl_n_rfrnds
#' @param by Up to two additional `data` columns to group by before counting number of referendums. `r pkgsnip::param_lbl("tidy_select_support")`
#' @param squeeze_zero_rows Whether or not to compress consecutive zero-sum rows into single period span rows.
#' @param add_total_col Whether or not to add a summary column at the very end of the table containing row totals. If `NULL`, a total column is added only if
#'   `by` is non-empty.
#'
#' @return `r pkgsnip::return_lbl("gt_obj")`
#' @family tabulate
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds_per_period(period = "decade")
#'
#' # grouped by a single additional column
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds_per_period(by = level,
#'                                period = "decade")
#'
#' # grouped by two addtional columns
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::tbl_n_rfrnds_per_period(by = c(level, type),
#'                                period = "decade")
#'
#' # count ballot dates instead of referendums
#' rdb::rfrnds(quiet = TRUE) |>
#'   rdb::as_ballot_dates() |>
#'   rdb::tbl_n_rfrnds_per_period(period = "decade")
tbl_n_rfrnds_per_period <- function(data,
                                    by = NULL,
                                    period = c("week", "month", "quarter", "year", "decade", "century"),
                                    fill_gaps = TRUE,
                                    period_floor = NULL,
                                    period_ceiling = NULL,
                                    squeeze_zero_rows = TRUE,
                                    descending = TRUE,
                                    add_total_row = TRUE,
                                    add_total_col = NULL,
                                    lbl_total_row = gt::md("**Total**"),
                                    lbl_total_col = lbl_total_row) {
  
  period <- rlang::arg_match(period)
  checkmate::assert_flag(squeeze_zero_rows)
  checkmate::assert_flag(add_total_row)
  checkmate::assert_flag(add_total_col,
                         null.ok = TRUE)
  checkmate::assert_string(lbl_total_row)
  checkmate::assert_string(lbl_total_col)
  rlang::check_installed("gt",
                         version = "0.9.0",
                         reason = pal::reason_pkg_required())
  
  ix_by <- tidyselect::eval_select(expr = rlang::enquo(by),
                                   data = data)
  n_by <- length(ix_by)
  has_by <- n_by > 0L
  
  if (n_by > 2L) {
    cli::cli_abort("At most {.emph two} additional data columns can be specified in {.arg by}, but {.val {n_by}} were provided.")
  }
  
  if (is.null(add_total_col)) {
    add_total_col <- has_by
  }
  
  by_names_print <- ifelse(has_by,
                           names(ix_by) %>%
                             prettify_var_names() %>%
                             pal::wrap_chr(wrap = "*") %>%
                             paste0(collapse = "<br><br>"),
                           "")
  data_to_plot <-
    data %>%
    n_rfrnds_per_period(period = period,
                        by = {{ by }},
                        fill_gaps = fill_gaps,
                        period_floor = period_floor,
                        period_ceiling = period_ceiling,
                        descending = descending) %>%
    dplyr::mutate(dplyr::across(where(is.factor),
                                ~ forcats::fct_na_value_to_level(f = .x,
                                                                 level = "N/A"))) %>%
    pal::when(has_by ~ tidyr::pivot_wider(data = .,
                                          names_from = {{ by }},
                                          names_sort = TRUE,
                                          values_from = n),
              ~ .) %>%
    pal::when(add_total_col ~ dplyr::mutate(.data = .,
                                            `:total` = rowSums(x = dplyr::pick(-!!as.symbol(period)),
                                                               na.rm = TRUE)),
              ~ .) %>%
    dplyr::mutate(dplyr::across(everything(),
                                ~ tidyr::replace_na(data = .x,
                                                    replace = 0L)),
                  # TODO: remove type conversion below once [issue #1305](https://github.com/rstudio/gt/issues/1305) is fixed
                  dplyr::across(all_of(period),
                                as.character))
  
  # squeeze consecutive all-0 rows into single row if requested
  ix <- integer()
  ix_rm <- integer()
  
  if (squeeze_zero_rows) {
    
    for (i in pal::safe_seq_len(nrow(data_to_plot))) {
      
      if (data_to_plot %>%
          dplyr::select(-any_of(c(period, ":total"))) %>%
          magrittr::extract(i, ) %>%
          sum() %>%
          magrittr::equals(0L)) {
        
        ix %<>% c(i)
        
      } else {
        if (length(ix) > 1L) {
          
          data_to_plot[ix[1L], period] <- paste0(data_to_plot[ix[length(ix)], period], "\u2013", data_to_plot[ix[1L], period])
          ix_rm %<>% c(ix[-1L])
        }
        ix <- integer()
      }
    }
    
    if (length(ix) > 1L) {
      data_to_plot[ix[1L], period] <- paste0(data_to_plot[ix[length(ix)], period], "\u2013", data_to_plot[ix[1L], period])
      ix_rm %<>% c(ix[-1L])
    }
  }
  
  # add "s" to decade/century period values
  if (period %in% c("decade", "century")) {
    data_to_plot[[period]] %<>% stringr::str_replace_all(pattern = "(\\d+)",
                                                         replacement = "\\1s")
  }
  
  data_to_plot %>%
    dplyr::filter(!(dplyr::row_number() %in% ix_rm)) %>%
    gt::gt(rowname_col = period) %>%
    pal::when(add_total_col ~ gt::cols_label(.data = .,
                                             `:total` = lbl_total_col),
              ~ .) %>%
    pal::when(add_total_row ~ {
      
      result <- gt::grand_summary_rows(data = .,
                                       fns = list(label = "DUMMY", id = "total") ~ sum(., na.rm = TRUE),
                                       fmt = ~ gt::fmt_integer(., sep_mark = ""))
      
      # TODO: remove this workaround and replace `"DUMMY"` with `lbl_total_row` above as soon as [#1295](https://github.com/rstudio/gt/issues/1295) is fixed.
      result$`_summary`[[1L]]$fns$total$label <- lbl_total_row
      
      result
    },
              ~ .) %>%
    gt::tab_spanner_delim(delim = "_",
                          split = "last") %>%
    gt::tab_stubhead(label = gt::md(by_names_print)) %>%
    gt::tab_style(style = gt::cell_text(align = "right",
                                        v_align = "middle"),
                  locations = gt::cells_stubhead())
}

#' List referendum territories from [sudd.ch](https://sudd.ch/)
#'
#' Lists [all referendum territories from sudd.ch](https://sudd.ch/list.php?mode=allareas), which means each `country_name_de` together with all the associated
#' `territory_name_de`, their search URL and their number of occurrences.
#'
#' Note that the values in the `territory_name_de` column returned by this function can differ from those in the `territory_name_de` column of
#' [sudd_rfrnds()] and [list_sudd_rfrnds()]. The latter is often more extensive and usually includes the `country_name_de` (in parentheses) for
#' subnational referendums.
#'
#' @inheritSection sudd_rfrnds About [sudd.ch](https://sudd.ch/)
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family sudd
#' @export
#'
#' @examples
#' rdb::list_sudd_territories()
list_sudd_territories <- function() {
  
  rows <-
    httr::RETRY(verb = "GET",
                url = url_sudd("list.php"),
                query = list(mode = "allareas"),
                times = 3L) %>%
    xml2::read_html() %>%
    rvest::html_elements(css = "main table tr") %>%
    purrr::map(rvest::html_elements,
               css = "td")
  
  col_1 <- rows %>% purrr::map(magrittr::extract2, 1L)
  col_2 <- rows %>% purrr::map(\(x) if (length(x) > 1L) x[[2L]] else xml2::as_xml_document(list()))
  col_3 <- rows %>% purrr::map(\(x) if (length(x) > 2L) x[[3L]] else xml2::as_xml_document(list()))
  
  tibble::tibble(country_name_de = purrr::map_chr(col_1, rvest::html_text),
                 territory_name_de = purrr::map_chr(col_2, rvest::html_text),
                 filter_url = purrr::map_chr(col_2,
                                             ~ {
                                               if (length(.x) > 0L) {
                                                 .x %>%
                                                   rvest::html_element(css = "a") %>%
                                                   rvest::html_attr(name = "href") %>%
                                                   url_sudd()
                                               } else {
                                                 NA_character_
                                               }
                                             }),
                 n = purrr::map_chr(col_3, rvest::html_text)) %>%
    dplyr::mutate(country_name_de = dplyr::if_else(stringr::str_detect(string = country_name_de,
                                                                       pattern = "^\\s*$"),
                                                   NA_character_,
                                                   country_name_de)) %>%
    tidyr::fill(country_name_de,
                .direction = "down") %>%
    dplyr::filter(!dplyr::if_all(c(territory_name_de, n),
                                 is.na))
}

#' List referendum titles from [sudd.ch](https://sudd.ch/)
#'
#' Lists [all referendum titles from sudd.ch](https://sudd.ch/list.php?mode=alltopics), together with their search URLs and number of occurrences.
#'
#' @inheritSection sudd_rfrnds About [sudd.ch](https://sudd.ch/)
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family sudd
#' @export
#'
#' @examples
#' rdb::list_sudd_titles() |> dplyr::filter(n > 1)
list_sudd_titles <- function() {
  
  rows <-
    httr::RETRY(verb = "GET",
                url = url_sudd("list.php"),
                query = list(mode = "alltopics"),
                times = 3L) %>%
    xml2::read_html() %>%
    rvest::html_elements(css = "main table tr") %>%
    purrr::map(rvest::html_elements,
               css = "td")
  
  col_1 <- rows %>% purrr::map(magrittr::extract2, 1L)
  col_2 <- rows %>% purrr::map(magrittr::extract2, 2L)
  
  tibble::tibble(title_de = col_1 %>% purrr::map_chr(rvest::html_text),
                 filter_url = col_1 %>% purrr::map_chr(~ .x %>%
                                                         rvest::html_element(css = "a") %>%
                                                         rvest::html_attr(name = "href") %>%
                                                         url_sudd()),
                 n = col_2 %>% purrr::map_chr(rvest::html_text))
}

#' List referendum data from [sudd.ch](https://sudd.ch/)
#'
#' @description
#' Lists the referendum data from [sudd.ch](https://sudd.ch/) in various ways its [`list.php`](https://sudd.ch/list.php) endpoint allows. The output of this
#' function can be directly fed to [sudd_rfrnds()].
#'
#' @inheritSection sudd_rfrnds About [sudd.ch](https://sudd.ch/)
#' @param mode The listing mode. One of
#' - `"by_date"`: Lists [all referendums in the sudd.ch database **by `date`**](https://sudd.ch/list.php?mode=allrefs), together with their `id_sudd`,
#'   `country_code`, `territory_name_de` and `title_de`. Specifying the sorting order of the results via the `order` parameter is supported.
#' - `"by_mod_date"`: Lists [all referendums in the sudd.ch database **by `date_last_edited`**](https://sudd.ch/list.php?mode=moddate), together with their
#'   `id_sudd`, `country_code`, `territory_name_de`, `date` and `title_de`.
#' - `"filter"`: Allows to provide additional arguments (via the `filter` parameter) that limit the results accordingly. Specifying the sorting order of the
#'   results via the `order` parameter is supported.
#' - `"random"`: Lists the `id_sudd` of five randomly selected referendums, together with their `country_code`.
#' @param order The sorting order of the results. Only relevant if `mode` is either `"by_date"` or `"filter"`.
#' @param filter A list with valid filtering arguments. Only relevant if `mode = "filter"`. The supported filtering arguments include
#' - `territory_name_de`: A string that must be (partially) matched by the referendums' `territory_name_de`. Matching is case-insensitive and no [fuzzy
#'   matching](https://en.wikipedia.org/wiki/Approximate_string_matching) is supported.
#' - `title_de`: A string that must be (partially) matched by the referendums' `title_de`. Matching is case-insensitive and no [fuzzy
#'   matching](https://en.wikipedia.org/wiki/Approximate_string_matching) is supported.
#' - `year_min`: The lower year limit of the referendums' `date`. A positive integer.
#' - `year_max`: The upper year limit of the referendums' `date`. A positive integer.
#' @param use_cache `r pkgsnip::param_lbl("use_cache")`
#' @param max_cache_age `r pkgsnip::param_lbl("max_cache_age")`
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return A [tibble][tibble::tbl_df] containing at least an `id_sudd` column.
#' @family sudd
#' @export
#'
#' @examples
#' # list all referendums by modification date (takes a while)
#' \dontrun{
#' rdb::list_sudd_rfrnds(mode = "by_mod_date")}
#' 
#' # list all referendums whose title matches "AHV"
#' rdb::list_sudd_rfrnds(mode = "filter",
#'                       filter = list(title_de = "AHV"),
#'                       quiet = TRUE)
#' 
#' # get sudd.ch referendum data from all referendums from 2020 onwards
#' rdb::list_sudd_rfrnds(mode = "filter",
#'                       filter = list(year_min = 2020),
#'                       quiet = TRUE) |>
#'   rdb::sudd_rfrnds(quiet = TRUE)
#' 
#' # get sudd.ch referendum data from five randomly picked referendums
#' rdb::list_sudd_rfrnds(mode = "random",
#'                       quiet = TRUE) |>
#'   rdb::sudd_rfrnds(quiet = TRUE)
list_sudd_rfrnds <- function(mode = c("by_date",
                                      "by_mod_date",
                                      "filter",
                                      "random"),
                             order = c("ascending",
                                       "descending"),
                             filter = list(territory_name_de = NULL,
                                           title_de = NULL,
                                           year_min = NULL,
                                           year_max = NULL),
                             use_cache = TRUE,
                             max_cache_age = "1 week",
                             quiet = FALSE) {
  # check args
  mode <- rlang::arg_match(mode)
  order <-
    rlang::arg_match(order) %>%
    dplyr::case_match(.x = .,
                      "ascending"  ~ "asc",
                      "descending" ~ "desc",
                      .default = .)
  checkmate::assert_flag(quiet)
  
  # do not cache `mode = "random"`
  if (mode == "random") use_cache <- FALSE
  
  pkgpins::with_cache(expr = {
    
    checkmate::assert_list(filter,
                           names = "unique")
    
    checkmate::assert_subset(names(filter),
                             choices = c("territory_name_de",
                                         "title_de",
                                         "year_min",
                                         "year_max"))
    
    checkmate::assert_string(filter$territory_name_de,
                             null.ok = TRUE)
    checkmate::assert_string(filter$title_de,
                             null.ok = TRUE)
    has_filter <-
      filter %>%
      purrr::map_lgl(is.null) %>%
      all() %>%
      magrittr::not()
    
    if (has_filter && mode != "filter") {
      cli::cli_alert_warning("{.arg filter} is ignored because {.arg mode} is set to {.val {mode}}.")
    }
    
    filter$year_min <- checkmate::assert_int(filter$year_min,
                                             lower = sudd_min_year,
                                             upper = sudd_max_year,
                                             null.ok = TRUE,
                                             coerce = TRUE)
    filter$year_max <- checkmate::assert_int(filter$year_max,
                                             lower = filter$year_min %||% sudd_min_year,
                                             upper = sudd_max_year,
                                             null.ok = TRUE,
                                             coerce = TRUE)
    
    is_year_missing <- purrr::map_lgl(c(filter$year_min, filter$year_max), is.null) %>% { any(.) && !all(.) }
    
    if (is_year_missing) {
      filter$year_min <- filter$year_min %||% sudd_min_year
      filter$year_max <- filter$year_max %||% sudd_max_year
    }
    
    if (mode == "filter" && all(purrr::map_lgl(filter, is.null))) {
      cli::cli_abort("At least one filtering argument must be provided in {.arg filter} when {.arg mode = \"filter\"}.")
    }
    
    names(filter) %<>% dplyr::case_match(.x = .,
                                         "territory_name_de" ~ "area",
                                         "title_de"          ~ "topic",
                                         "year_min"          ~ "first",
                                         "year_max"          ~ "last",
                                         .default = .)
    # assemble query params
    query <- c(list(mode = mode %>% dplyr::case_match(.x = .,
                                                      "by_date"     ~ "allrefs",
                                                      "by_mod_date" ~ "moddate",
                                                      .default = .))[mode != "filter"],
               list(sense = order)[mode %in% c("by_date", "filter")],
               filter[mode == "filter"])
    
    # retrieve and parse data
    if (!quiet) {
      status_msg <- "Fetching raw HTML data from {.url sudd.ch}..."
      cli::cli_progress_step(msg = status_msg,
                             msg_done = paste(status_msg, "done"),
                             msg_failed = paste(status_msg, "failed"))
    }
    
    html <-
      httr::RETRY(verb = "GET",
                  url = url_sudd("list.php"),
                  query = query,
                  times = 3L) %>%
      xml2::read_html()
    
    if (!quiet) {
      status_msg <- "Parsing and tidying raw HTML data..."
      cli::cli_progress_step(msg = status_msg,
                             msg_done = paste(status_msg, "done"),
                             msg_failed = paste(status_msg, "failed"))
    }
    
    if (mode == "random") {
      
      result <-
        html %>%
        rvest::html_elements(css = "main ul li a") %>%
        rvest::html_attr(name = "href") %>%
        stringr::str_extract(pattern = "(?<=id=)[\\w\\d]+") %>%
        tibble::tibble(id_sudd = .)
      
    } else {
      
      rows <-
        html %>%
        rvest::html_element(css = "main table") %>%
        rvest::html_children() %>%
        purrr::map(rvest::html_elements,
                   css = "td")
      
      col_1 <- rows %>% purrr::map(magrittr::extract2, 1L)
      col_2 <- rows %>% purrr::map(magrittr::extract2, 2L)
      col_3 <- rows %>% purrr::map(magrittr::extract2, 3L)
      col_4 <- rows %>% purrr::map(magrittr::extract2, 4L)
      
      result <-
        tibble::tibble(id_sudd = purrr::map_chr(col_4,
                                                \(x) {
                                                  x %>%
                                                    rvest::html_element(css = "a") %>%
                                                    rvest::html_attr(name = "href") %>%
                                                    stringr::str_extract(pattern = "(?<=[\\?&]id=)[\\w\\d]+")
                                                }),
                       territory_name_de = purrr::map_chr(col_2, rvest::html_text),
                       !!!(col_3 %>% purrr::map_chr(rvest::html_text) %>% parse_sudd_date_de()),
                       title_de = purrr::map_chr(col_4, rvest::html_text)) %>%
        # add `date`
        dplyr::mutate(date = clock::date_build(year = year,
                                               month = month,
                                               day = day,
                                               invalid = "NA")) %>%
        dplyr::relocate(date,
                        .before = year)
      
      if (mode == "by_mod_date") {
        
        result %<>%
          tibble::add_column(date_last_edited =
                               col_1 %>%
                               purrr::map_chr(~ .x %>%
                                                rvest::html_element(css = "time") %>%
                                                rvest::html_attr(name = "datetime")) %>%
                               clock::date_parse()) %>%
          tidyr::fill(date_last_edited,
                      .direction = "down")
      }
    }
    
    result %>%
      # derive vars from `id_sudd`
      dplyr::bind_cols(.$id_sudd |>
                         purrr::map(parse_sudd_id) |>
                         purrr::list_rbind()) %>%
      dplyr::select(id_sudd,
                    starts_with("country_"),
                    is_former_country,
                    starts_with("subnational_entity_"),
                    everything())
  },
  pkg = this_pkg,
  from_fn = "list_sudd_rfrnds",
  mode,
  order,
  filter,
  use_cache = use_cache,
  max_cache_age = max_cache_age)
}

#' Get referendum data from [sudd.ch](https://sudd.ch/)
#'
#' Downloads referendum data from [sudd.ch](https://sudd.ch/).
#'
#' # About [sudd.ch](https://sudd.ch/)
#'
#' **sudd** stands for _**Su**chmaschine für **d**irekte **D**emokratie_ (German) and is operated by [Beat Müller](mailto:beat@sudd.ch). Its database content is
#' licensed under [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA
#' 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).
#'
#' @param ids_sudd The referendum identifiers assigned by [sudd.ch](https://sudd.ch/). Either as a character vector or a data frame containing a column
#'   `id_sudd`. `NA`s are ignored.
#' @param use_cache `r pkgsnip::param_lbl("use_cache")`
#' @param max_cache_age `r pkgsnip::param_lbl("max_cache_age")`
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return `r pkgsnip::return_lbl("tibble")` The column names are aligned with those of [rfrnds()] as closely as possible.
#' @family sudd
#' @importFrom rlang :=
#' @export
#'
#' @examples
#' rdb::rfrnd(id = "5bbc045192a21351232e596f")$id_sudd |> rdb::sudd_rfrnds()
#' 
#' rdb::rfrnds(quiet = TRUE) |>
#'   dplyr::filter(country_code == "AT") |>
#'   rdb::sudd_rfrnds()
sudd_rfrnds <- function(ids_sudd,
                        use_cache = TRUE,
                        max_cache_age = "1 week",
                        quiet = FALSE) {
  
  checkmate::assert_flag(quiet)
  
  if (purrr::pluck_depth(ids_sudd) > 1L) {
    
    if (!("id_sudd" %in% colnames(ids_sudd))) {
      cli::cli_abort(paste0("{.arg ids_sudd} must be either a character vector of valid sudd.ch referendum identifiers or a data frame with a column of such",
                            " named {.var id_sudd}."))
    }
    
    ids_sudd <- ids_sudd$id_sudd
  }
  
  ids_sudd <-
    checkmate::assert_character(ids_sudd,
                                all.missing = FALSE) %>%
    magrittr::extract(!is.na(.))
  
  pkgpins::with_cache(expr = {
    
    ids_sudd %>%
      purrr::map(.f = \(x) sudd_rfrnd(x),
                 .progress = if (quiet) FALSE else "Scraping referendum data from sudd.ch") %>%
      purrr::list_rbind() %>%
      # properly parse `date`
      dplyr::bind_cols(.$date |>
                         purrr::map(parse_sudd_date) |>
                         purrr::list_rbind()) %>%
      dplyr::mutate(date = clock::date_build(year = year,
                                             month = month,
                                             day = day,
                                             invalid = "NA")) %>%
      # add `id_sudd`
      tibble::add_column(id_sudd = ids_sudd,
                         .before = 1L) %>%
      # derive vars from `id_sudd`
      dplyr::bind_cols(ids_sudd |>
                         purrr::map(parse_sudd_id) |>
                         purrr::list_rbind()) %>%
      # reorder columns
      dplyr::relocate(id_sudd,
                      country_code,
                      country_name,
                      is_former_country,
                      subnational_entity_code,
                      territory_name_de,
                      any_of(c("territory_type_de",
                               "date",
                               "year",
                               "month",
                               "day",
                               "title_de",
                               "question_type_de",
                               "types",
                               "result_de",
                               "result_status_de",
                               "adoption_requirements_de",
                               "electorate_total",
                               "electorate_abroad",
                               "polling_cards",
                               "votes_total",
                               "votes_empty",
                               "votes_void",
                               "votes_invalid",
                               "votes_valid",
                               "votes_yes",
                               "votes_no")),
                      matches("^votes_(option_\\d+|(counter_)?proposal)(_(total|empty|void|invalid|valid|yes|no))?$"),
                      any_of("votes_option_none"),
                      matches("^$"),
                      any_of(c("subterritories",
                               "subterritories_yes",
                               "subterritories_no",
                               "files",
                               "remarks",
                               "sources",
                               "ids_sudd_simultaneous",
                               "date_last_edited")))
  },
  pkg = this_pkg,
  from_fn = "sudd_rfrnds",
  ids_sudd,
  use_cache = use_cache,
  max_cache_age = max_cache_age)
}

#' Test RDB API availability
#'
#' Checks if the RDB API server is online and operational.
#'
#' @inheritParams url_api
#' @param quiet Whether or not to suppress printing a warning in case the API is unavailable.
#'
#' @return A logical scalar.
#' @family api_status
#' @export
is_online <- function(use_testing_server = pal::pkg_config_val(key = "use_testing_server",
                                                               pkg = this_pkg),
                      quiet = FALSE) {
  
  checkmate::assert_flag(quiet)
  
  result <- FALSE
  response <- tryCatch(expr = httr::RETRY(verb = "GET",
                                          url = url_api("health",
                                                        .use_testing_server = use_testing_server),
                                          times = 3L),
                       error = function(e) e$message)
  
  if (inherits(response, "response")) {
    
    response %<>%
      # ensure we actually got a plaintext response
      pal::assert_mime_type(mime_type = "text/plain",
                            msg_suffix = mime_error_suffix) %>%
      # parse response
      httr::content(as = "text",
                    encoding = "UTF-8")
    
    if (response == "OK") {
      result <- TRUE
      
    } else if (!quiet) {
      cli::cli_alert_warning("RDB API server responded with: {.val {response}}")
    }
    
  } else {
    cli::cli_alert_warning(response)
  }
  
  result
}

#' `r this_pkg` package configuration metadata
#'
#' A [tibble][tibble::tbl_df] with metadata of all possible `r this_pkg` package configuration options. See [pal::pkg_config_val()] for more information.
#'
#' @format `r pkgsnip::return_lbl("tibble_cols", cols = colnames(pkg_config))`
#' @export
#'
#' @examples
#' rdb::pkg_config
"pkg_config"
zdaarau/c2d documentation built on Dec. 18, 2024, 1:24 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
zdaarau/c2d
Download Data from the Referendum Database (RDB), Which Covers Direct Democratic Votes Worldwide

R/rdb.gen.R
In zdaarau/c2d: Download Data from the Referendum Database (RDB), Which Covers Direct Democratic Votes Worldwide

Defines functions assemble_query_filter as_fm_list api_failure .onLoad has_katex

Documented in assemble_query_filter

R Package Documentation

Browse R Packages

We want your feedback!

zdaarau/c2d Download Data from the Referendum Database (RDB), Which Covers Direct Democratic Votes Worldwide

R/rdb.gen.R In zdaarau/c2d: Download Data from the Referendum Database (RDB), Which Covers Direct Democratic Votes Worldwide

Defines functions assemble_query_filter as_fm_list api_failure .onLoad has_katex

Documented in assemble_query_filter

R Package Documentation

Browse R Packages

We want your feedback!

zdaarau/c2d
Download Data from the Referendum Database (RDB), Which Covers Direct Democratic Votes Worldwide

R/rdb.gen.R
In zdaarau/c2d: Download Data from the Referendum Database (RDB), Which Covers Direct Democratic Votes Worldwide