# DO NOT EDIT THIS FILE BY HAND! Instead edit the R Markdown source file `Rmd/rdb.Rmd` and run `pkgpurl::purl_rmd()`.
# See `README.md#r-markdown-format` for more information on the literate programming approach used applying the R Markdown format.
# rdb: Download Data from the Referendum Database (RDB), Which Covers Direct Democratic Votes Worldwide
# Copyright (C) 2024 Centre for Democracy Studies Aarau (ZDA)
#
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
utils::globalVariables(names = c(".",
# tidyselect fns
"all_of",
"any_of",
"everything",
"matches",
"starts_with",
"where",
".x",
# other
"Alpha_2",
"Alpha_3",
"Alpha_4",
"applicability_constraint",
"archive",
"Children",
"children_tier_1",
"children_tier_2",
"children_tier_3",
"Code",
"committee_name",
"Common_name",
"content-disposition",
"count",
"country_code",
"country_code_long",
"country_code_continual",
"country_name",
"country_name_de",
"country_name_long",
"date_last_edited",
"date_time_created",
"date_time_last_active",
"date_time_last_edited",
"Date_withdrawn",
"day",
"electorate_abroad",
"electorate_total",
"files",
"id",
"id_official",
"id_sudd",
"id_sudd_prefix",
"inst_has_precondition",
"inst_has_urgent_legal_basis",
"inst_is_assembly",
"inst_is_binding",
"inst_is_counter_proposal",
"inst_legal_basis_type",
"inst_object_author",
"inst_object_legal_level",
"inst_object_revision_extent",
"inst_object_revision_modes",
"inst_object_type",
"inst_precondition_actor",
"inst_precondition_decision",
"inst_quorum_approval",
"inst_quorum_turnout",
"inst_topics_excluded",
"inst_topics_only",
"inst_trigger_actor",
"inst_trigger_actor_level",
"inst_trigger_threshold",
"inst_trigger_time_limit",
"inst_trigger_type",
"is_draft",
"is_former_country",
"is_multi_valued",
"is_opt",
"is_testing_server",
"ISO_Alpha_3",
"items",
"level",
"lower_house_abstentions",
"lower_house_no",
"lower_house_yes",
"month",
"municipality",
"n",
"Name",
"name_long",
"number",
"Official_name",
"Parent",
"parent_topic",
"position_government",
"ptype",
"question",
"question_en",
"referendum_text_options",
"remarks",
"result",
"rowid",
"sources",
"subnational_entity_code",
"subnational_entity_name",
"subterritories_no",
"subterritories_yes",
"sudd_prefix",
"tags",
"topic",
"topic_tier_1",
"topic_tier_2",
"topic_tier_3",
"topics_tier_1",
"topics_tier_2",
"topics_tier_3",
"territory_name_de",
"territory_name_de_short",
"title_de",
"title_fr",
"title_en",
"turnout",
"type",
"Type",
"un_country_code",
"un_region_tier_1_code",
"un_region_tier_2_name",
"un_region_tier_3_name",
"upper_house_abstentions",
"upper_house_no",
"upper_house_yes",
"url_sudd",
"url_swissvotes",
"value_labels",
"value_scale",
"variable_name",
"variable_name_unnested",
"variable_name_print",
"variable_values",
"value",
"value_total",
"votes",
"votes_empty",
"votes_invalid",
"votes_no",
"votes_per_subterritory",
"votes_yes",
"year"))
# katex is used in documentation dynamically via `\Sexpr`, so it has to be imported to avoid an error when rendering the corresponding help page
# TODO: since the `R CMD check` warning can be considered a bug in this case, investigate the situation and possibly submit bug report
has_katex <- function() {
nchar(katex::example_math()) > 0L
}
.onLoad <- function(libname, pkgname) {
# clear pkgpins cache
tryCatch(expr = pkgpins::clear_cache(board = pkgpins::board(pkg = pkgname),
max_age = pal::pkg_config_val(key = "global_max_cache_age",
pkg = pkgname)),
error = function(e) cli::cli_alert_warning(text = "Failed to clear pkgpins cache on load of {.pkg {pkgname}}. Error message: {e$message}"))
}
api_failure <- function(parsed,
raw = NULL,
prefix = "") {
env <- parent.frame(n = 2L)
assign(x = "parsed",
value = parsed,
pos = env)
msg_part_val <- ifelse(utils::hasName(parsed$error, "value"),
paste0(": ", paste0("{.var ", names(parsed$error$value), "}: {.warn ", parsed$error$value, "}",
collapse = ", ")),
"")
msg_part_error <- ifelse(utils::hasName(parsed, "error"),
paste0("error {.err {parsed$error$id}}", msg_part_val),
"{.err {.y}}.")
cli_div_id <- cli::cli_div(theme = cli_theme)
cli::cli_alert_warning(paste0(prefix, "The API server responded with ", msg_part_error),
.envir = env)
if (!is.null(raw)) {
cli::cli_alert_info("The following JSON payload was sent: {.content {raw}}")
}
cli::cli_end(id = cli_div_id)
}
as_fm_list <- function(x) {
purrr::imap(x,
~ rlang::new_formula(lhs = .y,
rhs = .x,
env = emptyenv()))
}
#' Assemble MongoDB query filter document
#'
#' @param country_code The `country_code`(s) to be included. A character vector.
#' @param subnational_entity_name The `subnational_entity_name`(s) to be included. A character vector.
#' @param municipality The `municipality`(s) to be included. A character vector.
#' @param level The `level`(s) to be included. A character vector.
#' @param type The `type`(s) to be included. A character vector.
#' @param date_min The minimum `date` to be included. A [date][Date] or something coercible to.
#' @param date_max The maximum `date` to be included. A [date][Date] or something coercible to.
#' @param is_draft `TRUE` means to include only referendum entries with _draft_ status, `FALSE` to include only normal entries. Set to `NULL` in order to
#' include both draft and normal entries.
#' @param date_time_created_min The minimum `date_time_created` to be included. A [datetime][base::DateTimeClasses], or something coercible to (like
#' `"2006-01-02"` or `"2006-01-02T15:04:05Z"`; assumed to be in UTC if no timezone is given).
#' @param date_time_created_max The maximum `date_time_created` to be included. A [datetime][base::DateTimeClasses], or something coercible to (like
#' `"2006-01-02"` or `"2006-01-02T15:04:05Z"`; assumed to be in UTC if no timezone is given).
#' @param date_time_last_edited_min The minimum `date_time_last_edited` to be included. A [datetime][base::DateTimeClasses], or something coercible to (like
#' `"2006-01-02"` or `"2006-01-02T15:04:05Z"`; assumed to be in UTC if no timezone is given).
#' @param date_time_last_edited_max The maximum `date_time_last_edited` to be included. A [datetime][base::DateTimeClasses], or something coercible to (like
#' `"2006-01-02"` or `"2006-01-02T15:04:05Z"`; assumed to be in UTC if no timezone is given).
#' @param query_filter A valid [MongoDB JSON query filter document](https://docs.mongodb.com/manual/core/document/#query-filter-documents) which allows for
#' maximum control over what data is included. This takes precedence over all of the above listed parameters, i.e. if `query_filter` is provided, the
#' parameters `r formals(assemble_query_filter) |> names() |> setdiff(c("query_filter", "base64_encode")) |> pal::enum_str(wrap = "\x60")` are ignored.
#' @param base64_encode Whether or not to [Base64](https://en.wikipedia.org/wiki/Base64)-encode the resulting query filter document. Note that the
#' `query_filter` argument provided to other functions of this package must be Base64-encoded.
#'
#' @return A character scalar containing a valid [MongoDB JSON query filter document](https://docs.mongodb.com/manual/core/document/#query-filter-documents),
#' [Base64](https://en.wikipedia.org/wiki/Base64)-encoded if `base64_encode = TRUE`.
#' @keywords internal
assemble_query_filter <- function(country_code = NULL,
subnational_entity_name = NULL,
municipality = NULL,
level = NULL,
type = NULL,
date_min = NULL,
date_max = NULL,
is_draft = NULL,
date_time_created_min = NULL,
date_time_created_max = NULL,
date_time_last_edited_min = NULL,
date_time_last_edited_max = NULL,
query_filter = NULL,
base64_encode = TRUE) {
checkmate::assert_string(query_filter,
null.ok = TRUE)
checkmate::assert_flag(base64_encode)
# assemble JSON query filter document if `query_filter` is not provided
if (is.null(query_filter)) {
purrr::map_chr(.x = country_code,
.f = checkmate::assert_choice,
choices = val_set$country_code,
null.ok = TRUE,
.var.name = "country_code")
checkmate::assert_character(subnational_entity_name,
any.missing = FALSE,
null.ok = TRUE)
checkmate::assert_character(municipality,
any.missing = FALSE,
null.ok = TRUE)
purrr::map_chr(.x = level,
.f = checkmate::assert_choice,
choices = var_vals("level"),
null.ok = TRUE,
.var.name = "level")
purrr::map_chr(.x = type,
.f = checkmate::assert_choice,
choices = var_vals("type"),
null.ok = TRUE,
.var.name = "type")
checkmate::assert_flag(is_draft,
null.ok = TRUE)
date_min %<>% lubridate::as_date()
date_max %<>% lubridate::as_date()
date_time_created_min %<>% lubridate::as_datetime(tz = "UTC")
date_time_created_max %<>% lubridate::as_datetime(tz = "UTC")
date_time_last_edited_min %<>% lubridate::as_datetime(tz = "UTC")
date_time_last_edited_max %<>% lubridate::as_datetime(tz = "UTC")
query_filter <-
list(country_code = query_filter_in(country_code),
canton = query_filter_in(subnational_entity_name),
municipality = query_filter_in(municipality),
level = query_filter_in(level),
institution =
type %>%
pal::when(length(.) == 0L ~ .,
~ dplyr::case_match(.x = .,
"citizens' assembly" ~ "citizen assembly",
.default = .) %>%
stringr::str_to_sentence()) %>%
query_filter_in(),
date = query_filter_date(min = date_min,
max = date_max),
draft = is_draft,
created_on = query_filter_datetime(min = date_time_created_min,
max = date_time_created_max),
date_time_last_edited = query_filter_datetime(min = date_time_last_edited_min,
max = date_time_last_edited_max)) %>%
# remove `NULL` elements
purrr::compact() %>%
# convert to JSON
jsonlite::toJSON(POSIXt = "ISO8601",
auto_unbox = TRUE,
digits = NA,
pretty = FALSE)
}
if (base64_encode) {
query_filter %<>% jsonlite::base64_enc()
}
query_filter
}
assert_api_success <- function(x) {
if (!is.null(x$error$id)) {
cli_div_id <- cli::cli_div(theme = cli_theme)
cli::cli_abort("API server responded with error {.err {x$error$id}}")
cli::cli_end(id = cli_div_id)
}
invisible(x)
}
assert_cols_absent <- function(data,
type) {
type <- rlang::arg_match0(arg = type,
values = unique(unlist(data_cols_absent$type)))
cols <-
data_cols_absent %>%
dplyr::filter(purrr::map_lgl(type,
~ !!type %in% .x)) %$%
col
col_names <- colnames(data)
purrr::walk(cols,
~ {
if (.x %in% col_names) {
data_cols_absent %>%
dplyr::filter(col == !!.x & purrr::map_lgl(type,
~ !!type %in% .x)) %$%
msg %>%
cli::cli_abort()
}
})
invisible(data)
}
assert_cols_valid <- function(data,
type = c("validate", "add", "edit"),
action = cli::cli_abort,
cli_progress_id = NULL) {
type <- rlang::arg_match(type)
non_na_col_names <- c("id",
"country_code",
"date",
"level")
na_col_names <-
data %>%
dplyr::select(any_of(non_na_col_names)) %>%
dplyr::filter(dplyr::if_any(.cols = everything(),
.fns = is.na)) %>%
dplyr::select(where(~ anyNA(.x))) %>%
colnames()
n_na_col_names <- length(na_col_names)
if (n_na_col_names) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action("Detected {n_na_col_names} column{?s} in {.arg data} that contain forbidden {.val NA}s: {.var {na_col_names}}")
}
## check `id`
if ("id" %in% colnames(data) && anyDuplicated(data$id)) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action("Duplicated {.var id}s detected. IDs must be unique.")
}
## check `date`
if ("date" %in% colnames(data)) {
check <- checkmate::check_date(data$date,
any.missing = FALSE)
if (!isTRUE(check)) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action("Failed to validate {.var data$date}. {check}")
}
}
## check `level`
if ("level" %in% colnames(data)) {
check <- checkmate::check_subset(as.character(data$level),
choices = var_vals("level"))
if (!isTRUE(check)) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action("Failed to validate {.var data$level}. {check}")
}
}
## check `country_code`
if ("country_code" %in% colnames(data)) {
check <- checkmate::check_subset(as.character(data$country_code),
choices = val_set$country_code)
if (!isTRUE(check)) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action("Failed to validate {.var data$country_code}. {check}")
}
## ensure `position_government` is present for additions when `country_code = "CH" & level = "national"`
if (type == "add"
&& (data %>%
dplyr::filter(country_code == "CH" & level == "national") %>%
nrow() %>%
magrittr::is_greater_than(0L))
&& !("position_government" %in% colnames(data))) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action(paste0("Referendums with {.code country_code = \"CH\" & level = \"national\"} present in {.arg data} but column {.var ",
"position_government} is missing."))
}
}
## check `subnational_entity_name`
## TODO: check `subnational_entity_code` instead once it's available
if (any(data[["level"]] != "national")) {
if (!("subnational_entity_name" %in% colnames(data))) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action(paste0("Referendums of {.var level} below {.val national} present in {.arg data} but column {.var subnational_entity_name} is missing."))
}
ix_missing_subnational_entities <-
data %>%
tibble::rowid_to_column() %>%
dplyr::filter(level != "national" & is.na(subnational_entity_name)) %$%
rowid
n_missing_subnational_entities <- length(ix_missing_subnational_entities)
if (n_missing_subnational_entities) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action(paste0("{n_missing_subnational_entities} row{?s} in {.arg data} {?is/are} missing a {.var subnational_entity_name}. Affected {?is/are} ",
"the row{?s} with ind{?ex/ices} {.val {ix_missing_subnational_entities}}."))
}
}
if ("subnational_entity_name" %in% colnames(data)) {
ix_illegal_subnational_entities <-
data %>%
tibble::rowid_to_column() %>%
dplyr::filter(level == "national" & !is.na(subnational_entity_name)) %$%
rowid
n_illegal_subnational_entities <- length(ix_illegal_subnational_entities)
if (n_illegal_subnational_entities) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action(paste0("{n_illegal_subnational_entities} row{?s} in {.arg data} {?has/have} a {.var subnational_entity_name} set although they are on the ",
"national level. Affected {?is/are} the row{?s} with ind{?ex/ices} {.val {ix_illegal_subnational_entities}}."))
}
}
## check `municipality`
if (any(data[["level"]] == "local")) {
if (!("municipality" %in% colnames(data))) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action(paste0("Referendums of {.var level = \"local\"} present in {.arg data} but column {.var municipality} is missing."))
}
ix_missing_municipalities <-
data %>%
tibble::rowid_to_column() %>%
dplyr::filter(level == "local" & is.na(municipality)) %$%
rowid
n_missing_municipalities <- length(ix_missing_municipalities)
if (n_missing_municipalities) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action(paste0("{n_missing_municipalities} row{?s} in {.arg data} {?is/are} missing a {.var municipality}. Affected {?is/are} the row{?s} with ",
"ind{?ex/ices} {.val {ix_missing_subnational_entities}}."))
}
}
if ("municipality" %in% colnames(data)) {
ix_illegal_municipalities <-
data %>%
tibble::rowid_to_column() %>%
dplyr::filter(level != "local" & !is.na(municipality)) %$%
rowid
n_illegal_municipalities <- length(ix_illegal_municipalities)
if (n_illegal_municipalities) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action(paste0("{n_illegal_municipalities} row{?s} in {.arg data} {?has/have} a {.var municipality} set although they are not on the local level. ",
"Affected {?is/are} the row{?s} with ind{?ex/ices} {.val {ix_illegal_municipalities}}."))
}
}
## check variables that are only meant to be set for Swiss national referendums
## TODO: Remove this as soon as [issue #52](https://github.com/zdaarau/c2d-app/issues/52) is resolved.
### `votes_per_subterritory`
if (all(c("votes_per_subterritory", "level", "country_code") %in% colnames(data))) {
ix_illegal_votes_per_subterritory <-
data %>%
tibble::rowid_to_column() %>%
dplyr::filter((level != "national" | country_code != "CH") & !purrr::map_lgl(votes_per_subterritory, is.null)) %$%
rowid
n_illegal_votes_per_subterritory <- length(ix_illegal_votes_per_subterritory)
if (n_illegal_votes_per_subterritory) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action(paste0("{n_illegal_votes_per_subterritory} row{?s} in {.arg data} {?has/have} a {.var position_government} set although they are not Swiss ",
"national referendums. Affected {?is/are} the row{?s} with ind{?ex/ices} {.val {ix_illegal_votes_per_subterritory}}."))
}
}
### non-list vars
c("lower_house_yes",
"lower_house_no",
"lower_house_abstentions",
"upper_house_yes",
"upper_house_no",
"upper_house_abstentions",
"position_government") %>%
purrr::walk(function(var_name) {
if (all(c(var_name, "level", "country_code") %in% colnames(data))) {
ix_illegal <-
data %>%
tibble::rowid_to_column() %>%
dplyr::filter((level != "national" | country_code != "CH") & !is.na(!!as.symbol(var_name))) %$%
rowid
n_illegal <- length(ix_illegal)
if (n_illegal) {
cli::cli_progress_done(id = cli_progress_id,
result = "failed")
action(paste0("{n_illegal} row{?s} in {.arg data} {?has/have} a {.var {var_name}} set although {cli::qty(n_illegal)}{?it is not a/they are not} ",
"Swiss national referendum{?s}. Affected {?is/are} the row{?s} with ind{?ex/ices} {.val {ix_illegal}}."))
}
}
})
invisible(data)
}
assert_content <- function(x) {
if (!nchar(x)) {
cli::cli_abort("Received empty response from RDB API. Please debug.",
.internal = TRUE)
}
invisible(x)
}
#' Authenticate a user session for the [RDB API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#1-reflexive-routes)
#'
#' Creates a new user session token if necessary. The token is stored in the R option `rdb.user_session_tokens`, a [tibble][tibble::tbl_df] with the columns
#' `email`, `token` and `date_time_last_active`.
#'
#' `email` and `password` default to the [package configuration options][pkg_config] `api_username` and `api_password` respectively.
#'
#' User session tokens expire automatically after 15 days of inactivity.
#'
#' @inheritParams url_api
#' @param email The e-mail address of the user for which a session should be created. A character scalar.
#' @param password The password of the user for which a session should be created. A character scalar.
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return The user session token as a character scalar, invisibly.
#' @keywords internal
auth_session <- function(email = pal::pkg_config_val(key = "api_username",
pkg = this_pkg),
password = pal::pkg_config_val(key = "api_password",
pkg = this_pkg),
use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg),
quiet = FALSE) {
checkmate::assert_string(email,
min.chars = 3L)
checkmate::assert_string(password,
min.chars = 1L)
checkmate::assert_flag(quiet)
# get existing tokens or initialize empty tibble
tokens <-
getOption("rdb.user_session_tokens") %>%
pal::when(all(c("email", "token", "date_time_last_active") %in% colnames(.)) ~ .,
~ tibble::tibble(email = character(),
token = character(),
is_testing_server = logical(),
date_time_last_active = as.POSIXct(NULL)))
# extract latest token
token <- tokens %>% dplyr::filter(email == !!email & is_testing_server == !!use_testing_server)
if (nrow(token)) {
token %<>% dplyr::filter(date_time_last_active == max(date_time_last_active))
token %<>% .[1L, ]
}
# ensure token is not expired (checked if older than 14 days), else set to `NULL`
if (nrow(token) &&
checkmate::test_string(token$token, min.chars = 1L) &&
((token$date_time_last_active > clock::add_days(clock::date_now(zone = "UTC"), -14L)) || !is_session_expired(token = token$token,
use_testing_server = use_testing_server))) {
token <- token$token
} else {
token <- NULL
}
# create new session if necessary
if (is.null(token)) {
if (!quiet) {
status_msg <- "Authenticating new user session"
cli::cli_progress_step(msg = status_msg,
msg_done = paste(status_msg, "done"),
msg_failed = paste(status_msg, "failed"))
}
token <-
httr::RETRY(verb = "POST",
url = url_api("users/session",
.use_testing_server = use_testing_server),
config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server)),
times = 3L,
encode = "json",
body = list(email = email,
password = password)) %>%
# ensure we actually got a JSON response
pal::assert_mime_type(mime_type = "application/json",
msg_suffix = mime_error_suffix) %>%
# parse response
httr::content(as = "parsed") %$%
token
if (!quiet) {
cli::cli_progress_done()
}
}
# update `rdb.user_session_tokens` option
options(rdb.user_session_tokens =
tokens %>%
dplyr::filter(token != !!token) %>%
tibble::add_row(email = email,
token = token,
is_testing_server = use_testing_server,
date_time_last_active = clock::date_now(zone = "UTC")))
# return token
invisible(token)
}
md_link_codebook <- function(var_names) {
purrr::map_chr(var_names,
\(x) paste0("[`", x, "`](", url_codebook(x), ")"))
}
country_code_to_name <- function(country_code) {
purrr::map2_chr(.x = country_code,
.y = nchar(as.character(country_code)) > 2L,
.f = ~ {
if (isTRUE(.y)) {
result <-
data_iso_3166_3 %>%
dplyr::filter(Alpha_4 == !!.x) %$%
name_short
} else {
result <-
data_iso_3166_1 %>%
dplyr::filter(Alpha_2 == !!.x) %$%
name_short
}
if (length(result) == 0L) {
result <- NA_character_
}
result
})
}
field_to_var_name <- function(x) {
x %>% purrr::map_chr(~ var_names[[.x]] %||% .x)
}
derive_country_vars <- function(country_code,
date) {
country_code %<>% as.character()
subnational_entity_code <- NA_character_
# handle subnational entities
## Ascension
if (country_code == "AC") {
country_code <- "SH"
subnational_entity_code <- "SH-AC"
}
# assign canonical pseudo codes
## Kosovo
country_code %<>% dplyr::case_match(.x = .,
"KS" ~ "XK",
.default = .)
data_former <-
data_iso_3166_3 %>%
dplyr::filter(Alpha_2 == !!country_code & !!date <= (clock::add_years(Date_withdrawn, 50L))) %>%
dplyr::filter(Date_withdrawn == pal::safe_max(Date_withdrawn))
is_former <- nrow(data_former) > 0L
is_current <- !is_former && country_code %in% data_iso_3166_1$Alpha_2
if (!(is_former || is_current) && !(country_code %in% country_codes_sudd_invalid)) {
cli::cli_alert_warning("Neither ISO 3166-1 alpha-2 nor ISO 3166-3 alpha-4 {.var country_code} found for {.val {country_code}}.")
}
country_code <-
country_code %>%
pal::when(is_former ~
data_former %>%
dplyr::filter(Date_withdrawn == min(Date_withdrawn)) %>%
assertr::verify(nrow(.) == 1L) %$%
Alpha_4,
is_current ~
country_code,
~
NA_character_)
tibble::tibble(country_code = country_code,
country_name = country_code_to_name(country_code),
is_former_country = is_former,
subnational_entity_code = subnational_entity_code)
}
drop_disabled_vars <- function(data,
to_drop) {
to_drop_present <- intersect(to_drop, colnames(data))
n_to_drop_present <- length(to_drop_present)
if (n_to_drop_present) {
cli::cli_alert_warning(paste0("The {cli::qty(n_to_drop_present)} column{?s} {.var {to_drop_present}} in {.arg data} are ignored because setting/altering ",
"the corresponding values is disabled."))
data %<>% dplyr::select(-any_of(to_drop))
}
data
}
drop_implicit_vars <- function(data,
type = c("add", "edit")) {
type <- rlang::arg_match(type)
to_drop <-
data_cols_absent %>%
dplyr::filter(purrr::map_lgl(type,
~ !!type %in% .x)) %$%
col
data %>% dplyr::select(-any_of(to_drop))
}
drop_non_applicable_vars <- function(data) {
if ("level" %in% colnames(data)) {
if (data$level != "local") {
data %<>% dplyr::select(-any_of("municipality"))
}
if (data$level == "national") {
data %<>% dplyr::select(-any_of("subnational_entity_name"))
}
# TODO: remove this as soon as [issue #52](https://github.com/zdaarau/c2d-app/issues/52) is resolved
if (data$level != "national" || data$country_code != "CH") {
data %<>% dplyr::select(-any_of(c("votes_per_subterritory",
"lower_house_yes",
"lower_house_no",
"lower_house_abstentions",
"upper_house_yes",
"upper_house_no",
"upper_house_abstentions",
"position_government")))
}
}
data %<>% dplyr::select(-any_of(c(
"files",
"is_former_country",
# TODO: remove this as soon as [issue #81](https://github.com/zdaarau/c2d-app/issues/81) is fixed
"sources"
)))
data
}
fct_flip <- function(x) {
checkmate::assert_factor(x,
n.levels = 2L)
flip_map <-
levels(x) %>%
magrittr::set_names(value = rev(.)) %>%
as.list()
x %>% forcats::fct_recode(!!!flip_map)
}
flatten_array_as_is <- function(x) {
x %<>% unlist()
if (!is.null(x)) {
x %<>% I()
}
x
}
is_session_expired <- function(token,
use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg)) {
response <-
httr::RETRY(verb = "GET",
url = url_api("users/profile",
.use_testing_server = use_testing_server),
config = httr::add_headers(Authorization = paste("Bearer", token),
Origin = url_admin_portal(.use_testing_server = use_testing_server)),
times = 3L) %>%
# ensure we actually got a JSON response
pal::assert_mime_type(mime_type = "application/json",
msg_suffix = mime_error_suffix) %>%
# parse response
httr::content(as = "parsed")
is.null(response[["profile"]])
}
#' Lowercase non-abbreviations
#'
#' @param x A character vector.
#'
#' @return A character vector of the same length as `x`.
#' @keywords internal
lower_non_abbrs <- function(x) {
x %>%
stringr::str_split(pattern = "\\b") %>%
purrr::map_chr(~ .x %>%
dplyr::if_else(stringr::str_detect(string = .,
pattern = "^[^[:lower:]]+$"),
.,
stringr::str_to_lower(.)) %>%
paste0(collapse = ""))
}
order_rfrnd_cols <- function(data) {
data %>% dplyr::relocate(any_of(rfrnd_cols_order))
}
parse_datetime <- function(x) {
x %<>% unlist(use.names = FALSE)
if (is.character(x) && stringr::str_detect(string = x,
pattern = "^-?\\d+$",
negate = TRUE)) {
result <-
x %>%
clock::naive_time_parse(format = "%Y-%m-%dT%H:%M:%SZ",
precision = "millisecond") %>%
clock::as_date_time(zone = "UTC")
} else {
result <-
as.numeric(x) %>%
magrittr::divide_by(1000L) %>%
as.POSIXct(origin = "1970-01-01",
tz = "UTC")
}
result
}
plot_share_per_period <- function(data_freq,
x,
period) {
rlang::check_installed("plotly",
reason = pal::reason_pkg_required())
grid_step <- switch(EXPR = period,
week = 4L,
year = 50L,
decade = 50L,
century = 100L,
1L)
grid_x <- seq(from = ceiling(pal::safe_min(data_freq[[period]])[1L] / grid_step) * grid_step,
to = floor(pal::safe_max(data_freq[[period]])[1L] / grid_step) * grid_step,
by = grid_step)
plotly::plot_ly(data = data_freq,
type = "scatter",
mode = "none",
stackgroup = "one",
groupnorm = "percent",
x = ~eval(as.symbol(period)),
y = ~n,
name = ~eval(as.symbol(x))) %>%
plotly::layout(hovermode = "x",
legend = list(orientation = "h"),
xaxis = list(dtick = switch(EXPR = period,
week = 1L,
month = 1L,
quarter = 1L,
year = 10L,
decade = 10L,
century = 100L),
showgrid = FALSE,
ticklabelstep = switch(EXPR = period,
week = 4L,
month = 1L,
quarter = 1L,
year = 5L,
decade = 5L,
century = 1L),
range = switch(EXPR = period,
week = c(1L, 53L),
month = c(1L, 12L),
quarter = c(1L, 4L),
NULL),
ticks = "outside",
title = list(text = NULL)),
yaxis = list(fixedrange = TRUE,
hoverformat = ".1f",
showgrid = FALSE,
ticksuffix = "\u2009% ",
title = list(text = NULL)),
# draw custom grid
shapes =
grid_x %>%
purrr::map(~ list(type = "line",
y0 = 0L,
y1 = 1L,
yref = "paper",
x0 = .x,
x1 = .x,
line = list(color = "#fff",
width = 0.2))))
}
restore_topics <- function(topics_tier_1,
topics_tier_2,
topics_tier_3) {
list(topics_tier_1,
topics_tier_2,
topics_tier_3) %>%
purrr::pmap(~ {
..1 %>%
unlist() %>%
as.character() %>%
checkmate::assert_character(any.missing = FALSE,
max.len = 3L,
.var.name = "topics_tier_1")
..2 %>%
unlist() %>%
as.character() %>%
checkmate::assert_character(any.missing = FALSE,
max.len = 3L,
.var.name = "topics_tier_2")
..3 %>%
unlist() %>%
as.character() %>%
checkmate::assert_character(any.missing = FALSE,
max.len = 3L,
.var.name = "topics_tier_3")
topics_hierarchy <- hierarchize_topics(tibble::tibble(topics_tier_1 = list(..1),
topics_tier_2 = list(..2),
topics_tier_3 = list(..3)))
topics <- topics_hierarchy$topic_tier_3 %>% setdiff(NA_character_)
if (length(topics) < 3L) {
topics <-
topics_hierarchy %>%
dplyr::filter(is.na(topic_tier_3)) %$%
topic_tier_2 %>%
setdiff(NA_character_) %>%
c(topics)
}
if (length(topics) < 3L) {
topics <-
topics_hierarchy %>%
dplyr::filter(is.na(topic_tier_3) & is.na(topic_tier_2)) %$%
topic_tier_1 %>%
setdiff(NA_character_) %>%
c(topics)
}
topics
})
}
topic_frequency <- function(topics,
tier) {
topics %>%
purrr::list_c(ptype = character()) %>%
factor(levels = topics(tiers = tier)) %>%
tibble::tibble(topic = .) %>%
dplyr::group_by(topic) %>%
dplyr::summarise(n = dplyr::n())
}
#' Tidy "raw" RDB API referendum data
#'
#' Converts the "raw" MongoDB data from the RDB API to the tidied [rfrnds()] schema.
#'
#' You can reverse this function again using [untidy_rfrnds()].
#'
#' @param data The MongoDB data as a list (converted from the JSON returned by the RDB API using [jsonlite::fromJSON()]).
#' @param tidy Whether or not to tidy the referendum data, i.e. apply various data cleansing tasks and add additional variables. If `FALSE`, the raw MongoDB
#' referendum data will only be modified just enough to be able to return it as a [tibble][tibble::tbl_df]. Note that untidy data doesn't conform to the
#' [codebook][data_codebook] (i.a. different variable names).
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @keywords internal
tidy_rfrnds <- function(data,
tidy = TRUE) {
checkmate::assert_flag(tidy)
this_env <- rlang::current_env()
data %<>%
# unnest columns and ensure list type for multi-value columns
# NOTE that despite of the [speed-up in v1.1.4](https://github.com/tidyverse/tidyr/releases/tag/v1.1.4), `tidyr::unnest()` is still much slower than our
# custom function
purrr::map(.f = function(l,
category_names = names(l$categories),
context_names = names(l$context),
title_langs = names(l$title)) {
for (name in category_names) {
l[[paste0("categories.", name)]] <- l$categories[[name]]
}
for (name in context_names) {
l[[paste0("context.", name)]] <- l$context[[name]]
}
for (lang in title_langs) {
l[[paste0("title.", lang)]] <- l$title[[lang]]
}
l$categories <- NULL
l$context <- NULL
l$title <- NULL
for (name in c("tags",
"categories.action",
"categories.special_topics",
"categories.excluded_topics")) {
l[[name]] %<>% purrr::list_c(ptype = character()) %>% list()
}
for (name in c("archive",
"files",
"context.votes_per_canton")) {
l[[name]] %<>% list()
}
l
}) %>%
# drop empty fields
purrr::modify_depth(.depth = 1L,
.f = purrr::compact) %>%
# convert to tibble
purrr::map(tibble::as_tibble_row) %>%
purrr::list_rbind()
# tidy data
if (nrow(data) > 0L && tidy) {
data %<>%
# rename variables (mind that the MongoDB-based API doesn't demand a fixed schema)
pal::rename_from(dict = var_names) %>%
# create/recode variables
dplyr::mutate(
# ensure all supposed to floating-point numbers are actually of type double (JSON API is not reliable in this respect)
dplyr::across(any_of(c("subterritories_no",
"subterritories_yes",
# TODO: remove/adapt next two lines once [issue #78](https://github.com/zdaarau/c2d-app/issues/78) is resolved
"date_time_created"["date_time_created" %in% colnames(.)
&& any(purrr::map_lgl(.$date_time_created, is.numeric))],
"date_time_last_edited"["date_time_last_edited" %in% colnames(.)
&& any(purrr::map_lgl(.$date_time_last_edited, is.numeric))])),
~ purrr::map_dbl(.x, ~ if (is.null(.x)) NA_real_ else as.double(.x))),
# use explicit NA values
dplyr::across(where(is.integer),
~ dplyr::if_else(.x %in% c(-1L, -2L),
NA_integer_,
.x)),
dplyr::across(where(is.character),
~ dplyr::if_else(.x %in% c("", "-1", "-2"),
NA_character_,
.x)),
dplyr::across(any_of(c("subterritories_yes", "subterritories_no")),
~ dplyr::if_else(.x %in% c(-1.0, -2.0),
NA_real_,
.x)),
dplyr::across(any_of("result"),
~ dplyr::if_else(.x %in% c("Unknown", "Not provided"),
NA_character_,
.x)),
# convert all values to lowercase
## vectors
dplyr::across(any_of(c("result",
"type",
"inst_legal_basis_type",
"inst_object_type",
"inst_object_legal_level",
"inst_object_revision_extent",
"inst_trigger_type",
"inst_trigger_actor_level",
"inst_trigger_time_limit",
"inst_quorum_approval",
"inst_precondition_decision")),
stringr::str_to_lower),
## lists
dplyr::across(any_of(c("inst_object_revision_modes",
"inst_topics_only",
"inst_topics_excluded")),
~ purrr::map(.x = .x,
.f = stringr::str_to_lower)),
# convert only non-abbreviated values to lowercase
dplyr::across(any_of(c("inst_object_author",
"inst_trigger_actor",
"inst_precondition_actor")),
~ purrr::map_chr(.x = .x,
.f = lower_non_abbrs)),
# specific recodings
## binary (dummies)
dplyr::across(any_of("position_government"),
~ dplyr::case_when(.x == "Acceptance" ~ "yes",
.x == "Rejection" ~ "no",
.default = NA_character_)),
dplyr::across(any_of("inst_has_urgent_legal_basis"),
~ dplyr::case_when(.x == "Urgent" ~ TRUE,
.x == "Normal" ~ FALSE,
.default = NA)),
dplyr::across(any_of("inst_is_binding"),
~ dplyr::case_when(.x == "Binding" ~ TRUE,
.x == "Non-binding" ~ FALSE,
.default = NA)),
dplyr::across(any_of("inst_is_counter_proposal"),
~ dplyr::case_when(.x == "Yes" ~ TRUE,
.x == "No" ~ FALSE,
.default = NA)),
dplyr::across(any_of("inst_is_assembly"),
~ dplyr::case_when(.x == "Assembly" ~ TRUE,
.x == "Ballot" ~ FALSE,
.default = NA)),
dplyr::across(any_of("inst_has_precondition"),
~ dplyr::case_when(.x == "Exists" ~ TRUE,
.x == "Does not exist" ~ FALSE,
.default = NA)),
## nominal
### flatten `id`
id = purrr::list_c(id,
ptype = character()),
### split `tags` into separate per-tier vars
topics_tier_1 = tags %>% purrr::map(infer_topics,
tier = 1L),
topics_tier_2 = tags %>% purrr::map(infer_topics,
tier = 2L),
topics_tier_3 = tags %>% purrr::map(~ .x[.x %in% topics_tier_3_]),
### various cleanups
dplyr::across(any_of("type"),
~ dplyr::case_match(.x = .x,
"citizen assembly" ~ "citizens' assembly",
"not provided" ~ NA_character_,
.default = .x)),
dplyr::across(any_of(c("inst_trigger_actor",
"inst_object_author")),
~ dplyr::case_match(.x = .x,
"institution" ~ "other institution",
.default = .x)),
dplyr::across(any_of("inst_object_type"),
~ dplyr::case_match(.x = .x,
"legal text (ausformulierter vorschlag)" ~ "legal text (formulated proposal)",
"legal text (allg. anregung)" ~ "legal text (general proposal)",
.default = .x)),
dplyr::across(any_of("inst_topics_only"),
~ purrr::map(.x = .x,
.f = \(x) dplyr::case_match(.x = x,
"infrastructural act" ~ "infrastructural acts",
"competence shift" ~ "competence shifts",
"financial act" ~ "financial acts",
"financial act (expenses)" ~ "financial acts (expenses)",
"financial act (taxes)" ~ "financial acts (taxes)",
"financial act (obligations)" ~ "financial acts (obligations)",
"total revision of the constitution" ~ "total revisions of the constitution",
.default = x))),
dplyr::across(any_of("inst_topics_excluded"),
~ purrr::map(.x = .x,
.f = \(x) dplyr::case_match(.x = x,
"budget" ~ "budgets",
"parliamentary competence" ~ "everything within parliamentary competence",
.default = x))),
dplyr::across(any_of("inst_quorum_turnout"),
~ stringr::str_replace_all(string = .x,
pattern = c("^(\\s+)?>(\\s+)?" = ">\u202f",
"(\\s+)?%(\\s+)?$" = "\u202f%"))),
## ordinal
## interval
# TODO: Remove else-clauses once [this](https://github.com/zdaarau/c2d-app/commit/6b72d1928e0182f01b188f3973ba15482fc8c04a) is deployed to
# production
date = if (is.list(date)) {
clock::as_date(parse_datetime(date))
} else {
clock::date_parse(date)
},
dplyr::across(any_of(c("date_time_created",
"date_time_last_edited")),
parse_datetime),
## undefined
files = files %>% purrr::map(~ .x %>% purrr::map(~ .x %>%
# unnest and restore `date`
purrr::modify_in(.where = "date",
.f = parse_datetime) %>%
# change subvariable names
pal::rename_from(dict = sub_var_names$files))))
# complement `id_official` and `id_sudd` (a two-letter country code plus a 6-digit number) by old `number`
# TODO: once [issue #?](https://github.com/zdaarau/c2d-app/issues/?) is resolved:
# - correct this upstream using `edit_rfrnds()`
# - remove corresponding code below
# - file issue to completely get rid of field `number`
if ("number" %in% colnames(data)) {
data %<>% dplyr::mutate(number = dplyr::if_else(number %in% c("0", ""),
NA_character_,
number),
dplyr::across(any_of("id_official"),
~ dplyr::if_else(is.na(.x) & stringr::str_detect(number, "^\\d"),
number,
.x)),
dplyr::across(any_of("id_sudd"),
~ dplyr::if_else(is.na(.x) & stringr::str_detect(number, "^\\D"),
# everything beyond the 8th char seems to be manually added -> strip!
stringr::str_sub(string = number,
end = 8L),
.x)))
}
# ensure `id_official` and `id_sudd` are present
if (!("id_official" %in% colnames(data))) data$id_official <- NA_character_
if (!("id_sudd" %in% colnames(data))) data$id_sudd <- NA_character_
# TODO: remove this once [issue #]() has been resolved
# create `inst_is_variable/divisible` if necessary
if ("categories.referendum_text_options" %in% colnames(data)) {
if (!("inst_is_variable" %in% colnames(data))) {
data %<>% dplyr::mutate(inst_is_variable = dplyr::case_when(
categories.referendum_text_options %in% c("Variants possible", "Variants / splitting up possible") ~ TRUE,
is.na(categories.referendum_text_options) ~ NA,
.default = FALSE
))
}
if (!("inst_is_divisible" %in% colnames(data))) {
data %<>% dplyr::mutate(inst_is_divisible = dplyr::case_when(
categories.referendum_text_options %in% c("Splitting up possible", "Variants / splitting up possible") ~ TRUE,
is.na(categories.referendum_text_options) ~ NA,
.default = FALSE
))
}
}
# ensure all country codes are known and assign canonical country name
data %<>% add_country_name()
data %<>%
# remove obsolete vars
dplyr::select(-any_of(c("categories.referendum_text_options",
"country_code_historical",
"is_past_jurisdiction",
"number",
"tags"))) %>%
# convert to (ordered) factor where appropriate
## based on codebook
dplyr::mutate(dplyr::across(everything(),
~ {
metadata <- data_codebook %>% dplyr::filter(variable_name == dplyr::cur_column())
if (nrow(metadata) != 1L) {
cli::cli_abort("Missing codebook metadata! Please debug",
.internal = TRUE)
}
if (is.factor(unlist(metadata$ptype))) {
lvls <- levels(unlist(metadata$ptype))
is_ordered <- metadata$value_scale %in% c("ordinal_ascending", "ordinal_descending")
if (is.list(.x)) {
.x %>% purrr::map(.f = factor,
levels = lvls,
ordered = is_ordered)
} else {
factor(x = .x,
levels = lvls,
ordered = is_ordered)
}
} else {
.x
}
})) %>%
## fctrs without explicit variable_values set in codebook
dplyr::mutate(
### fctrs where we defined a finite set of values
country_code = factor(x = country_code,
levels = val_set$country_code,
ordered = FALSE),
### fctrs where we did not define a finite set of values (yet)
dplyr::across(any_of(c("subnational_entity_name",
"municipality")),
as.factor)
) %>%
# add vars which aren't always included and coerce to proper types
vctrs::tib_cast(to =
data_codebook %>%
dplyr::filter(!is_opt) %$%
magrittr::set_names(x = ptype,
value = variable_name) %>%
tibble::as_tibble(),
call = this_env) %>%
# harmonize col order
order_rfrnd_cols()
}
# convert nested list cols to tibbles
data %>%
dplyr::mutate(dplyr::across(any_of(c("files",
"votes_per_subterritory")),
~ purrr::map(.x,
\(x) if (length(x) > 0L) x %>% purrr::map(tibble::as_tibble) %>% purrr::list_rbind() else NULL)),
dplyr::across(any_of("archive"),
~ purrr::map(.x,
\(x) if (length(x) > 0L) tibble::as_tibble(x) else NULL))) %>%
# add variable labels (must be done at last since mutations above drop attrs)
labelled::set_variable_labels(.labels = var_lbls,
.strict = FALSE)
}
untidy_date <- function(x) {
as.numeric(x) %>%
magrittr::multiply_by(1000.0) %>%
as.list() %>%
magrittr::set_names(rep("$date",
times = length(.)))
}
#' Untidy into "raw" RDB API referendum data
#'
#' Converts from the tidied [rfrnds()] to the "raw" MongoDB schema used by the RDB API. Basically reverts [tidy_rfrnds()].
#'
#' @param data The data to untidy as returned by [rfrnds()].
#' @param as_tibble Whether or not to return the result as a [tibble][tibble::tbl_df]. If `FALSE`, a list is returned.
#'
#' @return
#' If `as_tibble = FALSE`, a list with one element per referendum, suitable to be converted [jsonlite::toJSON()] and then fed to the RDB API.
#'
#' Otherwise a [tibble][tibble::tbl_df] of the same format as returned by [`rfrnds(tidy = FALSE)`][rfrnds].
#' @keywords internal
untidy_rfrnds <- function(data,
as_tibble = FALSE) {
checkmate::assert_flag(as_tibble)
var_names_inverse <-
names(var_names) %>%
magrittr::set_names(purrr::list_c(var_names, ptype = character()))
sub_var_names_files_inverse <-
names(sub_var_names$files) %>%
magrittr::set_names(purrr::list_c(sub_var_names$files, ptype = character()))
# restore `number`
if (all(c("id_official", "id_sudd") %in% colnames(data))) {
data %<>% dplyr::mutate(id_sudd = dplyr::if_else(is.na(id_sudd),
id_official,
id_sudd))
}
data %<>%
# remove variable labels
labelled::remove_var_label() %>%
dplyr::mutate(
# restore strings
dplyr::across(c(any_of("date"),
where(is.factor)),
as.character),
dplyr::across(where(is.list),
~ {
if (is.factor(.x[[1L]])) {
.x %>% purrr::map(as.character)
} else {
.x
}
}),
# restore dates
dplyr::across(any_of(c("date_time_created",
"date_time_last_edited")),
untidy_date),
# restore individual variables
## `files`
dplyr::across(any_of("files"),
~ purrr::map(.x = .x,
.f = \(x) {
if ("date_time_attached" %in% colnames(x)) {
x$date_time_attached %<>% untidy_date()
}
x %<>% pal::rename_from(dict = sub_var_names_files_inverse)
})),
## `inst_topics_excluded`
dplyr::across(any_of("inst_topics_excluded"),
~ purrr::map(.x = .x,
.f = \(x) dplyr::case_match(.x = x,
"budgets" ~ "budget",
.default = x))),
## `inst_topics_only`
dplyr::across(any_of("inst_topics_only"),
~ purrr::map(.x = .x,
.f = \(x) dplyr::case_match(.x = x,
"infrastructural acts" ~ "infrastructural act",
"competence shifts" ~ "competence shift",
"financial acts" ~ "financial act",
"financial acts (expenses)" ~ "financial act (expenses)",
"financial acts (taxes)" ~ "financial act (taxes)",
"financial acts (obligations)" ~ "financial act (obligations)",
"total revisions of the constitution" ~ "total revision of the constitution",
.default = x))),
## `inst_object_type`
dplyr::across(any_of("inst_object_type"),
~ dplyr::case_match(.x = .x,
"legal text (formulated proposal)" ~ "legal text (ausformulierter vorschlag)",
"legal text (general proposal)" ~ "legal text (allg. anregung)",
.default = .x)),
## `inst_trigger_actor`, `inst_object_author`
dplyr::across(any_of(c("inst_trigger_actor",
"inst_object_author")),
~ dplyr::case_match(.x = .x,
"other institution" ~ "institution",
.default = .x)),
## `inst_precondition_actor`
dplyr::across(any_of("inst_precondition_actor"),
~ dplyr::case_match(.x = .x,
"parliament and president" ~ "parliament and President",
"parliament and government" ~ "parliament and Government",
.default = .x)),
## `type`
dplyr::across(any_of("type"),
~ dplyr::case_match(.x = .x,
"citizens' assembly" ~ "citizen assembly",
NA_character_ ~ "not provided",
.default = .x)),
## `id`
dplyr::across(any_of("id"),
~ as.list(.x) %>% magrittr::set_names(rep("$oid", times = length(.)))),
## binary (dummies)
dplyr::across(any_of("position_government"),
~ dplyr::case_match(.x = .x,
"yes" ~ "Acceptance",
"no" ~ "Rejection",
.default = .x)),
dplyr::across(any_of("inst_has_urgent_legal_basis"),
~ dplyr::if_else(.x,
"Urgent",
"Normal")),
dplyr::across(any_of("inst_is_binding"),
~ dplyr::if_else(.x,
"Binding",
"Non-binding")),
dplyr::across(any_of("inst_is_counter_proposal"),
~ dplyr::if_else(.x,
"Yes",
"No")),
dplyr::across(any_of("inst_is_assembly"),
~ dplyr::if_else(.x,
"Assembly",
"Ballot")),
dplyr::across(any_of("inst_has_precondition"),
~ dplyr::if_else(.x,
"Exists",
"Does not exist")),
# uppercase first letter of various vars
dplyr::across(any_of(c("result",
"type",
"inst_legal_basis_type",
"inst_object_type",
"inst_object_legal_level",
"inst_object_revision_extent",
"inst_trigger_type",
"inst_trigger_actor_level",
"inst_trigger_time_limit",
"inst_quorum_approval",
"inst_precondition_decision",
# vars containing uppercase abbreviations
"inst_object_author",
"inst_trigger_actor",
"inst_precondition_actor")),
~ pal::sentenceify(x = .x,
punctuation_mark = "")),
dplyr::across(any_of(c("inst_object_revision_modes",
"inst_topics_only",
"inst_topics_excluded")),
~ purrr::map(.x = .x,
.f = pal::sentenceify,
punctuation_mark = "")),
# restore NA values
dplyr::across(where(is.character) & !any_of("result"),
~ tidyr::replace_na(data = .x,
replace = "")),
## implicit NAs (i.e. 'not provided' (-2))
dplyr::across(where(is.integer) & !any_of(field_to_var_name(union(rfrnd_fields$required_for_additions, rfrnd_fields$required_for_edits))),
~ tidyr::replace_na(data = .x,
replace = -2L)),
## explicit NAs (i.e. 'unknown' (-1))
dplyr::across(any_of("result"),
~ tidyr::replace_na(data = .x,
replace = "Unknown")),
dplyr::across(where(is.integer) & any_of(field_to_var_name(union(rfrnd_fields$required_for_additions, rfrnd_fields$required_for_edits))),
~ tidyr::replace_na(data = .x,
replace = -1L)),
dplyr::across(any_of(c("subterritories_yes", "subterritories_no")),
~ tidyr::replace_na(data = .x,
replace = -1.0))
) %>%
# restore variable names
pal::rename_from(dict = var_names_inverse)
# restore `referendum_text_options`
if (all(c("inst_is_divisible", "inst_is_variable") %in% colnames(data))) {
data %<>% dplyr::mutate(referendum_text_options = dplyr::case_when(!inst_is_divisible & !inst_is_variable ~ "Whole text only",
inst_is_divisible & inst_is_variable ~ "Variants / splitting up possible",
inst_is_divisible ~ "Splitting up possible",
inst_is_variable ~ "Variants possible",
.default = NA_character_))
}
# restore `tags`
topics_var_names <- paste0("topics_tier_", 1:3)
topics_vars_present <- topics_var_names %in% colnames(data)
if (any(topics_vars_present)) {
if (!all(topics_vars_present)) {
topics_vars_missing <- topics_var_names %>% setdiff(topics_vars_present)
cli::cli_abort(paste0("{cli::qty(topics_vars_missing)}The following {.var {'topics_tier_#'}} variable{?s} {?is/are} missing from {.arg data}: ",
"{.var {topics_vars_missing}}"))
}
data %<>%
dplyr::mutate(tags = restore_topics(topics_tier_1,
topics_tier_2,
topics_tier_3)) %>%
dplyr::select(-any_of(topics_var_names))
}
# remove unknown columns
data %<>% dplyr::select(any_of(rfrnd_fields$all_flat))
if (!as_tibble) {
# remove nested field prefixes
data %<>% dplyr::rename_with(.cols = matches("^(categories|context|title)\\."),
.fn = ~ stringr::str_remove(string = .x,
pattern = "^\\w+?\\."))
# restore nested structure
categories_fields_present <-
names(var_names) %>%
stringr::str_subset(pattern = "^categories\\.") %>%
stringr::str_remove(pattern = "^categories\\.") %>%
intersect(colnames(data))
context_fields_present <-
names(var_names) %>%
stringr::str_subset(pattern = "^context\\.") %>%
stringr::str_remove(pattern = "^context\\.") %>%
intersect(colnames(data))
title_fields_present <-
names(var_names) %>%
stringr::str_subset(pattern = "^title\\.") %>%
stringr::str_remove(pattern = "^title\\.") %>%
intersect(colnames(data))
if (length(categories_fields_present)) {
data %<>% tidyr::nest(categories = all_of(categories_fields_present))
}
if (length(context_fields_present)) {
data %<>% tidyr::nest(context = all_of(context_fields_present))
}
if (length(title_fields_present)) {
data %<>% tidyr::nest(title = all_of(title_fields_present))
}
data %<>%
# convert to list
dplyr::group_split(dplyr::row_number(),
.keep = FALSE) %>%
purrr::map(as.list) %>%
# tweak list structure
purrr::modify_depth(.depth = 1L,
.f = ~
.x %>%
# flatten unnecessarily nested elements
purrr::modify_at(.at = "tags",
.f = flatten_array_as_is) %>%
# convert nested tibbles to lists
purrr::modify_at(.at = "files",
.f = ~
.x[[1L]] %>%
pal::when(is.null(.) ~ list(),
~ dplyr::group_split(.tbl = .,
dplyr::row_number(),
.keep = FALSE) %>%
purrr::map(as.list))) %>%
purrr::modify_at(.at = c("archive",
"categories",
"context",
"title"),
.f = ~
.x %>%
purrr::map(as.list) %>%
unlist(recursive = FALSE)) %>%
# reduce nesting of nested tibble
pal::when(is.null(purrr::pluck(., "context", "votes_per_canton")) ~ .,
~ purrr::modify_in(.x = .,
.where = c("context", "votes_per_canton"),
.f = dplyr::first)) %>%
# reduce nesting of array fields
pal::when(is.null(purrr::pluck(., "categories", "action")) ~ .,
~ purrr::modify_in(.x = .,
.where = c("categories", "action"),
.f = flatten_array_as_is)) %>%
pal::when(is.null(purrr::pluck(., "categories", "excluded_topics")) ~ .,
~ purrr::modify_in(.x = .,
.where = c("categories", "excluded_topics"),
.f = flatten_array_as_is)) %>%
pal::when(is.null(purrr::pluck(., "categories", "special_topics")) ~ .,
~ purrr::modify_in(.x = .,
.where = c("categories", "special_topics"),
.f = flatten_array_as_is)))
}
data
}
#' Assemble RDB Services API URL
#'
#' @param ... Optional path components added to the base URL.
#' @param .use_testing_server `r pkg_config$description[pkg_config$key == "use_testing_server"]`
#'
#' @return A character scalar.
#' @family url_assembly
#' @keywords internal
#'
#' @examples
#' rdb:::url_api("health")
url_api <- function(...,
.use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg)) {
checkmate::assert_flag(.use_testing_server)
ifelse(.use_testing_server,
"stagservices.c2d.ch",
"services.c2d.ch") %>%
fs::path(...) %>%
paste0("https://", .)
}
#' Assemble RDB admin portal URL
#'
#' @inheritParams url_api
#'
#' @inherit url_api return
#' @family url_assembly
#' @keywords internal
#'
#' @examples
#' rdb:::url_admin_portal("referendum/5bbbfd7b92a21351232e46b5")
url_admin_portal <- function(...,
.use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg)) {
checkmate::assert_flag(.use_testing_server)
ifelse(.use_testing_server,
"c2d-admin.netlify.app",
"admin.c2d.ch") %>%
fs::path(...) %>%
paste0("https://", .)
}
#' Assemble codebook URL
#'
#' @param var Optional variable name to add as the [fragment identifier](https://en.wikipedia.org/wiki/URI_fragment) of the returned URL, which leads to a
#' direct link to the relevant codebook section. Must be either one of the column names of [`data_codebook`], or a valid fragment identifier of a codebook
#' section above the individual variables (`r pal::enum_str(codebook_fragments, last = " or ")`).
#'
#' @return A character scalar.
#' @family url_assembly
#' @keywords internal
#'
#' @examples
#' rdb:::url_codebook("level")
#' rdb:::url_codebook("topics")
url_codebook <- function(var = NULL) {
checkmate::assert_string(var,
null.ok = TRUE)
if (!is.null(var)) {
var %<>%
rlang::arg_match0(values = c(data_codebook$variable_name,
# additional HTML anchors
codebook_fragments)) %>%
stringr::str_replace_all(pattern = stringr::fixed("_"),
replacement = "-")
}
paste0("https://rdb.rpkg.dev/articles/codebook.html", "#"[!is.null(var)], var)
}
#' Assemble website URL
#'
#' @inheritParams url_api
#'
#' @inherit url_api return
#' @family url_assembly
#' @keywords internal
#'
#' @examples
#' rdb:::url_website("referendum/CH/5bbc04f692a21351232e5a01")
url_website <- function(...,
.use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg)) {
checkmate::assert_flag(.use_testing_server)
ifelse(.use_testing_server,
"c2d-site.netlify.app",
"c2d.ch") %>%
fs::path(...) %>%
paste0("https://", .)
}
query_filter_date <- function(min,
max) {
list(`$gte` = min,
`$lte` = max) %>%
purrr::compact()
}
query_filter_datetime <- function(min,
max) {
list(`$gte` = purrr::compact(list(`$date` = min)),
`$lte` = purrr::compact(list(`$date` = max))) %>%
purrr::compact()
}
query_filter_in <- function(x) {
x %>% pal::when(length(.) == 0L ~ NULL,
length(.) == 1L ~ .,
~ list(`$in` = .))
}
parse_sudd_date <- function(x) {
x_parts <- stringr::str_split_1(string = x,
pattern = "-")
to_int <- function(x) {
x %<>% as.integer()
x[x == 0L] <- NA_integer_
x
}
tibble::tibble(year = to_int(x_parts[1L]),
month = to_int(x_parts[2L]),
day = to_int(x_parts[3L]))
}
parse_sudd_date_de <- function(x) {
components <- stringr::str_split_fixed(string = x,
pattern = "\\s+",
n = 3L)
list(year = components[, 3L] %>% stringr::str_extract(pattern = "\\d+") %>% as.integer(),
month = components[, 2L] %>% dplyr::case_match(!!!months_de_fms),
day = components[, 1L] %>% stringr::str_extract(pattern = "\\d+") %>% as.integer())
}
parse_sudd_id <- function(id_sudd) {
sudd_year <-
id_sudd %>%
stringr::str_extract(pattern = "\\d{4}$") %>%
as.integer()
sudd_country_code <-
id_sudd %>%
stringr::str_sub(end = 2L) %>%
stringr::str_to_upper()
derive_country_vars(country_code = sudd_country_code,
date = clock::date_build(year = sudd_year,
month = 1L,
day = 1L))
}
sudd_rfrnd <- function(id_sudd) {
checkmate::assert_string(id_sudd)
html <-
httr::RETRY(verb = "GET",
url = url_sudd("event.php"),
query = list(id = id_sudd),
times = 3L) %>%
xml2::read_html() %>%
rvest::html_element(css = "main table") %>%
rvest::html_children()
field_names <-
html %>%
rvest::html_elements(css = "td.feld") %>%
rvest::html_text()
# handle fields with duplicated/ambiguous names
if (anyDuplicated(field_names)) {
## simple duplicates (probably data errors)
if (id_sudd == "gr011862") {
ix_to_drop <- c(which(field_names == "\u2517\u2501 Republik")[2L],
which(field_names == "\u2517\u2501 Russischer Prinz")[2L])
html %<>% .[-ix_to_drop]
field_names %<>% .[-ix_to_drop]
## competing / mutually exclusive proposals, e.g. proposals with direct counter proposal and (optionally) tie-breaker question (CH and LI)
} else {
option_names <-
html %>%
rvest::html_elements(css = "td.feld strong") %>%
rvest::html_text()
if (length(option_names) < 2L) {
cli::cli_abort("Unknown table layout detected for referendum with {.arg id_sudd = {id_sudd}}. Please debug.",
.internal = TRUE)
}
ix_option_names <- which(field_names %in% option_names)
option_names_counter <- c("Gegenentwurf", "Gegenvorschlag")
option_names_tie_breaker <- "Stichfrage"
option_names_special <- c(option_names_counter, option_names_tie_breaker)
has_counter_proposal <- any(option_names_counter %in% option_names)
n_proposals_original <-
option_names %>%
setdiff(option_names_special) %>%
length()
ix_option_field_names <-
ix_option_names[-length(ix_option_names)] %>%
purrr::imap(~ (.x + 1L):(ix_option_names[.y + 1L] - 1L)) %>%
c(list((dplyr::last(ix_option_names) + 1L):(min(length(field_names), which(field_names %in% c("Medien",
"Bemerkungen",
"Gleichzeitig mit",
"Quellen",
"Vollst\u00e4ndigkeit",
"Letzte \u00c4nderung"))) - 1L)))
# rename field names
option_suffixes <-
option_names %>%
purrr::imap_chr(~ .x %>% pal::when(. %in% option_names_counter ~ "counter_proposal",
. %in% option_names_tie_breaker ~ "tie_breaker",
has_counter_proposal && n_proposals_original == 1L ~ "proposal",
~ glue::glue("option_{.y}")))
renamings <-
purrr::map2(.x = setdiff(option_names,
option_names_tie_breaker),
.y = setdiff(option_suffixes,
"tie_breaker"),
.f = ~ rlang::list2(!!paste("\u2517\u2501", .x) := glue::glue("votes_tie_breaker_{.y}"),
!!paste("\u2517\u2501 St\u00e4nde", .x) := glue::glue("subterritories_{.y}_tie_breaker"))) %>%
purrr::list_flatten() %>%
as_fm_list()
for (i in seq_along(option_names)) {
field_names[ix_option_field_names[[i]]] %<>%
dplyr::case_match(.x = .,
!!!c(renamings,
list("Abgegebene Stimmen" ~ glue::glue("votes_{option_suffixes[i]}_total"),
"Stimmen ausser Betracht" ~ glue::glue("votes_{option_suffixes[i]}_invalid"),
"Ohne Antwort" ~ glue::glue("votes_{option_suffixes[i]}_empty"),
"G\u00fcltige (= massgebende) Stimmen" ~ glue::glue("votes_{option_suffixes[i]}_valid"),
"\u2517\u2501 Ja-Stimmen" ~ glue::glue("votes_{option_suffixes[i]}_yes"),
"\u2517\u2501 Nein-Stimmen" ~ glue::glue("votes_{option_suffixes[i]}_no"),
"Ja-Stimmen" ~ glue::glue("votes_{option_suffixes[i]}_yes"),
"Nein-Stimmen" ~ glue::glue("votes_{option_suffixes[i]}_no"),
"St\u00e4nde (Kantone)" ~ glue::glue("subterritories_{option_suffixes[i]}"),
"\u2517\u2501 Annehmende St\u00e4nde" ~ glue::glue("subterritories_{option_suffixes[i]}_yes"),
"\u2517\u2501 Verwerfende St\u00e4nde" ~ glue::glue("subterritories_{option_suffixes[i]}_no"))),
.default = .)
}
# drop obsolete fields
html %<>% .[-ix_option_names]
field_names %<>% .[-ix_option_names]
}
}
# handle other special cases
if (id_sudd %in% c("li011954",
"li031985")) {
field_names %<>% dplyr::case_match(.x = .,
"\u2517\u2501 Initiative" ~ "votes_proposal",
"\u2517\u2501 Gegenvorschlag" ~ "votes_counter_proposal",
"\u2517\u2501 Nein-Stimmen" ~ "votes_option_none",
.default = .)
}
## move content of exotic fields to `remarks`
remarks_field <-
html[field_names == "Bemerkungen"] %>%
pal::when(length(.) > 0L ~ rvest::html_elements(x = ., css = "td")[[2L]],
~ .)
remarks_list_col <- list(list(text = rvest::html_text2(remarks_field),
urls =
remarks_field %>%
rvest::html_elements(css = "a") %>%
purrr::map_chr(~ .x %>%
rvest::html_attr(name = "href") %>%
url_sudd()),
html =
remarks_field %>%
xml2::xml_contents() %>%
as.character() %>%
paste0(collapse = "")))
ix_fields_to_remarks <-
field_names %>%
stringr::str_detect(pattern = paste0("^",
pal::fuse_regex("\u2517\u2501\u2501\u2501 .+Stimmen( .+)?",
"Unklare Stimmen",
"Unstimmigkeiten",
"G\u00fcltig stimmende Personen"),
"$")) %>%
which()
if (length(ix_fields_to_remarks) > 0L) {
addendum <- field_names[ix_fields_to_remarks]
if (length(addendum) > 0L) {
addendum %<>%
stringr::str_extract("\\w.*") %>%
paste0(": ",
html[ix_fields_to_remarks] %>%
rvest::html_elements(css = "td") %>%
magrittr::extract2(2L) %>%
rvest::html_elements(css = "data") %>%
rvest::html_attr("value"),
collapse = "\n\n")
}
remarks_list_col[[1L]]$text %<>% paste0(addendum, "\n\n"[length(addendum) > 0L], .)
remarks_list_col[[1L]]$html %<>% paste0("<p>\n", addendum, "\n</p>", .)
html %<>% .[-ix_fields_to_remarks]
field_names %<>% .[-ix_fields_to_remarks]
}
# remove unnecessary fields
ix_to_drop <- which(field_names %in% c("Nicht eingelegte Stimmzettel",
"Nicht eingelegte Stimmenzettel",
"Nicht teilgenommen"))
if (length(ix_to_drop)) {
html %<>% .[-ix_to_drop]
field_names %<>% .[-ix_to_drop]
}
field_names %<>%
dplyr::case_match(.x = .,
"Gebiet" ~ "territory_name_de",
"\u2517\u2501 Stellung" ~ "territory_type_de",
"Datum" ~ "date",
"Titel" ~ "title_de",
"Vorlage" ~ "title_de",
"\u2517\u2501 Fragemuster" ~ "question_type_de",
"\u2517\u2501 Abstimmungstyp" ~ "types",
"Ergebnis" ~ "result_de",
"Vollst\u00e4ndigkeit" ~ "result_status_de",
"\u2517\u2501 Mehrheiten" ~ "adoption_requirements_de",
"Stimmberechtigte" ~ "electorate_total",
"\u2517\u2501 Davon im Ausland" ~ "electorate_abroad",
"Stimmausweise" ~ "polling_cards",
"Ausgegebene Stimmzettel" ~ "polling_cards",
"Stimmbeteiligung" ~ "votes_total",
"Stimmen ausser Betracht" ~ "votes_invalid",
"Stimmzettel ausser Betracht" ~ "votes_invalid",
"Leere Stimmen" ~ "votes_empty",
"\u2517\u2501 Leere Stimmen" ~ "votes_empty",
"\u2517\u2501 Leere Stimmzettel" ~ "votes_empty",
"Ung\u00fcltige Stimmen" ~ "votes_void",
"\u2517\u2501 Ung\u00fcltige Stimmen" ~ "votes_void",
"\u2517\u2501 Ung\u00fcltige Stimmzettel" ~ "votes_void",
"Ung\u00fcltig eingelegte Stimmzettel" ~ "votes_void",
"Ganz ung\u00fcltige Stimmzettel" ~ "votes_void",
"G\u00fcltige (= massgebende) Stimmen" ~ "votes_valid",
"\u2517\u2501 Ja-Stimmen" ~ "votes_yes",
"\u2517\u2501 Nein-Stimmen" ~ "votes_no",
"\u2517\u2501 Nein zu beiden Vorschl\u00e4gen" ~ "votes_option_none",
"\u2517\u2501 Stimmen ausser Betracht" ~ "votes_invalid",
"Staaten" ~ "subterritories",
"\u2517\u2501 Annehmende Staaten" ~ "subterritories_yes",
"\u2517\u2501 Verwerfende Staaten" ~ "subterritories_no",
"Gebiete" ~ "subterritories",
"\u2517\u2501 Annehmende Gebiete" ~ "subterritories_yes",
"\u2517\u2501 Verwerfende Gebiete" ~ "subterritories_no",
"Provinzen" ~ "subterritories",
"\u2517\u2501 Annehmende Provinzen" ~ "subterritories_yes",
"\u2517\u2501 Verwerfende Provinzen" ~ "subterritories_no",
"Inseln" ~ "subterritories",
"\u2517\u2501 Annehmende Inseln" ~ "subterritories_yes",
"\u2517\u2501 Verwerfende Inseln" ~ "subterritories_no",
"St\u00e4nde (Kantone)" ~ "subterritories",
"\u2517\u2501 Annehmende St\u00e4nde" ~ "subterritories_yes",
"\u2517\u2501 Verwerfende St\u00e4nde" ~ "subterritories_no",
"Regionen / St\u00e4dte" ~ "subterritories",
"\u2517\u2501 Annehmende Regionen / St\u00e4dte" ~ "subterritories_yes",
"\u2517\u2501 Verwerfende Regionen / St\u00e4dte" ~ "subterritories_no",
"Wahlkreise" ~ "subterritories",
"\u2517\u2501 Annehmende Wahlkreise" ~ "subterritories_yes",
"\u2517\u2501 Verwerfende Wahlkreise" ~ "subterritories_no",
"Senatswahlkreise" ~ "subterritories",
"\u2517\u2501 Annehmende Senatswahlkreise" ~ "subterritories_yes",
"\u2517\u2501 Verwerfende Senatswahlkreise" ~ "subterritories_no",
"Medien" ~ "files",
"Bemerkungen" ~ "remarks",
"Gleichzeitig mit" ~ "ids_sudd_simultaneous",
"Quellen" ~ "sources",
"Letzte \u00c4nderung" ~ "date_last_edited",
.default = .) %>%
# assert field names are unique
checkmate::assert_character(any.missing = FALSE,
unique = TRUE,
.var.name = "field_names") %>%
# referendum-option-specific recodings (sequentially numbered `votes_option_#` columns)
# TODO: adapt this once we can properly capture more than yes/no answer options, cf. https://gitlab.com/zdaarau/rpkgs/rdb/-/issues/5
purrr::map_at(.at = which(startsWith(., "\u2517\u2501 ")),
.f = function(old_name, old_names) paste0("votes_option_", which(old_names == old_name)),
old_names = stringr::str_subset(string = .,
pattern = "^\u2517\u2501 ")) %>%
purrr::list_c(ptype = character())
# assert no original uppercase field names are left over
ix_field_names_unknown <-
field_names %>%
stringr::str_detect(pattern = "[:upper:]") %>%
which()
if (length(ix_field_names_unknown)) {
cli::cli_abort(paste0("Unknown {cli::qty(length(ix_field_names_unknown))} field{?s} {.field {field_names[ix_field_names_unknown]}} present in data for ",
"referendum with {.arg {paste0('id_sudd = ', id_sudd)}}."),
.internal = TRUE)
}
purrr::map2_dfc(.x = html,
.y = field_names,
.f = function(html, col_name) {
cells <- html %>% rvest::html_elements(css = "td")
col_text <- rvest::html_text2(cells[[2L]])
# extract hyperlinks if necessary
if (col_name %in% c("remarks",
"ids_sudd_simultaneous",
"sources")) {
urls <-
cells[[2L]] %>%
rvest::html_elements(css = "a") %>%
purrr::map_chr(~ .x %>%
rvest::html_attr(name = "href") %>%
url_sudd())
}
tibble::tibble(!!col_name :=
col_name %>%
pal::when(
# character scalars
. %in% c("territory_name_de",
"territory_type_de",
"title_de",
"question_type_de",
"result_de",
"result_status_de") ~
col_text,
# integer scalars
stringr::str_detect(
string = .,
pattern = paste0(
"^",
pal::fuse_regex(
"electorate_total",
"electorate_abroad",
"polling_cards",
"votes_total",
"votes_invalid",
"votes_empty",
"votes_void",
"votes_valid",
"votes_yes",
"votes_no",
"votes_proposal",
"votes_counter_proposal",
paste0("votes_",
pal::fuse_regex("option_(\\d+|none)",
"(counter_)?proposal",
"tie_breaker(_(option_\\d+|(counter_)?proposal))?"),
paste0("(_",
pal::fuse_regex("total",
"empty",
"void",
"invalid",
"valid",
"yes",
"no"),
")?"))),
"$")) ~
cells[[2L]] %>%
rvest::html_elements(css = "data") %>%
rvest::html_attr("value") %>%
# fall back to parsing text if no semantic data could be extracted
pal::when(length(.) == 0L ~ col_text %>% stringr::str_remove_all(pattern = "[^\\d]"),
~ .) %>%
as.integer(),
startsWith(., "subterritories") ~
cells[[2L]] %>%
rvest::html_elements(css = "data") %>%
rvest::html_attr("value") %>%
# fall back to parsing text if no semantic data could be extracted
pal::when(length(.) == 0L ~ col_text %>% stringr::str_remove_all(pattern = "[^\\d]"),
~ .) %>%
as.numeric(),
# date scalars
. == "date" ~
cells[[2L]] %>%
rvest::html_element(css = "time") %>%
rvest::html_attr(name = "datetime"),
. == "date_last_edited" ~
cells[[2L]] %>%
rvest::html_element(css = "time") %>%
rvest::html_attr(name = "datetime") %>%
clock::date_parse(),
# lists (multi-value cols)
. == "remarks" ~
remarks_list_col,
. == "sources" ~
list(list(text = col_text,
urls = urls,
html =
cells[[2L]] %>%
xml2::xml_contents() %>%
as.character() %>%
paste0(collapse = ""))),
. == "types" ~
col_text %>% stringr::str_split(pattern = "\\s*\u2192\\s*"),
. == "adoption_requirements_de" ~
col_text %>% stringr::str_split(pattern = ",\\s*"),
. == "files" ~
cells[[2L]] %>%
rvest::html_elements(css = "a") %>%
purrr::map(~ .x %>%
rvest::html_attr(name = "href") %>%
url_sudd() %>%
tibble::tibble(description = rvest::html_text(.x),
url = .)) %>%
purrr::list_rbind() %>%
list(),
. == "ids_sudd_simultaneous" ~
urls %>%
stringr::str_extract(pattern = "(?<=[\\?&]id=)[\\w\\d]+") %>%
list(),
~ "PARSING ERROR; PLEASE DEBUG"
))
})
}
url_sudd <- function(x = "") {
purrr::map_chr(x,
\(x) {
if (!is.na(x) && stringr::str_detect(x, "https?:")) {
x
} else {
httr::modify_url(url = x %|% "",
scheme = "https",
hostname = "sudd.ch")
}
})
}
this_pkg <- utils::packageName()
cli_theme <-
cli::builtin_theme() %>%
purrr::list_modify(span.err = list(color = "red",
`font-weight` = "bold"),
span.warn = list(color = "orange",
`font-weight` = "bold"),
span.content = list(color = "mediumorchid"))
date_backup_rdb <- pal::path_mod_time("data-raw/backups/rdb.rds") |> clock::as_date()
codebook_fragments <- c("institution-level-variables",
"referendum-level-variables",
"topics")
data_cols_absent <-
tibble::tibble(col = character(),
type = list(),
msg = character()) %>%
tibble::add_row(col = "id",
type = list("add"),
msg = "an {.var id} column. It is automatically set by the RDB API back-end. Did you mean to {.fun edit_rfrnds} instead?") %>%
tibble::add_row(col = "country_name",
type = list(c("add", "edit")),
msg = "a {.var country_name} column. It is automatically set by the RDB API back-end based on {.var country_code}.") %>%
tibble::add_row(col = "date_time_created",
type = list(c("add", "edit")),
msg = "a {.var date_time_created} column. This date is automatically set by the RDB API back-end and not supposed to be changed.") %>%
tibble::add_row(col = "date_time_last_edited",
type = list(c("add", "edit")),
msg = paste0("a {.var date_time_last_edited} column. This date is automatically set by the RDB API back-end and not supposed to be changed ",
"manually.")) %>%
dplyr::mutate(msg = paste0("{.arg data} mustn't contain ", msg))
ballot_date_colnames <- c("country_code",
"country_code_long",
"country_code_continual",
"country_name",
"country_name_long",
"subnational_entity_code",
"subnational_entity_name",
"municipality",
"level",
"date",
"week",
"month",
"quarter",
"year",
"decade",
"century",
"era",
"wave_of_democracy",
"is_former_country",
"un_country_code",
"un_region_tier_1_code",
"un_region_tier_1_name",
"un_region_tier_2_code",
"un_region_tier_2_name",
"un_region_tier_3_code",
"un_region_tier_3_name",
"un_subregion")
rfrnd_fields <- list()
rfrnd_fields$all <- c("_id",
"archive",
"canton",
"categories",
"citizens_abroad",
"committee_name",
"context",
"country_code",
"country_code_historical",
"country_name",
"created_on",
"date",
"date_time_last_edited",
"draft",
"files",
"id_official",
"id_sudd",
"institution",
"is_past_jurisdiction",
"level",
"question",
"question_en",
"municipality",
"number",
"remarks",
"result",
"sources",
"tags",
"title",
"total_electorate",
"votes_empty",
"votes_invalid",
"votes_no",
"votes_yes")
rfrnd_fields$all_flat <-
rfrnd_fields$all %>%
setdiff(c("categories", "context", "title")) %>%
union(c("categories.action",
"categories.author_of_the_vote_object",
"categories.counter_proposal",
"categories.decision_quorum",
"categories.degree_of_revision",
"categories.excluded_topics",
"categories.hierarchy_of_the_legal_norm",
"categories.institutional_precondition",
"categories.institutional_precondition_decision",
"categories.institutional_precondition_decision_actor",
"categories.legal_act_type",
"categories.official_status",
"categories.referendum_text_options",
"categories.special_topics",
"categories.turnout_quorum",
"categories.vote_object",
"categories.vote_result_status",
"categories.vote_trigger",
"categories.vote_trigger_actor",
"categories.vote_trigger_number",
"categories.vote_trigger_state_level",
"categories.vote_trigger_time",
"categories.vote_venue",
"context.national_council_abstentions",
"context.national_council_no",
"context.national_council_yes",
"context.recommendation",
"context.states_council_abstentions",
"context.states_council_no",
"context.states_council_yes",
"context.states_no",
"context.states_yes",
"context.votes_per_canton",
"title.de",
"title.en",
"title.fr"))
rfrnd_fields$required_for_edits <- c("_id",
"draft",
"total_electorate",
"citizens_abroad",
"votes_yes",
"votes_no",
"votes_empty",
"votes_invalid")
rfrnd_fields$required_for_additions <- c("country_code",
"level",
"date",
"title.en",
"result",
"total_electorate",
"citizens_abroad",
"votes_yes",
"votes_no",
"votes_empty",
"votes_invalid",
"draft",
"institution")
rfrnd_fields$never_empty <- c("_id",
"country_code",
"country_name",
"created_on",
"level",
"total_electorate",
"citizens_abroad",
"votes_yes",
"votes_no",
"votes_empty",
"votes_invalid",
"draft")
# old name new name
var_names <- list(`_id` = "id",
canton = "subnational_entity_name",
title.de = "title_de",
title.en = "title_en",
title.fr = "title_fr",
context.states_no = "subterritories_no",
context.states_yes = "subterritories_yes",
total_electorate = "electorate_total",
citizens_abroad = "electorate_abroad",
context.votes_per_canton = "votes_per_subterritory",
context.national_council_yes = "lower_house_yes",
context.national_council_no = "lower_house_no",
context.national_council_abstentions = "lower_house_abstentions",
context.states_council_yes = "upper_house_yes",
context.states_council_no = "upper_house_no",
context.states_council_abstentions = "upper_house_abstentions",
context.recommendation = "position_government",
draft = "is_draft",
created_on = "date_time_created",
institution = "type",
categories.official_status = "inst_legal_basis_type",
categories.legal_act_type = "inst_has_urgent_legal_basis",
categories.vote_result_status = "inst_is_binding",
categories.counter_proposal = "inst_is_counter_proposal",
categories.vote_venue = "inst_is_assembly",
categories.vote_trigger = "inst_trigger_type",
categories.vote_trigger_actor = "inst_trigger_actor",
categories.vote_trigger_state_level = "inst_trigger_actor_level",
categories.vote_trigger_number = "inst_trigger_threshold",
categories.vote_trigger_time = "inst_trigger_time_limit",
categories.vote_object = "inst_object_type",
categories.author_of_the_vote_object = "inst_object_author",
categories.hierarchy_of_the_legal_norm = "inst_object_legal_level",
categories.degree_of_revision = "inst_object_revision_extent",
categories.action = "inst_object_revision_modes",
categories.turnout_quorum = "inst_quorum_turnout",
categories.decision_quorum = "inst_quorum_approval",
categories.institutional_precondition = "inst_has_precondition",
categories.institutional_precondition_decision_actor = "inst_precondition_actor",
categories.institutional_precondition_decision = "inst_precondition_decision",
categories.special_topics = "inst_topics_only",
categories.excluded_topics = "inst_topics_excluded")
# old name new name
sub_var_names <- list(files = list(date = "date_time_attached",
object_key = "s3_object_key",
size = "file_size",
deleted = "is_deleted"))
# create additional formula-lists (mainly to be fed to `dplyr::case_match()`)
var_names_fms <- as_fm_list(var_names)
sub_var_names_fms <- purrr::imap(sub_var_names,
~ as_fm_list(.x))
mime_error_suffix <- "This indicates either some network issue or a change in the RDB API."
sudd_years <-
url_sudd("index.php") %>%
xml2::read_html() %>%
rvest::html_element(css = "select[id='first']") %>%
rvest::html_elements("option") %>%
rvest::html_attr("value") %>%
as.integer()
sudd_max_year <- pal::safe_max(sudd_years)
sudd_min_year <- pal::safe_min(sudd_years)
rm(sudd_years)
#' Get referendum data (old version)
#'
#' Downloads the referendum data from the Referendum Database (RDB). See the [`codebook`][codebook] for a detailed description of all variables.
#'
#' @inheritParams assemble_query_filter
#' @inheritParams tidy_rfrnds
#' @inheritParams url_api
#' @param incl_archive Whether or not to include an `archive` column containing data from an earlier, obsolete state of the Referendum Database (RDB).
#' @param use_cache `r pkgsnip::param_lbl("use_cache")`
#' @param max_cache_age `r pkgsnip::param_lbl("max_cache_age")`
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family rfrnd
#' @export
#'
#' @examples
#' # get all referendums (excl. drafts)
#' rdb::rfrnds()
#'
#' # get only referendums in Austria and Australia on subnational level
#' rdb::rfrnds(country_code = c("AT", "AU"),
#' level = "subnational",
#' quiet = TRUE)
#'
#' # get referendums in 2020
#' rdb::rfrnds(date_min = "2020-01-01",
#' date_max = "2020-12-31",
#' quiet = TRUE)
#'
#' # get referendums added to the database during the last 30 days
#' rdb::rfrnds(date_time_created_min = clock::date_today(zone = "UTC") |> clock::add_days(-30L),
#' date_time_created_max = clock::date_today(zone = "UTC"),
#' quiet = TRUE)
#'
#' # provide custom `query_filter` for more complex queries like regex matches
#' # cf. https://docs.mongodb.com/manual/reference/operator/query/regex/
#' rdb::rfrnds(query_filter = '{"country_code":{"$regex":"A."}}',
#' quiet = TRUE)
rfrnds <- function(country_code = NULL,
subnational_entity_name = NULL,
municipality = NULL,
level = NULL,
type = NULL,
date_min = NULL,
date_max = NULL,
is_draft = FALSE,
date_time_created_min = NULL,
date_time_created_max = NULL,
date_time_last_edited_min = NULL,
date_time_last_edited_max = NULL,
query_filter = NULL,
incl_archive = FALSE,
tidy = TRUE,
use_cache = TRUE,
max_cache_age = "1 week",
use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg),
quiet = FALSE) {
checkmate::assert_flag(incl_archive)
checkmate::assert_flag(quiet)
# TODO: remove this check as soon as [issue #78](https://github.com/zdaarau/c2d-app/issues/78) is resolved
if (isTRUE(use_testing_server)) cli::cli_abort("{.code mode=stream} is not yet supported on the testing servers.")
result <- pkgpins::with_cache(expr = {
if (!quiet) {
status_msg <- "Fetching JSON data from RDB API..."
cli::cli_progress_step(msg = status_msg,
msg_done = paste(status_msg, "done"),
msg_failed = paste(status_msg, "failed"))
}
data <-
httr::RETRY(verb = "GET",
url = url_api("referendums",
.use_testing_server = use_testing_server),
query = list(mode = "stream",
format = "json",
filter = assemble_query_filter(country_code = country_code,
subnational_entity_name = subnational_entity_name,
municipality = municipality,
level = level,
type = type,
date_min = date_min,
date_max = date_max,
is_draft = is_draft,
date_time_created_min = date_time_created_min,
date_time_created_max = date_time_created_max,
date_time_last_edited_min = date_time_last_edited_min,
date_time_last_edited_max = date_time_last_edited_max,
query_filter = query_filter)),
if (!quiet) httr::progress(type = "down"),
times = 3L) %>%
# ensure we actually got a JSON response
pal::assert_mime_type(mime_type = "application/json",
msg_suffix = mime_error_suffix) %>%
# extract JSON
httr::content(as = "text",
encoding = "UTF-8") %>%
# ensure body is not empty
assert_content()
if (!quiet) {
status_msg <- "Converting JSON to list data..."
cli::cli_progress_step(msg = status_msg,
msg_done = paste(status_msg, "done"),
msg_failed = paste(status_msg, "failed"))
}
# NOTE that we cannot rely on params `simplify*` and `flatten` because of varying field lengths in API result (depending on `query`)
data %<>%
jsonlite::fromJSON(simplifyVector = FALSE,
simplifyDataFrame = FALSE,
simplifyMatrix = FALSE,
flatten = FALSE) %$%
items
if (!quiet) {
status_msg <- "Tidying data..."
cli::cli_progress_step(msg = status_msg,
msg_done = paste(status_msg, "done"),
msg_failed = paste(status_msg, "failed"))
}
data %>% tidy_rfrnds(tidy = tidy)
},
pkg = this_pkg,
from_fn = "rfrnds",
country_code,
subnational_entity_name,
municipality,
level,
type,
date_min,
date_max,
is_draft,
date_time_created_min,
date_time_created_max,
date_time_last_edited_min,
date_time_last_edited_max,
query_filter,
tidy,
use_testing_server,
use_cache = use_cache,
max_cache_age = max_cache_age)
# exclude `archive` if requested
if (!incl_archive) result %<>% dplyr::select(-any_of("archive"))
result
}
#' Get referendum data from backup
#'
#' Downloads the referendum data from the Referendum Database (RDB) backup [in the `zdaarau/rpkgs/rdb`
#' repository](https://gitlab.com/zdaarau/rpkgs/rdb/-/blob/master/data-raw/backups/rdb.rds?ref_type=heads). See the [`codebook`][codebook] for a detailed
#' description of all variables.
#'
#' @inheritParams rfrnds
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family rfrnd
#' @export
#'
#' @examples
#' # get all referendums (excl. drafts)
#' rdb::rfrnds_bkp()
rfrnds_bkp <- function(is_draft = FALSE,
incl_archive = FALSE,
use_cache = TRUE,
max_cache_age = "1 week",
quiet = FALSE) {
checkmate::assert_flag(is_draft,
null.ok = TRUE)
checkmate::assert_flag(incl_archive)
checkmate::assert_flag(quiet)
result <- pkgpins::with_cache(
expr = {
if (!quiet) {
pal::cli_progress_step_quick(msg = "Fetching latest RDB backup")
}
path_temp <- fs::file_temp(pattern = "rdb-", ext = "rds")
utils::download.file(url = "https://gitlab.com/zdaarau/rpkgs/rdb/-/raw/master/data-raw/backups/rdb.rds?ref_type=heads&inline=false",
destfile = path_temp,
quiet = TRUE,
mode = "wb")
readRDS(file = path_temp)
},
pkg = this_pkg,
from_fn = "rfrnds",
use_cache = use_cache,
max_cache_age = max_cache_age
)
# exclude `archive` if requested
if (!incl_archive) result %<>% dplyr::select(-any_of("archive"))
# respect `is_draft`
if (!is.null(is_draft)) {
result %<>% dplyr::filter(is_draft == !!is_draft)
}
result
}
#' Get a single referendum's data
#'
#' Downloads a single referendum's data from the Referendum Database (RDB). See the [`codebook`][codebook] for a detailed description of all variables.
#'
#' @inheritParams rfrnds
#' @param id Referendum's unique [identifier](`r url_codebook("id")`).
#'
#' @inherit rfrnds return
#' @family rfrnd
#' @export
#'
#' @examples
#' rdb::rfrnd(id = "5bbbe26a92a21351232dd73f")
rfrnd <- function(id,
incl_archive = FALSE,
tidy = TRUE,
use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg)) {
checkmate::assert_string(id,
min.chars = 1L)
checkmate::assert_flag(incl_archive)
checkmate::assert_flag(tidy)
# retrieve data
data <-
httr::RETRY(verb = "GET",
url = url_api("referendums", id,
.use_testing_server = use_testing_server),
config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server)),
times = 3L) %>%
# ensure we actually got a JSON response
pal::assert_mime_type(mime_type = "application/json",
msg_suffix = mime_error_suffix) %>%
# extract JSON
httr::content(as = "text",
encoding = "UTF-8") %>%
# ensure body is not empty
assert_content() %>%
# convert JSON to list
# NOTE that we cannot rely on params `simplify*` and `flatten` because of varying field lengths in API result
jsonlite::fromJSON(simplifyVector = FALSE,
simplifyDataFrame = FALSE,
simplifyMatrix = FALSE,
flatten = FALSE) %>%
# ensure no error occured
assert_api_success() %>%
# tidy data
tidy_rfrnds(tidy = tidy)
# exclude `archive` if requested
if (!incl_archive) data %<>% dplyr::select(-any_of("archive"))
# return data
data
}
#' Download file attachment
#'
#' Downloads a file attachment from the Referendum Database (RDB). The necessary `s3_object_key`s identifying individual files are found in the `files` list
#' column returned by [rfrnds()].
#'
#' @inheritParams url_api
#' @param s3_object_key Key uniquely identifying the file in the RDB [Amazon S3 bucket](https://en.wikipedia.org/wiki/Amazon_S3#Design). A character scalar.
#' @param path Path where the downloaded file is written to.
#' @param use_original_filename Whether to save the file attachment using its original filename as uploaded. Note that original filenames are **not unique**,
#' i.e. there are multiple file attachments with the same original filename (but differing content, of course). If `FALSE`, `s3_object_key` is used as
#' filename. Only relevant if `path` is a directory.
#'
#' @return A [response object][httr::response], invisibly.
#' @family rfrnd
#' @export
#'
#' @examples
#' # get object keys
#' obj_keys <-
#' rdb::rfrnds()$files |>
#' purrr::list_rbind() |>
#' dplyr::filter(!is_deleted) |>
#' _$s3_object_key[1:3]
#'
#' # download them to the current working dir
#' purrr::walk(obj_keys,
#' rdb::download_file_attachment)
#'
#' # and delete them again
#' fs::file_delete(obj_keys)
download_file_attachment <- function(s3_object_key,
path = ".",
use_original_filename = FALSE,
use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg)) {
checkmate::assert_string(s3_object_key)
checkmate::assert_atomic(path)
checkmate::assert_flag(use_original_filename)
# TODO: remove this check as soon as [issue #78](https://github.com/zdaarau/c2d-app/issues/78) is resolved
if (isTRUE(use_testing_server)) cli::cli_abort("Accessing file attachments is not yet supported on the testing servers.")
is_dir <- fs::is_dir(path)
if (is_dir) {
checkmate::assert_directory_exists(path,
access = "rw")
} else {
checkmate::assert_path_for_output(path,
overwrite = TRUE)
}
temp_path <- fs::file_temp()
response <- httr::RETRY(verb = "GET",
url = url_api("s3_objects", s3_object_key,
.use_testing_server = use_testing_server),
httr::write_disk(path = temp_path),
times = 3L)
if (is_dir) {
if (use_original_filename) {
final_path <-
response %>%
httr::headers() %$%
`content-disposition` %>%
stringr::str_extract(pattern = "(?<=filename=\").+?(?=\")") %>%
fs::path(path, .)
} else {
final_path <- fs::path(path, s3_object_key)
}
} else {
final_path <- path
}
fs::file_move(path = temp_path,
new_path = final_path)
invisible(response)
}
#' Add new referendums to the RDB
#'
#' Adds new referendum entries to the Referendum Database (RDB) via [its
#' API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#3-referendum-routes).
#'
#' @details
#' Note that adding/editing the column `files` is not supported, i.e. it is simply dropped from `data`.
#'
#' @inheritParams url_api
#' @param data The new referendum data. A [tibble][tibble::tbl_df] that in any case must contain the columns
#' `r rfrnd_fields$required_for_additions %>% dplyr::case_match(.x = ., !!!var_names_fms, .default = .) %>% md_link_codebook() %>% pal::as_md_list()`
#'
#' plus the column [`subnational_entity_name`](`r url_codebook("subnational_entity_name")`) for referendums of
#' [`level`](`r url_codebook("subnational_entity_name")`) below `"national"`, and the column [`municipality`](`r url_codebook("municipality")`) for referendums
#' of `level = "local"`,
#'
#' plus any additional [valid][codebook] columns containing the values for the corresponding database fields.
#' @param email The e-mail address of the RDB API user account to be used for authentication. A character scalar.
#' @param password The password of the RDB API user account to be used for authentication. A character scalar.
#' @param quiet Whether or not to print the newly created referendum IDs to console.
#'
#' @return A character vector of newly created referendum IDs.
#' @family rfrnd
#' @export
add_rfrnds <- function(data,
email = pal::pkg_config_val(key = "api_username",
pkg = this_pkg),
password = pal::pkg_config_val(key = "api_password",
pkg = this_pkg),
use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg),
quiet = FALSE) {
checkmate::assert_data_frame(data,
min.rows = 1L)
checkmate::assert_flag(quiet)
## ensure forbidden columns are absent
assert_cols_absent(data = data,
type = "add")
## ensure mandatory columns are present
rfrnd_fields$required_for_additions %>%
dplyr::case_match(.x = .,
!!!var_names_fms,
.default = .) %>%
purrr::walk(~ if (!(.x %in% colnames(data))) cli::cli_abort(paste0("Mandatory column {.var ", .x, "} is missing from {.arg data}.")))
# drop non-applicable columns (they're supposed to be absent in MongoDB)
data %<>% drop_non_applicable_vars()
## ensure remaining columns are valid
assert_cols_valid(data = data,
type = "add")
# convert data to MongoDB schema
json_items <-
data %>%
# restore MongoDB fields
untidy_rfrnds() %>%
# convert to JSON
purrr::map(jsonlite::toJSON,
auto_unbox = TRUE,
digits = NA)
# add data to the MongoDB via HTTP POST on `/referendums` API endpoint
responses <-
json_items %>%
purrr::map(\(x) {
httr::RETRY(verb = "POST",
url = url_api("referendums",
.use_testing_server = use_testing_server),
config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server),
Authorization = paste("Bearer", auth_session(email = email,
password = password,
use_testing_server = use_testing_server))),
body = x,
times = 3L,
httr::content_type_json()) %>%
# ensure we actually got a JSON response
pal::assert_mime_type(mime_type = "application/json",
msg_suffix = mime_error_suffix) %>%
# extract JSON string
httr::content(as = "text",
encoding = "UTF-8") %>%
# ensure body is not empty
assert_content() %>%
# convert to list
jsonlite::fromJSON(simplifyDataFrame = FALSE,
simplifyMatrix = FALSE)
}) %>%
# ensure no error occured
assert_api_success()
# throw warnings for unsuccessful API calls
purrr::walk2(.x = responses,
.y = seq_along(responses),
.f = ~ if (!is.list(.x) || !isTRUE(nchar(.x$`_id`$`$oid`) > 0L)) {
api_failure(.x,
raw = json_items[[.y]],
prefix = "Failed to add the {.y}. referendum. ")
})
ids_new <- unlist(responses,
use.names = FALSE)
if (!quiet) {
cli::cli_alert_info("New referendum entries created with {.var id}s:")
cli::cli_li(ids_new)
}
invisible(ids_new)
}
#' Edit existing referendums in the RDB
#'
#' Edits existing referendum entries in the API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#3-referendum-routes) via [its
#' API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#3-referendum-routes).
#'
#' @inherit add_rfrnds details
#'
#' @inheritParams add_rfrnds
#' @param data Updated referendum data. A [tibble][tibble::tbl_df] that must contain an [`id`](`r url_codebook("id")`) column
#' identifying the referendums to be edited plus any additional columns containing the new values to update the corresponding database fields with. Note that
#' due to [current API requirements](https://github.com/zdaarau/c2d-app/issues/50#issuecomment-1222660683), the following columns must always be supplied:
#'
#' ```{r, echo = FALSE, results = "asis"}
#' rfrnd_fields$required_for_edits %>%
#' dplyr::case_match(.x = .,
#' !!!var_names_fms,
#' .default = .) |>
#' setdiff("id") |>
#' md_link_codebook() |>
#' pal::as_md_list() |>
#' cat()
#' ```
#'
#' @return `data`, invisibly.
#' @family rfrnd
#' @export
edit_rfrnds <- function(data,
email = pal::pkg_config_val(key = "api_username",
pkg = this_pkg),
password = pal::pkg_config_val(key = "api_password",
pkg = this_pkg),
use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg)) {
# ensure `data` is valid
checkmate::assert_data_frame(data,
min.rows = 1L)
## ensure forbidden columns are absent
assert_cols_absent(data = data,
type = "edit")
## ensure mandatory columns are present
rfrnd_fields$required_for_edits %>%
dplyr::case_match(.x = .,
!!!var_names_fms,
.default = .) %>%
c("id") %>%
purrr::walk(~ if (!(.x %in% colnames(data))) cli::cli_abort(paste0("Mandatory column {.var ", .x, "} is missing from {.arg data}.")))
# drop non-applicable columns (they're absent in MongoDB)
data %<>% drop_non_applicable_vars()
## ensure remaining columns are valid
assert_cols_valid(data,
type = "edit")
# convert data to MongoDB schema
ids <- data$id
json_items <-
data %>%
# drop `id`
dplyr::select(-id) %>%
# restore MongoDB fields
untidy_rfrnds() %>%
# convert to JSON
purrr::map(jsonlite::toJSON,
auto_unbox = TRUE,
digits = NA)
# edit data in the MongoDB via HTTP PUT on `/referendums/{id}` API endpoint
responses <- purrr::map2(.x = ids,
.y = json_items,
.f = ~
httr::RETRY(verb = "PUT",
url = url_api("referendums", .x,
.use_testing_server = use_testing_server),
config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server),
Authorization = paste("Bearer", auth_session(email = email,
password = password,
use_testing_server = use_testing_server))),
body = .y,
times = 3L,
httr::content_type_json()) %>%
# ensure we actually got a JSON response
pal::assert_mime_type(mime_type = "application/json",
msg_suffix = mime_error_suffix) %>%
# extract JSON string
httr::content(as = "text",
encoding = "UTF-8") %>%
# ensure body is not empty
assert_content())
# throw warnings for unsuccessful API calls
purrr::walk(.x = seq_along(ids),
.f = ~ {
parsed <- jsonlite::fromJSON(responses[[.x]])
if (!isTRUE(parsed$ok)) {
api_failure(parsed,
raw = json_items[[.x]],
prefix = paste0("Failed to edit referendum with {.var id} {.val ", ids[.x], "}. "))
}
})
invisible(data)
}
#' Delete referendums in the RDB
#'
#' Deletes existing referendum entries in the Referendum Database (RDB) via [its
#' API](https://github.com/zdaarau/c2d-app/blob/master/docs/services.md#3-referendum-routes).
#'
#' @inheritParams add_rfrnds
#' @param ids IDs of the referendums to be deleted. A character vector.
#'
#' @return `ids`, invisibly.
#' @family rfrnd
#' @export
delete_rfrnds <- function(ids,
email = pal::pkg_config_val(key = "api_username",
pkg = this_pkg),
password = pal::pkg_config_val(key = "api_password",
pkg = this_pkg),
use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg)) {
checkmate::assert_character(ids,
min.chars = 1L,
any.missing = FALSE,
unique = TRUE)
# TODO: remove this as soon as https://github.com/zdaarau/c2d-app/issues/45 is deployed to master
if (!use_testing_server) {
cli::cli_abort("Referendum deletions are not yet supported on the production servers.")
}
responses <- purrr::map(.x = ids,
.f = ~
httr::RETRY(verb = "DELETE",
url = url_api("referendums", .x,
.use_testing_server = use_testing_server),
config = httr::add_headers(Authorization = paste("Bearer", auth_session(email = email,
password = password,
use_testing_server = use_testing_server))),
times = 3L) %>%
# ensure we actually got a JSON response
pal::assert_mime_type(mime_type = "application/json",
msg_suffix = mime_error_suffix) %>%
# extract JSON string
httr::content(as = "text",
encoding = "UTF-8") %>%
# ensure body is not empty
assert_content())
# throw warnings for unsuccessful API calls
purrr::walk2(.x = ids,
.y = responses,
.f = ~ {
parsed <- jsonlite::fromJSON(.y)
if (!isTRUE(parsed$ok)) {
api_failure(parsed,
prefix = "Failed to delete referendum with {.var id} {.val {.x}}. ")
}
})
invisible(ids)
}
#' Validate referendum data
#'
#' Performs various data validation steps to ensure there are no errors in the supplied `data`.
#'
#' @param data Referendum data to validate, as returned by [rfrnds()].
#' @param check_applicability_constraint Whether or not to check that no applicability constraints as defined in the [codebook][data_codebook] are violated.
#' @param check_id_sudd_prefix Whether or not to check that all [`id_sudd`](`r url_codebook("id_sudd")`) prefixes are valid.
#'
#' @return `data`, invisibly.
#' @family rfrnd
#' @export
validate_rfrnds <- function(data,
check_applicability_constraint = TRUE,
check_id_sudd_prefix = TRUE) {
checkmate::assert_data_frame(data,
min.rows = 1L)
checkmate::assert_subset(colnames(data),
choices = rfrnd_cols_order)
checkmate::assert_flag(check_applicability_constraint)
checkmate::assert_flag(check_id_sudd_prefix)
# check columns
status_msg <- "Checking basic column validity..."
cli_progress_id <- cli::cli_progress_step(msg = status_msg,
msg_done = paste(status_msg, "done"),
msg_failed = paste(status_msg, "failed"))
assert_cols_valid(data = data,
type = "validate",
action = cli::cli_alert_warning,
cli_progress_id = cli_progress_id)
# check applicability constraints
if (check_applicability_constraint) {
status_msg <- "Asserting applicability constraints..."
cli::cli_progress_step(msg = status_msg,
msg_done = paste(status_msg, "done"),
msg_failed = paste(status_msg, "failed"))
var_names_violated <-
data_codebook %>%
dplyr::filter(variable_name %in% colnames(data)
& !is.na(applicability_constraint)) %$%
purrr::map2_lgl(.x = magrittr::set_names(x = variable_name,
value = variable_name),
.y = applicability_constraint,
.f = ~ {
data %>%
dplyr::filter(!eval(parse(text = .y))) %$%
eval(as.symbol(.x)) %>%
{ is.na(.) | purrr::map_lgl(., is.null) } %>%
all()
}) %>%
magrittr::extract(!.) %>%
names()
n_var_names_violated <- length(var_names_violated)
if (n_var_names_violated) {
cli::cli_progress_done(result = "failed")
cli::cli_alert_warning("Applicability constraints are violated for {n_var_names_violated} variable{?s}:")
paste0("{.var ", var_names_violated, "}") %>%
magrittr::set_names(rep("x",
times = length(.))) %>%
cli::cli_bullets()
first_var_name_violated <- var_names_violated[1L]
cli::cli({
cli::cli_text("\nTo get the applicability constraint of e.g. {.var {first_var_name_violated}}, run:")
cli::cli_text("")
cli::cli_code(c("rdb::data_codebook %>%",
glue::glue(" dplyr::filter(variable_name == \"{first_var_name_violated}\") %$%"),
" applicability_constraint"))
cli::cli_text("")
cli::cli_text("To inspect the entries in violation of the above applicability constraint, run:")
cli::cli_text("")
cli::cli_code(c("data %>%",
glue::glue(" dplyr::filter(rdb::data_codebook %>%\n",
" dplyr::filter(variable_name == \"{first_var_name_violated}\") %$%\n",
" applicability_constraint %>%\n",
" parse(text = .) %>%\n",
" eval() %>%\n",
" magrittr::not()) %>%\n",
" dplyr::select(id, {first_var_name_violated})",
.trim = FALSE)))
})
}
}
# check `id_sudd` prefix if requested
if (check_id_sudd_prefix) {
status_msg <- "Validating `id_sudd` prefixes..."
cli::cli_progress_step(msg = status_msg,
msg_done = paste(status_msg, "done"),
msg_failed = paste(status_msg, "failed"))
if (!all(c("country_code", "id_sudd") %in% colnames(data))) {
cli::cli_progress_done(result = "failed")
cli::cli_abort("Columns {.var country_code} and {.var id_sudd} must be present in {.arg data}.")
}
# define allowed exceptions
allowed_exceptions <- tibble::tribble(
~country_code, ~id_sudd_prefix,
# Curacao
"CW", "an",
# Szeklerland, cf. https://sudd.ch/event.php?id=hu042008
"RO", "hu"
)
# assemble target country codes
country_codes <-
data$country_code %>%
as.character() %>%
as.list()
for (country_code in allowed_exceptions$country_code) {
additional_country_codes <-
allowed_exceptions %>%
dplyr::filter(country_code == !!country_code) %$%
id_sudd_prefix %>%
stringr::str_to_upper()
ix_country_codes <-
country_codes %>%
purrr::map_lgl(~ country_code %in% .x) %>%
which()
for (i in ix_country_codes) {
country_codes[[i]] <- unique(c(country_codes[[i]], additional_country_codes))
}
}
# add dummy indicating if target country codes match
# TODO: instead of modifying input data, print cli msg with all relevant info!
data$matches_id_sudd_prefix <-
data$id_sudd %>%
stringr::str_extract(pattern = "^..") %>%
stringr::str_to_upper() %>%
purrr::map2_lgl(.y = country_codes,
.f = ~ .x %in% .y)
data$matches_id_sudd_prefix[is.na(data$id_sudd)] <- NA
}
invisible(data)
}
#' Count number of referendums
#'
#' Counts the number of referendums per [`level`](`r url_codebook("level")`) in the Referendum Database (RDB).
#'
#' @inheritParams assemble_query_filter
#' @inheritParams url_api
#'
#' @return A named list with `level` as names and referendum counts as values.
#' @family rfrnd
#' @export
#'
#' @examples
#' # the whole database (excl. drafts)
#' rdb::count_rfrnds()
#'
#' # only Swiss and Austrian referendums
#' rdb::count_rfrnds(country_code = c("CH", "AT"))
#'
#' # only Swiss referendums created between 2020 and 2021
#' rdb::count_rfrnds(country_code = "CH",
#' date_time_created_min = "2020-01-01",
#' date_time_created_max = "2021-01-01")
count_rfrnds <- function(is_draft = FALSE,
country_code = NULL,
subnational_entity_name = NULL,
municipality = NULL,
level = NULL,
type = NULL,
date_min = NULL,
date_max = NULL,
date_time_created_min = NULL,
date_time_created_max = NULL,
date_time_last_edited_min = NULL,
date_time_last_edited_max = NULL,
query_filter = NULL,
use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg)) {
httr::RETRY(verb = "GET",
url = url_api("referendums/stats",
.use_testing_server = use_testing_server),
query = list(filter = assemble_query_filter(country_code = country_code,
subnational_entity_name = subnational_entity_name,
municipality = municipality,
level = level,
type = type,
date_min = date_min,
date_max = date_max,
is_draft = is_draft,
date_time_created_min = date_time_created_min,
date_time_created_max = date_time_created_max,
date_time_last_edited_min = date_time_last_edited_min,
date_time_last_edited_max = date_time_last_edited_max,
query_filter = query_filter)),
config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = use_testing_server)),
times = 3L) %>%
# ensure we actually got a JSON response
pal::assert_mime_type(mime_type = "application/json",
msg_suffix = mime_error_suffix) %>%
# parse response
httr::content(as = "parsed") %$%
votes %>%
magrittr::set_names(names(.) %>% dplyr::case_match(.x = ., "sub_national" ~ "subnational", .default = .))
}
#' Test if referendum ID exists
#'
#' Tests whether the referendum with the supplied `id` exists or not.
#'
#' @inheritParams rfrnd
#' @inheritParams url_api
#'
#' @return A logical scalar.
#' @family rfrnd
#' @export
#'
#' @examples
#' rdb::rfrnd_exists("6303a4cba52c3995043a8c24")
rfrnd_exists <- function(id,
.use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg)) {
checkmate::assert_string(id,
min.chars = 1L)
httr::GET(url = url_api("referendums", id,
.use_testing_server = .use_testing_server),
config = httr::add_headers(Origin = url_admin_portal(.use_testing_server = .use_testing_server))) %>%
httr::http_error() %>%
magrittr::not()
}
#' Assert referendum variables are present
#'
#' Asserts the specified `vars` are present in the supplied referendum `data`. Depending on `vars`, additional integrity checks are performed.
#'
#' @param data RDB referendum data as returned by [rfrnds()].
#' @param vars Names of the variables to check. A character vector.
#'
#' @return `data`, invisibly.
#' @family rfrnd
#' @export
#'
#' @examples
#' rdb::rfrnd(id = "5bbbe26a92a21351232dd73f") |> rdb::assert_vars(vars = "country_code")
#'
#' try(
#' tibble::tibble(country_code = "AN") |> rdb::assert_vars(vars = "country_code")
#' )
assert_vars <- function(data,
vars) {
vars %>% purrr::walk(~ {
msg_suffix <- switch(EXPR = .x,
country_code = " with ISO 3166-1 alpha-2 or ISO 3166-3 alpha-4 codes.",
"")
if (!(.x %in% colnames(data))) {
cli::cli_abort(paste0("{.arg data} must contain a column {.var {.x}}", msg_suffix))
}
# run additional content check
assert_content <- switch(EXPR = .x,
country_code = \(x) {
checkmate::assert_vector(x = x,
.var.name = "data$country_code")
check <- checkmate::check_subset(x = as.character(x),
choices = val_set$country_code)
if (!isTRUE(check)) {
expired_codes <- intersect(as.character(x),
data_iso_3166_3$Alpha_2)
cli::cli_abort(paste0(
"Assertion on {.var data$country_code} failed: ",
ifelse(length(expired_codes),
paste0("The following country codes have been deleted from ISO 3166-1 and were moved to ISO ",
"3166-3 (former countries) instead: {.val {expired_codes}}"),
# escape curly braces from checkmate msg
stringr::str_replace_all(string = check,
pattern = "([\\{\\}])",
replacement = "\\1\\1"))))
}
},
\(x) TRUE)
assert_content(data[[.x]])
})
invisible(data)
}
#' RDB Codebook
#'
#' A tibble containing the complete metadata of all [rfrnds()] variables. The codebook below is also available [online](`r url_codebook()`).
#'
#' # Codebook
#'
#' ```{r, child = "vignettes/codebook.Rmd"}
#' ```
#'
#' @format `r pkgsnip::return_lbl("tibble")`
#' @aliases codebook
#' @family metadata
#' @export
#'
#' @examples
#' rdb::data_codebook
"data_codebook"
#' Get set of possible *value labels* of referendum data variable
#'
#' Returns a character vector of value labels of a specific [rfrnds()] column, in the same order as [var_vals()], or of length `0` if `var_name`'s values are
#' not restricted to a predefined set or no value labels are defined in the [codebook][data_codebook].
#'
#' @param var_name Variable name present in [`data_codebook`] for which the labels are to be returned. A character scalar.
#' @param incl_affixes Whether or not to add the corresponding `value_label_prefix` and `value_label_suffix` to the returned labels.
#'
#' @return A character vector. Of length `0` if `var_name`'s values are not restricted to a predefined set or no value labels are defined in the
#' [codebook][data_codebook].
#' @family metadata
#' @export
#'
#' @examples
#' rdb::val_lbls("result",
#' incl_affixes = FALSE)
#' rdb::val_lbls("result")
#'
#' # Convert the labels to sentence case with trailing dot
#' rdb::val_lbls("result") |> pal::sentenceify()
val_lbls <- function(var_name,
incl_affixes = TRUE) {
var_name <- rlang::arg_match0(arg = var_name,
values = rfrnd_cols_order)
metadata <- data_codebook |> dplyr::filter(variable_name == !!var_name | variable_name_unnested == !!var_name)
result <- metadata$value_labels |> purrr::list_c(ptype = character())
if (incl_affixes) {
if (!is.na(metadata$value_label_prefix)) result <- paste(metadata$value_label_prefix, result)
if (!is.na(metadata$value_label_suffix)) result <- paste(metadata$value_label_suffix, result)
}
result
}
#' Get *value scale* of referendum data variables
#'
#' Returns the value scale of the specified [rfrnds()] columns.
#'
#' @param var_names Variable name(s) present in [`data_codebook`] for which the value scale is to be returned. A character vector.
#'
#' @return A character scalar.
#' @family metadata
#' @export
#'
#' @examples
#' rdb::val_scale("level")
#' paste0("topics_tier_", 1:3) |> rdb::val_scale()
val_scale <- function(var_names) {
var_name <- rlang::arg_match(arg = var_names,
values = rfrnd_cols_order,
multiple = TRUE)
c(data_codebook$value_scale,
data_codebook$value_scale)[match(x = var_names,
table = c(data_codebook$variable_name,
data_codebook$variable_name_unnested))]
}
#' Get set of possible *values* of referendum data variable
#'
#' Returns a vector of the possible predefined values a specific column in [rfrnds()] can hold. If the variable values aren't restricted to a predefined
#' set, `NULL` is returned.
#'
#' @param var_name Variable name present in [`data_codebook`]. A character scalar.
#'
#' @return
#' If `var_name`'s values are restricted to a predefined set and
#' - `var_name` is *not* of type list, a vector of the same type as `var_name`.
#' - `var_name` is of type list, a vector of the same type as the elements of `var_name`.
#'
#' Else `NULL`.
#' @family metadata
#' @export
#'
#' @examples
#' rdb::var_vals("result")
#' rdb::var_vals("id")
var_vals <- function(var_name) {
var_name <- rlang::arg_match0(arg = var_name,
values = rfrnd_cols_order)
data_codebook |>
dplyr::filter(variable_name == !!var_name | variable_name_unnested == !!var_name) %$%
variable_values |>
unlist()
}
#' Get unnested variable names
#'
#' Returns the unnested analogue(s) of the specified variable name(s), which result from [unnesting][unnest_var]. For variable names that do *not* refer to
#' nested list columns, `var_names` is simply returned as-is.
#'
#' @inheritParams prettify_var_names
#'
#' @return A character vector of the same length as `var_names`.
#' @family metadata
#' @family unnest
#' @export
#'
#' @examples
#' rdb::var_name_unnested("inst_object_revision_modes")
#' rdb::var_name_unnested(paste0("topics_tier_", 1:3))
var_name_unnested <- function(var_names) {
var_names <- rlang::arg_match(arg = var_names,
values = data_codebook$variable_name,
multiple = TRUE)
data_codebook |>
dplyr::filter(variable_name %in% !!var_names) %$%
variable_name_unnested
}
#' Prettify referendum data variable names
#'
#' Converts referendum data variable names to their ready-for-publication version. Variable names that are unknown, i.e. not present in [`data_codebook`]), are
#' left untouched.
#'
#' @param var_names Variable name(s). Those not present in [`data_codebook`] remain untouched. A character vector.
#'
#' @return A character vector of the same length as `var_names`.
#' @family metadata
#' @family prettify
#' @export
#'
#' @examples
#' rdb::prettify_var_names("topics_tier_1")
#'
#' # also supports unnested var names
#' rdb::prettify_var_names("topic_tier_1")
#'
#' # unknown var names are left untouched
#' rdb::prettify_var_names(var_names = c("topic_tier_1", "topic_tier_99"))
prettify_var_names <- function(var_names) {
checkmate::assert_character(var_names)
c(data_codebook$variable_name_print,
data_codebook$variable_name_unnested_print)[match(x = var_names,
table = c(data_codebook$variable_name,
data_codebook$variable_name_unnested))] %|% var_names
}
#' Topic hierarchy
#'
#' A tibble reflecting the complete [referendum topics hierarchy](`r url_codebook("topics")`).
#'
#' @format `r pkgsnip::return_lbl("tibble")`
#' @family topics
#' @export
#'
#' @examples
#' rdb::data_topics
"data_topics"
#' List available topics
#'
#' Lists the set of available [referendum topics](`r url_codebook("topics")`) on the specified `tiers`.
#'
#' @param tiers Tiers to include topics from. An integerish vector.
#'
#' @return A character vector.
#' @family topics
#' @export
#'
#' @examples
#' rdb::topics(tiers = 1:2)
topics <- function(tiers = 1:3) {
checkmate::assert_integerish(tiers,
lower = 1L,
upper = 3L,
any.missing = FALSE,
unique = TRUE)
topic_set <- character()
if (1L %in% tiers) {
topic_set %<>% c(data_topics$topic_tier_1)
}
if (2L %in% tiers) {
topic_set %<>% c(data_topics$topic_tier_2)
}
if (3L %in% tiers) {
topic_set %<>% c(data_topics$topic_tier_3)
}
topic_set %>%
setdiff(NA_character_) %>%
unique()
}
#' Hierarchize topics
#'
#' Reconstructs the hierarchical relations between the three topic variables `topics_tier_1`, `topics_tier_2` and `topics_tier_3`. Can also be used to simply
#' determine the parent topic(s) of any topic.
#'
#' @param x The topics to hierarchize. Either a character vector of topics or a single-row data frame containing at least the columns `topics_tier_1`,
#' `topics_tier_2` and `topics_tier_3`.
#'
#' @return A [tibble][tibble::tbl_df] with the columns `topic_tier_1`, `topic_tier_2` and `topic_tier_3`.
#' @family topics
#' @export
#'
#' @examples
#' rdb::hierarchize_topics("territorial questions")
#'
#' # hierarchize the topics of all Austrian referendums
#' rdb::rfrnds(quiet = TRUE) |>
#' dplyr::filter(country_code == "AT") |>
#' dplyr::group_split(id) |>
#' purrr::map(rdb::hierarchize_topics)
hierarchize_topics <- function(x) {
test_char <- checkmate::test_character(x, any.missing = FALSE)
if (!test_char) {
topic_var_names <- paste0("topics_tier_", 1:3)
test_df <- checkmate::test_data_frame(x,
min.rows = 1L,
max.rows = 1L)
has_topic_vars <- all(topic_var_names %in% colnames(x))
if (!test_df || !has_topic_vars) {
cli::cli_abort(paste0("{.arg x} must be either a character vector of topics or a single-row data frame containing at least the columns ",
"{.field topics_tier_1}, {.field topics_tier_2} and {.field topics_tier_3}."))
}
x <- unlist(x[, topic_var_names],
use.names = FALSE)
}
checkmate::assert_subset(x,
choices = c(topics_tier_1_, topics_tier_2_, topics_tier_3_),
empty.ok = TRUE)
topics_tier_1 <- x[x %in% topics_tier_1_]
topics_tier_2 <- x[x %in% topics_tier_2_]
topics_tier_3 <- x[x %in% topics_tier_3_]
inferred_topics_tier_1 <- infer_topics(topics = c(topics_tier_2, topics_tier_3),
tier = 1L)
inferred_topics_tier_2 <- infer_topics(topics = topics_tier_3,
tier = 2L)
non_parent_topics_tier_1 <- setdiff(topics_tier_1, inferred_topics_tier_1)
non_parent_topics_tier_2 <- setdiff(topics_tier_2, inferred_topics_tier_2)
# 0. initialize empty tibble
result <- tibble::tibble(topic_tier_1 = character(),
topic_tier_2 = character(),
topic_tier_3 = character())
# 1. add third-tier topics
result <-
topics_tier_3 %>%
purrr::map(~ tibble::tibble(topic_tier_1 = infer_topics(topics = .x,
tier = 1L),
topic_tier_2 = infer_topics(topics = .x,
tier = 2L),
topic_tier_3 = .x)) %>%
purrr::list_rbind() %>%
dplyr::bind_rows(result)
# 2. add remaining second-tier topics
result <-
non_parent_topics_tier_2 %>%
purrr::map(~ tibble::tibble(topic_tier_1 = infer_topics(topics = .x,
tier = 1L),
topic_tier_2 = .x,
topic_tier_3 = NA_character_)) %>%
purrr::list_rbind() %>%
dplyr::bind_rows(result)
# 3. add remaining first-tier topics
result %>%
dplyr::bind_rows(tibble::tibble(topic_tier_1 = non_parent_topics_tier_1,
topic_tier_2 = NA_character_,
topic_tier_3 = NA_character_)) %>%
# sort result
dplyr::arrange(topic_tier_1, topic_tier_2, topic_tier_3)
}
#' Hierarchize topics (fast)
#'
#' Reconstructs the hierarchical relations between the three topic variables `topics_tier_1`, `topics_tier_2` and `topics_tier_3`. Other than
#' [hierarchize_topics()], this function assumes that the three topic variables are always *complete*, i.e. that no (grand)parent topics of lower-tier topics
#' are missing. This assumption is met by [rfrnds()] and [rfrnd()].
#'
#' @param topics_tier_1 First-tier topics. A character vector.
#' @param topics_tier_2 Second-tier topics. A character vector.
#' @param topics_tier_3 Third-tier topics. A character vector.
#'
#' @inherit hierarchize_topics return
#' @family topics
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' rdb::rfrnd(id = "5bbbe26a92a21351232dd73f") %$%
#' rdb::hierarchize_topics_fast(unlist(topics_tier_1),
#' unlist(topics_tier_2),
#' unlist(topics_tier_3))
#'
#' # hierarchize the topics of all Austrian referendums
#' rdb::rfrnds(quiet = TRUE) |>
#' dplyr::filter(country_code == "AT") |>
#' dplyr::group_split(id) |>
#' purrr::map(~ rdb::hierarchize_topics_fast(unlist(.x$topics_tier_1),
#' unlist(.x$topics_tier_2),
#' unlist(.x$topics_tier_3)))
hierarchize_topics_fast <- function(topics_tier_1 = character(),
topics_tier_2 = character(),
topics_tier_3 = character()) {
checkmate::assert_subset(topics_tier_1,
choices = topics_tier_1_)
checkmate::assert_subset(topics_tier_2,
choices = topics_tier_2_)
checkmate::assert_subset(topics_tier_3,
choices = topics_tier_3_)
# add tier-3 hierarchy
result <- data_topics[data_topics$topic_tier_3 %in% topics_tier_3, ]
# add non-parent tier-2 hierarchy
topics_tier_2 %<>% setdiff(result$topic_tier_2)
result %<>% dplyr::bind_rows(unique(data_topics[data_topics$topic_tier_2 %in% topics_tier_2, 1:2]))
# add non-parent tier-1 topics
topics_tier_1 %<>% setdiff(result$topic_tier_1)
result %>% dplyr::bind_rows(tibble::tibble(topic_tier_1 = topics_tier_1))
}
#' Infer higher-tier topics
#'
#' Determines the top-tier (`tier = 1L`) or second-tier (`tier = 2L`) topics corresponding to `topics` in the
#' [hierarchy][data_topics], i.e. either `topics` themselves or their (grand)parent topics.
#'
#' @param topics Topics from which the corresponding (grand)parent topics are to be determined. A factor or character vector.
#' @param tier Tier of the inferred topics. Either `1L` or `2L`.
#'
#' @return A character vector.
#' @family topics
#' @export
#'
#' @examples
#' rdb::infer_topics(topics = c("EU", "animal protection"),
#' tier = 1L)
#' rdb::infer_topics(topics = c("EU", "animal protection"),
#' tier = 2L)
#'
#' # topics of different tiers can mixed in `topics`
#' rdb::infer_topics(topics = c("EU", "environment"),
#' tier = 2L)
#'
#' # but `topics` of a higher tier than `tier` will be ignored
#' rdb::infer_topics(topics = "foreign policy",
#' tier = 2L)
infer_topics <- function(topics,
tier = 1L) {
if (is.factor(topics)) topics <- as.character(topics)
checkmate::assert_subset(topics,
choices = c(topics_tier_1_, topics_tier_2_, topics_tier_3_))
checkmate::assert_int(tier,
lower = 1L,
upper = 2L)
# inferred from lower-tier topics
result <- data_topics[data_topics$topic_tier_2 %in% topics | data_topics$topic_tier_3 %in% topics, ]
result %<>% .[[paste0("topic_tier_", tier)]]
# plus top-tier topics
if (tier == 1L) result %<>% c(topics[topics %in% topics_tier_1_])
unique(result)
}
#' Add `is_former_country` flag to referendum data
#'
#' Augments `data` with an additional column `is_former_country` indicating whether or not the column `country_code` holds an [ISO 3166-3
#' alpha-4 code](https://en.wikipedia.org/wiki/ISO_3166-3) referring to a historical country which ceased to exist. `is_former_country` being `FALSE` means
#' `country_code` holds an [ISO 3166-1 alpha-2 code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) instead.
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds() |>
#' rdb:::add_former_country_flag() |>
#' dplyr::select(id,
#' starts_with("country_"),
#' is_former_country)
add_former_country_flag <- function(data) {
# ensure minimal validity
checkmate::assert_data_frame(data)
assert_vars(data = data,
vars = "country_code")
data %>%
dplyr::mutate(is_former_country = nchar(as.character(country_code)) > 2L) %>%
# add var lbl
labelled::set_variable_labels(.labels = var_lbls["is_former_country"])
}
#' Add continual country code to referendum data
#'
#' Augments `data` with an additional column `country_code_continual` holding the current or future [ISO 3166-1
#' alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code of the country where the referendum took place. If the country still exists,
#' `country_code_continual` is identical to `country_code`, otherwise it is the `country_code` of the successor country. If the country was succeeded by
#' multiple countries, the code of the largest one in terms of population is taken.
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::add_country_code_continual() |>
#' dplyr::select(id,
#' starts_with("country_"))
add_country_code_continual <- function(data) {
# ensure minimal validity
checkmate::assert_data_frame(data)
assert_vars(data = data,
vars = "country_code")
data %>%
dplyr::mutate(country_code_continual = factor(x = purrr::map2_chr(.x = as.character(country_code),
.y = add_former_country_flag(data)$is_former_country,
.f = ~ {
if (.y) {
data_iso_3166_3$Alpha_2_new_main[data_iso_3166_3$Alpha_4 == .x]
} else {
.x
}
}),
levels = val_set$country_code_continual,
ordered = FALSE)) %>%
# add var lbl
labelled::set_variable_labels(.labels = var_lbls["country_code_continual"])
}
#' Add long country code to referendum data
#'
#' Augments `data` with an additional column holding the current or former three-letter [ISO 3166-1 alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3)
#' code of the country in which the referendum took place (see [ISO 3166-3](https://en.wikipedia.org/wiki/ISO_3166-3_alpha-3) for former country codes).
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb:::add_country_code_long() |>
#' dplyr::select(id,
#' starts_with("country_"))
add_country_code_long <- function(data) {
# ensure minimal validity
checkmate::assert_data_frame(data)
assert_vars(data = data,
vars = "country_code")
data %>%
# remove possibly existing long country code
dplyr::select(-any_of("country_code_long")) %>%
# add long country code
dplyr::mutate(country_code_long = factor(x = purrr::map2_chr(.x = as.character(country_code),
.y = add_former_country_flag(data)$is_former_country,
.f = ~ if (.y) {
data_iso_3166_3$Alpha_3[data_iso_3166_3$Alpha_4 == .x]
} else {
data_iso_3166_1$Alpha_3[data_iso_3166_1$Alpha_2 == .x]
}),
levels = val_set$country_code_long,
ordered = FALSE)) %>%
# ensure no NAs
assertr::assert(predicate = assertr::not_na,
country_code_long) %>%
order_rfrnd_cols() %>%
# add var lbl
labelled::set_variable_labels(.labels = var_lbls["country_code_long"])
}
#' Add short country name to referendum data
#'
#' Augments `data` with an additional column holding the common English name of the country in which the referendum took place.
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb:::add_country_name() |>
#' dplyr::select(id,
#' starts_with("country_"))
add_country_name <- function(data) {
# ensure minimal validity
checkmate::assert_data_frame(data)
assert_vars(data = data,
vars = "country_code")
data %>%
# remove possibly existing country name
dplyr::select(-any_of("country_name")) %>%
# add country name
dplyr::mutate(country_name = factor(x = purrr::map2_chr(.x = as.character(country_code),
.y = add_former_country_flag(data)$is_former_country,
.f = ~ if (.y) {
data_iso_3166_3$name_short[data_iso_3166_3$Alpha_4 == .x]
} else {
data_iso_3166_1$name_short[data_iso_3166_1$Alpha_2 == .x]
}),
levels = val_set$country_name,
ordered = FALSE)) %>%
# ensure no NAs
assertr::assert(predicate = assertr::not_na,
country_name) %>%
order_rfrnd_cols() %>%
# add var lbl
labelled::set_variable_labels(.labels = var_lbls["country_name"])
}
#' Add long country name to referendum data
#'
#' Augments `data` with an additional column holding the official full English name(s) of the country in which the referendum took place.
#'
#' @inheritParams add_world_regions
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb:::add_country_name_long() |>
#' dplyr::select(id,
#' starts_with("country_name"))
add_country_name_long <- function(data) {
# ensure minimal validity
checkmate::assert_data_frame(data)
assert_vars(data = data,
vars = "country_code")
data %>%
# remove possibly existing long country name
dplyr::select(-any_of("country_name_long")) %>%
# add long country name
dplyr::mutate(country_name_long = factor(x = purrr::map2_chr(.x = as.character(country_code),
.y = add_former_country_flag(data)$is_former_country,
.f = ~ if (.y) {
data_iso_3166_3$name_long[data_iso_3166_3$Alpha_4 == .x]
} else {
data_iso_3166_1$name_long[data_iso_3166_1$Alpha_2 == .x]
}),
levels = val_set$country_name_long,
ordered = FALSE)) %>%
# ensure no NAs
assertr::assert(predicate = assertr::not_na,
country_name_long) %>%
order_rfrnd_cols() %>%
# add var lbl
labelled::set_variable_labels(.labels = var_lbls["country_name_long"])
}
#' Add period to referendum data
#'
#' Augments `data` with an additional column holding the specified period in which the referendum took place. The new column is named after `period` and its
#' values are always of type integer.
#'
#' ```{r, child = "snippets/period_note.Rmd"}
#' ```
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `date`.
#' @param period Type of period to add. One of
#' `r pal::fn_param_defaults(fn = add_period, param = "period") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::add_period() |>
#' dplyr::select(id, date, week)
#'
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::add_period("year") |>
#' dplyr::select(id, date, year)
add_period <- function(data,
period = c("week", "month", "quarter", "year", "decade", "century")) {
checkmate::assert_data_frame(data)
period <- rlang::arg_match(period)
assert_vars(data = data,
vars = "date")
# define necessary date transformations
get_period <- switch(EXPR = period,
week = function(x) clock::as_iso_year_week_day(x) %>% clock::get_week(),
month = function(x) clock::get_month(x),
quarter = function(x) clock::as_year_quarter_day(x) %>% clock::get_quarter(),
year = function(x) clock::get_year(x),
decade = function(x) (clock::get_year(x) %/% 10L) * 10L,
century = function(x) (clock::get_year(x) %/% 100L) * 100L)
# define lbl parts
period_lbl <- switch(EXPR = period,
week = glue::glue("{period} (1\u201353)"),
month = glue::glue("{period} (1\u201312)"),
quarter = glue::glue("{period} (1\u20134)"),
period)
data %>%
# add period
dplyr::mutate(!!as.symbol(period) := get_period(date)) %>%
# harmonize col order
order_rfrnd_cols() %>%
# add var lbl
labelled::set_variable_labels(.labels = var_lbls[period])
}
#' Add turnout to referendum data
#'
#' @description
#' Augments `data` with an additional column `turnout` containing the voter turnout calculated as:
#'
#' \Sexpr[results=rd, stage=build]{
#' katex::math_to_rd(tex = "\\\\frac{votes\\\\_yes+votes\\\\_no+votes\\\\_empty+votes\\\\_invalid}{electorate\\\\_total}",
#' ascii = "(votes_yes + votes_no + votes_empty + votes_invalid) / electorate_total",
#' displayMode = TRUE)
#' }
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the columns `electorate_total`, `votes_yes`, `votes_no`,
#' `votes_empty` and `votes_invalid`.
#' @param rough Whether to fall back on a "rough" calculation of the turnout in case any of the variables `votes_empty` or `votes_invalid` is unknown (`NA`), or
#' to be strict and return `NA` in such a case.
#' @param excl_dubious Whether or not to exclude obviously dubious turnout numbers (those > 1.0) by setting them to `NA`. Such numbers stem either from
#' data errors or (officially) tampered numbers.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' # rough turnout numbers
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::add_turnout() |>
#' dplyr::select(id,
#' electorate_total,
#' starts_with("votes_"),
#' turnout)
#'
#' # strict turnout numbers
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::add_turnout(rough = FALSE) |>
#' dplyr::select(id,
#' electorate_total,
#' starts_with("votes_"),
#' turnout)
add_turnout <- function(data,
rough = TRUE,
excl_dubious = TRUE) {
checkmate::assert_data_frame(data)
checkmate::assert_flag(rough)
checkmate::assert_flag(excl_dubious)
assert_vars(data = data,
vars = c("electorate_total",
"votes_yes",
"votes_no",
"votes_empty",
"votes_invalid"))
data %>%
dplyr::rowwise() %>%
dplyr::mutate(turnout = sum(votes_yes, votes_no, votes_empty, votes_invalid, na.rm = rough) / electorate_total) %>%
dplyr::ungroup() %>%
# set dubious turnout numbers to NA if requested
dplyr::mutate(turnout = dplyr::if_else(excl_dubious & turnout > 1.0,
NA_real_,
turnout)) %>%
# harmonize col order
order_rfrnd_cols() %>%
# add var lbl
labelled::set_variable_labels(turnout = var_lbls[["turnout"]] %>% ifelse(test = rough,
yes = stringr::str_replace(string = .,
pattern = stringr::fixed("turnout"),
replacement = "turnout (rough)"),
no = .))
}
#' Add UN world regions to referendum data
#'
#' @description
#' Augments `data` with information about the [United Nations (UN) geoscheme](https://en.wikipedia.org/wiki/United_Nations_geoscheme) on three different
#' grouping tiers based on the [UN M49 area code hierarchy](https://en.wikipedia.org/wiki/UN_M49#Code_lists).
#'
#' In total, eight different columns are added:
#' - `un_country_code`: UN M49 country code
#' - `un_region_tier_1_code`: UN tier-1 region's M49 area code
#' - `un_region_tier_1_name`: UN tier-1 region's English name
#' - `un_region_tier_2_code`: UN tier-2 region's M49 area code
#' - `un_region_tier_2_name`: UN tier-2 region's English name
#' - `un_region_tier_3_code`: UN tier-3 region's M49 area code
#' - `un_region_tier_3_name`: UN tier-3 region's English name
#' - `un_subregion`: Combinatiorial English UN subregion name which, except for Northern Europe, corresponds to the lowest `un_region_tier_*_name`.
#'
#' Tier-1 regions are the highest, i.e. most aggregated UN regions, commonly referred to as continents. Tier-2 regions are also known as "subregions" and tier-3
#' regions as "sub-subregions".
#'
#' Only part of all UN tier-2 regions are further divided into UN tier-3 regions, meaning that not all countries are part of a UN tier-3 region. If a country
#' doesn't belong to any UN tier-3 region, the corresponding `un_region_tier_3_*` values will simply be `NA`. The `un_subregion` column specifically addresses
#' this issue by providing a uniform combination of `un_region_tier_2_name` and `un_region_tier_3_name`.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `country_code` (with [ISO 3166-1
#' alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) or [ISO 3166-3 alpha-4](https://en.wikipedia.org/wiki/ISO_3166-3) codes).
#' @param add_un_country_code Whether or not to also add a column `un_country_code` holding the UN M49 code of the country in which the referendum took place.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnd(id = "5bbbe26a92a21351232dd73f") |>
#' rdb::add_world_regions() |>
#' dplyr::select(id,
#' starts_with("country_"),
#' starts_with("un_"))
add_world_regions <- function(data,
add_un_country_code = TRUE) {
# ensure minimal validity
checkmate::assert_data_frame(data)
assert_vars(data = data,
vars = "country_code")
checkmate::assert_flag(add_un_country_code)
has_country_code_continual <- "country_code_continual" %in% colnames(data)
# add UN regions to input data
data %<>%
# temporarily add required base var `country_code_continual` if necessary
add_country_code_continual() %>%
# remove possibly existing UN region vars
dplyr::select(-any_of(setdiff(colnames(un_regions),
"country_code"))) %>%
# add UN regions
dplyr::left_join(y = un_regions,
by = c(country_code_continual = "country_code")) %>%
# ensure every row got at least a UN tier-1 region assigned
assertr::assert(predicate = assertr::not_na,
un_region_tier_1_code) %>%
# harmonize col order
order_rfrnd_cols() %>%
# add var lbl
labelled::set_variable_labels(.labels = purrr::keep_at(x = var_lbls,
at = c("un_country_code",
"un_region_tier_1_code",
"un_region_tier_1_name",
"un_region_tier_2_code",
"un_region_tier_2_name",
"un_region_tier_3_code",
"un_region_tier_3_name",
"un_subregion")))
# drop vars if necessary/requested
if (!has_country_code_continual) {
data %<>% dplyr::select(-country_code_continual)
}
if (!add_un_country_code) {
data %<>% dplyr::select(-un_country_code)
}
data
}
#' Add various URLs to referendum data
#'
#' Augments `data` with additional columns holding URLs of the specified `types`. The new columns will be named after `types`, prefixed with `url_`, so
#' `types = "sudd"` will add the column `url_sudd` etc.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `id_sudd` for `types = "sudd"` and the columns
#' `country_code`, `level` and `id_official` for `types = "swissvotes"`.
#' @param types Type(s) of URLs to add. One or more of
#' `r pal::fn_param_defaults(fn = add_urls, param = "types") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family augment
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#' dplyr::filter(country_code == "CH" & level == "national") |>
#' rdb::add_urls() |>
#' dplyr::select(id,
#' country_code,
#' level,
#' starts_with("id_"),
#' starts_with("url_"))
add_urls <- function(data,
types = c("sudd", "swissvotes")) {
checkmate::assert_data_frame(data)
types <- rlang::arg_match(arg = types,
multiple = TRUE)
if ("sudd" %in% types) {
assert_vars(data = data,
vars = "id_sudd")
data %<>% dplyr::mutate(url_sudd = dplyr::if_else(is.na(id_sudd),
NA_character_,
url_sudd(glue::glue("event.php?id={id_sudd}"))))
}
if ("swissvotes" %in% types) {
assert_vars(data = data,
vars = c("country_code",
"level",
"id_official"))
data %<>% dplyr::mutate(url_swissvotes = dplyr::if_else(country_code == "CH" & level == "national" & !is.na(id_official),
paste0("https://swissvotes.ch/vote/", id_official), # nolint: paste_linter
NA_character_))
}
data
}
#' Transform to ballot-date-level observations
#'
#' Transforms referendum-level observations to ones on the level of ballot date and jurisdiction via [nesting][tidyr::nest] of referendum-level columns. The
#' individual values of all the referendums on a specific ballot date in a specific jurisdiction are preserved in a list column named `rfrnd_data`.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `date`.
#' @param cols_to_retain Additional non-standard columns to be preserved as top-level columns instead of being nested in the list column `rfrnd_data`. They
#' mustn't vary within ballot-date-level observations. `r pkgsnip::param_lbl("tidy_select_support")`
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @export
#'
#' @examples
#' # standard RDB columns are retained as far as possible
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::as_ballot_dates()
#'
#' # non-standard columns must be explicitly specified in order to be retained
#' data_rdb <-
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::add_world_regions() |>
#' dplyr::mutate(region_custom =
#' factor(x = dplyr::if_else(country_code == "CH",
#' "Switzerland & Liechtenstein",
#' un_region_tier_1_name),
#' levels = c("Switzerland & Liechtenstein",
#' levels(un_region_tier_1_name))) |>
#' forcats::fct_relevel("Switzerland & Liechtenstein",
#' after = 3L) |>
#' forcats::fct_recode("rest of Europe" = "Europe"))
#'
#' data_rdb |> rdb::as_ballot_dates() |> colnames()
#' data_rdb |> rdb::as_ballot_dates(cols_to_retain = region_custom) |> colnames()
#'
#' # non-standard columns to retain must actually be retainable
#' try(
#' data_rdb |> rdb::as_ballot_dates(cols_to_retain = title_en)
#' )
as_ballot_dates <- function(data,
cols_to_retain = NULL) {
checkmate::assert_data_frame(data)
defused_cols_to_retain <- rlang::enquo(cols_to_retain)
ix_cols_to_retain <- tidyselect::eval_select(expr = defused_cols_to_retain,
data = data)
names_cols_to_retain <- names(ix_cols_to_retain)
# ensure date col is present
if (!("date" %in% colnames(data))) {
cli::cli_abort("Unable to transform to ballot-date-level data since no {.var {date}} column is present in {.arg data}.")
}
# nest data
cols_to_nest <-
data |>
colnames() |>
setdiff(c(ballot_date_colnames,
names_cols_to_retain))
result <- data |> tidyr::nest(rfrnd_data = any_of(cols_to_nest))
# ensure `cols_to_retain` don't vary within ballot dates
n_rows_nested <-
data |>
dplyr::summarise(n = dplyr::n(),
.by = any_of(ballot_date_colnames)) %$%
n
if (!identical(purrr::map_int(result$rfrnd_data,
nrow),
n_rows_nested)) {
cli::cli_abort(paste0("Retaining the additional non-standard {cli::qty(length(ix_cols_to_retain))} column{?s} {.var {names_cols_to_retain}} while ",
"converting to ballot-date-level observations is impossible because {?(some of)} {?this/these} column{?s} var{?ies/y} within ballot ",
"dates."))
}
result
}
#' Unnest multi-value variable
#'
#' Unnests a multi-value variable of type list to long format. Multi-value variables can contain more than one value per observation and thus break with the
#' [tidy-data convention](https://tidyr.tidyverse.org/articles/tidy-data.html). This function allows to conveniently expand `data` to contain a single `var`
#' value per observation only, thereby increasing the number of observations (i.e. rows).
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column specified in `var`.
#' @param var `data` column to unnest. One of the multi-value variables:
#' `r data_codebook |> dplyr::filter(is_multi_valued) %$% variable_name |> pal::wrap_chr(wrap = "\x60") |> pal::as_md_list()`
#'
#' `r pkgsnip::param_lbl("tidy_select_support")`
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @family unnest
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::unnest_var(topics_tier_2)
unnest_var <- function(data,
var) {
# tidy selection and arg check
checkmate::assert_data_frame(data)
defused_var <- rlang::enquo(var)
i_var <- tidyselect::eval_select(expr = defused_var,
data = data)
name_var <- names(i_var)
n_var <- length(i_var)
if (n_var > 1L) {
cli::cli_abort("Only {.emph one} {.arg var} can be unnested at a time, but {.val {n_var}} were provided.")
}
name_var <- rlang::arg_match0(arg = name_var,
arg_nm = "var",
values =
data_codebook |>
dplyr::filter(is_multi_valued) %$%
variable_name)
name_var_unnested <- var_name_unnested(name_var)
data |>
tidyr::unnest_longer(col = all_of(name_var),
values_to = name_var_unnested,
keep_empty = TRUE,
ptype = character()) |>
dplyr::mutate(!!as.symbol(name_var_unnested) := factor(x = !!as.symbol(name_var_unnested),
levels = var_vals(name_var),
ordered = val_scale(name_var) %in% c("ordinal_ascending", "ordinal_descending")))
}
#' Count number of referendums
#'
#' Counts the number of RDB referendums, optionally by additional columns specified via `by`.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the columns specified in `by` (if any).
#' @param by Optional `data` column(s) to group by before counting number of referendums. `r pkgsnip::param_lbl("tidy_select_support")`
#' @param complete_fcts Whether or not to complete the result with implicitly missing combinations of those columns specified in `by` which are of type factor.
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::n_rfrnds(by = level)
#'
#' # count ballot dates instead of referendums
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::as_ballot_dates() |>
#' rdb::n_rfrnds(by = level)
n_rfrnds <- function(data,
by = NULL,
complete_fcts = TRUE) {
# arg checks
checkmate::assert_data_frame(data)
checkmate::assert_flag(complete_fcts)
# tidy selection
defused_by <- rlang::enquo(by)
ix_by <- tidyselect::eval_select(expr = defused_by,
data = data)
names_by <- names(ix_by)
result <-
data |>
dplyr::group_by(!!!rlang::syms(names_by)) |>
dplyr::summarise(n = dplyr::n(),
.groups = "drop")
if (complete_fcts) {
result %<>% tidyr::complete(!!!rlang::syms(names_by),
fill = list(n = 0L))
}
result
}
#' Count number of referendums per period
#'
#' Counts the number of RDB referendums per desired period, optionally by additional columns specified via `by`.
#'
#' ```{r, child = "snippets/period_note.Rmd"}
#' ```
#'
#' @inheritParams n_rfrnds
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column specified in `period` or the column `date` (to
#' compute the [period column][add_period]), plus the one(s) specified via `by` (if any).
#' @param period Type of period to count referendums by. One of
#' `r pal::fn_param_defaults(fn = add_period, param = "period") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#' @param fill_gaps Whether or not to add zero-value rows to the result for `period` gaps in `data`.
#' @param period_floor Lower `period` limit up to which gaps are filled. If `NULL`, the lower limit is set to the minimum of `period` present in `data`. Only
#' relevant if `fill_gaps = TRUE` and `period` is set to a unique timespan type (`"year"`, `"decade"` or `"century"`).
#' @param period_ceiling Upper `period` limit up to which gaps are filled. If `NULL`, the upper limit is set to the maximum of `period` present in `data`. Only
#' relevant if `fill_gaps = TRUE` and `period` is set to a unique timespan type (`"year"`, `"decade"` or `"century"`).
#' @param descending Whether to sort the resulting table by `period` in descending or in ascending order.
#'
#' @inherit add_period details
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::n_rfrnds_per_period()
#'
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::n_rfrnds_per_period(by = level)
#'
#' # without filling gaps
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::n_rfrnds_per_period(by = level,
#' fill_gaps = FALSE)
#'
#' # per decade and by multiple columns
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::n_rfrnds_per_period(by = c(level, type),
#' period = "decade")
#'
#' # count ballot dates instead of referendums
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::as_ballot_dates() |>
#' rdb::n_rfrnds_per_period()
n_rfrnds_per_period <- function(data,
by = NULL,
period = c("week", "month", "quarter", "year", "decade", "century"),
fill_gaps = TRUE,
period_floor = NULL,
period_ceiling = NULL,
descending = FALSE) {
# arg checks
checkmate::assert_data_frame(data)
period <- rlang::arg_match(period)
checkmate::assert_flag(fill_gaps)
checkmate::assert_int(period_floor,
null.ok = TRUE)
checkmate::assert_int(period_ceiling,
null.ok = TRUE)
checkmate::assert_flag(descending)
# tidy selection
defused_by <- rlang::enquo(by)
ix_by <- tidyselect::eval_select(expr = defused_by,
data = data)
names_by <- names(ix_by)
# add period col if necessary
if (!(period %in% colnames(data))) {
data %<>% add_period(period = period)
}
result <-
data |>
dplyr::group_by(!!!rlang::syms(names_by), !!as.symbol(period)) |>
dplyr::summarise(n = dplyr::n(),
.groups = "drop")
# fill gaps
# (only if input data (and thus result) is non-empty since otherwise we can't infer a sensible period range for year/decade/century)
if (fill_gaps && nrow(result)) {
# define sensible min/max period vals
is_recurring_period <- period %in% c("week", "month", "quarter")
period_step <- switch(EXPR = period,
century = 100L,
decade = 10L,
1L)
period_min <- period |> pal::when(is.null(period_floor) && !is_recurring_period ~ pal::safe_min(data[[.]]),
!is_recurring_period ~ period_floor,
~ 1L)
period_max <- period |> pal::when(is.null(period_ceiling) && !is_recurring_period ~ pal::safe_max(data[[.]]),
!is_recurring_period ~ period_ceiling,
. == "week" ~ 53L,
. == "month" ~ 12L,
. == "quarter" ~ 4L)
period_seq <- seq(from = (period_min %/% period_step) * period_step,
to = period_max,
by = period_step)
result %<>%
# reduce to results `>= period_floor` and `<= period_ceiling`
dplyr::filter(!!as.symbol(period) %in% period_seq) %>%
# convert period col to fct, so `tidyr::complete()` knows the missing vals
dplyr::mutate(!!as.symbol(period) := factor(x = !!as.symbol(period),
levels = period_seq,
ordered = TRUE)) %>%
tidyr::complete(!!!rlang::syms(names_by), !!as.symbol(period),
fill = list(n = 0L)) %>%
# convert period col back to int
dplyr::mutate(!!as.symbol(period) := as.integer(as.character(!!as.symbol(period))))
}
result |> dplyr::arrange(if (descending) dplyr::desc(!!as.symbol(period)) else !!as.symbol(period))
}
#' Prettify referendum data column names
#'
#' Renames referendum data column names to be ready for publication. Useful e.g. to create tables or visualizations.
#'
#' Note that
#' - column names unknown to this function are not changed.
#' - column *labels* are [removed][labelled::remove_var_label] so they aren't inadvertently used instead of the column *names* (i.a. relevant for [gt][gt::gt]
#' [>= 0.9.0](https://gt.rstudio.com/news/index.html#minor-improvements-and-bug-fixes-0-9-0)).
#'
#' @param data RDB referendum data as returned by [rfrnds()].
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family transform
#' @family prettify
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::prettify_col_names()
prettify_col_names <- function(data) {
data |>
dplyr::rename_with(.cols = everything(),
.fn = prettify_var_names) |>
# we remove the var lbls so gt doesn't automatically pick them up instead of the column names
# cf. https://gt.rstudio.com/news/index.html#minor-improvements-and-bug-fixes-0-9-0
labelled::remove_var_label()
}
#' Referendum share per period stacked area chart
#'
#' Creates a [Plotly stacked area chart](https://plotly.com/r/filled-area-plots/#stacked-area-chart-with-cumulative-values) that visualizes the share of
#' referendums per period, grouped by another column.
#'
#' ```{r, child = "snippets/period_note.Rmd"}
#' ```
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column specified in `period` or the column `date` (to
#' compute the [period column][add_period]), plus the column specified in `by`.
#' @param by `data` column to group by before counting number of referendums. `r pkgsnip::param_lbl("tidy_select_support")`
#' @param period Type of period to count referendums by. One of
#' `r pal::fn_param_defaults(fn = add_period, param = "period") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#'
#' @return `r pkgsnip::param_lbl("plotly_obj")`
#' @family visualize
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE,
#' max_cache_age = "1 year") |>
#' rdb::plot_rfrnd_share_per_period(by = "level")
plot_rfrnd_share_per_period <- function(data,
by,
period = c("week", "month", "quarter", "year", "decade", "century")) {
period <- rlang::arg_match(period)
# add period col if necessary
if (!(period %in% colnames(data))) {
data %<>% add_period(period = period)
}
# tidy selection
defused_by <- rlang::enquo(by)
i_by <- tidyselect::eval_select(expr = defused_by,
data = data)
n_by <- length(i_by)
name_by <- names(i_by)
# ensure `x` is < 2
if (n_by > 1L) {
cli::cli_abort("Only {.emph one} column can be specified in {.arg by}, but {.val {n_by}} were provided.")
}
data %>%
# calculate freqs
dplyr::group_by(!!as.symbol(name_by), !!as.symbol(period)) %>%
dplyr::summarise(n = dplyr::n(),
.groups = "drop") %>%
# plot
plot_share_per_period(x = name_by,
period = period)
}
#' Topic segmentation sunburst chart
#'
#' Creates a [Plotly sunburst chart](https://plotly.com/r/sunburst-charts/) that visualizes the hierarchical segmentation of referendum topic occurences.
#'
#' A *topic lineage* is the hierarchical compound of a `topic_tier_1` and optionally a grandchild `topic_tier_3` and/or a child `topic_tier_2`.
#'
#' Note that topics can be assigned on any tier to referendums (i.e. in one case, a `topic_tier_1` plus a child `topic_tier_2` is assigned, and in another case
#' only a `topic_tier_1` without any further child topic).
#'
#' Furthermore, it should be noted that not every `topic_tier_2` has potential child `topic_tier_3`s. See the [full topic hierarchy](`r url_codebook("topics")`)
#' for details.
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the columns `topics_tier_1`, `topics_tier_2` and
#' `topics_tier_3`.
#' @param method Applied method to count the number of topic occurences. One of
#' - **`"per_rfrnd"`**: All *referendums* have the same weight. For a referendum with n different topics of the same tier, every topic is counted 1/n.
#' - **`"per_topic_lineage"`**: All *topic lineages* have the same weight. For a referendum with n different topics of the same tier, every topic is fully
#' counted, meaning that e.g. a referendum with three different tier-3 topics has a tripled impact on the result compared to a referendum that only has a
#' single one. Noticeably faster than `"per_rfrnd"`.
#' - **`"naive"`**: Naive procedure which doesn't properly reflect topic proportions on tier 2 and 3. Based on the (wrong) assumptions that a) all referendums
#' have the same number of topic lineages assigned and b) topics are not deduplicated per tier. By far the fastest method, though.
#'
#' @return `r pkgsnip::param_lbl("plotly_obj")`
#' @family visualize
#' @export
#'
#' @examples
#' # count each referendum equally
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::plot_topic_segmentation(method = "per_rfrnd")
#'
#' # count each topic lineage equally
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::plot_topic_segmentation(method = "per_topic_lineage")
#'
#' # naive count (way faster, but with misleading proportions on tier 2 and 3)
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::plot_topic_segmentation(method = "naive")
plot_topic_segmentation <- function(data,
method = c("per_rfrnd", "per_topic_lineage", "naive")) {
method <- rlang::arg_match(method)
rlang::check_installed("plotly",
reason = pal::reason_pkg_required())
is_naive <- method == "naive"
# assemble necessary data structure
if (is_naive) {
## naively
data_plot <-
dplyr::bind_rows(
data$topics_tier_1 %>%
topic_frequency(tier = 1L) %>%
dplyr::mutate(parent_topic = ""),
data$topics_tier_2 %>%
topic_frequency(tier = 2L) %>%
dplyr::mutate(parent_topic = purrr::map_chr(.x = as.character(topic),
.f = infer_topics,
tier = 1L)),
data$topics_tier_3 %>%
topic_frequency(tier = 3L) %>%
dplyr::mutate(parent_topic = purrr::map_chr(.x = as.character(topic),
.f = infer_topics,
tier = 2L))
) %>%
dplyr::rename(value = n)
} else {
is_per_rfrnd <- method == "per_rfrnd"
data_plot <- data %>% dplyr::select(starts_with("topics_tier_"))
### per rfrnd, i.e. in fractional numbers
if (is_per_rfrnd) {
data_plot %<>%
purrr::pmap(~ hierarchize_topics_fast(unlist(..1),
unlist(..2),
unlist(..3)) %>%
dplyr::mutate(value = 1.0 / nrow(.))) %>%
purrr::list_rbind()
### per topic lineage
} else {
data_plot %<>%
purrr::pmap(~ hierarchize_topics_fast(unlist(..1),
unlist(..2),
unlist(..3))) %>%
purrr::list_rbind() %>%
dplyr::mutate(value = 1.0)
}
data_plot <-
dplyr::bind_rows(
data_plot %>%
dplyr::group_by(topic_tier_1) %>%
dplyr::summarise(value = sum(value)) %>%
dplyr::mutate(topic = topic_tier_1,
parent_topic = "",
value,
.keep = "none"),
data_plot %>%
dplyr::group_by(topic_tier_2) %>%
dplyr::summarise(value = sum(value)) %>%
dplyr::mutate(topic = topic_tier_2,
parent_topic =
topic %>%
purrr::map_chr(~ {
if (is.na(.x)) {
NA_character_
} else {
infer_topics(topics = .x,
tier = 1L)
}}),
value,
.keep = "none"),
data_plot %>%
dplyr::group_by(topic_tier_3) %>%
dplyr::summarise(value = sum(value)) %>%
dplyr::mutate(topic = topic_tier_3,
parent_topic =
topic %>%
purrr::map_chr(~ {
if (is.na(.x)) {
NA_character_
} else {
infer_topics(topics = .x,
tier = 2L)
}}),
value,
.keep = "none")
) %>%
dplyr::filter(!is.na(topic))
### add NA rows filling the gaps
data_plot %<>%
dplyr::filter(parent_topic != "") %>%
dplyr::group_by(parent_topic) %>%
dplyr::summarise(value_total = sum(value),
.groups = "drop") %>%
dplyr::mutate(topic = "<i>not defined</i>",
value = purrr::map2_dbl(.x = value_total,
.y = parent_topic,
.f = ~
data_plot %>%
dplyr::filter(topic == .y) %$%
value %>%
checkmate::assert_number() %>%
magrittr::subtract(.x)),
parent_topic,
.keep = "none") %>%
dplyr::bind_rows(data_plot, .) %>%
dplyr::mutate(id = ifelse(topic == "<i>not defined</i>",
paste0("NA_", parent_topic),
topic))
}
# create plot
plotly::plot_ly(data = data_plot,
type = "sunburst",
labels = ~topic,
parents = ~parent_topic,
ids = if (is_naive) ~topic else ~id,
values = ~value,
branchvalues = ifelse(is_naive,
"remainder",
"total"),
insidetextorientation = "radial")
}
#' Topic share per period stacked area chart
#'
#' Creates a [Plotly stacked area chart](https://plotly.com/r/filled-area-plots/#stacked-area-chart-with-cumulative-values) that visualizes the share of
#' referendum topic occurences per period.
#'
#' ```{r, child = "snippets/period_note.Rmd"}
#' ```
#'
#' @param data RDB referendum data as returned by [rfrnds()]. A data frame that at minimum contains the column `topics_tier_#` of the specified `tier`.
#' @param tier Tier of the topics variable to plot. `1L`, `2L` or `3L`.
#' @param period Type of period to count topics by. One of
#' `r pal::fn_param_defaults(fn = add_period, param = "period") |> pal::wrap_chr("\x60") |> cli::ansi_collapse(sep2 = " or ", last = " or ")`.
#' @param weight_by_n_rfrnds Whether or not to weight topic occurences by number of referendums. If `TRUE`, for a referendum with n different topics of the same
#' `tier`, every topic is counted 1/n.
#'
#' @return `r pkgsnip::param_lbl("plotly_obj")`
#' @family visualize
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::plot_topic_share_per_period(period = "decade")
plot_topic_share_per_period <- function(data,
tier = 1L,
period = c("week", "month", "quarter", "year", "decade", "century"),
weight_by_n_rfrnds = TRUE) {
checkmate::assert_int(tier,
lower = 1L,
upper = 3L)
period <- rlang::arg_match(period)
checkmate::assert_flag(weight_by_n_rfrnds)
# add period col if necessary
if (!(period %in% colnames(data))) {
data %<>% add_period(period = period)
}
# ensure topics var is present
var_name_topics <- glue::glue("topics_tier_{tier}")
var_name_topic <- var_name_unnested(var_name_topics)
if (!(var_name_topics %in% colnames(data))) {
cli::cli_abort("Required column {.var {var_name_topics}} is missing from {.arg data}.")
}
data |>
# add proper count var
dplyr::mutate(count = if (weight_by_n_rfrnds) 1.0 / lengths(!!as.symbol(var_name_topics)) else 1.0) |>
# unnest topics var
unnest_var(var = var_name_topics) |>
# calculate freqs
dplyr::group_by(!!as.symbol(period), !!as.symbol(var_name_topic)) |>
dplyr::summarise(n = sum(count),
.groups = "drop") |>
# plot
plot_share_per_period(x = var_name_topic,
period = period)
}
#' Period streamgraph
#'
#' Creates a ggplot2 [streamgraph](https://en.wikipedia.org/wiki/Streamgraph) based on [ggstream::geom_stream()] using the specified `period` as time
#' resolution.
#'
#' @inheritParams n_rfrnds_per_period
#' @param by `data` column to group by before counting number of referendums.
#' @param stacking Stacking type. One of
#' - `"mirror"` to stack absolute values symmetrically around the zero line on the x-axis.
#' - `"ridge"` to stack absolute values from the zero line on the x-axis upwards.
#' - `"proportional"` to stack relative values that add up to 100 %.
#' @param bandwidth Kernel density estimation bandwidth. A numeric scalar.
#' @param y_lim Optional Y axis range limit. Only relevant if `stacking` is one of `"mirror"` or `"ridge"`. The limit applies to the upper side if
#' `stacking = "ridge"` and to both sides if `stacking = "mirror"`. A numeric scalar equal or greater than zero.
#' @param color_palette Color palette function that when called with a single integer argument returns that many color codes.
#' @param prune_legend Whether or not to drop `by` factor levels which don't occur in `data` from the legend. Only has an effect if `by` is of type factor.
#'
#' @return `r pkgsnip::return_lbl("ggplot2_obj")`
#' @family visualize
#' @export
#'
#' @examples
#' data_rdb <- rdb::rfrnds(quiet = TRUE)
#'
#' rdb::ggplot_streamgraph(data = data_rdb,
#' by = topics_tier_1,
#' period = "year")
#'
#' # you can specify a different color palette
#' rdb::ggplot_streamgraph(data = data_rdb,
#' by = topics_tier_1,
#' period = "year",
#' color_palette = viridisLite::viridis)
#'
#' # by default, only factor levels which occur in data are included in the legend
#' data_rdb |>
#' dplyr::filter(country_code == "AT") |>
#' rdb::ggplot_streamgraph(by = topics_tier_1,
#' period = "decade")
#'
#' # but you can include *all* factor levels in the legend if you want to
#' data_rdb |>
#' dplyr::filter(country_code == "AT") |>
#' rdb::ggplot_streamgraph(by = topics_tier_1,
#' period = "decade",
#' prune_legend = FALSE)
ggplot_streamgraph <- function(data,
by,
period = c("week", "month", "quarter", "year", "decade", "century"),
stacking = c("mirror", "ridge", "proportional"),
bandwidth = 0.75,
y_lim = NULL,
color_palette = viridisLite::turbo,
prune_legend = TRUE) {
stacking <- rlang::arg_match(stacking)
checkmate::assert_number(bandwidth)
checkmate::assert_number(y_lim,
lower = 0.0,
finite = TRUE,
null.ok = TRUE)
checkmate::assert_function(color_palette)
checkmate::assert_flag(prune_legend)
rlang::check_installed("ggplot2",
reason = pal::reason_pkg_required())
rlang::check_installed("ggstream",
reason = pal::reason_pkg_required())
rlang::check_installed("scales",
reason = pal::reason_pkg_required())
rlang::check_installed("viridisLite",
reason = pal::reason_pkg_required())
ix_by <- tidyselect::eval_select(expr = rlang::enquo(by),
data = data)
names_by <- names(ix_by)
name_by <- names_by
n_by <- length(ix_by)
if (n_by > 1L) {
cli::cli_abort("Only {.emph one} data column can be specified in {.arg by}, but {.val {n_by}} were provided.")
}
# unnest list col if necessary
if (is.list(data[[ix_by]])) {
data %<>% unnest_var(var = tidyselect::all_of(names_by))
name_by <- var_name_unnested(names_by)
}
result <- n_rfrnds_per_period(data = data,
period = period,
by = !!as.symbol(name_by))
# create stable color mapping based on lvls of `by`
# TODO: this probably could be removed and we could instead pass on a *discrete fill scale* fn (e.g. `\(...) viridis::scale_fill_viridis(..., discrete = T)`)
# to `ggplot2::scale_fill_discrete(type = )` once the below mentioned issue #23 is resolved
if (is.factor(data[[name_by]])) {
vals_by <- levels(data[[name_by]])
} else {
vals_by <- sort(unique(data[[name_by]]))
}
colors_by <- color_palette(length(vals_by))
names(colors_by) <- vals_by
# we need to remove zero-n rows (plus the corresponding colors) since ggstream doesn't handle them properly
# TODO: remove this workaround once [issue #23](https://github.com/davidsjoberg/ggstream/issues/23) is fixed
## if we prune the legend, we need to prune the fill colors, too (otherwise, colors aren't matched properly)
if (prune_legend) {
colors_by %<>% magrittr::extract(names(.) %in% unique(result[[name_by]][result$n > 0L]))
}
result %<>%
dplyr::group_by(!!as.symbol(name_by)) %>%
dplyr::group_modify(\(d, k) if (sum(d$n) > 0L) d else d[0L, ]) %>%
dplyr::ungroup()
result <-
ggplot2::ggplot(data = result,
mapping = ggplot2::aes(x = !!as.symbol(period),
y = n,
fill = !!as.symbol(name_by))) +
ggstream::geom_stream(type = stacking,
n_grid = 10000L,
show.legend = TRUE,
bw = bandwidth) +
ggplot2::scale_fill_discrete(type = colors_by,
name = prettify_var_names(name_by),
drop = prune_legend) +
ggplot2::xlab(ggplot2::element_blank()) +
ggplot2::ylab(ggplot2::element_blank())
if (stacking == "ridge" && !is.null(y_lim)) {
result <- result + ggplot2::coord_cartesian(ylim = c(0.0, y_lim),
default = TRUE)
} else if (stacking == "mirror") {
if (!is.null(y_lim)) {
result <- result + ggplot2::coord_cartesian(ylim = c(-y_lim, y_lim),
default = TRUE)
}
# make y scale absolute in both directions
result <- result + ggplot2::scale_y_continuous(labels = \(x) abs(x))
} else if (stacking == "proportional") {
result <- result + ggplot2::scale_y_continuous(labels = scales::label_percent(suffix = "\u2009%"))
}
result
}
#' Tabulate number of referendums
#'
#' Creates a ready-to-print [gt][gt::gt] table with the number of referendums, optionally counted `by` up to three additional variables.
#'
#' The first variable specified in `by` will be reflected in additional rows in the resulting table, i.e. expand it vertically. The second and third variables
#' will be reflected in additional columns, i.e. expand it horizontally.
#'
#' @inheritParams n_rfrnds
#' @param by Up to three additional `data` columns to group by before counting number of referendums. `r pkgsnip::param_lbl("tidy_select_support")`
#' @param n_rows Maximum number of rows to be included in the resulting table. All the rows exceeding that limit are replaced by a single row of ellipses. An
#' integer scalar or `Inf` for an unlimited number of rows.
#' @param order How to order the rows of the resulting table. One of
#' - `"ascending"` to sort in ascending order by the number of referendums,
#' - `"descending"` to sort in descending order by the number of referendums, or
#' - `NULL` to leave the sorting unchanged.
#' @param incl_row_head Whether or not to include a row heading with the [prettified][prettify_var_names] name of the first `by` variable.
#' @param incl_col_head Whether or not to include column headings (in the table's [stub][gt::tab_stubhead]) with the [prettified][prettify_var_names] names of
#' the second and third `by` variables.
#' @param add_total_row Whether or not to add a summary row at the very end of the table containing column totals. If `NULL`, a total row is added only if
#' at least one column is provided in `by`.
#' @param add_total_col Whether or not to add a summary column at the very end of the table containing row totals. If `NULL`, a total column is added only if
#' multiple columns are provided in `by`.
#' @param lbl_total_row Label of the summary row containing column totals. Only relevant if `add_total_row = TRUE`. A character scalar. [gt::md()] or
#' [gt::html()] can be used to format the label text.
#' @param lbl_total_col Label of the summary column containing row totals. Only relevant if `add_total_col = TRUE`. A character scalar. [gt::md()] or
#' [gt::html()] can be used to format the label text.
#'
#' @return `r pkgsnip::return_lbl("gt_obj")`
#' @family tabulate
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::tbl_n_rfrnds()
#'
#' # grouped by a single column
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::tbl_n_rfrnds(by = level)
#'
#' # grouped by two columns
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::tbl_n_rfrnds(by = c(type, level))
#'
#' # grouped by three columns
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::tbl_n_rfrnds(by = c(country_name, level, type),
#' n_rows = 10L,
#' order = "descending")
#'
#' # count ballot dates instead of referendums
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::as_ballot_dates() |>
#' rdb::tbl_n_rfrnds(by = c(country_name, level),
#' n_rows = 10L,
#' order = "descending")
tbl_n_rfrnds <- function(data,
by = NULL,
complete_fcts = TRUE,
n_rows = Inf,
order = NULL,
incl_row_head = TRUE,
incl_col_head = TRUE,
add_total_row = NULL,
add_total_col = NULL,
lbl_total_row = gt::md("**Total**"),
lbl_total_col = gt::md("**Total**")) {
if (!isTRUE(is.infinite(n_rows))) {
checkmate::assert_int(n_rows,
lower = 1L)
}
if (!is.null(order)) {
rlang::arg_match0(arg = order,
values = c("ascending", "descending"))
}
checkmate::assert_flag(incl_row_head)
checkmate::assert_flag(incl_col_head)
checkmate::assert_flag(add_total_row,
null.ok = TRUE)
checkmate::assert_flag(add_total_col,
null.ok = TRUE)
checkmate::assert_string(lbl_total_row)
checkmate::assert_string(lbl_total_col)
rlang::check_installed("gt",
version = "0.9.0",
reason = pal::reason_pkg_required())
ix_by <- tidyselect::eval_select(expr = rlang::enquo(by),
data = data)
n_by <- length(ix_by)
has_by <- n_by > 0L
has_by_rest <- n_by > 1L
if (n_by > 3L) {
cli::cli_abort("At most {.emph three} data columns can be specified in {.arg by}, but {.val {n_by}} were provided.")
}
if (is.null(add_total_row)) {
add_total_row <- has_by
}
if (is.null(add_total_col)) {
add_total_col <- has_by_rest
}
by_colname_1st <- names(ix_by[1L]) %|% ":no_by"
by_colnames_rest <- names(ix_by[-1L])
result <-
data |>
n_rfrnds(by = {{ by }},
complete_fcts = complete_fcts) |>
dplyr::mutate(dplyr::across(where(is.factor),
~ forcats::fct_na_value_to_level(f = .x,
level = "N/A"))) |>
pal::when(has_by_rest ~ tidyr::pivot_wider(data = .,
names_from = by_colnames_rest,
names_sort = TRUE,
values_from = n),
~ .) |>
dplyr::mutate(`:total` = rowSums(x = dplyr::pick(-any_of(by_colname_1st)),
na.rm = TRUE),
dplyr::across(everything(),
~ tidyr::replace_na(data = .x,
replace = 0L)),
# TODO: remove type conversion below once [issue #1305](https://github.com/rstudio/gt/issues/1305) is fixed
dplyr::across(any_of(by_colname_1st),
as.character)) |>
pal::when(isTRUE(order == "descending") ~ dplyr::arrange(.data = .,
-`:total`),
isTRUE(order == "ascending") ~ dplyr::arrange(.data = .,
`:total`),
~ .) |>
pal::when(!add_total_col ~ dplyr::select(.data = .,
-`:total`),
~ .)
total_n <-
result |>
dplyr::select(-any_of(by_colname_1st)) |>
purrr::map_int(\(x) sum(x, na.rm = TRUE))
chop_rows <- n_rows < nrow(result)
if (chop_rows) {
result %<>%
utils::head(n = n_rows) %>%
# add placeholder/ellipsis row
dplyr::mutate(dplyr::across(everything(),
as.character)) %>%
rbind("\u2026")
}
# NOTE: if we chop rows (and have multiple n cols), it's impossible to create our total row using `gt::grand_summary_rows()` since its `fns` arg only
# receives column content, no metadata; thus we create the total row manually
if (add_total_row && chop_rows) {
result %<>% rbind(c(lbl_total_row, total_n))
}
result %<>% gt::gt(rowname_col = ifelse(has_by,
by_colname_1st,
"rowname"),
process_md = TRUE)
if (incl_row_head && has_by) {
result %<>% gt::tab_row_group(label =
by_colname_1st |>
prettify_var_names() |>
pal::wrap_chr(wrap = "*") |>
gt::md(),
rows = tidyselect::everything(),
id = by_colname_1st)
}
if (add_total_col) {
result %<>% gt::cols_label(`:total` = lbl_total_col)
}
if (add_total_row) {
if (chop_rows) {
result %<>% gt::tab_style(style = gt::cell_borders(sides = "top",
color = "#D3D3D3",
style = "double",
weight = gt::px(6L)),
locations = list(gt::cells_body(rows = n_rows + 2L),
gt::cells_stub(rows = n_rows + 2L)))
} else {
result %<>% gt::grand_summary_rows(fns = list(id = "total", label = "DUMMY") ~ sum(., na.rm = TRUE),
fmt = ~ gt::fmt_integer(., sep_mark = ""))
# TODO: remove this workaround and replace `"DUMMY"` with `lbl_total_row` above as soon as [#1295](https://github.com/rstudio/gt/issues/1295)
# is fixed.
result$`_summary`[[1L]]$fns$total$label <- lbl_total_row
}
}
if (incl_col_head && has_by_rest) {
result %<>%
gt::tab_stubhead(label =
by_colnames_rest |>
prettify_var_names() |>
pal::wrap_chr(wrap = "*") |>
paste0(collapse = "<br><br>") |>
gt::md()) %>%
gt::tab_style(style = gt::cell_text(align = "right",
v_align = "middle"),
locations = gt::cells_stubhead())
}
result |>
gt::tab_spanner_delim(delim = "_",
split = "last") |>
# right-align cols; required since they're of type chr if we chopped rows
gt::cols_align(align = "right",
columns = -tidyselect::any_of(by_colname_1st)) |>
# hide table header if there are less than two `by` cols
pal::when(!has_by_rest ~ gt::tab_options(data = .,
column_labels.hidden = TRUE),
~ .)
}
#' Tabulate number of referendums per period
#'
#' Creates a ready-to-print [gt][gt::gt] table with the number of referendums per period, optionally counted `by` up to two additional columns.
#'
#' ```{r, child = "snippets/period_note.Rmd"}
#' ```
#'
#' @inheritParams n_rfrnds_per_period
#' @inheritParams tbl_n_rfrnds
#' @param by Up to two additional `data` columns to group by before counting number of referendums. `r pkgsnip::param_lbl("tidy_select_support")`
#' @param squeeze_zero_rows Whether or not to compress consecutive zero-sum rows into single period span rows.
#' @param add_total_col Whether or not to add a summary column at the very end of the table containing row totals. If `NULL`, a total column is added only if
#' `by` is non-empty.
#'
#' @return `r pkgsnip::return_lbl("gt_obj")`
#' @family tabulate
#' @export
#'
#' @examples
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::tbl_n_rfrnds_per_period(period = "decade")
#'
#' # grouped by a single additional column
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::tbl_n_rfrnds_per_period(by = level,
#' period = "decade")
#'
#' # grouped by two addtional columns
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::tbl_n_rfrnds_per_period(by = c(level, type),
#' period = "decade")
#'
#' # count ballot dates instead of referendums
#' rdb::rfrnds(quiet = TRUE) |>
#' rdb::as_ballot_dates() |>
#' rdb::tbl_n_rfrnds_per_period(period = "decade")
tbl_n_rfrnds_per_period <- function(data,
by = NULL,
period = c("week", "month", "quarter", "year", "decade", "century"),
fill_gaps = TRUE,
period_floor = NULL,
period_ceiling = NULL,
squeeze_zero_rows = TRUE,
descending = TRUE,
add_total_row = TRUE,
add_total_col = NULL,
lbl_total_row = gt::md("**Total**"),
lbl_total_col = lbl_total_row) {
period <- rlang::arg_match(period)
checkmate::assert_flag(squeeze_zero_rows)
checkmate::assert_flag(add_total_row)
checkmate::assert_flag(add_total_col,
null.ok = TRUE)
checkmate::assert_string(lbl_total_row)
checkmate::assert_string(lbl_total_col)
rlang::check_installed("gt",
version = "0.9.0",
reason = pal::reason_pkg_required())
ix_by <- tidyselect::eval_select(expr = rlang::enquo(by),
data = data)
n_by <- length(ix_by)
has_by <- n_by > 0L
if (n_by > 2L) {
cli::cli_abort("At most {.emph two} additional data columns can be specified in {.arg by}, but {.val {n_by}} were provided.")
}
if (is.null(add_total_col)) {
add_total_col <- has_by
}
by_names_print <- ifelse(has_by,
names(ix_by) %>%
prettify_var_names() %>%
pal::wrap_chr(wrap = "*") %>%
paste0(collapse = "<br><br>"),
"")
data_to_plot <-
data %>%
n_rfrnds_per_period(period = period,
by = {{ by }},
fill_gaps = fill_gaps,
period_floor = period_floor,
period_ceiling = period_ceiling,
descending = descending) %>%
dplyr::mutate(dplyr::across(where(is.factor),
~ forcats::fct_na_value_to_level(f = .x,
level = "N/A"))) %>%
pal::when(has_by ~ tidyr::pivot_wider(data = .,
names_from = {{ by }},
names_sort = TRUE,
values_from = n),
~ .) %>%
pal::when(add_total_col ~ dplyr::mutate(.data = .,
`:total` = rowSums(x = dplyr::pick(-!!as.symbol(period)),
na.rm = TRUE)),
~ .) %>%
dplyr::mutate(dplyr::across(everything(),
~ tidyr::replace_na(data = .x,
replace = 0L)),
# TODO: remove type conversion below once [issue #1305](https://github.com/rstudio/gt/issues/1305) is fixed
dplyr::across(all_of(period),
as.character))
# squeeze consecutive all-0 rows into single row if requested
ix <- integer()
ix_rm <- integer()
if (squeeze_zero_rows) {
for (i in pal::safe_seq_len(nrow(data_to_plot))) {
if (data_to_plot %>%
dplyr::select(-any_of(c(period, ":total"))) %>%
magrittr::extract(i, ) %>%
sum() %>%
magrittr::equals(0L)) {
ix %<>% c(i)
} else {
if (length(ix) > 1L) {
data_to_plot[ix[1L], period] <- paste0(data_to_plot[ix[length(ix)], period], "\u2013", data_to_plot[ix[1L], period])
ix_rm %<>% c(ix[-1L])
}
ix <- integer()
}
}
if (length(ix) > 1L) {
data_to_plot[ix[1L], period] <- paste0(data_to_plot[ix[length(ix)], period], "\u2013", data_to_plot[ix[1L], period])
ix_rm %<>% c(ix[-1L])
}
}
# add "s" to decade/century period values
if (period %in% c("decade", "century")) {
data_to_plot[[period]] %<>% stringr::str_replace_all(pattern = "(\\d+)",
replacement = "\\1s")
}
data_to_plot %>%
dplyr::filter(!(dplyr::row_number() %in% ix_rm)) %>%
gt::gt(rowname_col = period) %>%
pal::when(add_total_col ~ gt::cols_label(.data = .,
`:total` = lbl_total_col),
~ .) %>%
pal::when(add_total_row ~ {
result <- gt::grand_summary_rows(data = .,
fns = list(label = "DUMMY", id = "total") ~ sum(., na.rm = TRUE),
fmt = ~ gt::fmt_integer(., sep_mark = ""))
# TODO: remove this workaround and replace `"DUMMY"` with `lbl_total_row` above as soon as [#1295](https://github.com/rstudio/gt/issues/1295) is fixed.
result$`_summary`[[1L]]$fns$total$label <- lbl_total_row
result
},
~ .) %>%
gt::tab_spanner_delim(delim = "_",
split = "last") %>%
gt::tab_stubhead(label = gt::md(by_names_print)) %>%
gt::tab_style(style = gt::cell_text(align = "right",
v_align = "middle"),
locations = gt::cells_stubhead())
}
#' List referendum territories from [sudd.ch](https://sudd.ch/)
#'
#' Lists [all referendum territories from sudd.ch](https://sudd.ch/list.php?mode=allareas), which means each `country_name_de` together with all the associated
#' `territory_name_de`, their search URL and their number of occurrences.
#'
#' Note that the values in the `territory_name_de` column returned by this function can differ from those in the `territory_name_de` column of
#' [sudd_rfrnds()] and [list_sudd_rfrnds()]. The latter is often more extensive and usually includes the `country_name_de` (in parentheses) for
#' subnational referendums.
#'
#' @inheritSection sudd_rfrnds About [sudd.ch](https://sudd.ch/)
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family sudd
#' @export
#'
#' @examples
#' rdb::list_sudd_territories()
list_sudd_territories <- function() {
rows <-
httr::RETRY(verb = "GET",
url = url_sudd("list.php"),
query = list(mode = "allareas"),
times = 3L) %>%
xml2::read_html() %>%
rvest::html_elements(css = "main table tr") %>%
purrr::map(rvest::html_elements,
css = "td")
col_1 <- rows %>% purrr::map(magrittr::extract2, 1L)
col_2 <- rows %>% purrr::map(\(x) if (length(x) > 1L) x[[2L]] else xml2::as_xml_document(list()))
col_3 <- rows %>% purrr::map(\(x) if (length(x) > 2L) x[[3L]] else xml2::as_xml_document(list()))
tibble::tibble(country_name_de = purrr::map_chr(col_1, rvest::html_text),
territory_name_de = purrr::map_chr(col_2, rvest::html_text),
filter_url = purrr::map_chr(col_2,
~ {
if (length(.x) > 0L) {
.x %>%
rvest::html_element(css = "a") %>%
rvest::html_attr(name = "href") %>%
url_sudd()
} else {
NA_character_
}
}),
n = purrr::map_chr(col_3, rvest::html_text)) %>%
dplyr::mutate(country_name_de = dplyr::if_else(stringr::str_detect(string = country_name_de,
pattern = "^\\s*$"),
NA_character_,
country_name_de)) %>%
tidyr::fill(country_name_de,
.direction = "down") %>%
dplyr::filter(!dplyr::if_all(c(territory_name_de, n),
is.na))
}
#' List referendum titles from [sudd.ch](https://sudd.ch/)
#'
#' Lists [all referendum titles from sudd.ch](https://sudd.ch/list.php?mode=alltopics), together with their search URLs and number of occurrences.
#'
#' @inheritSection sudd_rfrnds About [sudd.ch](https://sudd.ch/)
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family sudd
#' @export
#'
#' @examples
#' rdb::list_sudd_titles() |> dplyr::filter(n > 1)
list_sudd_titles <- function() {
rows <-
httr::RETRY(verb = "GET",
url = url_sudd("list.php"),
query = list(mode = "alltopics"),
times = 3L) %>%
xml2::read_html() %>%
rvest::html_elements(css = "main table tr") %>%
purrr::map(rvest::html_elements,
css = "td")
col_1 <- rows %>% purrr::map(magrittr::extract2, 1L)
col_2 <- rows %>% purrr::map(magrittr::extract2, 2L)
tibble::tibble(title_de = col_1 %>% purrr::map_chr(rvest::html_text),
filter_url = col_1 %>% purrr::map_chr(~ .x %>%
rvest::html_element(css = "a") %>%
rvest::html_attr(name = "href") %>%
url_sudd()),
n = col_2 %>% purrr::map_chr(rvest::html_text))
}
#' List referendum data from [sudd.ch](https://sudd.ch/)
#'
#' @description
#' Lists the referendum data from [sudd.ch](https://sudd.ch/) in various ways its [`list.php`](https://sudd.ch/list.php) endpoint allows. The output of this
#' function can be directly fed to [sudd_rfrnds()].
#'
#' @inheritSection sudd_rfrnds About [sudd.ch](https://sudd.ch/)
#' @param mode The listing mode. One of
#' - `"by_date"`: Lists [all referendums in the sudd.ch database **by `date`**](https://sudd.ch/list.php?mode=allrefs), together with their `id_sudd`,
#' `country_code`, `territory_name_de` and `title_de`. Specifying the sorting order of the results via the `order` parameter is supported.
#' - `"by_mod_date"`: Lists [all referendums in the sudd.ch database **by `date_last_edited`**](https://sudd.ch/list.php?mode=moddate), together with their
#' `id_sudd`, `country_code`, `territory_name_de`, `date` and `title_de`.
#' - `"filter"`: Allows to provide additional arguments (via the `filter` parameter) that limit the results accordingly. Specifying the sorting order of the
#' results via the `order` parameter is supported.
#' - `"random"`: Lists the `id_sudd` of five randomly selected referendums, together with their `country_code`.
#' @param order The sorting order of the results. Only relevant if `mode` is either `"by_date"` or `"filter"`.
#' @param filter A list with valid filtering arguments. Only relevant if `mode = "filter"`. The supported filtering arguments include
#' - `territory_name_de`: A string that must be (partially) matched by the referendums' `territory_name_de`. Matching is case-insensitive and no [fuzzy
#' matching](https://en.wikipedia.org/wiki/Approximate_string_matching) is supported.
#' - `title_de`: A string that must be (partially) matched by the referendums' `title_de`. Matching is case-insensitive and no [fuzzy
#' matching](https://en.wikipedia.org/wiki/Approximate_string_matching) is supported.
#' - `year_min`: The lower year limit of the referendums' `date`. A positive integer.
#' - `year_max`: The upper year limit of the referendums' `date`. A positive integer.
#' @param use_cache `r pkgsnip::param_lbl("use_cache")`
#' @param max_cache_age `r pkgsnip::param_lbl("max_cache_age")`
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return A [tibble][tibble::tbl_df] containing at least an `id_sudd` column.
#' @family sudd
#' @export
#'
#' @examples
#' # list all referendums by modification date (takes a while)
#' \dontrun{
#' rdb::list_sudd_rfrnds(mode = "by_mod_date")}
#'
#' # list all referendums whose title matches "AHV"
#' rdb::list_sudd_rfrnds(mode = "filter",
#' filter = list(title_de = "AHV"),
#' quiet = TRUE)
#'
#' # get sudd.ch referendum data from all referendums from 2020 onwards
#' rdb::list_sudd_rfrnds(mode = "filter",
#' filter = list(year_min = 2020),
#' quiet = TRUE) |>
#' rdb::sudd_rfrnds(quiet = TRUE)
#'
#' # get sudd.ch referendum data from five randomly picked referendums
#' rdb::list_sudd_rfrnds(mode = "random",
#' quiet = TRUE) |>
#' rdb::sudd_rfrnds(quiet = TRUE)
list_sudd_rfrnds <- function(mode = c("by_date",
"by_mod_date",
"filter",
"random"),
order = c("ascending",
"descending"),
filter = list(territory_name_de = NULL,
title_de = NULL,
year_min = NULL,
year_max = NULL),
use_cache = TRUE,
max_cache_age = "1 week",
quiet = FALSE) {
# check args
mode <- rlang::arg_match(mode)
order <-
rlang::arg_match(order) %>%
dplyr::case_match(.x = .,
"ascending" ~ "asc",
"descending" ~ "desc",
.default = .)
checkmate::assert_flag(quiet)
# do not cache `mode = "random"`
if (mode == "random") use_cache <- FALSE
pkgpins::with_cache(expr = {
checkmate::assert_list(filter,
names = "unique")
checkmate::assert_subset(names(filter),
choices = c("territory_name_de",
"title_de",
"year_min",
"year_max"))
checkmate::assert_string(filter$territory_name_de,
null.ok = TRUE)
checkmate::assert_string(filter$title_de,
null.ok = TRUE)
has_filter <-
filter %>%
purrr::map_lgl(is.null) %>%
all() %>%
magrittr::not()
if (has_filter && mode != "filter") {
cli::cli_alert_warning("{.arg filter} is ignored because {.arg mode} is set to {.val {mode}}.")
}
filter$year_min <- checkmate::assert_int(filter$year_min,
lower = sudd_min_year,
upper = sudd_max_year,
null.ok = TRUE,
coerce = TRUE)
filter$year_max <- checkmate::assert_int(filter$year_max,
lower = filter$year_min %||% sudd_min_year,
upper = sudd_max_year,
null.ok = TRUE,
coerce = TRUE)
is_year_missing <- purrr::map_lgl(c(filter$year_min, filter$year_max), is.null) %>% { any(.) && !all(.) }
if (is_year_missing) {
filter$year_min <- filter$year_min %||% sudd_min_year
filter$year_max <- filter$year_max %||% sudd_max_year
}
if (mode == "filter" && all(purrr::map_lgl(filter, is.null))) {
cli::cli_abort("At least one filtering argument must be provided in {.arg filter} when {.arg mode = \"filter\"}.")
}
names(filter) %<>% dplyr::case_match(.x = .,
"territory_name_de" ~ "area",
"title_de" ~ "topic",
"year_min" ~ "first",
"year_max" ~ "last",
.default = .)
# assemble query params
query <- c(list(mode = mode %>% dplyr::case_match(.x = .,
"by_date" ~ "allrefs",
"by_mod_date" ~ "moddate",
.default = .))[mode != "filter"],
list(sense = order)[mode %in% c("by_date", "filter")],
filter[mode == "filter"])
# retrieve and parse data
if (!quiet) {
status_msg <- "Fetching raw HTML data from {.url sudd.ch}..."
cli::cli_progress_step(msg = status_msg,
msg_done = paste(status_msg, "done"),
msg_failed = paste(status_msg, "failed"))
}
html <-
httr::RETRY(verb = "GET",
url = url_sudd("list.php"),
query = query,
times = 3L) %>%
xml2::read_html()
if (!quiet) {
status_msg <- "Parsing and tidying raw HTML data..."
cli::cli_progress_step(msg = status_msg,
msg_done = paste(status_msg, "done"),
msg_failed = paste(status_msg, "failed"))
}
if (mode == "random") {
result <-
html %>%
rvest::html_elements(css = "main ul li a") %>%
rvest::html_attr(name = "href") %>%
stringr::str_extract(pattern = "(?<=id=)[\\w\\d]+") %>%
tibble::tibble(id_sudd = .)
} else {
rows <-
html %>%
rvest::html_element(css = "main table") %>%
rvest::html_children() %>%
purrr::map(rvest::html_elements,
css = "td")
col_1 <- rows %>% purrr::map(magrittr::extract2, 1L)
col_2 <- rows %>% purrr::map(magrittr::extract2, 2L)
col_3 <- rows %>% purrr::map(magrittr::extract2, 3L)
col_4 <- rows %>% purrr::map(magrittr::extract2, 4L)
result <-
tibble::tibble(id_sudd = purrr::map_chr(col_4,
\(x) {
x %>%
rvest::html_element(css = "a") %>%
rvest::html_attr(name = "href") %>%
stringr::str_extract(pattern = "(?<=[\\?&]id=)[\\w\\d]+")
}),
territory_name_de = purrr::map_chr(col_2, rvest::html_text),
!!!(col_3 %>% purrr::map_chr(rvest::html_text) %>% parse_sudd_date_de()),
title_de = purrr::map_chr(col_4, rvest::html_text)) %>%
# add `date`
dplyr::mutate(date = clock::date_build(year = year,
month = month,
day = day,
invalid = "NA")) %>%
dplyr::relocate(date,
.before = year)
if (mode == "by_mod_date") {
result %<>%
tibble::add_column(date_last_edited =
col_1 %>%
purrr::map_chr(~ .x %>%
rvest::html_element(css = "time") %>%
rvest::html_attr(name = "datetime")) %>%
clock::date_parse()) %>%
tidyr::fill(date_last_edited,
.direction = "down")
}
}
result %>%
# derive vars from `id_sudd`
dplyr::bind_cols(.$id_sudd |>
purrr::map(parse_sudd_id) |>
purrr::list_rbind()) %>%
dplyr::select(id_sudd,
starts_with("country_"),
is_former_country,
starts_with("subnational_entity_"),
everything())
},
pkg = this_pkg,
from_fn = "list_sudd_rfrnds",
mode,
order,
filter,
use_cache = use_cache,
max_cache_age = max_cache_age)
}
#' Get referendum data from [sudd.ch](https://sudd.ch/)
#'
#' Downloads referendum data from [sudd.ch](https://sudd.ch/).
#'
#' # About [sudd.ch](https://sudd.ch/)
#'
#' **sudd** stands for _**Su**chmaschine für **d**irekte **D**emokratie_ (German) and is operated by [Beat Müller](mailto:beat@sudd.ch). Its database content is
#' licensed under [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA
#' 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).
#'
#' @param ids_sudd The referendum identifiers assigned by [sudd.ch](https://sudd.ch/). Either as a character vector or a data frame containing a column
#' `id_sudd`. `NA`s are ignored.
#' @param use_cache `r pkgsnip::param_lbl("use_cache")`
#' @param max_cache_age `r pkgsnip::param_lbl("max_cache_age")`
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return `r pkgsnip::return_lbl("tibble")` The column names are aligned with those of [rfrnds()] as closely as possible.
#' @family sudd
#' @importFrom rlang :=
#' @export
#'
#' @examples
#' rdb::rfrnd(id = "5bbc045192a21351232e596f")$id_sudd |> rdb::sudd_rfrnds()
#'
#' rdb::rfrnds(quiet = TRUE) |>
#' dplyr::filter(country_code == "AT") |>
#' rdb::sudd_rfrnds()
sudd_rfrnds <- function(ids_sudd,
use_cache = TRUE,
max_cache_age = "1 week",
quiet = FALSE) {
checkmate::assert_flag(quiet)
if (purrr::pluck_depth(ids_sudd) > 1L) {
if (!("id_sudd" %in% colnames(ids_sudd))) {
cli::cli_abort(paste0("{.arg ids_sudd} must be either a character vector of valid sudd.ch referendum identifiers or a data frame with a column of such",
" named {.var id_sudd}."))
}
ids_sudd <- ids_sudd$id_sudd
}
ids_sudd <-
checkmate::assert_character(ids_sudd,
all.missing = FALSE) %>%
magrittr::extract(!is.na(.))
pkgpins::with_cache(expr = {
ids_sudd %>%
purrr::map(.f = \(x) sudd_rfrnd(x),
.progress = if (quiet) FALSE else "Scraping referendum data from sudd.ch") %>%
purrr::list_rbind() %>%
# properly parse `date`
dplyr::bind_cols(.$date |>
purrr::map(parse_sudd_date) |>
purrr::list_rbind()) %>%
dplyr::mutate(date = clock::date_build(year = year,
month = month,
day = day,
invalid = "NA")) %>%
# add `id_sudd`
tibble::add_column(id_sudd = ids_sudd,
.before = 1L) %>%
# derive vars from `id_sudd`
dplyr::bind_cols(ids_sudd |>
purrr::map(parse_sudd_id) |>
purrr::list_rbind()) %>%
# reorder columns
dplyr::relocate(id_sudd,
country_code,
country_name,
is_former_country,
subnational_entity_code,
territory_name_de,
any_of(c("territory_type_de",
"date",
"year",
"month",
"day",
"title_de",
"question_type_de",
"types",
"result_de",
"result_status_de",
"adoption_requirements_de",
"electorate_total",
"electorate_abroad",
"polling_cards",
"votes_total",
"votes_empty",
"votes_void",
"votes_invalid",
"votes_valid",
"votes_yes",
"votes_no")),
matches("^votes_(option_\\d+|(counter_)?proposal)(_(total|empty|void|invalid|valid|yes|no))?$"),
any_of("votes_option_none"),
matches("^$"),
any_of(c("subterritories",
"subterritories_yes",
"subterritories_no",
"files",
"remarks",
"sources",
"ids_sudd_simultaneous",
"date_last_edited")))
},
pkg = this_pkg,
from_fn = "sudd_rfrnds",
ids_sudd,
use_cache = use_cache,
max_cache_age = max_cache_age)
}
#' Test RDB API availability
#'
#' Checks if the RDB API server is online and operational.
#'
#' @inheritParams url_api
#' @param quiet Whether or not to suppress printing a warning in case the API is unavailable.
#'
#' @return A logical scalar.
#' @family api_status
#' @export
is_online <- function(use_testing_server = pal::pkg_config_val(key = "use_testing_server",
pkg = this_pkg),
quiet = FALSE) {
checkmate::assert_flag(quiet)
result <- FALSE
response <- tryCatch(expr = httr::RETRY(verb = "GET",
url = url_api("health",
.use_testing_server = use_testing_server),
times = 3L),
error = function(e) e$message)
if (inherits(response, "response")) {
response %<>%
# ensure we actually got a plaintext response
pal::assert_mime_type(mime_type = "text/plain",
msg_suffix = mime_error_suffix) %>%
# parse response
httr::content(as = "text",
encoding = "UTF-8")
if (response == "OK") {
result <- TRUE
} else if (!quiet) {
cli::cli_alert_warning("RDB API server responded with: {.val {response}}")
}
} else {
cli::cli_alert_warning(response)
}
result
}
#' `r this_pkg` package configuration metadata
#'
#' A [tibble][tibble::tbl_df] with metadata of all possible `r this_pkg` package configuration options. See [pal::pkg_config_val()] for more information.
#'
#' @format `r pkgsnip::return_lbl("tibble_cols", cols = colnames(pkg_config))`
#' @export
#'
#' @examples
#' rdb::pkg_config
"pkg_config"
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.