Referendum-level variables

For the following variables, the unit of observation is the individual referendum.

#| label: referendum-level-vars
#| echo: false
#| results: asis
#| message: false

# setup
library(rlang)
library(magrittr)

rlang::check_installed("salim")

# determine whether Pandoc rendering is involved which supports extended Pandoc MD syntax (like footnotes)
to_pandoc <- checkmate::test_string(knitr::pandoc_to(),
                                    min.chars = 1L)
# define functions
## flatten list column values
flatten_list_col <- function(x) {

  x %>%
    unlist() %>%
    pal::when(is.character(.) && length(.) > 0L ~ pal::wrap_chr(.),
              ~ .)
}

## prettify value scale
prettify_value_scale <- function(x) {

  x %>%
    stringr::str_replace(pattern = "_(.*)",
                         replacement = " (\\1)") %>%
    dplyr::case_match(.x = .,
                      "binary" ~ "nominal (binary)",
                      .default = .)
}

## strip MD anchor links (recursively)
strip_anchors <- function(x) {

  if (purrr::pluck_depth(x) > 1L) {
    x %>% purrr::map(strip_anchors)
  } else if (is.character(x)) {
    x %>% stringr::str_replace_all(pattern = "(\\[)([^\\]]+?)(\\]\\(#[^\\)]+?\\))",
                                   replacement = "\\2")
  } else {
    x
  }
}

## assemble variable sections in Markdown
var_sections <- function(data,
                         # useful when target format doesn't support full Pandoc Markdown syntax like GFM in roxygen2 documentation
                         strip_extended_pandoc = FALSE,
                         strip_anchors = FALSE) {

  data %>% purrr::pmap(function(variable_name,
                                variable_name_print,
                                variable_name_unnested,
                                variable_name_unnested_print,
                                variable_label,
                                is_multi_valued,
                                is_nested,
                                applicability_constraint,
                                variable_values,
                                value_label_prefix,
                                value_labels,
                                value_label_suffix,
                                value_default,
                                value_scale,
                                ptype,
                                derived_from,
                                gen_fn,
                                is_opt) {

    variable_values %<>% flatten_list_col()
    value_default %<>% flatten_list_col()
    value_scale %<>% prettify_value_scale()
    value_labels %<>%
      unlist() %>%
      pal::when(is.na(value_label_prefix) ~ .,
                ~ paste(value_label_prefix, .)) %>%
      pal::when(is.na(value_label_suffix) ~ .,
                ~ paste(., value_label_suffix)) %>%
      pal::sentenceify() %>%
      salim::n_ify()

    if (strip_extended_pandoc) {
      variable_label %<>% pal::strip_md_footnotes()
      value_labels %<>% pal::strip_md_footnotes()
      value_label_prefix %<>% pal::strip_md_footnotes()
      value_label_suffix %<>% pal::strip_md_footnotes()
    }

    is_derived <- length(unlist(derived_from)) > 0L
    has_gen_fn <- !is.na(gen_fn)

    list(
      if (variable_name == "topics_tier_1") {
        c("### Topics",
          "",
          "The political topics to classify referendums are organized on a three-tier hierarchy as follows:",
          "",
          unique(rdb::data_topics$topic_tier_1) %>% purrr::map_chr(function(topic_tier_1) {

            result <- ifelse(strip_extended_pandoc,
                             paste0("\n- ", topic_tier_1),
                             paste0("\n- [", topic_tier_1, "]{.color-azure}"))

            result %<>% paste0(rdb::data_topics %>%
                                 dplyr::filter(topic_tier_1 == !!topic_tier_1) %$%
                                 topic_tier_2 %>%
                                 unique() %>%
                                 purrr::map_chr(function(topic_tier_2) {

                                   result <- ifelse(strip_extended_pandoc,
                                                    paste0("\n  - ", topic_tier_2),
                                                    paste0("\n  - [", topic_tier_2, "]{.color-violet}"))

                                   result %<>% paste0(rdb::data_topics %>%
                                                        dplyr::filter(topic_tier_2 == !!topic_tier_2) %$%
                                                        topic_tier_3 %>%
                                                        setdiff(NA_character_) %>%
                                                        purrr::map_chr(~ ifelse(strip_extended_pandoc,
                                                                                paste0("\n    - ", .x),
                                                                                paste0("\n    - [", .x, "]{.color-rose}"))) %>%
                                                        paste0(collapse = ""))
                                   result
                                 }) %>%
                                 paste0(collapse = ""))
            result
          }),
          "",
          paste0("<sup>[`topics_tier_1`](#topics_tier_1){.color-azure} are colored in [azure]{.color-azure}, [`topics_tier_2`](#topics_tier_2){.color-violet} ",
                 "in [violet]{.color-violet} and [`topics_tier_3`](#topics_tier_3){.color-rose} in [rose]{.color-rose}.</sup>")[!strip_extended_pandoc],
          "",
          paste0("The hierarchical relations between the three topic variables `topics_tier_1`, `topics_tier_2` and `topics_tier_3` can be reconstructed at ",
                 "any time using ",
                 ifelse(strip_extended_pandoc,
                        "[hierarchize_topics()]",
                        "[`hierarchize_topics()`](https://rdb.rpkg.dev/reference/hierarchize_topics.html)"),
                 ". This function can also be used to simply determine the ",
                 "parent topic(s) of any topic."),
          "")
      },
      c("::: {.codebook-item}",
        "")[to_pandoc],
      glue::glue(ifelse(stringr::str_detect(string = variable_name,
                                            pattern = "^topics_tier_\\d+$"),
                        "#",
                        ""),
                 "### `{variable_name}`"),
      "",
      variable_label %>% pal::sentenceify(),
      "",
      if (is_derived) {
        c("#### Origin",
          "",
          paste0("This variable is derived from ", pal::enum_str(rdb:::md_link_codebook(unlist(derived_from))),
                 ifelse(has_gen_fn,
                        paste0(" and can be generated on demand via ",
                               glue::glue(ifelse(strip_extended_pandoc,
                                                 "[{gen_fn}()]",
                                                 "[`{gen_fn}()`](https://rdb.rpkg.dev/reference/{gen_fn}.html)"))),
                        ""),
                 "."),
          "")
      },
      if (length(variable_values)) {
        c(paste0("#### Possible values", glue::glue(" {{#{variable_name}-values}}")[!strip_extended_pandoc]),
          "",
          if (length(value_labels)) {
            c("| Value | Meaning |",
              "| :---- | :------ |",
              variable_values %>% purrr::map2_chr(.y = value_labels,
                                                  .f = ~ glue::glue("| `{.x}` | {.y} |")))
          } else {
            c("| Value |",
              "| :---- |",
              variable_values %>% purrr::map_chr(~ glue::glue("| `{.x}` |")))
          },
          # if (!is.null(value_default)) c("", glue::glue("The default value is: `{value_default}`")),
          "")
      },
      if (is_multi_valued) {
        c(paste0("_This is a multi-value variable", "[^multi-val-var]"[!strip_extended_pandoc], " and thus of type `list`. When unnested via ",
                 "[`unnest_var()`](https://rdb.rpkg.dev/reference/unnest_var.html), its name becomes `{variable_name_unnested}`._"),
          "")
      },
      if (!is.na(applicability_constraint)) {
        c(paste0("#### Applicability constraint", glue::glue(" {{#{variable_name}-constraint}}")[!strip_extended_pandoc]),
          "",
          "This variable is only applicable under the following condition:",
          "",
          "```r",
          applicability_constraint,
          "```",
          "")
      },
      if (value_scale != "undefined") {
        c(paste0("#### Level of measurement", glue::glue(" {{#{variable_name}-scale}}")[!strip_extended_pandoc]),
          "",
          glue::glue("This variable's [scale of measure](https://en.wikipedia.org/wiki/Level_of_measurement) is the **{value_scale}** level."),
          "")
      },
      c(":::",
        "")[to_pandoc]
    ) %>%
      pal::when(strip_anchors ~ strip_anchors(.),
                ~ .)
  })
}

rdb::data_codebook %>%
  dplyr::filter(stringr::str_detect(string = variable_name,
                                    pattern = "^inst_",
                                    negate = TRUE)) %>%
  var_sections(strip_extended_pandoc = !to_pandoc,
               strip_anchors = !to_pandoc) %>%
  pal::cat_lines()

Institution-level variables

#| label: institution-level-vars
#| echo: false
#| results: asis

"For the following variables, the [unit of observation](https://en.wikipedia.org/wiki/Unit_of_observation) is the referendum's *institutional instrument*, a
combination of the referendum's [`type`](#type) and the jurisdiction it took place.

The set of variables that constitute the *geographical* component of the jurisdiction is dependent on the referendum's [`level`](#level):

| `level`       | Geographical jurisdiction is given by                                                                                     |
|:--------------|:--------------------------------------------------------------------------------------------------------------------------|
| `national`    | [`country_code`](#country_code)                                                                                           |
| `subnational` | [`country_code`](#country_code) + [`subnational_entity_name`](#subnational_entity_name)                                   |
| `local`       | [`country_code`](#country_code) + [`subnational_entity_name`](#subnational_entity_name) + [`municipality`](#municipality) |

The actual jurisdiction is also dependent on a *temporal* component since the legal setting in a geographical jurisdiction can vary over time. Thus, the actual
jurisdicion is dependent on the geographical jurisdiction plus the [`date`](#date) a referendum took place (more specifically the date *range* in between a
specific legal setting is in force).

" %>%
  pal::when(!to_pandoc ~ strip_anchors(.),
            ~ .) %>%
  cat()

rdb::data_codebook %>%
  dplyr::filter(stringr::str_detect(string = variable_name,
                                    pattern = "^inst_")) %>%
  var_sections(strip_extended_pandoc = !to_pandoc,
               strip_anchors = !to_pandoc) %>%
  pal::cat_lines()
#| label: footnotes
#| echo: false
#| results: asis
#| include: !expr to_pandoc

cat("
[^multi-val-var]: A multi-value variable allows for more than one value per observation and thus breaks with the [tidy-data
    convention](https://tidyr.tidyverse.org/articles/tidy-data.html). Use `rdb::unnest_var()` to convert data containing such a variable to long format. To
    unnest `topics_tier_1` to `topic_tier_1` for example, use:

    \`\`\`r
    rdb::rfrnds() |> rdb::unnest_var(var = topics_tier_1)
    \`\`\`
")


zdaarau/c2d documentation built on Dec. 18, 2024, 1:24 p.m.