R/census_vectors.R

Defines functions child_census_vectors parent_census_vectors list_census_vectors

Documented in child_census_vectors list_census_vectors parent_census_vectors

#' Query the CensusMapper API for available vectors for a given dataset.
#'
#' @param dataset The dataset to query for available vectors, e.g.
#'   \code{"CA16"}.
#' @param use_cache If set to TRUE (the default), data will be read from a local cache
#'   that is maintained for the duration of the R session, if
#'   available. If set to FALSE, query the API for the data, and
#'   refresh the local cache with the result.
#' @param quiet When FALSE, shows messages and warnings. Set to TRUE by default.
#'
#' @return
#' Returns a data frame detailing the available Census vectors (i.e. variables) for a given Census
#' dataset. This data frame has columns \code{vector} containing the short code for the
#' variable, \code{type} describing whether it's a female, male, or total aggregate, \code{label}
#' indicating the name of the variable, \code{units} indicating whether the value represents a
#' numeric integer, percentage, dollar figure, or ratio, \code{parent_vector} to show hierarchical
#' relationship, \code{aggregation} indicating whether the value is additive or a transformation,
#' and a column \code{details} with a detailed description of the variable generated by traversing
#' all labels within its hierarchical structure.
#'
#' @export
#'
#' @examples
#'
#' \dontrun{
#' # List all vectors for a given Census dataset in CensusMapper
#' list_census_vectors('CA16')
#' }
list_census_vectors <- function(dataset, use_cache = TRUE, quiet = TRUE) {
  dataset <- translate_dataset(dataset)
  cache_file <- file.path(tempdir(),paste0(dataset, "_vectors.rda"))
  if (!use_cache || !file.exists(cache_file)) {
    url <- paste0(cancensus_base_url(),"/api/v1/vector_info/", dataset, ".csv")
    response <- if (!quiet) {
      message("Querying CensusMapper API for vectors data...")
      httr::GET(url, httr::progress())
    } else {
      httr::GET(url)
    }
    handle_cm_status_code(response, NULL)
    content <- httr::content(response, type = "text", encoding = "UTF-8")
    result <- if (!requireNamespace("readr", quietly = TRUE)) {
      dplyr::as_tibble(utils::read.csv(textConnection(content), stringsAsFactors = FALSE),
                       .name_repair = "minimal")
    } else {
      readr::read_csv(content,col_types=readr::cols(.default="c"))
    }
    result <- dplyr::mutate(
      result, type = factor(.data$type),
      units = factor(units, levels = as.character(1:6),
                     labels = c("Number", "Percentage ratio (0.0-1.0)",
                                "Currency", "Ratio", "Percentage (0-100)","Currency (1000s)")),
      aggregation = dplyr::case_when(
        add == "1" ~ "Additive",
        add == "0" ~ "Not additive",
        grepl("^2.", add) ~ gsub(".", ", ", gsub("^2\\.", "Average of ", add),
                                 fixed = TRUE),
        grepl("^3.", add) ~ gsub(".", ", ", gsub("^3\\.", "Median of ", add),
                                 fixed = TRUE),
        grepl("^4.", add) ~ gsub(".", ", ", gsub("^4\\.", "Average to ", add),
                                 fixed = TRUE),
        grepl("^9.", add) ~ gsub(".", ", ", gsub("^9\\.", "Standard error based on ", add),
                                 fixed = TRUE)
      )) %>%
      dplyr::select(.data$vector, .data$type, .data$label, .data$units,
                    parent_vector = .data$parent, .data$aggregation,
                    .data$details)
    attr(result, "last_updated") <- Sys.time()
    attr(result, "dataset") <- dataset
    save(result, file = cache_file)
    result
  } else {
    if (!quiet) message("Reading vector information from local cache.")
    load(file = cache_file)
    result
  }
}

#' List all parent variables from vector hierarchies given either a list of Census
#' variables returned by
#' \code{list_census_vectors}, \code{search_census_vectors}, \code{find_census_vectors}, or a direct string reference to the vector code.
#'
#' @param vector_list The list of vectors to be used, either a character vector or a filtered tibble
#' as returned from \code{list_census_vectors}.
#'
#' @export
#'
#' @examples
#' # Query parent vectors directly using vector identifier
#' parent_census_vectors("v_CA16_2519")
#' \dontrun{
#' # Example using multiple vectors coerced into a list
#' parent_census_vectors(c("v_CA16_2519","v_CA16_2520","v_CA16_2521"))
#'
#' # or, equivalently
#' selected_vectors <- c("v_CA16_2519","v_CA16_2520","v_CA16_2521")
#' parent_census_vectors(selected_vectors)
#'
#' # Example using dplyr and piped arguments
#' library(dplyr, warn.conflicts = FALSE)
#'
#' list_census_vectors("CA16") %>%
#'   filter(vector == "v_CA16_2519") %>%
#'   parent_census_vectors()
#' }
parent_census_vectors <- function(vector_list){
  dataset <- dataset_from_vector_list(vector_list)
  vector_list <- clean_vector_list(vector_list,dataset)
  base_list <- vector_list
  n=0
  vector_list <-
    list_census_vectors(dataset, use_cache = TRUE, quiet = TRUE) %>%
    dplyr::filter(vector %in% base_list$parent_vector) %>%
    dplyr::distinct(vector, .keep_all = TRUE)
  while (n!=nrow(vector_list)) {
    n=nrow(vector_list)
    new_list <- list_census_vectors(dataset, use_cache = TRUE, quiet = TRUE) %>%
      dplyr::filter(vector %in% vector_list$parent_vector)
    vector_list <- vector_list %>% rbind(new_list) %>%
      dplyr::distinct(vector, .keep_all = TRUE)
  }
  attr(vector_list, "dataset") <- dataset
  return(vector_list)
}

#' List all child variables from vector hierarchies given either a list of Census
#' variables returned by
#' \code{list_census_vectors}, \code{search_census_vectors}, \code{find_census_vectors}, or a direct string reference to the vector code.
#'
#' @param vector_list the list of vectors to be used, either a character vector or a filtered tibble
#'   as returned from \code{list_census_vectors}.
#' @param leaves_only boolean flag to indicate if only final leaf vectors should be returned,
#' i.e. terminal vectors that themselves do not have children.
#' @param max_level optional, maximum depth to look for child vectors. Default is \code{NA} and will return all
#' child census vectors.
#' @param keep_parent optional, also return parent vector in list of results. Default is set to \code{FALSE}.
#'
#' @export
#'
#' @examples
#' # Query parent vectors directly using vector identifier
#' child_census_vectors("v_CA16_2510")
#'
#' \dontrun{
#'
#' # Example using multiple vectors coerced into a list
#' child_census_vectors(c("v_CA16_2510","v_CA16_2511","v_CA16_2512"))
#'
#' # or, equivalently
#' selected_vectors <- c("v_CA16_2510","v_CA16_2511","v_CA16_2512")
#' child_census_vectors(selected_vectors)
#'
#' # Example using dplyr and piped arguments
#' library(dplyr, warn.conflicts = FALSE)
#'
#' list_census_vectors("CA16") %>%
#'   filter(vector == "v_CA16_2510") %>%
#'   child_census_vectors(TRUE)
#'
#' # this will return the equivalent of c("v_CA16_2510", child_census_vectors("v_CA16_2510"))
#' list_census_vectors("CA16") %>%
#'   filter(vector == "v_CA16_2510") %>%
#'   child_census_vectors(TRUE, keep_parent = TRUE)
#'}
#'
child_census_vectors <- function(vector_list, leaves_only=FALSE,max_level=NA,keep_parent = FALSE){
  vector_list <- clean_vector_list(vector_list)
  base_list <- vector_list
  dataset <- dataset_from_vector_list(vector_list)
  n <- 0
  child_level <- 1
  if (!is.null(dataset)) {
    vector_list <-
      list_census_vectors(dataset, use_cache = TRUE, quiet = TRUE) %>%
      dplyr::filter(.data$parent_vector %in% base_list$vector) %>%
      dplyr::distinct(vector, .keep_all = TRUE)
    while (n!=nrow(vector_list) && (is.na(max_level) || child_level<max_level)) {
      child_level <- child_level+1
      n=nrow(vector_list)
      new_list <- list_census_vectors(dataset, use_cache = TRUE, quiet = TRUE) %>%
        dplyr::filter(.data$parent_vector %in% vector_list$vector)
      vector_list <- vector_list %>% rbind(new_list) %>%
        dplyr::distinct(vector, .keep_all = TRUE)
    }
    # only keep leaves if leaves_only==TRUE
    if (leaves_only) {
      vector_list <- vector_list %>%
        dplyr::filter(!(vector %in% list_census_vectors(dataset, use_cache = TRUE, quiet = TRUE)$parent_vector))
    }
    if (keep_parent) {
      vector_list <- dplyr::bind_rows(base_list,vector_list)
    }
    attr(vector_list, "dataset") <- dataset
  }
  return(vector_list)
}
mountainMath/cancensus documentation built on Feb. 11, 2024, 5:13 p.m.