#' Query the CensusMapper API for available vectors for a given dataset.
#'
#' @param dataset The dataset to query for available vectors, e.g.
#' \code{"CA16"}.
#' @param use_cache If set to TRUE (the default), data will be read from a local cache
#' that is maintained for the duration of the R session, if
#' available. If set to FALSE, query the API for the data, and
#' refresh the local cache with the result.
#' @param quiet When FALSE, shows messages and warnings. Set to TRUE by default.
#'
#' @return
#' Returns a data frame detailing the available Census vectors (i.e. variables) for a given Census
#' dataset. This data frame has columns \code{vector} containing the short code for the
#' variable, \code{type} describing whether it's a female, male, or total aggregate, \code{label}
#' indicating the name of the variable, \code{units} indicating whether the value represents a
#' numeric integer, percentage, dollar figure, or ratio, \code{parent_vector} to show hierarchical
#' relationship, \code{aggregation} indicating whether the value is additive or a transformation,
#' and a column \code{details} with a detailed description of the variable generated by traversing
#' all labels within its hierarchical structure.
#'
#' @export
#'
#' @examples
#'
#' \dontrun{
#' # List all vectors for a given Census dataset in CensusMapper
#' list_census_vectors('CA16')
#' }
list_census_vectors <- function(dataset, use_cache = TRUE, quiet = TRUE) {
dataset <- translate_dataset(dataset)
cache_file <- file.path(tempdir(),paste0(dataset, "_vectors.rda"))
if (!use_cache || !file.exists(cache_file)) {
url <- paste0(cancensus_base_url(),"/api/v1/vector_info/", dataset, ".csv")
response <- if (!quiet) {
message("Querying CensusMapper API for vectors data...")
httr::GET(url, httr::progress())
} else {
httr::GET(url)
}
handle_cm_status_code(response, NULL)
content <- httr::content(response, type = "text", encoding = "UTF-8")
result <- if (!requireNamespace("readr", quietly = TRUE)) {
dplyr::as_tibble(utils::read.csv(textConnection(content), stringsAsFactors = FALSE),
.name_repair = "minimal")
} else {
readr::read_csv(content,col_types=readr::cols(.default="c"))
}
result <- dplyr::mutate(
result, type = factor(.data$type),
units = factor(units, levels = as.character(1:6),
labels = c("Number", "Percentage ratio (0.0-1.0)",
"Currency", "Ratio", "Percentage (0-100)","Currency (1000s)")),
aggregation = dplyr::case_when(
add == "1" ~ "Additive",
add == "0" ~ "Not additive",
grepl("^2.", add) ~ gsub(".", ", ", gsub("^2\\.", "Average of ", add),
fixed = TRUE),
grepl("^3.", add) ~ gsub(".", ", ", gsub("^3\\.", "Median of ", add),
fixed = TRUE),
grepl("^4.", add) ~ gsub(".", ", ", gsub("^4\\.", "Average to ", add),
fixed = TRUE),
grepl("^9.", add) ~ gsub(".", ", ", gsub("^9\\.", "Standard error based on ", add),
fixed = TRUE)
)) %>%
dplyr::select(.data$vector, .data$type, .data$label, .data$units,
parent_vector = .data$parent, .data$aggregation,
.data$details)
attr(result, "last_updated") <- Sys.time()
attr(result, "dataset") <- dataset
save(result, file = cache_file)
result
} else {
if (!quiet) message("Reading vector information from local cache.")
load(file = cache_file)
result
}
}
#' List all parent variables from vector hierarchies given either a list of Census
#' variables returned by
#' \code{list_census_vectors}, \code{search_census_vectors}, \code{find_census_vectors}, or a direct string reference to the vector code.
#'
#' @param vector_list The list of vectors to be used, either a character vector or a filtered tibble
#' as returned from \code{list_census_vectors}.
#'
#' @export
#'
#' @examples
#' # Query parent vectors directly using vector identifier
#' parent_census_vectors("v_CA16_2519")
#' \dontrun{
#' # Example using multiple vectors coerced into a list
#' parent_census_vectors(c("v_CA16_2519","v_CA16_2520","v_CA16_2521"))
#'
#' # or, equivalently
#' selected_vectors <- c("v_CA16_2519","v_CA16_2520","v_CA16_2521")
#' parent_census_vectors(selected_vectors)
#'
#' # Example using dplyr and piped arguments
#' library(dplyr, warn.conflicts = FALSE)
#'
#' list_census_vectors("CA16") %>%
#' filter(vector == "v_CA16_2519") %>%
#' parent_census_vectors()
#' }
parent_census_vectors <- function(vector_list){
dataset <- dataset_from_vector_list(vector_list)
vector_list <- clean_vector_list(vector_list,dataset)
base_list <- vector_list
n=0
vector_list <-
list_census_vectors(dataset, use_cache = TRUE, quiet = TRUE) %>%
dplyr::filter(vector %in% base_list$parent_vector) %>%
dplyr::distinct(vector, .keep_all = TRUE)
while (n!=nrow(vector_list)) {
n=nrow(vector_list)
new_list <- list_census_vectors(dataset, use_cache = TRUE, quiet = TRUE) %>%
dplyr::filter(vector %in% vector_list$parent_vector)
vector_list <- vector_list %>% rbind(new_list) %>%
dplyr::distinct(vector, .keep_all = TRUE)
}
attr(vector_list, "dataset") <- dataset
return(vector_list)
}
#' List all child variables from vector hierarchies given either a list of Census
#' variables returned by
#' \code{list_census_vectors}, \code{search_census_vectors}, \code{find_census_vectors}, or a direct string reference to the vector code.
#'
#' @param vector_list the list of vectors to be used, either a character vector or a filtered tibble
#' as returned from \code{list_census_vectors}.
#' @param leaves_only boolean flag to indicate if only final leaf vectors should be returned,
#' i.e. terminal vectors that themselves do not have children.
#' @param max_level optional, maximum depth to look for child vectors. Default is \code{NA} and will return all
#' child census vectors.
#' @param keep_parent optional, also return parent vector in list of results. Default is set to \code{FALSE}.
#'
#' @export
#'
#' @examples
#' # Query parent vectors directly using vector identifier
#' child_census_vectors("v_CA16_2510")
#'
#' \dontrun{
#'
#' # Example using multiple vectors coerced into a list
#' child_census_vectors(c("v_CA16_2510","v_CA16_2511","v_CA16_2512"))
#'
#' # or, equivalently
#' selected_vectors <- c("v_CA16_2510","v_CA16_2511","v_CA16_2512")
#' child_census_vectors(selected_vectors)
#'
#' # Example using dplyr and piped arguments
#' library(dplyr, warn.conflicts = FALSE)
#'
#' list_census_vectors("CA16") %>%
#' filter(vector == "v_CA16_2510") %>%
#' child_census_vectors(TRUE)
#'
#' # this will return the equivalent of c("v_CA16_2510", child_census_vectors("v_CA16_2510"))
#' list_census_vectors("CA16") %>%
#' filter(vector == "v_CA16_2510") %>%
#' child_census_vectors(TRUE, keep_parent = TRUE)
#'}
#'
child_census_vectors <- function(vector_list, leaves_only=FALSE,max_level=NA,keep_parent = FALSE){
vector_list <- clean_vector_list(vector_list)
base_list <- vector_list
dataset <- dataset_from_vector_list(vector_list)
n <- 0
child_level <- 1
if (!is.null(dataset)) {
vector_list <-
list_census_vectors(dataset, use_cache = TRUE, quiet = TRUE) %>%
dplyr::filter(.data$parent_vector %in% base_list$vector) %>%
dplyr::distinct(vector, .keep_all = TRUE)
while (n!=nrow(vector_list) && (is.na(max_level) || child_level<max_level)) {
child_level <- child_level+1
n=nrow(vector_list)
new_list <- list_census_vectors(dataset, use_cache = TRUE, quiet = TRUE) %>%
dplyr::filter(.data$parent_vector %in% vector_list$vector)
vector_list <- vector_list %>% rbind(new_list) %>%
dplyr::distinct(vector, .keep_all = TRUE)
}
# only keep leaves if leaves_only==TRUE
if (leaves_only) {
vector_list <- vector_list %>%
dplyr::filter(!(vector %in% list_census_vectors(dataset, use_cache = TRUE, quiet = TRUE)$parent_vector))
}
if (keep_parent) {
vector_list <- dplyr::bind_rows(base_list,vector_list)
}
attr(vector_list, "dataset") <- dataset
}
return(vector_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.