Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----echo=FALSE, results="hide"-----------------------------------------------
library(vcr)
vcr_dir <- "fixtures"
have_api_access <- TRUE
if (!nzchar(Sys.getenv("IPUMS_API_KEY"))) {
if (dir.exists(vcr_dir) && length(dir(vcr_dir)) > 0) {
# Fake API token to fool ipumsr API functions
Sys.setenv("IPUMS_API_KEY" = "foobar")
} else {
# If there are no mock files nor API token, can't run API tests
have_api_access <- FALSE
}
}
vcr_configure(
filter_sensitive_data = list(
"<<<IPUMS_API_KEY>>>" = Sys.getenv("IPUMS_API_KEY")
),
write_disk_path = vcr_dir,
dir = vcr_dir
)
check_cassette_names()
# We do not expose detailed pagination options to users, but we do not want
# to save a full record of summary metadata in a .yml fixture for this
# vignette. This helper allows us to request just a few records, which
# we pretend is the full set of records for the purposes of the vignette.
get_truncated_metadata <- function(collection,
type,
page_size = 10,
max_pages = 1,
api_key = Sys.getenv("IPUMS_API_KEY")) {
url <- ipumsr:::api_request_url(
collection = collection,
path = ipumsr:::metadata_request_path(collection, type),
queries = list(pageNumber = 1, pageSize = page_size)
)
responses <- ipumsr:::ipums_api_paged_request(
url = url,
max_pages = max_pages,
delay = 0,
api_key = api_key
)
metadata <- purrr::map_dfr(
responses,
function(res) {
content <- jsonlite::fromJSON(
httr::content(res, "text"),
simplifyVector = TRUE
)
content$data
}
)
# Recursively convert all metadata data.frames to tibbles and all
# camelCase names to snake_case
ipumsr:::convert_metadata(metadata)
}
## ----message=FALSE------------------------------------------------------------
library(ipumsr)
library(dplyr)
library(purrr)
## ----echo=FALSE, results="hide", message=FALSE--------------------------------
insert_cassette("nhgis-metadata-summary")
## -----------------------------------------------------------------------------
ds <- get_metadata_nhgis(type = "datasets")
head(ds)
## -----------------------------------------------------------------------------
ds %>%
filter(
group == "1900 Census",
grepl("Agriculture", description)
)
## ----echo=FALSE, results="hide", message=FALSE--------------------------------
# Secretly get truncated number of tst records because otherwise the .yml
# fixture becomes very large.
# Make sure that any code that uses this metadata is consistent with the output
# that would be obtained were the entire metadata set loaded!
tst <- get_truncated_metadata("nhgis", "time_series_tables")
## ----eval=FALSE---------------------------------------------------------------
# tst <- get_metadata_nhgis("time_series_tables")
## -----------------------------------------------------------------------------
head(tst)
## -----------------------------------------------------------------------------
tst$years[[1]]
tst$geog_levels[[1]]
## -----------------------------------------------------------------------------
# Iterate over each `years` entry, identifying whether that entry
# contains "1840" in its `name` column.
tst %>%
filter(map_lgl(years, ~ "1840" %in% .x$name))
## ----echo=FALSE, results="hide", message=FALSE--------------------------------
eject_cassette("nhgis-metadata-summary")
## ----echo=FALSE, results="hide", message=FALSE--------------------------------
insert_cassette("nhgis-metadata-detailed")
## -----------------------------------------------------------------------------
cAg_meta <- get_metadata_nhgis(dataset = "1900_cAg")
## -----------------------------------------------------------------------------
cAg_meta$data_tables
cAg_meta$geog_levels
## -----------------------------------------------------------------------------
get_metadata_nhgis(dataset = "1900_cAg", data_table = "NT2")
## ----echo=FALSE, results="hide", message=FALSE--------------------------------
eject_cassette("nhgis-metadata-detailed")
## -----------------------------------------------------------------------------
cAg_meta$data_tables
## -----------------------------------------------------------------------------
dataset <- ds_spec(
"1900_cAg",
data_tables = c("NT1", "NT2"),
geog_levels = "state"
)
str(dataset)
## -----------------------------------------------------------------------------
nhgis_ext <- define_extract_nhgis(
description = "Example farm data in 1900",
datasets = dataset
)
nhgis_ext
## -----------------------------------------------------------------------------
define_extract_nhgis(
description = "Example time series table request",
time_series_tables = tst_spec(
"CW3",
geog_levels = c("county", "tract"),
years = c("1990", "2000")
)
)
## -----------------------------------------------------------------------------
define_extract_nhgis(
description = "Example shapefiles request",
shapefiles = c("us_county_2021_tl2021", "us_county_2020_tl2020")
)
## -----------------------------------------------------------------------------
define_extract_nhgis(
description = "Slightly more complicated extract request",
datasets = list(
ds_spec("2018_ACS1", "B01001", "state"),
ds_spec("2019_ACS1", "B01001", "state")
),
shapefiles = c("us_state_2018_tl2018", "us_state_2019_tl2019")
)
## -----------------------------------------------------------------------------
ds_names <- c("2019_ACS1", "2018_ACS1")
tables <- c("B01001", "B01002")
geogs <- c("county", "state")
# For each dataset to include, create a specification with the
# data tabels and geog levels indicated above
datasets <- purrr::map(
ds_names,
~ ds_spec(name = .x, data_tables = tables, geog_levels = geogs)
)
nhgis_ext <- define_extract_nhgis(
description = "Slightly more complicated extract request",
datasets = datasets
)
nhgis_ext
## ----eval=FALSE---------------------------------------------------------------
# nhgis_ext_submitted <- submit_extract(nhgis_ext)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.