Nothing
#'
#' Download and load LODES data into a data frame (tibble)
#'
#' @param state US state abbreviation in lower case, as character.
#' Can be a vector of states, like c("or","md","tx") for Oregon, Maryland,
#' and Texas.
#' @param year Year of the lodes data, as numerical.
#' Can be a vector of years, like c(2014, 2020) for 2014 and 2020.
#' @param version The LODES version to use.
#' Version 8 (the default, use "LODES8") is enumerated at 2020 Census blocks.
#' "LODES7" is enumerated at 2010 Census blocks, but ends in 2019;
#' LODES5" is enumerated at 2000 Census blocks, but ends in 2009.
#' @param lodes_type The LODES table type.
#' Values can be the default origin-destination ("od"),residential association
#' ("rac"), or workplace association ("wac").
#' od files give a home and destination census block for workers. Residential
#' files give job totals of worker home census blocks and workplace files
#' give job totals of worker job census blocks
#' @param job_type Jobtype: "JT00" for all jobs, "JT01" for Primary Jobs,
#' "JT02" for All Private Jobs, "JT03" for Private Primary jobs, "JT04" for
#' All Federal jobs, "JT05" for Federal Primary jobs
#' @param segment Segment of the workforce.
#' "S000" total number of jobs for workers, "SA01" number of jobs for workers
#' aged 29 or younger, "SA02" number of jobs for workers aged 30-54,"SA03"
#' number of jobs for workers 55 and older, "SE01" number of jobs with
#' earnings $1,250/month or less, "SE02" number of jobs with earnings $1,251
#' to $3,333/month, "SE03" number of jobs with earnings greater than
#' $3,333/month, "SI01" number of jobs in Goods Producing industry sectors,
#' "SI02" number of jobs in Trade, Transportation, & Utilities industry
#' sectors, "SI03" number of jobs in All Other Services industry sectors.
#' @param state_part Part of the state file, required for "od" lodes_type.
#' Can have values of "main" or "aux" in OD files. Using "main" includes
#' workers with their workplace and residence in the state. Using "aux"
#' includes workers with residences out of state and workplace in the state
#' of interest
#' @param agg_geo Aggregate to a geography other than Census Block (default).
#' Values can be "bg" for block group, "tract", "county", or "state".
#' @param download_dir Directory where lodes table will be downloaded.
#' @param use_cache Boolean indicating whether or not to check whether or not
#' the file had already been downloaded. Defaults to FALSE. Setting to TRUE
#' will allow lehdr to reuse existing files that were already downloaded.
#'
#' @description Download LODES OD, RAC, and WAC tables
#' @return a dataframe (tibble) of block or tract level LODES files
#' @import dplyr
#' @import magrittr
#' @import httr2
#' @importFrom readr read_csv cols col_character
#' @importFrom glue glue
#' @importFrom stats na.omit
#' @importFrom stringr str_sub str_extract
#'
#' @examples
#' \donttest{
#' # download and load 2014 block level O-D data for Vermont
#' blk_df_or_od <- grab_lodes(state = 'vt', year = 2014, lodes_type = "od", job_type = "JT01",
#' segment = "SA01", state_part = "main")
#'
#' # download and load 2014 O-D data for Vermont and aggregate
#' # to the tract level
#' trt_df_or_od <- grab_lodes(state = 'vt', year = 2014, lodes_type = "od", job_type = "JT01",
#' segment = "SA01", state_part = "main", agg_geo = "tract")
#'
#' # download and load 2020 RAC data for Vermont and aggregate
#' # to the tract level
#' trt_df_or_rac <- grab_lodes(state = 'vt', year = 2014, lodes_type = "rac", job_type = "JT01",
#' segment = "SA01", agg_geo = "tract")
#'
#' # download and load 2020 WAC data for Vermont and aggregate
#' # to the tract level
#' trt_df_or_wac <- grab_lodes(state = 'vt', year = 2014, lodes_type = "wac", job_type = "JT01",
#' segment = "SA01", agg_geo = "tract")
#' }
#' @export
grab_lodes <- function(state, year,
version = c("LODES8", "LODES7", "LODES5"),
lodes_type = c("od", "rac", "wac"),
job_type = c("JT00", "JT01", "JT02", "JT03", "JT04", "JT05"),
segment = c("S000", "SA01", "SA02", "SA03", "SE01", "SE02",
"SE03", "SI01", "SI02", "SI03"),
agg_geo = c("block", "bg", "tract", "county", "state"),
state_part = c("","main","aux"),
download_dir = normalizePath(file.path(tools::R_user_dir("lehdr", which="cache")),
mustWork = FALSE),
use_cache = FALSE) { # Thanks to Kyle Walker for this
if (length(state) > 1 | length(year) > 1) {
## Handle multiple states x years
state_year <- expand.grid(state=state, year=year)
results <- apply(state_year, 1, function(df_row) grab_lodes(df_row[1], df_row[2],
version = version,
lodes_type = lodes_type,
job_type = job_type,
segment = segment,
agg_geo = agg_geo,
state_part = state_part,
download_dir = download_dir)
)
return(dplyr::bind_rows(results))
}
# Format inputs
state <- tolower(state)
# Handle errors and set default arguments
version <- rlang::arg_match(version)
lodes_type <- rlang::arg_match(lodes_type)
agg_geo_to <- rlang::arg_match(agg_geo)
job_type <- rlang::arg_match(job_type)
segment <- rlang::arg_match(segment)
state_part <- rlang::arg_match(state_part)
# Only proceed if use_cache is boolean
if(!is.logical(use_cache)) { stop("The use_cache paramater must be either TRUE or FALSE") }
# If someone uses od, but doesn't set state_part
if(lodes_type == "od" && !(state_part %in% c("main","aux"))) {
state_part <- "main"
warning("'state_part' is required when setting lodes_type =\"od\", defaulting to state_part=\"main\"")
}
# Setup col_types and url variables, used in the if statement
cols_types <- cols()
url <- as.character("")
# Read LODES_types and set col_types
if (lodes_type == "od") {
# Set url for od
url <- glue::glue("https://lehd.ces.census.gov/data/lodes/{version}/{state}/{lodes_type}/{state}_{lodes_type}_{state_part}_{job_type}_{year}.csv.gz")
# Set column types for od
col_types <- cols(w_geocode = col_character(),
h_geocode = col_character(),
createdate = col_character())
} else {
# Set url for rac/wac
url <- glue::glue("https://lehd.ces.census.gov/data/lodes/{version}/{state}/{lodes_type}/{state}_{lodes_type}_{segment}_{job_type}_{year}.csv.gz")
# Set column types for rac/wac -- h_ is home, w_ is work (from LODES)
if (lodes_type == "rac") {
col_types <- cols(h_geocode = col_character(),
createdate = col_character())
} else if (lodes_type == "wac") {
col_types <- cols(w_geocode = col_character(),
createdate = col_character())
}
}
# Set download directory and file name
if (!dir.exists(download_dir)) {
dir.create(download_dir, recursive=TRUE)
}
fil <- normalizePath(file.path(download_dir, basename(url)), mustWork = FALSE)
# Create httr2 request from url
lodes_req <- request(url)
# Check for cache
if(use_cache) { # User set use_cache to TRUE
if(file.exists(fil)) { # If there is a cached file, use it
rlang::inform(glue::glue("Using cached version of file found in: {fil}"))
} else { # No cached file
# Perform request and handle connection errors, writing response to disk
withCallingHandlers(
lodes_resp <- lodes_req |>
req_error(is_error = \(lodes_resp) FALSE) |>
req_perform(path = fil),
httr2_failure = function(cnd) {
rlang::abort(c("lehdr: Could not establish connection to LODES FTP server.",
"i" = "Please check internet connection."))
}
)
if(lodes_resp$status_code >= 400) {
rlang::abort(c(glue::glue("lehdr: Server error while downloading: {lodes_req$url}"),
"i" = glue::glue("{lodes_resp$status_code} status returned."),
"i" = "Please consult the most recent LEHD Technical Document to verify state/year combination availability.",
"i" = "https://lehd.ces.census.gov/data/lodes/8/"))
} else if(length(lodes_resp$body) < 1) {
rlang::abort(c(glue::glue("lehdr: Connection error while downloading: {lodes_req$url}"),
"i" = "Empty response."))
} else {
rlang::inform(glue::glue("Download complete for {fil}"))
}
}
} else { # User did not allow cache to be used
if(file.exists(fil)) { # But there is a cached file
# Existing file found, inform user of use_cache
rlang::inform(glue::glue("Cached version of file found in: {fil}"))
rlang::inform(glue::glue("Consider setting use_cache=TRUE to use previously downloaded files."))
}
# Download (and overwite if necessary) data from server
rlang::inform(glue::glue("Downloading {url} to {fil} now..."))
# Perform request and handle connection errors, writing response to disk
withCallingHandlers(
lodes_resp <- lodes_req |>
req_error(is_error = \(lodes_resp) FALSE) |>
req_perform(path = fil),
httr2_failure = function(cnd) {
rlang::abort(c("lehdr: Could not establish connection to LODES FTP server.",
"i" = "Please check internet connection."))
}
)
if(lodes_resp$status_code >= 400) {
rlang::abort(c(glue::glue("lehdr: Server error while downloading: {lodes_req$url}"),
"i" = glue::glue("{lodes_resp$status_code} status returned."),
"i" = "Please consult the most recent LEHD Technical Document to verify state/year combination availability.",
"i" = "https://lehd.ces.census.gov/data/lodes/8/"))
} else if(length(lodes_resp$body) < 1) {
rlang::abort(c(glue::glue("lehdr: Connection error while downloading: {lodes_req$url}"),
"i" = "Empty response."))
} else {
rlang::inform(glue::glue("Download complete for {fil}"))
}
}
# Read in the data
lehdr_df <- suppressMessages(readr::read_csv(fil, col_types = col_types))
# Remove temp files if the user did not set use_cache = TRUE
if(!use_cache) {
if(unlink(fil)) { # 0 for success, 1 for failure, invisibly.
rlang::inform(glue::glue("Could not clear {fil} from cache."))
} else {
rlang::inform(glue::glue("{fil} cleared from cache."))
# Now check to see if the cache directory is empty, remove it if it is
if(length(list.files(download_dir)) == 0) {
unlink(download_dir, recursive = TRUE)
}
}
}
# Mutate the data based on year and state
lehdr_df <- lehdr_df %>% mutate(
year=year,
state=toupper(state))
# Group by geography
# If the agg_geo_to variable is "block" or NULL (not passed via args),
# then skip this step as the data is already in block format
if(agg_geo_to != "block" && !is.null(agg_geo_to)) {
id_len <- switch(agg_geo_to,
"bg" = 12,
"tract" = 11,
"county" = 5,
"state" = 2)
lehdr_df <- aggregate_lodes_df(lehdr_df, id_len, agg_geo_to)
}
return(lehdr_df)
}
#'
#' Aggregate data to a certain level dictated by inputs. Internal function.
#'
#' @param lehdr_df Data frame (tibble) to be aggregated
#' @param geoid_to The number of characters to do the aggregation on from the geoid
#' @param aggname What the level is called, like "tract" etc
#' @description Helper function for lehdr which aggregates block geographies based on the block id.
#' @import dplyr
#' @importFrom glue glue
#' @importFrom stats na.omit
#' @importFrom stringr str_sub str_extract
aggregate_lodes_df <- function(lehdr_df, geoid_to, aggname) {
lehdr_df <- lehdr_df %>%
mutate(across(ends_with("_geocode"),
~stringr::str_sub(., 1, geoid_to))) %>%
rename_with(~stringr::str_replace(., "geocode", aggname),
ends_with("_geocode")
)
## Group by column(s) ending with "_{aggname}" as passed
geoid_cols <- stringr::str_extract(names(lehdr_df), glue::glue(".*_{aggname}$")) %>% na.omit()
group_cols <- c("year", "state", geoid_cols)
lehdr_df <- lehdr_df %>%
group_by(across({{group_cols}})) %>%
summarise_if(is.numeric, sum) %>%
ungroup()
return(lehdr_df)
}
## quiets concerns of R CMD check re: non-standard evaluation via tidyverse
## per suggestion of Hadley Wickham (https://stackoverflow.com/a/12429344/688693)
if(getRversion() >= "2.15.1") utils::globalVariables(c("."))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.