Nothing
#' Generic BLS Dataset Download
#'
#' This function generalizes a method to download all BLS data for a given time series database.
#' These files are accessed from https://download.bls.gov/pub/time.series/ and several datasets
#' are available. A summary of an identified database can be generated using the `bls_overiew()`
#' function. When multiple potential data files exist (common in large data sets), the function
#' will prompt for an input of which file to use.
#'
#' @param database_code This is the two digit character identifier for the desired database.
#' Some Valid options are:
#' \itemize{
#' \item "ce" - National Current Employment Statistics Data
#' \item "sm" - State and Metro area Current Employment Statistics Data
#' \item "mp" - Major Sector Total Factor Productivity
#' \item "ci" - Employment Cost Index
#' \item "eb" - Employee Benefits Survey
#' }
#'
#' @param return_full This argument defaults to FALSE. If set to TRUE it will return
#' a list of the elements of data retrieved from the BLS separating the data, series, and
#' mapping values downloaded.
#'
#' @param simplify_table This parameter defaults to TRUE. When TRUE it will remove all
#' columns from the date with "_code" in the column name, as well as a series of internal
#' identifiers which provide general information about the series but which are not needed for
#' performing time series analysis. This parameter also converts the column "value" to numeric
#' and generates a date column from the year and period columns in the data.
#'
#' @param suppress_warnings Logical. If TRUE, suppress individual download warnings during processing.
#'
#' @param which_data Character string or NULL. Defaults to NULL.
#' \itemize{
#' \item "all" - Automatically selects the data file containing ".1.All" (e.g., "bd.data.1.AllItems" or "le.data.1.AllData").
#' \item "current" - Automatically selects the data file containing "Current" (e.g., "ce.data.0.Current").
#' \item NULL - Default behavior. Prompts the user to select a file if multiple exist, or selects the single available file.
#' }
#' If the requested pattern is not found, the function falls back to the default behavior, prompting the user to select a file.
#'
#' @param cache Logical. Uses USE_BLS_CACHE environment variable, or defaults to FALSE. If TRUE, will download a cached file from BLS server and update cache if BLS server indicates an updated file.
#'
#' @returns This function will return either a bls_data_collection object (if return_full is FALSE or not provided)
#' or a named list of the returned data including the bls_data_collection object.
#'
#' @export
#' @importFrom data.table data.table
#' @importFrom data.table fcase
#' @importFrom data.table rbindlist
#' @importFrom data.table :=
#' @importFrom rvest read_html html_elements html_attr
#' @importFrom httr GET add_headers stop_for_status content
#' @importFrom dplyr left_join
#' @importFrom stats setNames
#' @importFrom utils head
#'
#' @examples
#' \dontrun{
#' # Import All Data
#' fm_import <- load_bls_dataset("fm", which_data = "all")
#'
#' # Get $data element
#' fm_data <- fm_import$data
#'
#' # Filter to a Series
#' # Families with Children Under 6 and No Employed Parent
#'
#' u6_no_emp <- fm_data |>
#' dplyr::filter(series_title == "Total families with children under 6 - with no parent employed") |>
#' dplyr:: select(year, value, fchld_text, fhlf_text, tdat_text)
#'
#'
#' head(u6_no_emp)
#' }
#'
#' \dontrun{
#' # Examples requiring manual intervention in the console
#' # Download Employer Cost Index Data
#' cost_index <- load_bls_dataset("ci")
#'
#' # Download separated data, series, and mapping columns
#' benefits <- load_bls_dataset("eb", return_full = TRUE)
#'
#' # Download data without removing excess columns and value conversions
#' productivity <- load_bls_dataset("mp", simplify_table = FALSE)
#'
#' # Check for download issues
#' if (has_bls_issues(cost_index)) {
#' print_bls_warnings(cost_index, detailed = TRUE)
#' }
#'
#' }
load_bls_dataset <- function(database_code, return_full = FALSE, simplify_table = TRUE, suppress_warnings = FALSE, which_data = NULL, cache = check_bls_cache_env()) {
# Validate inputs
if (!is.character(database_code) || length(database_code) != 1) {
stop("database_code must be a single character string")
}
# Validate which_data input
if (!is.null(which_data) && (!is.character(which_data) || length(which_data) != 1 || !which_data %in% c("all", "current"))) {
stop("which_data must be NULL, 'all', or 'current'")
}
base_url <- sprintf("https://download.bls.gov/pub/time.series/%s/", database_code)
# Function to scrape directory contents with proper headers
get_directory_files <- function(url, prefix) {
tryCatch({
# Set up headers to avoid 403 errors
headers <- get_bls_headers()
# Make request with headers
response <- httr::GET(url, httr::add_headers(.headers = headers))
httr::stop_for_status(response)
# Exit function if download failed.
if(is.null(downloads)){
stop("Download of BLS data failed. Please run with suppress_warnings = FALSE for additional status messages. Consider setting the BLS_USER_AGENT environment variable to your email address to avoid Status 403 errors from BLS.")
}
# Parse HTML content
page <- rvest::read_html(httr::content(response, as = "text"))
links <- rvest::html_elements(page, "a")
hrefs <- rvest::html_attr(links, "href")
# Extract just the filename from the full path
# hrefs will be like "/pub/time.series/ce/ce.data.0.AllCESSeries"
filenames <- basename(hrefs)
# Filter for files that start with the prefix and exclude unwanted extensions
valid_files <- filenames[
grepl(paste0("^", prefix, "\\."), filenames) &
!grepl("\\.(contacts|txt|footnote)$", filenames) &
!is.na(filenames) &
filenames != ""
]
return(valid_files)
}, error = function(e) {
stop("Could not access BLS directory: ", url, "\nError: ", e$message)
})
}
# Get all valid files from the directory
file_names <- get_directory_files(base_url, database_code)
if (length(file_names) == 0) {
stop("No valid files found in the BLS database directory for code: ", database_code)
}
# Create file table and classify by pattern
file_table <- data.table(file_name = file_names)
file_table[, file_type := fcase(
grepl("\\.data\\.", file_name), "data",
grepl("\\.series($|\\.)", file_name), "series",
grepl("\\.aspect($|\\.)", file_name), "aspect",
default = "mapping"
)]
# Identify files
mapping_files <- file_table[file_type == "mapping", file_name]
data_files <- file_table[file_type == "data", file_name]
series_file <- file_table[file_type == "series", file_name]
aspect_files <- file_table[file_type == "aspect", file_name]
if (length(series_file) == 0) {
stop("Could not find a series file in the BLS database directory.")
}
# Handle multiple series files (prompt user to choose)
if (length(series_file) > 1) {
message("Multiple series files found. Please select a file to load:\n")
for (i in seq_along(series_file)) {
message(i, ": ", series_file[i], "\n")
}
# Get user input for series file selection
selected_series_index <- as.integer(readline(prompt = "Enter the number of the series file you want to load: "))
# Validate the input
if (is.na(selected_series_index) || selected_series_index < 1 || selected_series_index > length(series_file)) {
stop("Invalid selection. Please run the function again and enter a valid number.")
}
# Get the selected series file name
series_file <- series_file[selected_series_index]
message("Loading series file:", series_file, "\n")
} else if (length(series_file) == 1) {
message("Loading series file:", series_file, "\n")
}
# --- Logic for data file selection ---
selected_data_file <- NULL
# 1. Attempt Auto-selection if requested
if (!is.null(which_data)) {
pattern <- NULL
if (which_data == "all") {
pattern <- ".1.All"
} else if (which_data == "current") {
pattern <- "Current"
}
if (!is.null(pattern)) {
# Look for files containing the pattern (case insensitive)
# We check if pattern is in the filename
matches <- grep(pattern, data_files, ignore.case = TRUE, value = TRUE)
if (length(matches) > 0) {
# Use the first match
selected_data_file <- matches[1]
message("Auto-selected '", which_data, "' file: ", selected_data_file, "\n")
} else {
message("Warning: No file matching '", pattern, "' found. Falling back to manual selection/default.\n")
}
}
}
# 2. Fallback to Prompt or Single File Logic if not auto-selected
if (is.null(selected_data_file)) {
if (length(data_files) > 1) {
# If there are multiple data files and auto-select didn't find a match, prompt the user
message("Multiple data files found. Please select a file to load:\n")
for (i in seq_along(data_files)) {
message(i, ": ", data_files[i], "\n")
}
# Get user input for file selection
selected_index <- as.integer(readline(prompt = "Enter the number of the file you want to load: "))
# Validate the input
if (is.na(selected_index) || selected_index < 1 || selected_index > length(data_files)) {
stop("Invalid selection. Please run the function again and enter a valid number.")
}
# Get the selected file name
selected_data_file <- data_files[selected_index]
message("Loading:", selected_data_file, "\n")
} else if (length(data_files) == 1) {
# If there is only one data file, use it directly
selected_data_file <- data_files[1]
message("Loading:", selected_data_file, "\n")
} else {
stop("No data files found in the BLS database directory.")
}
}
# --- Logic for aspect file selection ---
selected_aspect_file <- NULL
if (length(aspect_files) > 1) {
message("Multiple aspect files found. Please select a file to load:\n")
for (i in seq_along(aspect_files)) {
message(i, ": ", aspect_files[i], "\n")
}
# Get user input for aspect file selection
selected_aspect_index <- as.integer(readline(prompt = "Enter the number of the aspect file you want to load: "))
# Validate the input
if (is.na(selected_aspect_index) || selected_aspect_index < 1 || selected_aspect_index > length(aspect_files)) {
stop("Invalid selection. Please run the function again and enter a valid number.")
}
# Get the selected aspect file name
selected_aspect_file <- aspect_files[selected_aspect_index]
message("Loading aspect file:", selected_aspect_file, "\n")
} else if (length(aspect_files) == 1) {
# If there is only one aspect file, use it directly
selected_aspect_file <- aspect_files[1]
message("Loading aspect file:", selected_aspect_file, "\n")
} else if (length(aspect_files) == 0) {
if (!suppress_warnings) message("No aspect files found in the BLS database directory.")
}
# Create URLs for downloading
urls <- c(
setNames(paste0(base_url, selected_data_file), selected_data_file),
setNames(paste0(base_url, series_file), series_file)
)
# Add aspect file URL if it exists
if (!is.null(selected_aspect_file)) {
aspect_url <- setNames(paste0(base_url, selected_aspect_file), selected_aspect_file)
urls <- c(urls, aspect_url)
}
# Add mapping file URLs
if (length(mapping_files) > 0) {
mapping_urls <- setNames(paste0(base_url, mapping_files), mapping_files)
urls <- c(urls, mapping_urls)
}
# Download all files using the new system
downloads <- download_bls_files(urls, suppress_warnings = suppress_warnings, cache = cache)
# Extract data from downloads
data_dt <- get_bls_data(downloads[[selected_data_file]])
series_dt <- get_bls_data(downloads[[series_file]])
# Remove unwanted columns from all files
columns_to_remove <- c("display_level", "sort_sequence", "selectable", "footnote_codes")
data_dt <- data_dt |> dplyr::select(-tidyselect::any_of(columns_to_remove))
series_dt <- series_dt |> dplyr::select(-tidyselect::any_of(columns_to_remove))
# Track processing steps
processing_steps <- character(0)
# STEP 1: Join data to series first to get lookup codes
if (!suppress_warnings) message("Joining data to series file...")
full_dt <- left_join(data_dt, series_dt, by = "series_id")
processing_steps <- c(processing_steps, "joined_data_to_series")
# STEP 2: Join aspect file if it exists (after series, before mapping files)
if (!is.null(selected_aspect_file) && selected_aspect_file %in% names(downloads)) {
tryCatch({
if (!suppress_warnings) message("Joining aspect file...")
aspect_dt <- get_bls_data(downloads[[selected_aspect_file]])
# Remove unwanted columns from aspect file
aspect_dt <- aspect_dt |> dplyr::select(-tidyselect::any_of(columns_to_remove))
# Rename the value column in aspect file to aspect_value to avoid conflicts
if ("value" %in% names(aspect_dt)) {
aspect_dt <- aspect_dt |>
dplyr::rename(aspect_value = value)
}
# Join aspect file on series_id, year, and period
join_cols <- c("series_id", "year", "period")
available_join_cols <- intersect(join_cols, names(aspect_dt))
if (length(available_join_cols) > 0) {
full_dt <- left_join(full_dt, aspect_dt, by = available_join_cols)
processing_steps <- c(processing_steps, "joined_aspect_file")
if (!suppress_warnings) message("Aspect file joined successfully on: ", paste(available_join_cols, collapse = ", "))
} else {
if (!suppress_warnings) message("Warning: Could not join aspect file - no matching columns found")
}
}, error = function(e) {
if (!suppress_warnings) message("Error processing aspect file ", selected_aspect_file, ": ", e$message)
})
}
# STEP 3: Now join mapping files to the combined table
for (map_file in mapping_files) {
if (map_file %in% names(downloads)) {
tryCatch({
map_dt <- get_bls_data(downloads[[map_file]])
# Remove unwanted columns from mapping file
map_dt <- map_dt |> dplyr::select(-tidyselect::any_of(columns_to_remove))
if (ncol(map_dt) == 2) {
# For mapping files with exactly 2 columns, assume first is join column
join_col <- names(map_dt)[1]
if (join_col %in% names(full_dt)) {
if (!suppress_warnings) message("Joining mapping file ", map_file, " on column: ", join_col)
full_dt <- left_join(full_dt, map_dt, by = join_col)
processing_steps <- c(processing_steps, paste0("joined_mapping_", gsub("\\.", "_", map_file)))
} else {
if (!suppress_warnings) message("Skipping mapping file ", map_file, " - join column '", join_col, "' not found in data")
}
} else {
# For mapping files with >2 columns, use all except last as potential join columns
potential_join_cols <- names(map_dt)[1:(ncol(map_dt) - 1)]
join_cols <- intersect(potential_join_cols, names(full_dt))
if (length(join_cols) > 0) {
if (!suppress_warnings) message("Joining mapping file ", map_file, " on column(s): ", paste(join_cols, collapse = ", "))
full_dt <- left_join(full_dt, map_dt, by = join_cols)
processing_steps <- c(processing_steps, paste0("joined_mapping_", gsub("\\.", "_", map_file)))
} else {
if (!suppress_warnings) message("Skipping mapping file ", map_file, " - no join columns found in data")
}
}
}, error = function(e) {
if (!suppress_warnings) message("Error processing mapping file ", map_file, ": ", e$message)
})
}
}
# STEP 4: Apply table simplification if requested
if (simplify_table) {
if (!suppress_warnings) message("Simplifying table structure...")
full_dt <- full_dt |>
dplyr::mutate(value = as.numeric(value),
period_type_code = substr(period,1,1),
date = case_when(
period %in% c("M13", "Q05") ~ lubridate::ym(paste0(year,"-01")),
period_type_code == "Q" ~ lubridate::yq(paste(year, "Q", substr(period,3,3))),
TRUE ~ lubridate::ym(paste0(year,period))
)
) |>
dplyr::select(-tidyselect::contains("_code"))
processing_steps <- c(processing_steps, "simplified_table")
}
# Create the BLS data collection object
bls_collection <- create_bls_object(
data = full_dt,
downloads = downloads,
data_type = paste0("BLS-", toupper(database_code)),
processing_steps = processing_steps
)
# Print summary unless suppressed
if (!suppress_warnings) {
if (has_bls_issues(bls_collection)) {
message("\n")
print_bls_warnings(bls_collection, detailed = FALSE)
message("\nUse print_bls_warnings(result, detailed = TRUE) for detailed diagnostics\n")
} else {
message("\nDownload completed successfully with no issues detected.\n")
}
}
# Return based on return_full parameter
if (return_full) {
return(list(
bls_collection = bls_collection,
full_file = get_bls_data(bls_collection),
data = data_dt,
series = series_dt,
aspect = if (!is.null(selected_aspect_file)) get_bls_data(downloads[[selected_aspect_file]]) else NULL,
mapping_files = mapping_files,
file_table = file_table,
downloads = downloads
))
} else {
return(bls_collection)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.