source("data-raw/data_raw.R")
#' Get BEA state GDP data (including GVA, Employment Compensation, Tax, and GOS)
#' for a specified year.
#' @param dataname A text indicating what state data to get.
#' Can be GVA, employee compensation, taxes, and gross operating surplus.
#' @param year A numeric value specifying year of interest.
#' @return A data frame of BEA state data for the specified year.
getBEAStateGDPData <- function(dataname, year) {
# Create the placeholder file
StateGVAzip <- file.path(stateio_dir, "SAGDP.zip")
dir <- file.path(stateio_dir, "SAGDP")
dir.create(dir, showWarnings = FALSE)
# Download all BEA IO tables into the placeholder file
if (!file.exists(StateGVAzip)) {
tryCatch({
utils::download.file("https://apps.bea.gov/regional/zip/SAGDP.zip",
StateGVAzip, mode = "wb")
# Get the name of all files in the zip archive
tmp <- unzip(StateGVAzip, list = TRUE)
fname <- tmp[tmp$Length > 0, ]$Name
# Unzip the file to the designated directory
unzip(StateGVAzip, files = fname, exdir = dir, overwrite = TRUE)
}, error = function(e) {
stop("Error in downloading or unzipping the file:", e$message)
})
}
# Determine data filename
if (dataname == "GVA") {
FileName <- list.files(dir, pattern = "SAGDP2N__ALL_AREAS.*\\.csv")
} else if (dataname == "Tax") {
FileName <- list.files(dir, pattern = "SAGDP3N__ALL_AREAS.*\\.csv")
} else if (dataname == "Compensation") {
FileName <- list.files(dir, pattern = "SAGDP4N__ALL_AREAS.*\\.csv")
} else if (dataname == "GOS") {
FileName <- list.files(dir, pattern = "SAGDP7N__ALL_AREAS.*\\.csv")
}
print("data filename determined")
if (length(FileName) == 0) {
stop("No files found matching the pattern for", dataname)
}
FullFileName <- file.path(dir, FileName)
# Get date_accessed
date_accessed <- as.character(as.Date(file.mtime(StateGVAzip)))
# Find latest data year
file_split <- unlist(stringr::str_split(FileName, pattern = "_"))
endyear <- sub(".csv", "", file_split[length(file_split)])
# Load state data
if (year <= endyear) {
StateData <- utils::read.table(FullFileName, sep = ",",
header = TRUE, stringsAsFactors = FALSE,
check.names = FALSE, fill = TRUE)
# Get date_last_modified
date_last_modified <- stringr::str_match(StateData[grep("Last updated: ",
StateData$GeoFIPS),
"GeoFIPS"],
"Last updated: (.*?)--")[2]
StateData <- StateData[!is.na(StateData$LineCode), ]
# Assign NaN to (L) Less than $50,000.
StateData[StateData == "(L)"] <- NaN
# Assign NA to (D) Not shown to avoid disclosure of confidential information;
# estimates are included in higher-level totals.
StateData[StateData == "(D)"] <- NA
# Replace (NA) with NA
StateData[StateData == "(NA)"] <- NA
# Convert all the other values to numeric
year_col <- as.character(year)
StateData[, year_col] <- sapply(StateData[, year_col], as.numeric)
# Convert values to current US $
if (unique(StateData$Unit) == "Millions of current dollars") {
StateData[, year_col] <- StateData[, year_col]*1E6
} else if (unique(StateData$Unit) == "Thousands of dollars") {
StateData[, year_col] <- StateData[, year_col]*1E3
}
# Keep state-level data
geo_names <- c(state.name, "District of Columbia", "United States *")
# Save data
df <- StateData[StateData$GeoName %in% geo_names,
c("GeoName", "LineCode", "Description", year)]
# Write data to .rds
data_name <- paste("State", dataname, year,
utils::packageDescription("stateior", fields = "Version"),
sep = "_")
saveRDS(object = df,
file = paste0(file.path("data", data_name), ".rds"))
# Write metadata to JSON
useeior:::writeMetadatatoJSON(package = "stateior",
name = data_name,
year = year,
source = "US Bureau of Economic Analysis",
url = "https://apps.bea.gov/regional/zip/SAGDP.zip",
date_last_modified = date_last_modified,
date_accessed = date_accessed)
} else {
logging::logwarn(paste(year, "state", dataname, "data is not avaliable from BEA.",
"Nothing is returned."))
}
}
# Download, save and document BEA state economic data
for (dataname in c("GVA", "Tax", "Compensation", "GOS")) {
getBEAStateGDPData(dataname, year)
}
#' Get BEA state PCE (personal consumption expenditures) data for a specified year.
#' @param year A numeric value specifying year of interest.
#' @return A data frame of BEA state PCE data for the specified year.
#' available.
getBEAStatePCE <- function(year) {
# Create the placeholder file
StatePCEzip <- file.path(stateio_dir, "SAPCE.zip")
dir <- file.path(stateio_dir, "SAPCE")
# Download all BEA IO tables into the placeholder file
if (!file.exists(StatePCEzip)) {
utils::download.file("https://apps.bea.gov/regional/zip/SAPCE.zip",
StatePCEzip, mode = "wb")
# Get the name of all files in the zip archive
tmp <- unzip(StatePCEzip, list = TRUE)
fname <- tmp[tmp$Length > 0, ]$Name
# Unzip the file to the designated directory
unzip(StatePCEzip, files = fname, exdir = dir, overwrite = TRUE)
}
# Define FileName and FullFileName
FileName <- list.files(dir, pattern = "SAPCE1__ALL_AREAS")
FullFileName <- file.path(dir, FileName)
# Get date_accessed
date_accessed <- as.character(as.Date(file.mtime(StatePCEzip)))
# Find latest data year
file_split <- unlist(stringr::str_split(FileName, pattern = "_"))
endyear <- sub(".csv", "", file_split[length(file_split)])
# Load state PCE data
if (year <= endyear) {
StatePCE <- utils::read.table(FullFileName, sep = ",",
header = TRUE, stringsAsFactors = FALSE,
check.names = FALSE, fill = TRUE)
# Get date_last_modified
date_last_modified <- stringr::str_match(StatePCE[grep("Last updated: ",
StatePCE$GeoFIPS),
"GeoFIPS"],
"Last updated: (.*?)--")[2]
StatePCE <- StatePCE[!is.na(StatePCE$Line), ]
# Replace NA with zero
StatePCE[is.na(StatePCE)] <- 0
# Convert values to current US $
year_col <- as.character(year)
StatePCE[, year_col] <- StatePCE[, year_col]*1E6
# Keep state-level data
geo_names <- c(state.name, "District of Columbia", "United States")
# Save data
df <- StatePCE[StatePCE$GeoName %in% geo_names,
c("GeoName", "LineCode", "Description", year)]
# Write data to .rds
data_name <- paste("State_PCE", year,
utils::packageDescription("stateior", fields = "Version"),
sep = "_")
saveRDS(object = df,
file = paste0(file.path("data", data_name), ".rds"))
# Write metadata to JSON
useeior:::writeMetadatatoJSON(package = "stateior",
name = data_name,
year = year,
source = "US Bureau of Economic Analysis",
url = "https://apps.bea.gov/regional/zip/SAPCE.zip",
date_last_modified = date_last_modified,
date_accessed = date_accessed)
} else {
logging::logwarn(paste(year, "state PCE data is not avaliable from BEA.",
"Nothing is returned."))
}
}
# Download, save and document BEA state PCE from 2010 to the latest year available.
getBEAStatePCE(year)
#' Get BEA state foreign travel and expenditures by U.S. residents and
#' expenditures in the U.S. by nonresidents data from 2010 to the latest year
#' available.
#' @return A data frame of BEA state foreign travel and expenditures by U.S.
#' residents and expenditures in the U.S. by nonresidents data from 2010 to
#' the latest year available.
getBEAStateResidentAndNonresidentSpending <- function() {
# Create the placeholder file
StatePCEzip <- file.path(stateio_dir, "SAPCE.zip")
dir <- file.path(stateio_dir, "SAPCE")
# Download all BEA IO tables into the placeholder file
if (!file.exists(StatePCEzip)) {
utils::download.file("https://apps.bea.gov/regional/zip/SAPCE.zip",
StatePCEzip, mode = "wb")
# Get the name of all files in the zip archive
tmp <- unzip(StatePCEzip, list = TRUE)
fname <- tmp[tmp$Length > 0, ]$Name
# Unzip the file to the designated directory
unzip(StatePCEzip, files = fname, exdir = dir, overwrite = TRUE)
}
# Define FileName and FullFileName
FileName <- list.files(dir, pattern = "SAPCE4__ALL_AREAS")
FullFileName <- file.path(dir, FileName)
# Get date_accessed
date_accessed <- as.character(as.Date(file.mtime(StatePCEzip)))
# Find latest data year
file_split <- unlist(stringr::str_split(FileName, pattern = "_"))
end_year <- sub(".csv", "", file_split[length(file_split)])
# Create year_cols
year_cols <- as.character(2010:end_year)
# Load state PCE data
StatePCE <- utils::read.table(FullFileName, sep = ",",
header = TRUE, stringsAsFactors = FALSE,
check.names = FALSE, fill = TRUE)
# Get date_last_modified
date_last_modified <- stringr::str_match(StatePCE[grep("Last updated: ",
StatePCE$GeoFIPS),
"GeoFIPS"],
"Last updated: (.*?)--")[2]
# Get resident and nonresident spending
Spending <- StatePCE[StatePCE$LineCode %in% c(129, 130), ]
# Replace NA with zero
Spending[is.na(Spending)] <- 0
# Convert values to current US $
Spending[, year_cols] <- sapply(Spending[, year_cols], as.numeric)*1E6
# Keep state-level data
geo_names <- c(state.name, "District of Columbia", "United States")
# Save data
for (year in year_cols) {
for (linecode in c(129:130)) {
df <- Spending[Spending$GeoName %in% geo_names & Spending$LineCode == linecode,
c("GeoName", "LineCode", "Description", year)]
# Write data to .rds
data_name <- paste("State",
ifelse(linecode == 129,
"ForeignExpenditureByResident",
"DomesticExpenditureByNonresident"),
year,
utils::packageDescription("stateior", fields = "Version"),
sep = "_")
saveRDS(object = df,
file = paste0(file.path("data", data_name), ".rds"))
# Write metadata to JSON
useeior:::writeMetadatatoJSON(package = "stateior",
name = data_name,
year = year,
source = "US Bureau of Economic Analysis",
url = "https://apps.bea.gov/regional/zip/SAPCE.zip",
date_last_modified = date_last_modified,
date_accessed = date_accessed)
}
}
}
# Download, save and document BEA state foreign travel and expenditures by U.S.
# residents and expenditures in the U.S. by nonresidents data from 2010 to
# the latest year available.
getBEAStateResidentAndNonresidentSpending()
#' Download BEA US Gov Expenditure data (NIPA table).
#' @return A data frame of BEA US Gov Expenditure data (NIPA table).
downloadBEAGovExpenditure <- function() {
TableName <- "Section3All_xls.xlsx"
dir <- file.path(stateio_dir, "StateLocalGovFinances")
dir.create(dir, showWarnings = FALSE)
FullFileName <- file.path(dir, TableName)
url <- "https://apps.bea.gov/national/Release/XLS/Survey/Section3All_xls.xlsx"
# Download NIPA table file
if (!file.exists(FullFileName)) {
utils::download.file(url, FullFileName, mode = "wb")
}
# Create output
ls <- list("url" = url,
"date_accessed" = as.character(as.Date(file.mtime(FullFileName))))
return(ls)
}
#' Get BEA US Gov Investment data by state (Table 3.9.5 annual) for a specified
#' year.
#' @param year A numeric value specifying year of interest.
#' @return A data frame of BEA US Gov Investment data for the specified year.
getBEAGovInvestment <- function(year) {
# Download US Gov Expenditure (NIPA table) from BEA
url <- downloadBEAGovExpenditure()[["url"]]
date_accessed <- downloadBEAGovExpenditure()[["date_accessed"]]
# Load Gov Investment table
TableName <- "Section3All_xls.xlsx"
FullFileName <- file.path(stateio_dir, "StateLocalGovFinances", TableName)
# Get date_last_modified
notes <- as.data.frame(readxl::read_excel(FullFileName,
sheet = "T30905-A",
range = "A1:A7"))
date_last_modified <- stringr::str_match(notes[grep("File created ",
notes[, 1]),
1],
"File created (.*?) ")[2]
# Load data
GovInvestment <- as.data.frame(readxl::read_excel(FullFileName,
sheet = "T30905-A",
col_names = TRUE,
skip = 7))
# Assign column name to the description column
colnames(GovInvestment)[2] <- "Description"
# Create year_col
year_col <- as.character(year)
# Save data
if (year %in% colnames(GovInvestment)) {
df <- GovInvestment[complete.cases(GovInvestment),
c("Line", "Description", year)]
# Convert values from million $ to $
df[, year_col] <- df[, year_col]*1E6
# Write data to .rds
data_name <- paste("GovInvestment", year,
utils::packageDescription("stateior", fields = "Version"),
sep = "_")
saveRDS(object = df,
file = paste0(file.path("data", data_name), ".rds"))
# Write metadata to JSON
useeior:::writeMetadatatoJSON(package = "stateior",
name = data_name,
year = year,
source = "US Bureau of Economic Analysis",
url = url,
date_last_modified = date_last_modified,
date_accessed = date_accessed)
} else {
logging::logwarn(paste(year, "state-level US government investment data is not",
"avaliable from BEA. Nothing is returned."))
}
}
# Download, save and document BEA US government investment data.
getBEAGovInvestment(year)
#' Get BEA US Gov Consumption data by state (Table 3.10.5 annual) for a specified
#' year.
#' @param year A numeric value specifying year of interest.
#' @return A data frame of BEA US Gov Consumption by state data for the specified
#' year.
getBEAGovConsumption <- function(year) {
# Download US Gov Expenditure (NIPA table) from BEA
url <- downloadBEAGovExpenditure()[["url"]]
date_accessed <- downloadBEAGovExpenditure()[["date_accessed"]]
# Load Gov Consumption table
TableName <- "Section3All_xls.xlsx"
FullFileName <- file.path(stateio_dir, "StateLocalGovFinances", TableName)
# Get date_last_modified
notes <- as.data.frame(readxl::read_excel(FullFileName,
sheet = "T31005-A",
range = "A1:A7"))
date_last_modified <- stringr::str_match(notes[grep("File created ",
notes[, 1]),
1],
"File created (.*?) ")[2]
# Load data
GovConsumption <- as.data.frame(readxl::read_excel(FullFileName,
sheet = "T31005-A",
col_names = TRUE,
skip = 7))
# Assign column name to the description column
colnames(GovConsumption)[2] <- "Description"
# Create year_col
year_col <- as.character(year)
# Save data
if (year %in% colnames(GovConsumption)) {
df <- GovConsumption[complete.cases(GovConsumption),
c("Line", "Description", year)]
# Convert values from million $ to $
GovConsumption[, year_col] <- GovConsumption[, year_col]*1E6
# Write data to .rds
data_name <- paste("GovConsumption", year,
utils::packageDescription("stateior", fields = "Version"),
sep = "_")
saveRDS(object = df,
file = paste0(file.path("data", data_name), ".rds"))
# Write metadata to JSON
useeior:::writeMetadatatoJSON(package = "stateior",
name = data_name,
year = year,
source = "US Bureau of Economic Analysis",
url = url,
date_last_modified = date_last_modified,
date_accessed = date_accessed)
} else {
logging::logwarn(paste(year, "state-level US government consumption data is not",
"avaliable from BEA. Nothing is returned."))
}
}
# Download, save and document BEA US government consumption by state data
getBEAGovConsumption(year)
#####################################################################################
################## BELOW ARE COUNTY MODEL DATA PROCESSING FUNCTIONS #################
#####################################################################################
#' getBEACountySectorGDP
#'
#' It returns the original county GDP of one specified state at BEA-sector(Line Code)
#' level with NAs.
#'
#' @param year A numeric value between 2001 and 2018 specifying the year of interest. If 0 ,return a dataframe with data from all years available
#' @param state A string character specifying the state of interest, 'GA'
#' @param axis A numeric value, 0,1. if 0, each county will be a col, if 1, row, default 0. This parameter only works when you specify one year
#' @return A data frame contains selected county GDP by BEA sector industries at a specific year.
getBEACountySectorGDP = function(year, state, axis = 0) {
# Create the placeholder file
CountyGDPzip = "inst/extdata/CAGDP2.zip"
# Download all BEA IO tables into the placeholder file
if(!file.exists(CountyGDPzip)) {
utils::download.file("https://apps.bea.gov/regional/zip/CAGDP2.zip", CountyGDPzip, mode = "wb")
# Get the name of all files in the zip archive
fname = unzip(CountyGDPzip, list = TRUE)[unzip(CountyGDPzip, list = TRUE)$Length > 0, ]$Name
# Unzip the file to the designated directory
unzip(CountyGDPzip, files = fname, exdir = "inst/extdata/CAGDP2", overwrite = TRUE)
}
# filter for specified state
fileName = paste0('inst/extdata/CAGDP2/CAGDP2_', paste0(getStateAbbreviation(state),'_2001_2018.csv'))
# read data
countyData = utils::read.table(fileName, sep = ",", header = TRUE,
stringsAsFactors = FALSE, check.names = FALSE,
fill = TRUE)
# drop last four rows (notes in original file)
countyData = countyData[!is.na(countyData$Region), ]
# select only BEA-sector-level rows
sectorLevelLineCode = c(1,3,6,10,11,12,34,35,36,45,50,59,68,75,82,83) #######TODO: have a crosswalk file in extdata and read from it, instead of hardcoding?
countyData = countyData[countyData$LineCode %in% sectorLevelLineCode, ]
# convert data type (string to numeric) Note: NAs introduced by coercion, which is ok
year_range = seq(2001,2018,1)
countyData[, as.character(year_range)] = sapply(countyData[, as.character(year_range)],
as.numeric)
# Decision1: return all-year table or one-year table
if (year == 0) {
# retain all year columns
countyDataAllYear = countyData[, c('GeoFIPS', 'GeoName', 'LineCode', as.character(year_range))]
# convert unit
for (yr in as.character(year_range)) {
countyDataAllYear[[yr]] = 1000 * countyDataAllYear[[yr]]
}
return(countyDataAllYear)
} else {
# retain specified year only
countyDataOneYear = countyData[, c('GeoName', 'LineCode', year)]
countyDataOneYear[[as.character(year)]] = 1000 * countyDataOneYear[[as.character(year)]]
}
# Decision2: transpose the table? (county as column or as row)
if (axis == 0) {
countyDataOneYearColumn = countyDataOneYear
colnames(countyDataOneYearColumn)[ncol(countyDataOneYearColumn)] = 'GDP'
# drop State Total Rows (you can still find it in row table)
dropStateFilter = sapply(attributes(countyDataOneYearColumn)$row.names, as.numeric) >= 36 # because state total always takes the first 35 rows(indices) of each table
countyDataOneYearColumn = countyDataOneYearColumn[dropStateFilter, ]
# WHY ARRANGE? It is because there is a discrepancy between rank by FIPS (default) and Name.
# We decide to stick with character string ranking (For instance: in Georgia, we arrange 'Macon' before 'McIntosh' while Macon in fact has a larger fips than McIntosh)
countyDataOneYearColumn = countyDataOneYearColumn %>%
dplyr::arrange(GeoName) %>%
dplyr::spread(GeoName, GDP)
return(countyDataOneYearColumn)
} else {
return(countyDataOneYear)
}
}
# CountyGA_BEASectorGDP_2001_2018 = getBEACountySectorGDP(year = 0, state = 'Georgia', axis = 1)
# usethis::use_data(CountyGA_BEASectorGDP_2001_2018, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.