#' DOD OMB Group
#'
#' @param data
#'
#' @return
#' @export
#'
#' @examples
add_dod_omb_group <-
function(data) {
match_cols <-
data %>%
select(one_of(
c(
"idCGAC",
"nameAgencyCGAC",
"nameBureauOMB",
"nameSubFunctionOMB",
"nameAccountOMB"
)
)) %>%
ncol()
if (match_cols < 3) {
return(data)
}
data <-
data %>%
mutate_if(is.numeric, as.numeric) %>%
select(-matches("groupDOD|isDefenseAccount"))
df_accounts <-
data %>%
distinct(idCGAC,
nameAgencyCGAC,
nameBureauOMB,
nameSubFunctionOMB,
nameAccountOMB) %>%
arrange(idCGAC) %>%
mutate(isDefenseAccount = case_when(
nameAgencyCGAC %>% str_detect("ARMY|NAVY|SPACE FORCE|AIR FORCE|DEFENSE") ~ TRUE,
TRUE ~ FALSE
))
df_accounts <-
df_accounts %>%
mutate(
groupDOD = case_when(
nameBureauOMB %>% str_detect("PROCUREMENT") ~ "PROCUREMENT",
nameBureauOMB %>% str_detect("CONSTRUCTION|FAMILY HOUSING") ~ "CONSTRUCTION",
nameBureauOMB %>% str_detect("RESEARCH") ~ "RDT&E",
nameBureauOMB == "MILITARY PERSONNEL" ~ "MILITARY PERSONNEL",
nameBureauOMB %>% str_detect("OPERATION AND MAINTENANCE") ~ "OPERATIONS AND MAINTENANCE",
nameBureauOMB %>% str_detect("DEPARTMENT OF DEFENSE--MILITARY PROGRAMS") ~ "MILITARY PROGRAMS",
nameSubFunctionOMB %>% str_detect(
"RESEARCH, DEVELOPMENT,|SCIENCE AND TECHNOLOGY|GENERAL SCIENCE|AGRICULTURAL RESEARCH"
) ~ "RDT&E",
nameAccountOMB %>% str_detect("PROCUREMENT") ~ "PROCUREMENT",
nameAccountOMB %>% str_detect("CONSTRUCTION|FAMILY HOUSING") ~ "CONSTRUCTION",
nameAccountOMB %>% str_detect("RETIREMENT") ~ "RETIREMENT",
nameAccountOMB %>% str_detect("RESEARCH") ~ "RDT&E",
nameAccountOMB == "MILITARY PERSONNEL" ~ "MILITARY PERSONNEL",
nameAccountOMB %>% str_detect("WORKING CAPITAL") ~ "WORKING CAPITAL",
nameAccountOMB %>% str_detect("OPERATION AND MAINTENANCE") ~ "OPERATIONS AND MAINTENANCE",
nameAccountOMB %>% str_detect("DEPARTMENT OF DEFENSE--MILITARY PROGRAMS") ~ "MILITARY PROGRAMS",
TRUE ~ "OTHER"
)
)
data <-
data %>%
left_join(
df_accounts,
by = c(
"idCGAC",
"nameAgencyCGAC",
"nameBureauOMB",
"nameAccountOMB",
"nameSubFunctionOMB"
)
)
data
}
.join_tas_symbols <-
function(data) {
df_tas <- dictionary_treasury_account_symbols()
df_tas <-
df_tas %>%
distinct(
idFederalAccount,
idCGAC,
codeBureauTAS = codeBureauOMB,
nameBureauTAS = nameBureauOMB,
nameAgencyTAS = nameAgency,
nameFinancialReportingEntity,
codeFinancialReportingEntity,
datetimeProgramEstablished,
datetimeProgramEnded
) %>%
group_by(idFederalAccount) %>%
filter(datetimeProgramEstablished == max(datetimeProgramEstablished)) %>%
slice(1) %>%
ungroup()
data <-
data %>%
mutate(idCGAC = as.numeric(idCGAC)) %>%
left_join(df_tas, by = c("idFederalAccount", "idCGAC"))
data <-
data %>%
munge_data()
data
}
# utils -------------------------------------------------------------------
.add_omb_account_group <-
function(data) {
data <-
data %>%
mutate(
groupOMBAccount = case_when(
nameAccountOMB %>% str_detect("SALARIES|COMPENSATION OF|^COMPENSATION|SEPARATION PAY|PERSONNEL") ~ "SALARY",
nameAccountOMB %>% str_detect(
"RESEARCH PROJECTS|RESEARCH, DEVELOPMENT|^RESEARCH AND DEVELOPMENT|^SCIENCE|EXPLORATION TECHNOLOGY|INFORMATION TECHNOLOGY|RESEARCH AND DEVELOPMENT|^HEALTH RESEARCH|^RESEARCH AND|^MAJOR RESEARCH"
) ~ "RESEARCH, SCIENCE AND TECHNOLOGY",
nameAccountOMB %>% str_detect("PROCUREMENT|PURCHASES|WEAPONS ACTIVITIES") ~ "PROCUREMENT",
nameAccountOMB %>% str_detect("ASSISTANCE") ~ "ASSISTANCE",
nameAccountOMB %>% str_detect("OPERATIONS AND SUPPORT|OPERATION AND MAINTENANCE") ~ "OPERATIONS AND MAINTENANCE",
nameAccountOMB %>% str_detect(
"PROGRAM ADMINISTRATION|POLICY AND ADMINISTRATION|DEPARTMENTAL ADMINISTRATION|GENERAL ADMINISTRATION"
) ~ "PROGRAM ADMINISTRATION",
nameAccountOMB %>% str_detect("CAPITAL IMPROVEMENT") ~ "CAPITAL IMPROVMENT",
nameAccountOMB %>% str_detect("WORKING CAPITAL|REVOLVING FUND") ~ "WORKING CAPITAL",
nameAccountOMB %>% str_detect("OPERATING EXPENSES|^EXPENSES ") ~ "OPERATING EXPENSES",
nameAccountOMB %>% str_detect("LEASES|SALES") ~ "SALES AND LEASES",
nameAccountOMB %>% str_detect("MILITARY SALE") ~ "MILITARY SALES",
nameAccountOMB %>% str_detect("FEES") ~ "FEES",
nameAccountOMB %>% str_detect(
"COURT SECURITY|BORDER SECURITY|CYBER SECURITY|CYBERSECURITY|MARITIME SECURITY|PROTECTION OF FOREIGN MISSIONS AND OFFICIALS"
) ~ "SECURITY",
nameAccountOMB %>% str_detect("TRANSMISSION") ~ "POWER TRANSMISSION",
nameAccountOMB %>% str_detect("HEALTH PROGRAM") ~ "HEALTH CARE",
nameAccountOMB %>% str_detect("PENSION|RETIREMENT|READJUSTMENT BENEFITS|RETIREMENT BENEFITS|BENEFIT PLAN") ~ "RETIREMENT BENEFITS",
nameAccountOMB %>% str_detect("ANNUITY") ~ "ANNUITIES",
nameAccountOMB %>% str_detect(" FUND,| FUND |FUND ") ~ "FUND",
nameAccountOMB %>% str_detect("EXPLORATION") ~ "EXPLORATION",
nameAccountOMB %>% str_detect(
"DRUG INTERDICTION|CRIME AND DRUG ENFORCEMENT AND ENFORCEMENT|DRUG TRAFFICKING|DRUG TRAFFICKING"
) ~ "DRUG INTERDICTION AND ENFORCEMENT",
nameAccountOMB %>% str_detect("^RENT |^RENTS |^RENTAL PAYMENTS") ~ "RENTS",
nameAccountOMB %>% str_detect("SUBSIDIES|SUBSIDY") ~ "SUBSIDY",
nameAccountOMB %>% str_detect("REFUNDS") ~ "REFUNDS",
nameAccountOMB %>% str_detect("SCHOLARSHIP") ~ "SCHOLARSHIP",
nameAccountOMB %>% str_detect(" GRANT |GRANT | GRANTS |GRANTS ") ~ "GRANTS",
nameAccountOMB %>% str_detect("CONTRIBUTION TO|CONTRIBUTIONS TO|^CONTRIBUTIONS|ALLIED CONTRIB|AFGHANISTAN SECURITY|COOPERATIVE THREAT REDUCTION|COUNTER-ISLAMIC|BASE CLOSURE|WORKFORCE DEVELOPMENT") ~ "CONTRIBUTIONS",
nameAccountOMB %>% str_detect(
"COMMISSIONS|UNITED STATES-CHINA ECONOMIC AND SECURITY REVIEW COMMISSION|UNITED STATES COMMISSION ON INTERNATIONAL RELIGIOUS FREEDOM
DENALI COMMISSION"
) ~ "COMMISSION",
nameAccountOMB %>% str_detect("MEDICARE") ~ "MEDICARE",
nameAccountOMB %>% str_detect("NUTRITION") ~ "NUTRITION",
nameAccountOMB %>% str_detect("FINANCING") ~ "FINANCING",
nameAccountOMB %>% str_detect("LOAN PROGRAM") ~ "LOAN PROGRAM",
nameAccountOMB %>% str_detect("FIRE") ~ "FIRE MANAGEMENT",
nameAccountOMB %>% str_detect("SCHOOL IMPROVEMENT") ~ "SCHOOL IMPROVEMENT",
nameAccountOMB %>% str_detect("PROFIT|EARNINGS") ~ "PROFITS AND EARNINGS",
nameAccountOMB %>% str_detect("FRAUD") ~ "FRAUD PREVENTION",
nameAccountOMB %>% str_detect("SUPPLEMENTAL") ~ "SUPPLEMENTAL ASSISTANCE",
nameAccountOMB %>% str_detect("INSPECTOR GENERAL") ~ "INSPECTOR GENERAL",
nameAccountOMB %>% str_detect("^PAYMENT TO") ~ "PAYMENTS",
nameAccountOMB %>% str_detect(
"ECONOMIC DEVELOPMENT|ECONOMIC SUPPORT AND DEVELOPMENT FUND|BUSINESS DEVELOPMENT|ENTREPRENEURIAL DEVELOPMENT|ECONOMIC SUPPORT |COMMUNITY DEVELOPMENT"
) ~ "ECONOMIC DEVELOPMENT",
nameAccountOMB %>% str_detect("CAPITAL IMPROVEMENTS") ~ "CAPITAL IMPROVEMENTS",
nameAccountOMB %>% str_detect("RECEIVABLES") ~ "RECEIVABLES",
nameAccountOMB %>% str_detect("NOT OTHERWISE CLASSIFIED") ~ "NOT OTHERWISE CLASSIFIED",
nameAccountOMB %>% str_detect("BUILDINGS AND FACILITIES|CARE OF THE BUILDING AND GROUNDS") ~ "BUILDINGS AND FACILITIES",
nameAccountOMB %>% str_detect("INSURANCE") ~ "INSURANCE",
nameAccountOMB %>% str_detect("LIQUIDATING ACCOUNT") ~ "LIQUIDATING ACCOUNT",
nameAccountOMB %>% str_detect("CLEARNING ACCOUNT") ~ "CLEARNING ACCOUNT",
nameAccountOMB %>% str_detect("INTEREST ON") ~ "INTEREST",
nameAccountOMB %>% str_detect("^DONATIONS") ~ "DONATIONS",
nameAccountOMB %>% str_detect("MILITARY CONSTRUCTION|FAMILY HOUSING") ~ "MILITARY CONSTRUCTION",
nameAccountOMB %>% str_detect("REIMBURSEMENT") ~ "REIMBURSEMENTS",
nameAccountOMB %>% str_detect("INNOVATION") ~ "INNOVATION",
nameAccountOMB %>% str_detect("GUARANTEE") ~ "GUARANTEES",
nameAccountOMB %>% str_detect("FORFEITURE") ~ "FORFEITURES",
nameAccountOMB %>% str_detect("REVENUE") ~ "REVENUES",
nameAccountOMB %>% str_detect("REFUGEE") ~ "REFUGEES",
nameAccountOMB %>% str_detect("ALLOWANCE") ~ "ALLOWANCE",
nameAccountOMB %>% str_detect("MEDICAID") ~ "MEDICAID",
TRUE ~ "OTHER"
)
)
data <-
data %>%
mutate(
groupOMBAccount = case_when(
nameSubFunctionOMB %>% str_detect("MEDICARE") ~ "MEDICARE",
nameSubFunctionOMB %>% str_detect("CENTRAL FISCAL") ~ "SALARIES",
nameSubFunctionOMB %>% str_detect("ATOMIC ENERGY|EMERGENCY ENERGY|ENERGY CONSERV|ENERGY SUPPLY") ~ "ENERGY",
nameSubFunctionOMB %>% str_detect("SOCIAL SERVICES") ~ "SOCIAL SERVICES",
nameSubFunctionOMB %>% str_detect("SOCIAL SECURITY") ~ "SOCIAL SECURITY",
nameSubFunctionOMB %>% str_detect("INTEREST") ~ "INTEREST",
nameSubFunctionOMB %>% str_detect("FARM INCOME") ~ "SUBSIDY",
nameSubFunctionOMB %>% str_detect("TRANSPORT") ~ "TRANSPORTATION",
nameSubFunctionOMB %>% str_detect("FOREIGN INFORMATION|FOREIGN AFFAIRS|INTERNATIONAL FINANC") ~ "FOREIGN AFFAIRS",
nameSubFunctionOMB %>% str_detect("HEALTH CARE|HOSPITAL AND") ~ "HEALTH CARE",
nameSubFunctionOMB %>% str_detect("EDUCATION") ~ "EDUCTATION",
nameSubFunctionOMB %>% str_detect("ASSISTANCE$") ~ "ASSISTANCE",
nameSubFunctionOMB %>% str_detect("OTHER ADVANCEMENT") ~ "CONTRIBUTIONS",
nameSubFunctionOMB %>% str_detect("DISASTER RELIEF") ~ "DISASTER RELIEF",
nameSubFunctionOMB %>% str_detect("INCOME SECURITY|BENEFITS AND SERVICES") ~ "RETIREMENT BENEFITS",
nameSubFunctionOMB %>% str_detect("UNEMPLOYMENT|GENERAL RETIRE") ~ "UNEMPLOYMENT",
nameSubFunctionOMB %>% str_detect("HEALTH RESEARCH|SPACE FLIGHT,|RESEARCH,") ~ "RESEARCH, SCIENCE AND TECHNOLOGY",
nameSubFunctionOMB %>% str_detect("POLLUTION|ENVIRONMENTAL") ~ "ENVIRONMENT AND POLLUTION",
nameBureauOMB %>% str_detect("PROCUREMENT") ~ "PROCUREMENT",
nameSubFunctionOMB %>% str_detect("NATURAL RESOURCES") ~ "NATURAL RESOURCES",
nameSubFunctionOMB %>% str_detect("RECREATION") ~ "RECREATION",
nameSubFunctionOMB %>% str_detect("TRAINING AND EMPLOYMENT") ~ "JOB TRAINING",
nameSubFunctionOMB %>% str_detect("AREA AND REGIONAL DEVELOPMENT|COMMUNITY DEVELOPMENT") ~ "ECONOMIC DEVELOPMENT",
nameSubFunctionOMB %>% str_detect("LAND MANAGEMENT") ~ "LAND MANAGEMENT",
nameSubFunctionOMB %>% str_detect("WATER") ~ "WATER",
nameSubFunctionOMB %>% str_detect("LAW ENFORCEMENT") ~ "LAW ENFORCEMENT",
nameSubFunctionOMB %>% str_detect("JUDICIAL ACTIV") ~ "JUDICIAL ACTIVITIES",
nameSubFunctionOMB %>% str_detect("RESEARCH, DEVELOPMENT,|SCIENCE AND TECHNOLOGY|GENERAL SCIENCE|AGRICULTURAL RESEARCH") ~ "RESEARCH, SCIENCE AND TECHNOLOGY",
TRUE ~ groupOMBAccount
)
)
data
}
#' Munge Treasury Account Symbol Data
#'
#' @param data
#' @param research_terms
#' @param research_columns
#'
#' @return
#' @export
#'
#' @examples
munge_account_symbols <-
function(data,
research_terms = c("research",
"science",
"technology",
"ARTIFICIAL INTELLIGENCE",
"technical"),
research_columns = c("nameAgencyOMB", "nameBureauOMB", "nameAccountOMB")) {
if (data %>% hasName("idCGAC")) {
data <- data %>%
mutate(hasNoCGAC = is.na(idCGAC))
}
if (data %>% hasName("nameAgencyCGAC") &
data %>% hasName("idCGAC")) {
data <- data %>%
mutate(idCGAC = as.character(idCGAC)) %>%
select(-one_of("slugCGAC")) %>%
left_join(.cgac_codes() %>% mutate(idCGAC = as.character(idCGAC))
,
by = "idCGAC") %>%
select(names(data), everything())
}
has_research_cols <- length(research_columns) > 0
has_research_search <-
length(research_terms) > 0
if (has_research_cols & has_research_search) {
research_columns %>%
walk(function(col) {
data <<- .add_research_science_flags(data = data,
terms = research_terms,
column_name = col)
})
}
data
}
.add_research_science_flags <-
function(data,
terms = c("research",
"science",
"technology",
"ARTIFICIAL INTELLIGENCE",
"technical"),
column_name = "nameAccountOMB") {
if (!data %>% hasName(column_name)) {
return(data)
}
terms <-
terms %>% str_to_upper() %>% unique() %>% str_c(collapse = "|")
new_col <-
column_name %>% str_replace_all("name", "is") %>% str_c("ResearchFlag")
data <-
data %>%
mutate(!!sym(new_col) := !!sym(column_name) %>% str_detect(terms))
data
}
.dictionary_omb_names <-
function(data) {
tibble(
nameOMB = c(
"Agency Code",
"Agency Name",
"Bureau Code",
"Bureau Name",
"Account Code",
"Account Name",
"Treasury Agency Code",
"Subfunction Code",
"Subfunction Title",
"BEA Category",
"On- or Off- Budget",
"yearBudget",
"amount",
"Grant/non-grant split",
"Source Category Code",
"Source category name",
"Source subcategory",
"Source subcategory name",
"Agency code",
"Agency name",
"Bureau code",
"Bureau name",
"Account code",
"Account name",
"Treasury Agency code",
"On- or off- budget"
),
nameActual = c(
"codeAgencyOMB",
"nameAgencyOMB",
"codeBureauOMB",
"nameBureauOMB",
"codeAccountOMB",
"nameAccountOMB",
"idCGAC",
"codeSubFunctionOMB",
"nameSubFunctionOMB",
"typeCategoryBEA",
"isOnBudget",
"yearBudget",
"amount",
"typeFunding",
"codeSource",
"nameSource",
"codeSubFunctionBudget",
"nameSubFunctionBudget",
"codeAgencyOMB",
"nameAgencyOMB",
"codeBureauOMB",
"nameBureauOMB",
"codeAccountOMB",
"nameAccountOMB",
"idCGAC",
"isOnBudget"
)
)
}
.munge_omb_names <-
function(data) {
names_dict <- names(data)
dict <- .dictionary_omb_names()
actual_names <-
names_dict %>%
map_chr(function(name) {
df_row <-
dict %>% filter(nameOMB == name)
if (nrow(df_row) == 0) {
glue::glue("Missing {name}") %>% message()
return(name)
}
df_row$nameActual
})
data %>%
set_names(actual_names)
}
.munge_general <-
function(data,
base_row = 2,
table_name,
year_plus = 2,
year_plus_long = 1,
is_wide = F) {
df_types <-
data %>% dplyr::slice(base_row) %>% select(2:ncol(data)) %>%
gather(column, type) %>%
fill(type) %>%
mutate(idRow = 1:n()) %>%
select(idRow, type) %>%
.remove_na()
if (df_types %>% ncol() == 1 | is_wide) {
years <-
data %>% dplyr::slice(base_row + year_plus_long) %>% select(2:ncol(data)) %>% as.character()
df_features <-
data %>%
select(1) %>%
slice((base_row + 2):nrow(data)) %>%
mutate(idRow = 1:n()) %>%
setNames(c("metric", "idRow"))
df_subparents <-
df_features %>%
filter(metric %>% str_detect("\\:")) %>%
rename(subparent = metric) %>%
filter(!subparent %>% str_detect("Note:"))
df_parents <-
df_features %>%
filter(metric %>% str_detect("\\:")) %>%
select(idRow) %>%
mutate(idRow = idRow + 1) %>%
left_join(df_features, by = "idRow") %>%
mutate(idRow = idRow - 1) %>%
rename(parent = metric)
data <-
data %>%
slice((base_row + 2):nrow(data)) %>%
setNames(c("metric", years)) %>%
mutate(idRow = 1:n()) %>%
select(idRow, everything())
df_parents <-
data %>%
select(idRow, metric) %>%
filter(metric %>% str_detect(":")) %>%
rename(parent = metric)
d <-
data %>%
left_join(df_parents, by = "idRow") %>%
select(idRow, parent, metric, everything()) %>%
fill(parent)
group <-
d %>% filter(is.na(parent)) %>% select(metric) %>% pull()
if (length(group) == 1) {
d <-
d %>%
mutate(group = group)
d <-
d %>%
filter(!is.na(parent)) %>%
filter(!metric %>% str_detect(":")) %>%
select(-idRow) %>%
gather(yearBudget, amount, -c(group, parent, metric)) %>%
mutate(amount = amount %>% readr::parse_number()) %>%
filter(!is.na(amount)) %>%
mutate(
nameTable = table_name,
isEstimate = yearBudget %>% str_detect("estimate"),
yearBudget = yearBudget %>% str_remove_all("estimate")
) %>%
select(nameTable,
isEstimate,
yearBudget,
group,
parent,
metric,
everything()) %>%
filter(!metric %>% str_detect("Total")) %>%
mutate(amount = as.character(amount)) %>%
.convert_amount() %>%
.summarise_sum(
columns = c(
"nameTable",
"isEstimate",
"yearBudget",
"group",
"parent",
"metric"
)
)
} else {
d <- d %>%
filter(!metric %>% str_detect(":")) %>%
select(-idRow) %>%
gather(yearBudget, amount, -c(parent, metric)) %>%
mutate(amount = amount %>% readr::parse_number()) %>%
filter(!is.na(amount)) %>%
mutate(
nameTable = table_name,
isEstimate = yearBudget %>% str_detect("estimate"),
yearBudget = yearBudget %>% str_remove_all("estimate")
) %>%
select(nameTable,
isEstimate,
yearBudget,
parent,
metric,
everything()) %>%
filter(!metric %>% str_detect("Total"))
d <-
d %>%
mutate_at(c("parent",
"metric"),
list(function(x) {
x %>% str_remove_all("\\:|\\(|\\)") %>% str_replace_all("\\-", " ") %>% str_squish()
}))
return(d)
}
d <- d %>%
mutate_at(c("group",
"parent",
"metric"),
list(function(x) {
x %>% str_remove_all("\\:|\\(|\\)") %>% str_replace_all("\\-", " ") %>% str_squish()
}))
return(d)
}
df_outlays <-
data %>% dplyr::slice(base_row + 1) %>% select(2:ncol(data)) %>%
gather(column, outlay) %>%
mutate(idRow = 1:n(),
outlay = coalesce(outlay, "")) %>%
select(idRow, outlay) %>%
mutate(outlay = outlay %>% str_replace_all("\n", " "))
df_cols <-
df_outlays %>%
left_join(df_types, by = "idRow") %>%
unite(type, type, outlay, sep = "_")
d <-
data %>%
dplyr::slice((base_row + year_plus):nrow(data))
data <-
data %>%
set_names(c("yearBudget", df_cols$type)) %>%
gather(type, amount, -yearBudget) %>%
separate(
type,
into = c("typeBudget", "typeCashFlow"),
sep = "_",
extra = "merge"
) %>%
filter(typeBudget != "Total",
!typeCashFlow %>% str_detect("Surplus")) %>%
mutate(yearBudget = case_when(yearBudget == "TQ" ~ "1977",
TRUE ~ yearBudget)) %>%
mutate(amount = amount %>% readr::parse_number()) %>%
filter(!is.na(amount)) %>%
mutate(
nameTable = table_name,
isEstimate = yearBudget %>% str_detect("estimate"),
yearBudget = yearBudget %>% str_remove_all("estimate")
)
data
}
.unite_filter_metric <-
function(data, budget_filter = NULL) {
if (length(budget_filter) > 0) {
data <- data %>%
filter(typeBudget %in% budget_filter)
}
data %>%
unite(metric, typeBudget, typeCashFlow, sep = "")
}
.summarise_pct <-
function(data,
columns = c("yearBudget", "isEstimate", "metric")) {
data %>%
group_by(!!!syms(columns)) %>%
summarise_all(mean) %>%
ungroup() %>%
arrange(yearBudget)
}
.summarise_sum <-
function(data,
columns = c("yearBudget", "isEstimate", "metric"),
amount_col = "amount") {
data %>%
group_by(!!!syms(columns)) %>%
summarise_at(amount_col, sum) %>%
ungroup() %>%
arrange(yearBudget)
}
.summarise_mean <-
function(data,
columns = c("yearBudget", "isEstimate", "metric"),
amount_col = "amount") {
data %>%
group_by(!!!syms(columns)) %>%
summarise_at(amount_col, mean) %>%
ungroup() %>%
arrange(yearBudget)
}
.omb_munge <- function(data) {
if (data %>% hasName("yearBudget") &
!data %>% hasName("isEstimate")) {
data <-
data %>%
mutate(
isEstimate = yearBudget %>% str_detect("estimate"),
yearBudget = yearBudget %>% str_remove_all("estimate") %>% parse_number()
) %>%
fill(yearBudget)
}
amt_mill <- data %>% select(matches("amountMillions")) %>% names()
amt_bil <-
data %>% select(matches("amountBillions")) %>% names()
if (length(amt_mill) > 0) {
data <-
data %>%
mutate_at(amt_mill, function(x) {
x * 1000000 %>% currency(digits = 0)
})
names(data) <-
names(data) %>% str_replace_all("amountMillions", "amount")
}
pct_cols <- data %>% select(matches("pct")) %>% names()
if (length(pct_cols) > 0) {
data <- data %>% mutate_at(pct_cols, list(function(x) {
x / 100
}))
data <-
data %>%
mutate_at(amt_bil, function(x) {
x * 1000000000 %>% currency(digits = 0)
})
names(data) <-
names(data) %>% str_replace_all("amountBillions", "amount")
}
data
}
.omb_chop <-
function(data, base_col, start_row = 4) {
df_base <-
data %>%
dplyr::slice(start_row:nrow(data))
df_headers <-
data %>% select(2:ncol(data)) %>% dplyr::slice(1:(start_row - 1))
df_headers <- df_headers %>% t() %>%
as_tibble() %>%
.remove_na()
names(df_headers) <- 1:ncol(df_headers) %>% str_c("V", .)
other_cols <- df_headers %>%
tidyr::fill(V1) %>%
separate(V1, sep = "\\(", into = c("V1", "remove")) %>%
select(-remove) %>%
mutate_all(list(function(x) {
x %>% str_remove_all("\\-|\\(|\\)|\\ ") %>% str_squish()
})) %>%
mutate_if(is.character,
list(function(x) {
ifelse(is.na(x), "", x)
})) %>%
mutate(V1 = str_to_lower(V1)) %>%
unite(type, V1, V2, sep = "") %>%
pull()
data <-
df_base %>%
setNames(c(base_col, other_cols)) %>%
gather(item, amount, -c(base_col))
data
}
.add_table_no <- function(data, table_number = 1) {
data %>%
mutate(numberTable = table_number) %>%
select(numberTable, everything())
}
.nest_omb <-
function(data) {
data %>%
group_by(numberTable, nameTable, slugTable) %>%
nest() %>%
ungroup() %>%
mutate(countRows = data %>% map_dbl(nrow),
countColumns = data %>% map_dbl(ncol)) %>%
select(-data, everything())
}
.convert_amount <- function(data, amount_col = "amount") {
data %>%
mutate_at(amount_col,
list(function(x) {
x %>% parse_number() * 1000000 %>% as.integer() %>% currency(digits = 0)
})) %>%
filter(!is.na(!!!syms(amount_col)))
}
.unpivot_omb <-
function(data,
data_start_row = 5,
column_names = c("metric", "parent", "item"),
header_rows = 2:4,
column_start = 2) {
df_columns <-
data %>% dplyr::slice(header_rows) %>% dplyr::select(column_start:ncol(data)) %>%
t() %>%
as_tibble() %>%
mutate_all(as.character)
names(df_columns) %>%
walk(function(x) {
df_columns <<- df_columns %>%
fill(!!sym(x))
})
d <-
data %>% dplyr::slice(data_start_row:nrow(data))
years <-
d %>% t() %>% as_tibble() %>% slice(1) %>% as.character()
d <- d %>% t() %>% as_tibble()
d <- d %>% slice(2:nrow(d)) %>%
setNames(years)
d <-
df_columns %>%
setNames(column_names) %>%
bind_cols(d)
d
}
.fix_year <-
function(data,
year_column = "yearBudget",
current_budget_year = 2020) {
data <-
data %>%
mutate(
UQ(year_column) := case_when(!!sym(year_column) == "TQ" ~ "1977",
TRUE ~ !!sym(year_column)),
isEstimate = !!sym(year_column) %>% str_detect("estimate"),
UQ(year_column) := !!sym(year_column) %>% str_remove_all("estimate") %>% as.numeric()
) %>%
select(isEstimate, one_of(year_column), everything())
if (data %>% hasName("yearBudget")) {
data <- data %>%
mutate(
dateFiscalYearEnd = case_when(
yearBudget < 1977 ~ glue("{yearBudget}-12-31") %>% as.character() %>% ymd(),
TRUE ~ glue("{yearBudget}-09-30") %>% as.character() %>% ymd()
)
) %>%
select(isEstimate,
dateFiscalYearEnd,
one_of(year_column),
everything())
}
data <-
data %>%
mutate(isEstimate = yearBudget > current_budget_year)
data
}
.munge_metrics <-
function(data) {
data %>%
mutate(
metric = case_when(
metric == "Addendum: Composite Deflator" ~ "ratioDeflator",
metric == "In Constant (FY 2012) Dollars" ~ "amountBillions2012Dollars",
metric %>% str_detect(
"In Millions of Current Dollars|In Millions of Dollars|In Current Dollars"
) ~ "amountMillionsNominal",
metric %in% c("In of Dollars") ~ "amountMillionsNominal",
metric %>% str_detect("In Billions of Constant") ~ "amountBillions2012Dollars",
metric %>% str_detect("Percentages of GDP") ~ "pctGDP",
metric %>% str_detect(
"Percentages of Total Outlays|As Percentages of Federal Outlays"
) ~ "pctBudget",
metric %>% str_detect("Composite Outlay Deflators") ~ "ratio",
metric %in% c("In Billions of Current Dollars", "In Billions of Dollars") ~ "amountBillionsNominal",
metric == "GDP (Chained) Price Index" ~ "ratio",
metric == "Addendum: Fiscal Year GDP" ~ "amountMillionsGDP",
metric == "GDP (in billions of dollars)" ~ "amountBillions2012Dollars",
TRUE ~ metric
)
)
}
.munge_parents <-
function(data) {
if (!data %>% hasName("parent")) {
return(data)
}
data <-
data %>%
mutate(
parent = case_when(
parent == "Addendum: Direct Capital" ~ "Direct Capital",
parent %>% str_detect("Other nondefense") ~ "Non Defense",
parent %>% str_detect("Natural resources and environment") ~ "Natural Resources and Environment",
parent %>% str_detect("Community and regional development") ~ "Community Development",
parent %>% str_detect("Direct Federal") ~ "Federal Direct",
parent %>% str_detect("Atomic Energy and Other Defense") ~ "Atomic Energy",
parent %>% str_detect("Department of Defense") ~ "Department of Defense",
parent %>% str_detect("National defense") ~ "Defense",
parent %>% str_detect("General science, space, and technology") ~ "Science, Space and Technology",
TRUE ~ parent
)
)
data
}
.munge_omb_items <-
function(data) {
if (!data %>% hasName("item")) {
return(data)
}
data <-
data %>%
mutate(item = item %>% str_squish()) %>%
mutate(
item = case_when(
item %>% str_detect("Nondefense") ~ "Non Defense",
item %>% str_detect("Urban renewal") ~ "Urban Renewal",
item %>% str_detect("Pollution control facilities") ~ "Polluition Control Facilities",
item %>% str_detect("Block grants") ~ "Block Grants",
item %>% str_detect("Housing assistance") ~ "Housing
Assistance",
item %>% str_detect("National Defense|National defense") ~ "Defense",
item %>% str_detect("Construction and Rehabilitation of Physical Assets") ~ "Asset Construction and Rehabilitation",
item %>% str_detect(
"Education, training, employment, and social services|Education, Training and Services"
) ~ "Education, Training and Services",
item %>% str_detect("Public works acceleration/local public works") ~ "Public Works",
item %>% str_detect("All other") ~ "Other",
item %>% str_detect("DOT") ~ "DOT",
item %>% str_detect("Other national defense") ~ "Other Defense",
item %>% str_detect("Natural resources and environment") ~ "Natural Resources and Environment",
item %>% str_detect("Atomic energy general science") ~ "Atomic Energy",
TRUE ~ item
)
)
data
}
.parse_41 <-
function(data, exclude_total = T) {
table_name <-
.add_table_name(data)
df <-
data %>% .unpivot_omb(data_start_row = 6, header_rows = 3:5)
df <- df %>%
mutate_if(is.character, str_squish)
df <-
df %>%
mutate(
item =
case_when(
metric == "GDP (in billions of dollars)" ~ "GDP",
metric == "GDP (Chained) Price Index" ~ "ratioGDP",
parent == "Total" ~ "Total",
parent == "Total Defense" ~ "Defense",
parent == "Total Nondefense" ~ "Non Defense",
TRUE ~ item
)
) %>%
.munge_metrics() %>%
mutate(
parent = parent %>% str_remove_all("Total |\\Addendum: ") %>% str_replace_all(
"Undis- tributed Offsetting Receipts",
"Undistributed Recepits"
)
)
df <-
df %>%
gather(yearBudget, amount, -c("metric", "parent", "item")) %>%
.fix_year() %>%
filter(!is.na(amount)) %>%
.munge_omb_items() %>%
.fix_omb_amount()
df <-
df %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
df <-
df %>%
filter(!is.na(yearBudget)) %>%
.munge_parents()
df
}
.fix_omb_amount <-
function(data) {
has_cols <- data %>% hasName("metric") && data %>% hasName("amount")
if (!has_cols) {
return(data)
}
data <- data %>%
mutate(
amount = parse_number(amount),
amount = case_when(
metric %>% str_detect("^pct") ~ amount / 100,
metric %>% str_detect("Millions") ~ amount * 1000000,
metric %>% str_detect("Billions") ~ amount * 1000000000,
TRUE ~ amount
),
metric = metric %>% str_remove_all("Millions|Billions")
)
data
}
.munge_for_column_parents <- function(df_items) {
df_parents <- df_items %>%
filter(item %>% str_detect(":")) %>%
select(numberColumn, parent = item) %>%
mutate(numberColumn = numberColumn + 1)
df_items %>%
left_join(df_parents, by = "numberColumn") %>%
fill(parent) %>%
mutate(
parent = parent %>% str_remove_all("\\:"),
item = item %>% str_remove_all("\\(|\\)|[0-9]")
) %>%
mutate_if(is.character, str_squish) %>%
filter(!item %>% str_detect(":"))
}
.munge_omb_wide <- function(data,
year_row = 3,
column_start = 2,
remove_column = NULL,
year_offset = 1,
gather_columns = c("parent", "item"),
metric = "amountMillionsNominal",
id_ratio_threshold = .015,
filter_total = T) {
years <-
data %>%
dplyr::slice(year_row) %>%
dplyr::select(column_start:ncol(data)) %>%
as.character()
if (years %>% is.na() %>% sum() / length(years) > .1) {
years <-
tibble(year = years) %>%
fill(year) %>%
pull(year)
}
d <-
data %>% slice((year_row + year_offset):nrow(data))
if (length(remove_column) > 0) {
d <- d %>% select(-remove_column)
}
d <-
d %>% t() %>% as_tibble() %>%
mutate_all(as.character)
items <-
d %>% dplyr::slice(1) %>% t() %>% as.character()
d <- d %>% dplyr::slice(2:nrow(d)) %>% .remove_na()
df_items <- tibble(item = items) %>%
mutate(numberColumn = 1:n()) %>%
select(numberColumn, everything()) %>%
mutate(idColumn = glue("V{numberColumn}") %>% as.character()) %>%
select(idColumn, everything())
id_ratio <-
items %>% str_detect("[0-9][0-9][0-9]") %>% sum() / length(items)
id_ratio <- case_when(is.na(id_ratio) ~ 0,
TRUE ~ id_ratio)
if (id_ratio > id_ratio_threshold) {
df_items <-
df_items %>%
mutate(
isFunction = item %>% str_detect("^[0-9][0-9][0-9]"),
idFunction = case_when(isFunction ~ item %>% substr(1, 3),
TRUE ~ NA_character_),
item = case_when(isFunction ~ item %>% substr(4, nchar(item)),
TRUE ~ item) %>% str_squish()
) %>%
select(idColumn, numberColumn, idFunction, everything()) %>%
select(-isFunction)
}
df_items <-
df_items %>% .munge_for_column_parents()
df_items <- df_items %>%
mutate(
parent = case_when(
item == "Direct Payments for Individuals" ~ "Direct Payments for Individuals",
TRUE ~ parent
)
) %>%
fill(parent)
d <-
d %>% t() %>% data.frame() %>% tibble::rownames_to_column(var = "idColumn") %>% as_tibble() %>%
mutate_all(as.character)
bad_row <-
d %>% slice(1) %>% select(2:ncol(d)) %>% as.character() %>%
str_detect("[0-9]") %>% sum() == 0
bad_row <- case_when(is.na(bad_row) ~ F,
TRUE ~ bad_row)
if (bad_row) {
d <- d %>% slice(2:nrow(d))
}
d <-
d %>%
left_join(df_items %>% select(-numberColumn), by = "idColumn") %>%
select(-idColumn) %>%
select(one_of(c("parent", "item", "idFunction")), everything())
start_names <-
names(d)[names(d) %in% c("parent", "item", "idFunction")]
d <-
d %>%
setNames(c(start_names, years))
d <-
d %>%
gather(yearBudget, amount , -c(start_names)) %>%
filter(!is.na(amount)) %>%
mutate(metric = metric) %>%
select(metric, everything()) %>%
.fix_year() %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
filter(!is.na(amount))
d <- d %>%
mutate(
metric = case_when(
item %>% str_detect("As a percentage of total outlays") ~ "pctOutlays",
item %>% str_detect("In billions of constant FY dollars") ~ "amountBillions2012Dollars",
TRUE ~ metric
)
)
d <-
d %>%
.fix_omb_amount() %>%
filter(!is.na(amount))
d <-
d %>%
mutate(item = item %>% str_remove_all("\\*"))
if (filter_total) {
d <-
d %>%
filter(!item %>% str_detect("Total|TOTAL|Subtotal"))
}
d
}
.calc_omb_summary <-
function(data) {
data <-
data %>%
filter(metric %>% str_detect("count|amount")) %>%
.summarise_sum(amount_col = "amount",
columns =
data %>% select(-amount) %>% names()) %>%
bind_rows(
data %>%
filter(metric %>% str_detect("pct|ratio")) %>%
.summarise_mean(
amount_col = "amount",
columns =
data %>% select(-amount) %>% names()
)
) %>%
mutate_if(is.character, str_squish) %>%
.remove_na() %>%
arrange(yearBudget)
data
}
#' OMB Budget Supplement URLs
#'
#' Includes links to the most recent supplemental
#' budget tables and links.
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' dictionary_omb_supplements()
dictionary_omb_supplements <-
memoise::memoise(function() {
page <-
read_html("https://www.whitehouse.gov/omb/supplemental-materials/")
nodes <- page %>% html_nodes("p a")
files <- nodes %>% html_text()
urls <-
nodes %>% html_attr("href")
data <- tibble(nameFile = files, urlData = urls)
data <-
data %>%
filter(urlData %>% str_detect(".pdf$|.xlsx$|.csv$")) %>%
separate(
nameFile,
into = c("typeFile", "descriptionFile"),
remove = F,
extra = "merge",
sep = "\\:",
fill = "right"
) %>%
mutate_if(is.character, str_squish) %>%
mutate_at(c("nameFile", "typeFile", "descriptionFile"), str_to_upper)
data
})
# dicts -------------------------------------------------------------------
# https://www.whitehouse.gov/omb/historical-tables/
#' OMB Budget Tables
#'
#' List of OMB budget tables
#'
#' @param url url of file location
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_omb_tables()
dictionary_omb_tables <-
function(url = "https://www.whitehouse.gov/omb/historical-tables/") {
page <- read_html(url)
file_links <-
page %>% html_nodes(".editor__module-eop+ p a") %>% html_attr("href") %>% .[[1]]
zip <-
page %>% html_nodes("h2+ p a") %>% html_attr("href")
nodes <-
page %>%
html_nodes("p+ p a")
files <-
nodes %>%
html_text() %>%
str_squish() %>%
str_replace("\\—", "\\|")
urls <- nodes %>% html_attr("href")
data <-
tibble(files) %>%
mutate(numberTable = 1:n()) %>%
select(numberTable, everything()) %>%
separate(files, into = c("idTable", "file"), sep = "\\|") %>%
mutate(idTable = idTable %>% str_remove_all("Table ")) %>%
separate(file,
into = c("nameFile", "yearsData"),
sep = "\\:") %>%
separate(yearsData,
into = c("yearsData", "formatData"),
sep = "\\(") %>%
mutate_if(is.character, str_squish) %>%
separate(
yearsData,
sep = "\\–",
into = c("yearStart", "yearEnd"),
remove = F
) %>%
mutate(formatData = formatData %>% str_remove_all("\\)")) %>%
mutate_at(c("yearStart", "yearEnd"), as.integer) %>%
mutate(urlFile = urls,
urlBudgetZIP = zip,
urlBudgetPDF = file_links)
data
}
.add_table_name <- function(data) {
data[1, 1] %>% as.character() %>% str_squish()
}
# parsers -----------------------------------------------------------------
.parse_32 <-
function(data) {
table_name <-
.add_table_name(data)
df_columns <-
data %>% dplyr::slice(2:3) %>% select(2:ncol(data)) %>%
t() %>%
as_tibble() %>%
fill(V1) %>%
mutate(
V1 = case_when(
V1 %>% str_detect("In Millions of Dollars") ~ "amountMillions",
V1 %>% str_detect("In Billions") ~ "amountBillions2012Dollars",
V1 %>% str_detect("Percentages of GDP") ~ "pctGDP",
V1 %>% str_detect("Percentages of Total Outlays") ~ "pctOutlays"
)
) %>%
mutate(V2 = V2 %>% str_remove_all("\n")) %>%
unite(item, V1, V2, sep = "") %>%
mutate(item = item %>% str_remove_all("\\ "))
data <-
data %>%
dplyr::slice(4:nrow(data)) %>%
setNames(c("yearBudget", df_columns$item)) %>%
mutate(isEstimate = yearBudget %>% str_detect("estimate")) %>%
select(isEstimate, everything()) %>%
select(-matches("Total")) %>%
mutate(
nameTable = table_name,
isEstimate = yearBudget %>% str_detect("estimate"),
yearBudget = yearBudget %>% str_remove_all("estimate")
) %>%
select(nameTable, everything()) %>%
mutate(yearBudget = as.numeric(yearBudget)) %>%
fill(yearBudget)
amt_m <- data %>% select(matches("amountMillions")) %>%
names()
amt_b <-
data %>% select(matches("amountBill")) %>%
names()
pct_cols <- data %>% select(matches("pct")) %>% names()
data <-
data %>%
mutate_at(pct_cols, list(function(x) {
x %>% parse_number() / 100
})) %>%
mutate_at(amt_m, list(function(x) {
x %>% parse_number() * 1000000 %>% currency(digits = 0)
})) %>%
mutate_at(amt_b, list(function(x) {
x %>% parse_number() * 1000000000 %>% currency(digits = 0)
}))
names(data) <-
names(data) %>% str_remove_all("Millions|Billions")
names(data) <-
names(data) %>% str_replace_all("Nondefense", "NonDefense")
data
data <-
data %>%
select(nameTable:yearBudget, matches("pct")) %>%
group_by(nameTable, isEstimate, yearBudget) %>%
summarise_all(mean) %>%
left_join(
data %>%
select(nameTable:yearBudget, matches("amount")) %>%
group_by(nameTable, isEstimate, yearBudget) %>%
summarise_all(sum),
by = c("nameTable", "isEstimate", "yearBudget")
) %>%
ungroup()
data
}
.parse_45 <-
function(data, exclude_total = T) {
table_name <-
.add_table_name(data)
df <-
data %>% .unpivot_omb(
data_start_row = 4,
header_rows = 2:3,
column_names = c("metric", "item")
) %>%
.munge_metrics()
df <- df %>%
mutate_if(is.character, str_squish)
df <-
df %>%
gather(yearBudget, amount, -c("metric", "item")) %>%
.fix_year() %>%
filter(!is.na(amount)) %>%
.munge_omb_items() %>%
.fix_omb_amount()
if (exclude_total) {
df <- df %>%
filter(!item %>% str_detect("Total"))
}
df <-
df %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
df <-
df %>%
filter(!is.na(yearBudget))
df
}
.parse_44 <-
function(data) {
table_name <-
.add_table_name(data)
df_years <-
data %>% dplyr::slice(3) %>% select(2:ncol(data)) %>%
gather(column, year) %>%
fill(year) %>%
mutate(idRow = 1:n()) %>%
select(idRow, year)
df_types <-
data %>% dplyr::slice(4) %>% select(2:ncol(data)) %>%
gather(column, type) %>%
mutate(idRow = 1:n()) %>%
select(idRow, type)
df_cols <- df_years %>%
left_join(df_types, by = "idRow") %>%
mutate(type = str_to_lower(type)) %>%
unite(type, type, year, sep = "_")
data <- data %>%
dplyr::slice(5:nrow(data))
names(data)[[1]] <- "nameProgram"
names(data)[2:ncol(data)] <- df_cols$type
data <-
data %>%
gather(period, amount, -nameProgram) %>%
separate(period,
into = c("typeFunding", "yearBudget"),
extra = "merge") %>%
filter(typeFunding != "total") %>%
filter(!nameProgram %>% str_detect("Total,"))
data <- data %>% filter(!nameProgram %>% str_detect("\\*"))
data <- data %>%
mutate(idRow = 1:n())
df_parents <-
data %>%
filter(is.na(amount) | nameProgram %>% str_detect(":")) %>%
filter(is.na(amount)) %>%
rename(parent = nameProgram) %>%
mutate(idRow = 1 + idRow) %>%
mutate(parent = parent %>% str_remove_all("\\:")) %>%
select(idRow, parent)
data <- data %>%
left_join(df_parents, by = "idRow") %>%
fill(parent) %>%
select(parent, everything()) %>%
select(-idRow) %>%
filter(!is.na(parent)) %>%
mutate(metric = "amountNominalMillions") %>%
select(metric, everything()) %>%
.fix_omb_amount() %>%
.fix_year() %>%
filter(!is.na(amount)) %>%
mutate(
nameProgram = nameProgram %>% str_remove_all("\\:|\\)|\\(") %>% str_remove_all("formerly Food stampsincluding Puerto Rico|WIC and CSFP|\\.") %>% str_replace_all("\\--|\\-|\\/", " ")
)
data <-
data %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(amount = currency(amount, digits = 0)) %>%
rename(item = nameProgram) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
data
}
.parse_1 <-
function(data) {
table_name <-
.add_table_name(data)
df_types <-
data %>% dplyr::slice(3) %>% select(2:ncol(data)) %>%
gather(column, type) %>%
fill(type) %>%
mutate(idRow = 1:n()) %>%
select(idRow, type) %>%
mutate(
type = case_when(
type %>% str_detect("On-Budget") ~ "OnBudget",
type %>% str_detect("Off-Budget") ~ "OffBudget" ,
TRUE ~ "Total"
)
)
df_outlays <-
data %>% dplyr::slice(4) %>% select(2:ncol(data)) %>%
gather(column, outlay) %>%
mutate(idRow = 1:n()) %>%
select(idRow, outlay)
df_cols <-
df_outlays %>%
left_join(df_types, by = "idRow") %>%
unite(type, type, outlay, sep = "_")
data <-
data %>% dplyr::slice(5:nrow(data)) %>%
set_names(c("yearBudget", df_cols$type)) %>%
gather(type, amount, -yearBudget) %>%
separate(type,
into = c("typeBudget", "typeCashFlow"),
extra = "merge") %>%
filter(typeBudget != "Total",
!typeCashFlow %>% str_detect("Surplus")) %>%
mutate(yearBudget = case_when(yearBudget == "TQ" ~ "1977",
TRUE ~ yearBudget)) %>%
.convert_amount() %>%
group_by(yearBudget, typeBudget, typeCashFlow) %>%
summarise_all(sum) %>%
ungroup() %>%
mutate(
nameTable = table_name,
isEstimate = yearBudget %>% str_detect("estimate"),
yearBudget = yearBudget %>% str_remove_all("estimate")
) %>%
mutate_if(is.character, str_squish) %>%
arrange(yearBudget) %>%
select(nameTable, everything())
data
}
.parse_2 <-
function(data) {
table_name <-
.add_table_name(data)
df_types <-
data %>% dplyr::slice(2) %>% select(2:ncol(data)) %>%
gather(column, type) %>%
fill(type) %>%
mutate(idRow = 1:n()) %>%
select(idRow, type) %>%
mutate(
type = case_when(
type %>% str_detect("On-Budget") ~ "OnBudget",
type %>% str_detect("Off-Budget") ~ "OffBudget" ,
type %>% str_detect("GDP") ~ "GDP",
TRUE ~ "Total"
)
)
df_outlays <-
data %>% dplyr::slice(3) %>% select(2:ncol(data)) %>%
gather(column, outlay) %>%
mutate(idRow = 1:n(),
outlay = coalesce(outlay, "")) %>%
select(idRow, outlay)
df_cols <-
df_outlays %>%
left_join(df_types, by = "idRow") %>%
unite(type, type, outlay, sep = "_")
data <-
data %>% dplyr::slice(4:nrow(data)) %>%
set_names(c("yearBudget", df_cols$type)) %>%
gather(type, amount, -yearBudget) %>%
separate(type,
into = c("typeBudget", "typeCashFlow"),
extra = "merge") %>%
filter(typeBudget != "Total",
!typeCashFlow %>% str_detect("Surplus")) %>%
mutate(yearBudget = case_when(yearBudget == "TQ" ~ "1977",
TRUE ~ yearBudget)) %>%
mutate(amount = amount %>% readr::parse_number()) %>%
filter(!is.na(amount))
data <-
data %>%
.fix_year()
df_gdp <-
data %>%
filter(typeBudget == "GDP") %>%
select(-typeCashFlow) %>%
mutate(amount = as.character(amount)) %>%
.convert_amount() %>%
rename(item = typeBudget) %>%
mutate(metric = "amountNominal",
amount = as.numeric(amount)) %>%
.calc_omb_summary() %>%
mutate(amount = amount * 1000)
df_non_gdp <-
data %>%
filter(typeBudget != "GDP") %>%
rename(item = typeCashFlow) %>%
rename(parent = typeBudget) %>%
mutate(amount = amount / 100) %>%
mutate(metric = "pctGDP") %>%
.calc_omb_summary() %>%
select(isEstimate:yearBudget, metric, everything())
data <-
df_gdp %>%
bind_rows(df_non_gdp) %>%
mutate(nameTable = table_name) %>%
select(nameTable, yearBudget, everything())
data <- data %>%
select(
nameTable,
isEstimate,
yearBudget,
dateFiscalYearEnd,
metric,
parent,
item,
everything()
)
data
}
.parse_3 <-
function(data) {
table_name <-
.add_table_name(data)
df_types <-
data %>% dplyr::slice(3) %>% select(2:ncol(data)) %>%
gather(column, type) %>%
fill(type) %>%
mutate(idRow = 1:n()) %>%
select(idRow, type) %>%
mutate(
type = case_when(
type %>% str_detect("Current Dollars") ~ "amountNominal",
type %>% str_detect("FY 2012") ~ "amount2012" ,
type %>% str_detect("Percentages of GDP") ~ "pctGDP",
type %>% str_detect("Addendum") ~ "ratioDeflator"
)
)
df_outlays <-
data %>% dplyr::slice(4) %>% select(2:ncol(data)) %>%
gather(column, outlay) %>%
mutate(idRow = 1:n(),
outlay = coalesce(outlay, "")) %>%
select(idRow, outlay)
df_cols <-
df_outlays %>%
left_join(df_types, by = "idRow") %>%
unite(type, type, outlay, sep = "_")
data <-
data %>%
dplyr::slice(5:nrow(data)) %>%
set_names(c("yearBudget", df_cols$type)) %>%
gather(type, amount, -yearBudget) %>%
separate(
type,
into = c("typeBudget", "typeCashFlow"),
sep = "_",
extra = "merge"
) %>%
filter(typeBudget != "Total",
!typeCashFlow %>% str_detect("Surplus")) %>%
mutate(yearBudget = case_when(yearBudget == "TQ" ~ "1977",
TRUE ~ yearBudget)) %>%
mutate(amount = amount %>% readr::parse_number()) %>%
filter(!is.na(amount)) %>%
mutate(
isEstimate = yearBudget %>% str_detect("estimate"),
yearBudget = yearBudget %>% str_remove_all("estimate")
)
df_gdp <-
data %>%
filter(typeBudget == "pctGDP") %>%
unite(metric, typeBudget, typeCashFlow, sep = "") %>%
.summarise_pct() %>%
mutate(amount = amount / 100)
df_deflator <-
data %>%
.unite_filter_metric(budget_filter = "ratioDeflator") %>%
.summarise_pct()
df_amts <-
data %>%
.unite_filter_metric(budget_filter = c("amount2012", "amountNominal")) %>%
mutate(amount = as.character(amount)) %>%
.convert_amount() %>%
.summarise_sum() %>%
mutate_if(is.numeric, as.numeric)
data <-
list(df_amts, df_gdp, df_deflator) %>%
map_dfr(bind_rows) %>%
mutate(yearBudget = as.numeric(yearBudget),
nameTable = table_name) %>%
select(nameTable, yearBudget, everything())
data
}
.parse_4 <-
function(data,
column_names = c("parent", "item"),
metric = "amountNominalMillions") {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
data_start_row = 5,
header_rows = 3:4,
column_names = column_names
) %>%
mutate(metric = metric) %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
.munge_metrics() %>%
mutate_all(str_squish) %>%
.fix_year() %>%
filter(!is.na(amount)) %>%
.munge_metrics() %>%
.fix_omb_amount() %>%
filter(!is.na(amount)) %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
df <- df %>%
mutate_at(c("parent", "item"),
list(function(x) {
x %>% str_remove_all("\\(") %>% str_remove_all("\\)") %>%
str_replace_all("\\ - ", " ") %>% str_remove_all("[0-9]") %>%
str_squish()
}))
df <-
df %>%
filter(item != "Total" | is.na(item)) %>%
mutate(amount = currency(amount, digits = 0))
df
}
.parse_5 <-
function(data,
column_names = c("parent", "item"),
metric = "amountNominalMillions") {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
data_start_row = 5,
header_rows = 3:4,
column_names = column_names
) %>%
mutate(metric = metric) %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
.munge_metrics() %>%
mutate_all(str_squish) %>%
.fix_year() %>%
filter(!is.na(amount)) %>%
.munge_metrics() %>%
.fix_omb_amount() %>%
filter(!is.na(amount)) %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
df <- df %>%
mutate_at(c("parent", "item"),
list(function(x) {
x %>% str_remove_all("\\(") %>% str_remove_all("\\)") %>%
str_replace_all("\\ - ", " ") %>% str_remove_all("[0-9]") %>%
str_squish()
}))
df <-
df %>%
filter(item != "Total" | is.na(item)) %>%
filter(parent != "Total Receipts") %>%
mutate(amount = currency(amount, digits = 0))
df
}
.parse_7 <-
function(data,
column_names = c("parent", "item"),
metric = "pctGDP") {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
data_start_row = 4,
header_rows = 2:3,
column_names = column_names
) %>%
mutate(metric = metric) %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
.munge_metrics() %>%
mutate_all(str_squish) %>%
.fix_year() %>%
filter(!is.na(amount)) %>%
.munge_metrics() %>%
.fix_omb_amount() %>%
filter(!is.na(amount)) %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
df <- df %>%
mutate_at(c("parent", "item"),
list(function(x) {
x %>% str_remove_all("\\(") %>% str_remove_all("\\)") %>%
str_replace_all("\\ - ", " ")
}))
df <- df %>%
filter(item != "Total" | is.na(item)) %>%
filter(parent != "Total Receipts") %>%
rename(pct = amount)
df
}
.parse_8 <-
function(data) {
table_name <-
.add_table_name(data)
df <-
data %>%
.munge_omb_wide(year_row = 3, gather_columns = "item") %>%
.remove_na() %>%
filter(!is.na(amount)) %>%
.munge_metrics() %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
filter(!is.na(amount)) %>%
mutate_at(c("parent", "item"),
list(function(x) {
x %>% str_replace_all("\\-", " ") %>% str_squish()
}))
df <- df %>%
mutate(amount = currency(amount, digits = 0))
df
}
.parse_9 <-
function(data,
metric = "amountNominalMillions",
exclude_total = T,
column_names = c("parent", "item")) {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
data_start_row = 5,
column_names = column_names,
header_rows = 3:4
) %>%
gather(yearBudget, amount, -c(column_names)) %>%
mutate(metric = metric) %>%
.munge_metrics() %>%
mutate_all(str_squish) %>%
.fix_year() %>%
filter(!is.na(amount)) %>%
.munge_metrics() %>%
.fix_omb_amount() %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
filter(!is.na(amount))
df <-
df %>%
mutate(parent = parent %>% str_remove_all('"')) %>%
filter(item != "Total" | is.na(item)) %>%
filter(parent != "Total Other Receipts")
df <-
df %>%
.calc_omb_summary() %>%
mutate(amount = currency(amount, digits = 0)) %>%
mutate_at(column_names,
list(function(x) {
x %>% str_remove_all("[0-9]") %>%
str_remove_all("\\(") %>%
str_remove_all("\\)") %>%
str_remove_all("\\Memorandum: ") %>%
str_squish()
}))
df
}
.parse_10 <-
function(data,
metric = "amountNominalMillions",
exclude_total = T) {
table_name <-
.add_table_name(data)
df <-
.munge_omb_wide(
data = data,
year_row = 2,
gather_columns = "item",
year_offset = 0,
metric = metric
) %>%
filter(
!item %in% c(
"Department or other unit",
"Function and Subfunction",
"Superfunction and Function"
)
) %>%
.remove_na() %>%
mutate_if(is.character, str_squish)
df <-
df %>%
mutate(
metric = case_when(
parent == "As percentages of outlays" ~ "pctOutlays",
parent == "As percentages of GDP" ~ "pctGDP",
TRUE ~ metric
)
)
df <-
df %>%
select(-parent)
df <-
df %>%
mutate(amount = case_when(metric %>% str_detect("pct") ~ amount / 100000000,
TRUE ~ amount))
df <-
df %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
if (exclude_total) {
df <-
df %>%
filter(!item %in% c("On-budget", "Off-budget"))
}
df
}
.parse_11 <-
function(data,
metric = "amountNominalMillions",
exclude_total = T) {
table_name <-
.add_table_name(data)
df <-
.munge_omb_wide(
data = data,
year_row = 3,
gather_columns = "item",
year_offset = 0,
metric = metric
) %>%
filter(!item %in% c("Department or other unit", "Function and Subfunction")) %>%
.remove_na() %>%
mutate_if(is.character, str_squish)
df <-
df %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
mutate(amount = currency(amount, digits = 0))
if (exclude_total) {
df <- df %>%
filter(!item %in% c("On-budget", "Off-budget"))
}
df
}
.parse_12 <-
function(data) {
table_name <-
.add_table_name(data)
df <-
.munge_omb_wide(data = data,
year_row = 3,
gather_columns = "item") %>%
.remove_na() %>%
mutate_if(is.character, str_squish)
df <-
df %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
mutate(amount = currency(amount, digits = 0))
df
}
.parse_13 <-
function(data, metric = "pctOutlay") {
table_name <-
.add_table_name(data)
df <-
.munge_omb_wide(
data = data,
year_row = 2,
gather_columns = "item",
year_offset = 0,
metric = metric
) %>%
filter(item != "Department or other unit") %>%
.remove_na() %>%
mutate_if(is.character, str_squish)
df <-
df %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
rename(pct = amount)
df
}
.parse_18 <-
function(data) {
table_name <-
.add_table_name(data)
col_names <- data %>% dplyr::slice(2) %>% as.character()
col_names[[1]] <- "nameDepartment"
data <-
data %>% dplyr::slice(3:nrow(data)) %>%
setNames(col_names) %>%
gather(yearBudget, amount, -c(nameDepartment)) %>%
filter(!is.na(amount)) %>%
.convert_amount() %>%
filter(!is.na(yearBudget)) %>%
filter(!nameDepartment %>% str_detect("Total")) %>%
select(yearBudget, everything())
return(data)
}
.parse_20 <-
function(data, base_row = 2) {
table_name <-
.add_table_name(data)
data <-
data %>%
.munge_general(
base_row = base_row,
table_name = table_name,
is_wide = T,
year_plus_long = 0
)
data <- data %>%
mutate(
measurement = case_when(
is.na(parent) ~ "amountNominal",
parent == "As percentages of GDP" ~ "pctGDP",
parent == "As percentages of outlays" ~ "pctOutlays",
parent == "In billions of constant FY 2012 dollars" ~ "amount2012",
TRUE ~ NA_character_
)
) %>%
fill(measurement) %>%
select(nameTable:yearBudget, measurement, metric, amount) %>%
mutate(
amount = case_when(
measurement %in% c("pctGDP", "pctOutlays") ~ amount / 100,
measurement == "amountNominal" ~ amount * 1000000,
measurement == "amount2012" ~ amount * 1000000000,
TRUE ~ amount
)
)
data <-
data %>%
.fix_year() %>%
mutate(metric = metric %>% str_remove_all("[0-9]$") %>% str_squish()) %>%
select(nameTable, everything())
data <-
data %>%
rename(item = metric,
metric = measurement) %>%
.calc_omb_summary()
data
}
.parse_21 <-
function(data,
column_names = c("metric", "parent", "item"),
exclude_total = T) {
table_name <-
.add_table_name(data)
df <-
data %>%
.unpivot_omb(
data_start_row = 5,
header_rows = 2:4,
column_names = column_names
) %>%
mutate_if(is.character, str_squish) %>%
select(metric, everything()) %>%
.remove_na()
df <-
df %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
.fix_year() %>%
filter(!is.na(amount)) %>%
mutate_all(str_squish) %>%
.munge_metrics() %>%
.fix_omb_amount() %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
if (exclude_total) {
df <-
df %>%
filter(item != "Total" | is.na(item))
}
df
}
.parse_22 <-
function(data) {
table_name <-
.add_table_name(data)
d <-
data %>%
slice(4:nrow(data))
df <-
d %>% select(1, 3, 5, 7) %>%
gather(col, year) %>%
select(year)
df <- df %>%
bind_cols(
d %>% select(2, 4, 6, 8) %>% gather(column, amount) %>%
select(amountDebtSubjectToLimit = amount) %>%
mutate(amountDebtSubjectToLimit = amountDebtSubjectToLimit %>% parse_number() * 1000000)
) %>%
rename(yearBudget = year) %>%
mutate(yearBudget = case_when(yearBudget == "TQ" ~ "1977",
TRUE ~ yearBudget)) %>%
mutate(
nameTable = table_name,
isEstimate = yearBudget %>% str_detect("estimate"),
yearBudget = yearBudget %>% str_remove_all("estimate")
) %>%
mutate(yearBudget = as.numeric(yearBudget)) %>%
select(nameTable, isEstimate, yearBudget, everything()) %>%
filter(!is.na(amountDebtSubjectToLimit))
df <-
df %>%
.summarise_mean(
columns = c("nameTable", "isEstimate", "yearBudget"),
amount_col = "amountDebtSubjectToLimit"
) %>%
mutate(amountDebtSubjectToLimit = currency(amountDebtSubjectToLimit, digits = 0))
}
.parse_23 <-
function(data) {
table_name <-
.add_table_name(data)
d <-
data %>%
slice(4:nrow(data)) %>%
setNames(c(
"detailsStatute",
"dateStatute",
"descriptionStatute",
"amountDebtCeiling"
)) %>%
mutate(dateStatute = mdy(dateStatute)) %>%
fill(detailsStatute, dateStatute) %>%
mutate(amountDebtCeiling =
amountDebtCeiling %>% parse_number() * 1000000000 %>% currency(digits = 0)) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
d
}
.parse_24 <-
function(data,
column_names = c("type", "roleBudget", "parent", "item"),
exclude_total = T,
metric = "amountBillionsNominal") {
table_name <-
.add_table_name(data)
df <-
data %>%
.unpivot_omb(
data_start_row = 7,
header_rows = 3:6,
column_names = column_names
) %>%
mutate_if(is.character, str_squish) %>%
mutate(metric = metric) %>%
select(metric, everything()) %>%
.remove_na()
df <-
df %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
.fix_year() %>%
.fix_omb_amount() %>%
filter(!is.na(amount)) %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
df <-
df %>%
filter(!is.na(roleBudget)) %>%
mutate(
roleBudget = case_when(
roleBudget == "Non- defense" ~ "Non Defense",
roleBudget == "National Defense" ~ "Defense",
TRUE ~ roleBudget
)
) %>%
mutate(amount = amount %>% currency(digits = 0))
df <- df %>%
mutate_at(column_names,
list(function(x) {
x %>% str_remove_all("[0-9]") %>%
str_remove_all("\\(") %>%
str_remove_all("\\)") %>%
str_squish()
}))
if (exclude_total) {
df <-
df %>%
filter(roleBudget != "Total")
}
df
}
.parse_25 <-
function(data,
column_names = c("roleBudget", "parent", "item"),
exclude_total = T,
metric = "amountBillionsNominal") {
table_name <-
.add_table_name(data)
df <-
data %>%
.unpivot_omb(
data_start_row = 7,
header_rows = 3:5,
column_names = column_names
) %>%
mutate_if(is.character, str_squish) %>%
mutate(metric = metric) %>%
select(metric, everything()) %>%
.remove_na()
df <-
df %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
.fix_year() %>%
.fix_omb_amount() %>%
filter(!is.na(amount)) %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
df <-
df %>%
filter(!is.na(parent)) %>%
mutate(
parent = case_when(
parent == "Non- defense" ~ "Non Defense",
parent == "National Defense" ~ "Defense",
TRUE ~ parent
)
)
df <-
df %>%
mutate(item = case_when(
item %>% str_detect("Undistributed Offsetting Receipts") ~ "Undistributed Offsetting Receipts",
TRUE ~ item
)) %>%
mutate(amount = amount %>% currency(digits = 0))
if (exclude_total) {
df <- df %>%
filter(parent != "Total")
}
df
}
.parse_26 <-
function(data,
column_names = c("type", "roleBudget", "parent", "item"),
exclude_total = F,
metric = "pctOutlay") {
table_name <-
.add_table_name(data)
df <-
data %>%
.unpivot_omb(
data_start_row = 6,
header_rows = 2:5,
column_names = column_names
) %>%
mutate_if(is.character, str_squish) %>%
mutate(metric = metric) %>%
select(metric, everything())
df <-
df %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
.fix_year() %>%
.fix_omb_amount() %>%
filter(!is.na(amount)) %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
df <- df %>%
filter(!is.na(roleBudget)) %>%
mutate(
roleBudget = case_when(
roleBudget == "Non- defense" ~ "Non Defense",
roleBudget == "National Defense" ~ "Defense",
TRUE ~ roleBudget
)
) %>%
rename(pct = amount)
df <-
df %>%
mutate(item = item %>% gsub("\\(1)", "", .) %>% str_squish())
if (exclude_total) {
df <- df %>%
filter(roleBudget != "Total")
}
df
}
.parse_28 <-
function(data, metric = "amount") {
table_name <-
.add_table_name(data)
df <-
data %>%
.munge_omb_wide(year_row = 3,
column_start = 2,
year_offset = 1) %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
mutate(amount = amount %>% currency(digits = 0))
df <- df %>%
mutate(item = item %>% str_remove_all("\\)|\\(|\\,|\\[0-9]") %>% str_replace_all("\\-", " "))
df
}
.parse_33 <-
function(data,
data_start_row = 5,
column_names = c("metric", "parent", "item"),
header_rows = 2:4,
column_start = 2) {
table_name <- .add_table_name(data)
df <- .unpivot_omb(
data = data,
data_start_row = data_start_row,
column_names = column_names,
header_rows = header_rows,
column_start = column_start
)
data <-
df %>%
gather(yearBudget, amount, -column_names)
data <-
data %>%
.fix_year()
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items()
data <-
data %>%
.fix_omb_amount() %>%
mutate(amount = currency(amount, digits = 0))
data <-
data %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
mutate(amount = amount %>% currency(digits = 0))
data
}
.parse_34 <-
function(data,
data_start_row = 5,
column_names = c("metric", "parent", "item"),
header_rows = 2:4,
column_start = 2) {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
data_start_row = data_start_row,
column_names = column_names,
header_rows = header_rows,
column_start = column_start
)
data <-
df %>%
gather(yearBudget, amount, -column_names)
data <-
data %>%
.fix_year()
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items()
data <-
data %>%
.fix_omb_amount()
data <-
data %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
rename(pct = amount)
data
}
.parse_35 <-
function(data,
data_start_row = 5,
column_names = c("parent", "item"),
header_rows = 3:4,
column_start = 2) {
table_name <- .add_table_name(data)
df <-
.unpivot_omb(
data = data,
data_start_row = data_start_row,
column_names = column_names,
header_rows = header_rows,
column_start = column_start
)
data <-
df %>%
gather(yearBudget, amount, -column_names) %>%
filter(!is.na(amount)) %>%
mutate(metric = "amountMillionsNominal") %>%
select(metric, everything())
data <-
data %>%
.fix_year()
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items()
data <-
data %>%
.fix_omb_amount() %>%
filter(!is.na(amount))
data <-
data %>%
.summarise_sum(amount_col = "amount",
columns =
data %>% select(-amount) %>% names()) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
data
}
.parse_36 <-
function(data,
data_start_row = 7,
column_names = c("parent", "item", "groupAgency"),
header_rows = 4:6,
column_start = 2) {
table_name <- .add_table_name(data)
df <-
.unpivot_omb(
data = data,
data_start_row = data_start_row,
column_names = column_names,
header_rows = header_rows,
column_start = column_start
)
data <-
df %>%
gather(yearBudget, amount, -column_names) %>%
filter(!is.na(amount)) %>%
mutate(metric = "amountMillionsNominal") %>%
select(metric, everything())
data <-
data %>%
.fix_year()
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
filter(!is.na(groupAgency))
data <-
data %>%
.fix_omb_amount() %>%
filter(!is.na(amount))
data <-
data %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
mutate(amount = currency(amount, digits = 0))
data
}
.parse_37 <-
function(data,
year_row = 3,
column_start = 2,
gather_columns = c("parent", "item"),
metric = "amountMillionsNominal",
filter_total = T) {
table_name <- .add_table_name(data)
data <-
.munge_omb_wide(
data = data,
year_row = year_row,
gather_columns = gather_columns,
metric = metric,
filter_total = filter_total
)
data <-
data %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
mutate(amount = currency(amount, digits = 0))
data
}
.parse_39 <-
function(data,
year_row = 3,
column_start = 2,
gather_columns = c("parent", "item"),
metric = "amountMillionsNominal",
filter_total = T) {
table_name <- .add_table_name(data)
data <-
.munge_omb_wide(
data = data,
year_row = year_row,
gather_columns = gather_columns,
metric = metric,
filter_total = filter_total
)
data <-
data %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
rename(groupAgency = item) %>%
.remove_na() %>%
mutate(amount = currency(amount, digits = 0))
data
}
.parse_38 <-
function(data,
exclude_total = T,
column_names = c("metric", "item")) {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
column_start = 2,
data_start_row = 4,
column_names = column_names,
header_rows = 2:3
)
data <-
df %>%
gather(yearBudget, amount, -column_names)
data <-
data %>%
.fix_year() %>%
filter(!is.na(metric))
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
.fix_omb_amount() %>%
filter(!is.na(amount))
if (exclude_total) {
data <-
data %>% filter(!item %>% str_detect("Total"))
}
data <-
data %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
data
}
.parse_40 <- function(data, exclude_total = T) {
table_name <-
.add_table_name(data)
df <-
.munge_omb_wide(data = data)
if (exclude_total) {
df <-
df %>% filter(!item %>% str_detect("Total"))
}
df <-
df %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
.remove_na()
df
}
.parse_42 <-
function(data,
exclude_total = T,
column_names = c("metric", "item")) {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
column_start = 2,
data_start_row = 4,
column_names = column_names,
header_rows = 2:3
)
data <-
df %>%
gather(yearBudget, amount, -column_names)
data <-
data %>%
.fix_year() %>%
filter(!is.na(metric))
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
.fix_omb_amount() %>%
filter(!is.na(amount))
if (exclude_total) {
data <-
data %>% filter(!item %>% str_detect("Total"))
}
data <-
data %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
.remove_na()
data
}
.parse_43 <- function(data, exclude_total = T) {
table_name <-
.add_table_name(data)
df <-
.munge_omb_wide(data = data)
df <-
df %>%
filter(!is.na(metric))
df <-
df %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
filter(!is.na(amount))
df <-
df %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
arrange(yearBudget) %>%
mutate(amount = currency(amount, digits = 0))
df <-
df %>%
filter(!is.na(yearBudget)) %>%
filter(parent != "Total Payments for Individuals")
df
}
.parse_46 <-
function(data,
exclude_total = T,
year_offset = 1) {
table_name <-
.add_table_name(data)
df <-
data %>%
.munge_omb_wide(year_offset = year_offset)
df <-
df %>%
mutate_if(is.character, str_squish) %>%
filter(!item %in% c("Federal funds", "Trust funds")) %>%
.remove_na()
df <-
df %>%
filter(!is.na(amount)) %>%
.munge_omb_items()
if (exclude_total) {
df <-
df %>%
filter(!item %>% str_detect("Total"))
}
df <-
df %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
mutate(amount = currency(amount, digits = 0))
df <-
df %>%
filter(!is.na(yearBudget))
df
}
.parse_47 <-
function(data,
exclude_total = T,
year_offset = 1,
column_start = 3,
remove_column = 2) {
table_name <-
.add_table_name(data)
df <-
data %>%
.munge_omb_wide(
year_offset = year_offset,
column_start = column_start,
filter_total = exclude_total,
remove_column = remove_column
)
df <-
df %>%
mutate_if(is.character, str_squish) %>%
filter(!item %in% c("Federal funds", "Trust funds")) %>%
.remove_na()
df <-
df %>%
filter(!is.na(amount)) %>%
.munge_omb_items()
if (exclude_total) {
df <-
df %>%
filter(!item %>% str_detect("Total"))
}
df <-
df %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
df <-
df %>%
filter(!is.na(yearBudget)) %>%
mutate(amount = currency(amount, digits = 0))
df
}
.parse_48 <-
function(data,
exclude_total = T,
year_offset = 1,
column_start = 3,
remove_column = 2) {
table_name <-
.add_table_name(data)
df <-
data %>%
.munge_omb_wide(
year_offset = year_offset,
column_start = column_start,
filter_total = exclude_total,
remove_column = remove_column
)
df <-
df %>%
mutate_if(is.character, str_squish) %>%
.remove_na()
df <-
df %>%
filter(!is.na(amount)) %>%
.munge_omb_items()
if (exclude_total) {
df <-
df %>%
filter(!item %>% str_detect("Total"))
}
df <-
df %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
mutate(amount = currency(amount, digits = 0))
df <-
df %>%
filter(!is.na(yearBudget))
df
}
.parse_49 <-
function(data,
exclude_total = T,
column_names = c("metric", "parent", "item")) {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
column_start = 2,
data_start_row = 6,
column_names = column_names,
header_rows = 3:5
)
df <-
df %>%
gather(yearBudget, amount, -column_names) %>%
mutate_if(is.character, str_squish)
data <-
df %>%
.fix_year() %>%
filter(!is.na(item))
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
.fix_omb_amount() %>%
filter(!is.na(amount))
if (exclude_total) {
data <-
data %>% filter(!item %>% str_detect("Total"))
}
data <- data %>%
mutate(
parent = case_when(
parent == "State and Local Government Non-Interest Receipts (NIPA Basis) (1)" ~ "State and Local Government Non-Interest Receipts",
TRUE ~ parent
)
)
data <-
data %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
data
}
.parse_50 <-
function(data,
exclude_total = T,
column_names = c("parent", "item")) {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
column_start = 2,
data_start_row = 6,
column_names = column_names,
header_rows = 3:4
) %>%
mutate(metric = "amountNominalMillions") %>%
select(metric, everything())
df <-
df %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
mutate_if(is.character, str_squish)
data <-
df %>%
.fix_year() %>%
filter(!is.na(item))
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
.fix_omb_amount() %>%
filter(!is.na(amount))
if (exclude_total) {
data <-
data %>% filter(!item %>% str_detect("Total")) %>%
filter(!is.na(item))
}
data <-
data %>%
mutate(
parent = case_when(
parent == "State and Local Government Expenditures From Own Sources (NIPA Basis) (1)" ~ "State and Local Government Expenditures From Own Sources",
parent == "Addendum: Federal Grants (NIPA Basis)" ~ "Federal Grants",
TRUE ~ parent
)
)
data <-
data %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
mutate(amount = currency(amount, digits = 0))
data
}
.parse_51 <-
function(data,
exclude_total = T,
column_names = c("parent", "item")) {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
column_start = 2,
data_start_row = 4,
column_names = column_names,
header_rows = 2:3
) %>%
mutate(metric = "pctGDP") %>%
select(metric, everything())
df <-
df %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
mutate_if(is.character, str_squish)
data <-
df %>%
.fix_year() %>%
filter(!is.na(item))
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
.fix_omb_amount() %>%
filter(!is.na(amount))
if (exclude_total) {
data <-
data %>% filter(!item %>% str_detect("Total")) %>%
filter(!is.na(item))
}
data <-
data %>%
mutate(
parent = case_when(
parent == "State and Local Government Expenditures From Own Sources (NIPA Basis) (1)" ~ "State and Local Government Expenditures From Own Sources",
parent == "Addendum: Federal Grants (NIPA Basis)" ~ "Federal Grants",
TRUE ~ parent
)
)
data <-
data %>%
.calc_omb_summary() %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
data <- data %>%
rename(pct = amount)
data
}
.parse_52 <-
function(data,
exclude_total = T,
column_names = c("parent", "item")) {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
column_start = 2,
data_start_row = 5,
column_names = column_names,
header_rows = 3:4
) %>%
mutate(metric = "amountBillionsNominal") %>%
select(metric, everything())
df <-
df %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
mutate_if(is.character, str_squish)
data <-
df %>%
.fix_year() %>%
filter(!is.na(item))
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
.fix_omb_amount() %>%
filter(!is.na(amount))
if (exclude_total) {
data <-
data %>% filter(!item %>% str_detect("Total")) %>%
filter(!is.na(item))
}
data <-
data %>%
mutate(
parent = case_when(
parent == "State and Local Government Expenditures From Own Sources (NIPA Basis) (1)" ~ "State and Local Government Expenditures From Own Sources",
parent == "Addendum: Federal Grants (NIPA Basis)" ~ "Federal Grants",
TRUE ~ parent
)
)
data <-
data %>%
.calc_omb_summary()
data <-
data %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
data
}
.parse_53 <-
function(data,
exclude_total = F,
column_names = c("parent", "item")) {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
column_start = 2,
data_start_row = 4,
column_names = column_names,
header_rows = 2:3
) %>%
mutate(metric = "pctGDP") %>%
select(metric, everything())
df <-
df %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
mutate_if(is.character, str_squish)
data <-
df %>%
.fix_year()
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
.fix_omb_amount() %>%
filter(!is.na(amount))
data <-
data %>%
mutate(item = case_when(is.na(item) ~ parent,
TRUE ~ item))
if (exclude_total) {
data <-
data %>% filter(!item %>% str_detect("Total"))
}
data <-
data %>%
mutate(
parent = case_when(
parent == "State and Local Government Expenditures From Own Sources (NIPA Basis) (1)" ~ "State and Local Government Expenditures From Own Sources",
parent == "Addendum: Federal Grants (NIPA Basis)" ~ "Federal Grants",
TRUE ~ parent
)
)
data <-
data %>%
.calc_omb_summary()
data <-
data %>%
arrange(yearBudget) %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
rename(pct = amount)
data
}
.parse_54 <-
function(data,
exclude_total = T,
column_names = c("metric", "parent", "item")) {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
column_start = 2,
data_start_row = 5,
column_names = column_names,
header_rows = 2:4
) %>%
select(metric, everything())
df <-
df %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
mutate_if(is.character, str_squish)
data <-
df %>%
.fix_year()
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
.fix_omb_amount() %>%
filter(!is.na(amount))
data <- data %>%
mutate(item = case_when(is.na(item) ~ parent,
TRUE ~ item))
if (exclude_total) {
data <-
data %>% filter(!item %>% str_detect("Total")) %>%
filter(!is.na(item))
}
data <-
data %>%
mutate(
parent = case_when(
parent == "State and Local Government Expenditures From Own Sources (NIPA Basis) (1)" ~ "State and Local Government Expenditures From Own Sources",
parent %in% c(
"Addendum: Federal Grants (NIPA Basis)",
"State and Local (NIPA Basis)"
) ~ "Federal Grants",
TRUE ~ parent
)
)
data <-
data %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
data
}
.parse_55 <-
function(data,
exclude_total = T,
column_names = c("metric", "parent", "item")) {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
column_start = 2,
data_start_row = 5,
column_names = column_names,
header_rows = 2:4
) %>%
select(metric, everything())
df <- df %>%
mutate(metric = case_when(
metric == "As Percentages of" ~ str_c(metric, parent, sep = " "),
TRUE ~ metric
))
df <-
df %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
mutate_if(is.character, str_squish)
data <-
df %>%
.fix_year()
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
.fix_omb_amount() %>%
filter(!is.na(amount))
data <- data %>%
mutate(item = case_when(is.na(item) ~ parent,
TRUE ~ item))
if (exclude_total) {
data <-
data %>% filter(!item %>% str_detect("Total"))
}
data <-
data %>%
mutate(
parent = case_when(
parent == "Medicare (Excluding Premiums)" ~ "Medicare Excluding Premiums",
parent == "Other Health (1)" ~ "Other Health",
parent == "Federal Employees Health Benefits (FEHB)" ~ "Federal Employees Health Benefits",
parent %in% c(
"Addendum: Federal Grants (NIPA Basis)",
"State and Local (NIPA Basis)"
) ~ "Federal Grants",
TRUE ~ parent
)
)
data <-
data %>%
.calc_omb_summary() %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything())
data
}
.parse_56 <-
function(data,
exclude_total = T,
column_names = c("parent", "item")) {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
column_start = 2,
data_start_row = 5,
column_names = column_names,
header_rows = 3:4
) %>%
mutate(metric = "countEmployees") %>%
select(metric, everything()) %>%
.remove_na()
data <-
df %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
mutate_if(is.character, str_squish) %>%
.fix_year() %>%
mutate(amount = parse_number(amount) * 1000)
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
filter(!is.na(amount))
data <- data %>%
mutate(item = case_when(is.na(item) ~ parent,
TRUE ~ item))
if (exclude_total) {
data <-
data %>% filter(!parent %>% str_detect("Total")) %>%
filter(item != "Total")
}
data <-
data %>%
mutate(
item = case_when(
item == "Transpor- tation" ~ "Transportation",
item == "Treasure" ~ "Treasury",
TRUE ~ item
)
)
data <-
data %>%
.calc_omb_summary()
data <-
data %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
rename(count = amount)
data
}
.parse_57 <-
function(data,
exclude_total = T,
column_names = c("parent", "item")) {
table_name <-
.add_table_name(data)
df <-
.unpivot_omb(
data = data,
column_start = 2,
data_start_row = 6,
column_names = column_names,
header_rows = 3:5
) %>%
mutate(metric = "pctEmployees") %>%
select(metric, everything()) %>%
.remove_na()
data <-
df %>%
gather(yearBudget, amount, -c("metric", column_names)) %>%
mutate_if(is.character, str_squish) %>%
.fix_year() %>%
mutate(amount = parse_number(amount) / 100)
data <-
data %>%
.munge_metrics() %>%
.munge_parents() %>%
.munge_omb_items() %>%
filter(!is.na(amount))
data <- data %>%
mutate(item = case_when(is.na(item) ~ parent,
TRUE ~ item))
if (exclude_total) {
data <-
data %>% filter(!parent %>% str_detect("Total")) %>%
filter(item != "Total")
}
data <-
data %>%
mutate(
item = case_when(
item == "Transpor- tation" ~ "Transportation",
item == "Treasure" ~ "Treasury",
TRUE ~ item
)
)
data <-
data %>%
.calc_omb_summary()
data <-
data %>%
mutate(nameTable = table_name) %>%
select(nameTable, everything()) %>%
rename(pct = amount)
data
}
.dl_omb <-
function(url = "https://www.whitehouse.gov/wp-content/uploads/2019/03/hist-fy2020.zip") {
tmp <-
tempfile()
file <- curl_download(url, tmp)
unz_files <- unzip(file, exdir = "files")
file_nos <-
seq_along(unz_files)
all_data <-
file_nos %>%
map_dfr(function(x) {
options(scipen = 999999)
glue("Table: {x}") %>% message()
data <-
unz_files[[x]] %>%
read_excel(col_names = F)
file <-
unz_files[[x]] %>% str_split("/") %>% flatten_chr() %>%
.[[2]] %>% str_remove_all(".xlsx")
if (x %in% c(1)) {
df <-
.parse_1(data = data)
df <-
df %>% mutate(metric = "amount")
df <-
df %>%
filter(yearBudget %>% str_detect("-")) %>%
bind_rows(
df %>%
filter(!yearBudget %>% str_detect("-")) %>%
.fix_year() %>%
.calc_omb_summary() %>%
mutate(amount = amount %>% currency(digits = 0)) %>%
mutate(yearBudget = as.character(yearBudget))
) %>%
rename(yearGroupBudget = yearBudget) %>%
mutate(yearBudget = year(dateFiscalYearEnd) %>% as.numeric()) %>%
select(nameTable,
isEstimate,
yearGroupBudget,
yearBudget,
everything()) %>%
mutate(amount = currency(amount, digits = 0)) %>%
rename(parent = typeBudget,
item = typeCashFlow) %>%
select(nameTable, metric, everything())
data <- df %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(2)) {
data <-
.parse_2(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(3)) {
data <-
.parse_3(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(4)) {
data <-
.parse_4(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(5)) {
data <-
.parse_5(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(6)) {
data <-
.parse_7(data = data, metric = "pctOutlays") %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(7)) {
data <-
.parse_7(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(8)) {
data <-
.parse_8(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(9)) {
data <-
.parse_9(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(10)) {
data <-
.parse_10(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(11, 14)) {
data <-
.parse_11(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(12, 15, 17, 19)) {
data <-
.parse_12(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(13, 16, 18)) {
data <-
.parse_13(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(20)) {
data <-
.parse_20(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(21)) {
data <-
.parse_21(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(22)) {
data <-
.parse_22(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(23)) {
data <-
.parse_23(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(24)) {
data <-
.parse_24(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(25)) {
data <-
.parse_25(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(26)) {
data <-
.parse_26(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(27)) {
data <-
.parse_26(data = data, metric = "pctGDP") %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(28, 30)) {
data <-
.parse_28(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(29)) {
data <-
.parse_28(data = data) %>%
mutate(amount = amount * 10000) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(31)) {
data <-
.parse_28(data = data) %>%
mutate(amount = amount * 10000) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(32)) {
data <-
.parse_32(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% 33) {
data <-
.parse_33(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% 34) {
data <-
.parse_34(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% 35) {
data <-
.parse_35(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% 36) {
data <-
.parse_36(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(37)) {
data <-
.parse_37(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(38)) {
data <-
.parse_38(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(39)) {
data <-
.parse_39(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(40)) {
data <-
.parse_40(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(41)) {
data <-
.parse_41(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(42)) {
data <-
.parse_42(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(43)) {
data <-
.parse_43(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(44)) {
data <-
.parse_44(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(45)) {
data <-
.parse_45(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(46)) {
data <-
.parse_46(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(47)) {
data <-
.parse_47(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(48)) {
data <-
.parse_48(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(49)) {
data <-
.parse_49(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(50)) {
data <-
.parse_50(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(51)) {
data <-
.parse_51(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(52)) {
data <-
.parse_52(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(53)) {
data <-
.parse_53(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(54)) {
data <-
.parse_54(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(55)) {
data <-
.parse_55(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(56)) {
data <-
.parse_56(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
if (x %in% c(57)) {
data <-
.parse_57(data = data) %>%
.add_table_no(table_number = x) %>%
mutate(slugTable = file) %>%
.nest_omb()
return(data)
}
data
})
file %>% unlink()
unlink("xml", recursive = T)
unz_files %>% unlink()
all_data <- all_data %>%
mutate(nameTable = nameTable %>% sub("\\ - ", "|", .)) %>%
separate(
nameTable,
into = c("idTable", "nameTable"),
sep = "\\|",
extra = "merge"
) %>%
mutate(idTable = idTable %>% str_remove_all("\\Table ") %>% as.numeric()) %>%
separate(
nameTable,
into = c("typeTable", "periodTable"),
sep = "\\: ",
remove = F
) %>%
mutate(periodTable = periodTable %>% str_replace_all("\\ - ", "\\-"))
all_data
}
#' Office of Budget Management Tables
#'
#' @param assign_to_environment if \code{TRUE} assigns each table
#' to environment
#'
#' @return \code{nested_tibble}
#' @export
#'
#' @examples
omb_tables <-
function(assign_to_environment = T) {
df_tables <- dictionary_omb_tables()
url <-
df_tables %>%
filter(numberTable == 1) %>%
pull(urlBudgetZIP)
all_data <- .dl_omb(url = url)
if (assign_to_environment) {
dict_tables <-
all_data %>%
select(-matches("data"))
assign("tbl_000_omb_data", dict_tables, envir = .GlobalEnv)
1:nrow(all_data) %>%
walk(function(x) {
zeros <- 3 - nchar(x)
zeros <- rep(0, zeros) %>% str_c(collapse = "")
zeros <- str_c(zeros, x, sep = "")
df_row <- all_data %>% dplyr::slice(x)
table_slug <- df_row$slugTable
tn <- glue("tbl_{zeros}_{table_slug}")
df <-
df_row %>%
select(slugTable, nameTable, data) %>%
unnest_legacy()
if (df %>% hasName("amount")) {
df <- df %>%
mutate(amount = currency(amount, digits = 0))
}
assign(x = tn, df, envir = .GlobalEnv)
})
}
all_data
}
# other -------------------------------------------------------------------
.parse_omb_budget_authority <-
memoise::memoise(function(url = "https://www.whitehouse.gov/wp-content/uploads/2019/03/budauth-fy2020.xlsx",
filter_zero_balance = T) {
data <-
url %>%
rio::import()
data <-
data %>%
as_tibble()
end_col <- data %>% names() %>% grep("On-", .)
gather_cols <- data %>% select(1:(end_col)) %>% names()
data <-
data %>%
gather(yearBudget, amount, -c(gather_cols))
data <-
data %>%
.munge_omb_names()
data <-
data %>%
mutate(isOnBudget = isOnBudget %>% str_detect("On-budget")) %>%
mutate(amount = 1000 * amount,
amount = currency(amount, digits = 0))
data <-
data %>%
.fix_year() %>%
mutate(metric = "amountNominal")
if (filter_zero_balance) {
data <-
data %>%
filter(amount != 0)
}
data <-
data %>%
.calc_omb_summary()
data <-
data %>%
select(-metric)
data <- data %>%
mutate(amount = currency(amount, digits = 0))
data <-
data %>%
mutate(slugCGAC =
case_when(
!is.na(idCGAC) ~ glue("0{idCGAC}") %>% as.character(),
TRUE ~ NA_character_
)) %>%
.munge_data()
data
})
.parse_omb_outlays <-
memoise::memoise(function(url = "https://www.whitehouse.gov/wp-content/uploads/2019/03/outlays-fy2020.xlsx",
filter_zero_balance = T) {
data <- rio::import(url) %>% as_tibble()
data <-
data %>%
as_tibble()
end_col <- data %>% names() %>% grep("On-", .)
gather_cols <- data %>% select(1:(end_col)) %>% names()
data <-
data %>%
gather(yearBudget, amount, -c(gather_cols))
data <-
data %>%
.munge_omb_names()
data <-
data %>%
mutate(isOnBudget = isOnBudget %>% str_detect("On-budget")) %>%
mutate(amount = 1000 * amount,
amount = currency(amount, digits = 0))
data <-
data %>%
.fix_year() %>%
mutate(metric = "amountNominal")
if (filter_zero_balance) {
data <-
data %>%
filter(amount != 0)
}
data <-
data %>%
.calc_omb_summary()
data <-
data %>%
mutate(slugCGAC =
case_when(
!is.na(idCGAC) ~ glue("0{idCGAC}") %>% as.character(),
TRUE ~ NA_character_
)) %>%
.munge_data() %>%
select(-metric)
data
})
.parse_omb_receipts <-
memoise::memoise(function(url = "https://www.whitehouse.gov/wp-content/uploads/2019/03/receipts-fy2020.xlsx",
filter_zero_balance = T) {
data <-
rio::import(url) %>% as_tibble()
data <-
data %>%
as_tibble()
end_col <- data %>% names() %>% grep("On-", .)
gather_cols <- data %>% select(1:(end_col)) %>% names()
data <-
data %>%
gather(yearBudget, amount, -c(gather_cols))
data <-
data %>%
.munge_omb_names()
data <-
data %>%
mutate(isOnBudget = isOnBudget %>% str_detect("On-budget")) %>%
mutate(amount = 1000 * amount,
amount = currency(amount, digits = 0))
data <-
data %>%
.fix_year() %>%
mutate(metric = "amountNominal")
if (filter_zero_balance) {
data <-
data %>%
filter(amount != 0)
}
data <-
data %>%
.calc_omb_summary()
data <-
data %>%
mutate(slugCGAC =
case_when(
!is.na(idCGAC) ~ glue("0{idCGAC}") %>% as.character(),
TRUE ~ NA_character_
)) %>%
.munge_data() %>%
select(-metric)
data
})
#' OMB Historic Budget Authority
#'
#' Returns data containing information
#' about the historic OMB budget authority
#'
#' @param filter_zero_balance if \code{TRUE} excludes
#' accounts with zero authority
#' @param research_terms
#' @param research_columns
#' @param join_tas if \code{TRUE} join extra treasury symbol data
#'
#' @return \code{tibble()}
#' @export
#'
#' @examples
#' omb_budget_authority()
omb_budget_authority <-
function(filter_zero_balance = T,
join_tas = T,
research_terms = c("research",
"science",
"technology",
"ARTIFICIAL INTELLIGENCE",
"technical"),
research_columns = c("nameAgencyOMB",
"nameBureauOMB", "nameAccountOMB")) {
url <-
dictionary_omb_supplements() %>%
filter(typeFile %>% str_detect("BUDGET AUTHORITY XLS")) %>%
pull(urlData)
data <-
.parse_omb_budget_authority(url = url, filter_zero_balance = filter_zero_balance)
data <-
data %>% .generate_federal_account_ids(cgac_column = "idCGAC", account_column = "codeAccountOMB")
data <-
data %>%
munge_account_symbols(research_columns = research_columns,
research_terms = research_terms) %>%
.add_omb_account_group()
data <-
data %>%
mutate(idCGAC = as.numeric(idCGAC)) %>%
select(-one_of("slugCGAC")) %>%
left_join(dictionary_omb_cgac_accounts(), by = "idCGAC") %>%
distinct()
if (join_tas) {
data <-
data %>%
.join_tas_symbols()
}
data <-
data %>% mutate_if(is.numeric, as.numeric) %>%
add_dod_omb_group()
data
}
#' OMB Budget Outlays
#'
#' Returns information about all outflows
#' as since 1962 as recorded by the OMB
#'
#' @param filter_zero_balance if \code{TRUE} excludes
#' accounts with zero authority
#' @param research_terms
#' @param research_columns
#' @param join_tas
#'
#' @return
#' @export
#'
#' @examples
#' omb_outlays()
omb_outlays <- function(filter_zero_balance = T,
research_terms = c("research",
"science",
"technology",
"ARTIFICIAL INTELLIGENCE",
"technical"),
join_tas = T,
research_columns = c("nameAgencyOMB",
"nameBureauOMB", "nameAccountOMB")) {
url <-
dictionary_omb_supplements() %>%
filter(typeFile %>% str_detect("OUTLAYS XLS")) %>%
pull(urlData)
data <-
.parse_omb_outlays(url = url, filter_zero_balance = filter_zero_balance)
data <-
data %>% .generate_federal_account_ids(cgac_column = "idCGAC", account_column = "codeAccountOMB")
data <-
data %>%
munge_account_symbols(research_columns = research_columns,
research_terms = research_terms) %>%
.add_omb_account_group()
data <-
data %>%
mutate(idCGAC = as.numeric(idCGAC)) %>%
select(-one_of("slugCGAC")) %>%
left_join(dictionary_omb_cgac_accounts(), by = "idCGAC") %>%
distinct()
if (join_tas) {
data <-
data %>%
.join_tas_symbols()
}
data <-
data %>% mutate_if(is.numeric, as.numeric) %>%
add_dod_omb_group()
data
}
#' OMB Budget Receipts
#'
#' Returns data about all monies received by
#' the United States Government since 1962 as recorded by the OMB
#'
#' @param filter_zero_balance if \code{TRUE} excludes
#' accounts with zero authority
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' omb_receipts()
omb_receipts <-
function(filter_zero_balance = T,
join_tas = T,
research_terms = c("research",
"science",
"technology",
"ARTIFICIAL INTELLIGENCE",
"technical"),
research_columns = c("nameAgencyOMB",
"nameBureauOMB", "nameAccountOMB")) {
url <-
dictionary_omb_supplements() %>%
filter(typeFile %>% str_detect("RECEIPTS XLS")) %>%
pull(urlData)
data <-
data %>% .generate_federal_account_ids(cgac_column = "idCGAC", account_column = "codeAccountOMB")
data <-
data %>%
munge_account_symbols(research_columns = research_columns,
research_terms = research_terms) %>%
.add_omb_account_group()
data <-
data %>%
mutate(idCGAC = as.numeric(idCGAC)) %>%
select(-one_of("slugCGAC")) %>%
left_join(dictionary_omb_cgac_accounts(), by = "idCGAC") %>%
distinct()
if (join_tas) {
data <-
data %>%
.join_tas_symbols()
}
data <- add_dod_omb_group(data = data)
data
}
#' All OMB Data
#'
#' @param types
#' @param filter_zero_balance
#' @param research_terms
#' @param research_columns
#' @param widen_data
#'
#' @return
#' @export
#'
#' @examples
omb_data <-
function(types = c("budget", "receipts", "outlays"),
filter_zero_balance = T,
join_tas = F,
research_terms = c("research",
"science",
"technology",
"ARTIFICIAL INTELLIGENCE",
"technical"),
research_columns = c("nameAgencyOMB",
"nameBureauOMB", "nameAccountOMB"),
widen_data = F
) {
slug_types <- str_to_lower(types)
all_data <-
tibble()
if (types %>% str_to_lower() %>% str_detect("budget") %>% sum(na.rm = T) > 0) {
all_data <-
all_data %>% bind_rows(
omb_budget_authority(
filter_zero_balance = filter_zero_balance,
research_terms = research_terms,
research_columns = research_columns
) %>%
mutate(typeBudget = "Budget")
)
}
if (types %>% str_to_lower() %>% str_detect("receipts") %>% sum(na.rm = T) > 0) {
all_data <-
all_data %>% bind_rows(
omb_receipts(
filter_zero_balance = filter_zero_balance,
research_terms = research_terms,
research_columns = research_columns,
) %>%
mutate(typeBudget = "Receipts")
)
}
if (types %>% str_to_lower() %>% str_detect("outlays") %>% sum(na.rm = T) > 0) {
all_data <-
all_data %>% bind_rows(
omb_outlays(
filter_zero_balance = filter_zero_balance,
research_terms = research_terms,
research_columns = research_columns
) %>%
mutate(typeBudget = "Outlays")
)
}
all_data <- all_data %>%
select(-typeFunding)
all_data <- all_data %>%
group_by(!!!syms(all_data %>% select(-amount) %>% names())) %>%
summarise(amount = sum(amount, na.rm = T)) %>%
ungroup()
if (widen_data) {
all_data %>%
mutate(typeBudget = str_c("amount", typeBudget)) %>%
spread(typeBudget, amount)
}
if (join_tas) {
data <-
data %>%
.join_tas_symbols()
}
all_data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.