.add_cgac <-
function(data) {
if (!data %>% hasName("slugTreasuryAgency")) {
return(data)
}
df_cgac <- data %>%
distinct(slugTreasuryAgency) %>%
mutate(
nameAgencyCGAC = case_when(
slugTreasuryAgency == "A" ~ "DEPARTMENT OF THE ARMY",
slugTreasuryAgency == "N" ~ "DEPARTMENT OF THE NAVY",
slugTreasuryAgency == "F" ~ "DEPARTMENT OF THE AIR FORCE",
slugTreasuryAgency == "D" ~ "DEPARTMENT OF DEFENSE",
),
idCGAC = case_when(
slugTreasuryAgency == "A" ~ 21,
slugTreasuryAgency == "N" ~ 17,
slugTreasuryAgency == "F" ~ 57,
slugTreasuryAgency == "D" ~ 97,
)
)
data <- data %>%
left_join(df_cgac, by = "slugTreasuryAgency") %>%
select(yearBudget, names(df_cgac), everything())
data
}
# budget_urls -------------------------------------------------------------
#' DOD Budget name dictionaryd
#'
#' @return
#' @export
#'
#' @examples
dictionary_dod_budget_names <-
function() {
tibble(
nameDOD = c(
"idRow",
"Account",
"Account Title",
"Organization",
"Budget Activity",
"Budget Activity Title",
"Budget Sub Activity",
"Budget Sub Activity Title",
"Add/ Non-Add",
"Include in TOA",
"Classification",
"item",
"value",
"AG / BSA",
"AG / Budget SubActivity (BSA) Title",
"Line Number",
"SAG / BLI",
"SAG / Budget Line Item (BLI) Title",
"BSA",
"Budget Sub Activity (BSA) Title",
"Line Item",
"Line Item Title",
"Cost Type",
"Cost Type Title",
"PE / BLI",
"Program Element / Budget Line Item (BLI) Title",
"Treasury Agency",
"Account Short Title",
"Budget Activity Short Title",
"State Country",
"State Country Title",
"Fiscal Year",
"Facility Category Title",
"Location Title",
"Construction Project",
"Construction Project Title",
"Appropriation",
"BSA Title",
"Mil Dept DW",
"AG Title",
"SAG", "SAG Title", "PE", "Program Element Title",
"PL Title Name",
"Location",
"PE Title",
"Organization Title",
"Appn",
"Comp",
"Appn Name",
"Org",
"Org Name",
"Line Num",
"BA",
"BA Name",
"PE Name",
"Sec",
"TreasuryCode",
"Treasury",
"TreasuryAccount Title",
"R1 LineNumber",
"BudgetActivity",
"Budget",
"ProgramElement",
"Program",
"SecurityClass."
),
nameActual =
c(
"idRow",
"codeAccountOMB",
"nameAccountOMB",
"slugOrganization",
"slugBudgetParent",
"nameBudgetParent",
"slugBudgetActivity",
"nameBudgetActivity",
"isAdded",
"isTOA",
"slugClassification",
"item",
"value",
"slugBudgetActivity",
"nameBudgetActivity",
"idLineNumber",
"codeProgramElement",
"nameProgramElement",
"slugBudgetActivity",
"nameBudgetActivity",
"codeProgramElement",
"nameProgramElement",
"slugCost",
"typeCost",
"codeProgramElement",
"nameProgramElement",
"slugTreasuryAgency",
"nameAccountOMB",
"nameBudgetParent",
"slugStateCountry",
"nameStateCountry",
"yearBudget",
"nameBudgetActivity",
"nameLocation",
"nameConstructionProject",
"nameProgramElement",
"nameAccountOMB",
"nameBudgetActivity",
"remove",
"nameBudgetActivity",
"codeProgramElement", "nameProgramElement", "codeProgramElement", "nameProgramElement",
"nameDODBudgetGroup",
"idLocation",
"nameProgramElement",
"nameOrganization",
"codeAccountOMB",
"slugTreasuryAgency",
"nameAccountOMB",
"slugOrganization",
"nameOrganization",
"idLineNumber",
"slugBudgetParent",
"nameBudgetParent",
"nameProgramElement",
"slugClassification",
"codeAccountOMB",
"slugTreasuryAgency",
"nameAccountOMB",
"idLineNumber",
"slugBudgetActivity",
"nameBudgetActivity",
"codeProgramElement",
"nameProgramElement",
"slugClassification"
)
)
}
.munge_dod_names <-
function(data) {
dict_names <- dictionary_dod_budget_names()
fdps_names <-
names(data)
actual_names <-
fdps_names %>%
map_chr(function(name) {
df_row <-
dict_names %>% filter(nameDOD == name)
if (nrow(df_row) == 0) {
glue::glue("Missing {name}") %>% message()
return(name)
}
df_row$nameActual
})
data %>%
set_names(actual_names)
}
#' DOD Budgets Codes
#'
#' @return
#' @export
#'
#' @examples
dictionary_dod_budget_codes <-
function() {
tibble(
slugDODBudgetGroup = c("m1", "o1", "rf1", "p1", "p1r", "r1", "c1", "OGSI") %>% str_to_upper(),
nameDODBudgetGroup = c(
"Military Personnel",
"Operation and Maintenance",
"Revolving and Management Fund",
"Procurement",
"Procurement Reserve",
"RDTE",
"Military Construction",
"Opportunity, Growth, and Security Initiative"
)
)
}
.dod_budget_year_urls <-
function() {
page <-
"https://comptroller.defense.gov/Budget-Materials/" %>% read_html()
tibble(
yearBudget = page %>% html_nodes("#sitetitle p a") %>% html_text() %>% as.numeric(),
urlBudgetDODYear = page %>% html_nodes("#sitetitle p a") %>% html_attr("href") %>% str_c("https://comptroller.defense.gov", .)
) %>%
mutate(isBudgetCurrentYear = yearBudget == max(yearBudget))
}
.parse_dod_buget_url <-
function(url = "https://comptroller.defense.gov/Budget-Materials/Budget2020/",
include_all_page_files = F,
return_message = T) {
if (return_message) {
glue("Parsing {url} for budget files") %>% message()
}
page <- url %>% read_html()
excel_files <-
page %>% html_nodes("a") %>% html_attr("href") %>% discard(function(x) {
is.na(x)
}) %>%
keep(function(x) {
x %>% str_detect("xls")
}) %>%
str_c("https://comptroller.defense.gov", .) %>%
discard(function(x) {
x %>% str_detect("display")
})
parts <-
excel_files %>%
str_split("/") %>% flatten_chr() %>% keep(function(x) {
x %>% str_detect("xls")
}) %>%
str_split("\\.")
slugDODBudgetGroup <-
seq_along(parts) %>%
map_chr(function(x) {
z <- parts[[x]][[1]] %>% str_to_upper()
z <-
z %>% str_remove_all("FY_")
if (z %>% str_count("\\_") == 1) {
z <- z %>% str_split("\\_") %>% flatten_chr() %>% .[[2]]
}
z
})
parts <-
url %>% str_split("/") %>% flatten_chr() %>%
discard(function(x) {
x == ""
})
yearBudget <-
parts[length(parts)] %>% str_remove_all("\\.aspx") %>% str_remove_all("budget") %>%
parse_number()
data <-
tibble(
yearBudget,
slugDODBudgetGroup,
urlBudgetDODFile = excel_files,
urlBudgetDODYear = url
)
data <-
data %>%
mutate(isAmendedBudget = slugDODBudgetGroup %>% endsWith("A")) %>%
select(yearBudget, isAmendedBudget, everything()) %>%
mutate(
slugDODBudgetGroup = case_when(
isAmendedBudget ~ slugDODBudgetGroup %>% substr(1, nchar(slugDODBudgetGroup) - 1),
TRUE ~ slugDODBudgetGroup
)
) %>%
left_join(dictionary_dod_budget_codes(), by = "slugDODBudgetGroup") %>%
select(yearBudget, nameDODBudgetGroup, everything())
if (!include_all_page_files) {
return(data)
}
urls <-
page %>% html_nodes("strong a") %>% html_attr("href")
file_names <-
page %>% html_nodes("strong a") %>% html_text()
data <-
tibble(urlBudgetDODFile = urls, nameFile = file_names) %>% distinct() %>%
mutate(
nameFile = nameFile %>% str_trim(),
nameFile = case_when(nameFile == "" ~ NA_character_,
TRUE ~ nameFile),
urlBudgetDODFile = case_when(
urlBudgetDODFile %>% str_detect("http") ~ urlDODFile,
TRUE ~ str_c("https://comptroller.defense.gov", urlBudgetDODFile)
),
yearBudget,
typeFile = case_when(
urlBudgetDODFile %>% str_detect(".xls") ~ "excel",
urlBudgetDODFile %>% str_detect(".pdf") ~ "pdf",
urlBudgetDODFile %>% str_detect(".zip") ~ "zip",
TRUE ~ "html"
),
urlBudgetDODYear = url
) %>%
fill(nameFile) %>%
mutate(
slugDODBudgetGroup = case_when(
nameFile %>% str_detect("M-1|M1") ~ "M1",
nameFile %>% str_detect("O1|O-1") ~ "O1",
nameFile %>% str_detect("P1|P-1") ~ "P1",
nameFile %>% str_detect("P1-R|P1R") ~ "P1R",
nameFile %>% str_detect("R-1|R1") ~ "R1",
nameFile %>% str_detect("RF-1|RF1") ~ "RF1",
nameFile %>% str_detect("C-1|C1") ~ "C1"
),
isGreenBook = nameFile %>% str_detect("Green Book")
) %>%
group_by(urlBudgetDODFile) %>%
dplyr::slice(1) %>%
ungroup() %>%
filter(!urlBudgetDODFile %>% str_detect(".aspx")) %>%
select(yearBudget, everything()) %>%
left_join(dictionary_dod_budget_codes(), by = "slugDODBudgetGroup")
data
}
#' Department of Defense Budget Data dictionary
#'
#' @param filter_years if not \code{NULL} filters the years to specification
#' @param include_all_page_files if \code{TRUE} returns all data urls otherwise
#' returns just excel links
#' @param return_message if \code{TRUE} returns a message
#'
#' @return
#' @export
#'
#' @examples
dictionary_dod_budget_urls <-
function(filter_years = NULL,
include_all_page_files = F,
return_message = T) {
df_urls <- .dod_budget_year_urls()
if (length(filter_years) > 0) {
df_urls <-
df_urls %>%
filter(yearBudget %in% filter_years)
}
all_data <-
df_urls$urlBudgetDODYear %>%
map_dfr(memoise::memoise(function(url) {
.parse_dod_buget_url(
url = url,
include_all_page_files = include_all_page_files,
return_message = return_message
)
}))
all_data <-
all_data %>%
mutate(
isBudgetGroupNormal = !is.na(nameDODBudgetGroup),
nameDODBudgetGroup = case_when(
is.na(nameDODBudgetGroup) ~ slugDODBudgetGroup,
TRUE ~ nameDODBudgetGroup
) %>% str_to_upper(),
nameDODBudgetGroup = nameDODBudgetGroup %>% str_replace_all("\\_|\\-", " "),
isAmendedBudget = case_when(
isAmendedBudget == F ~ nameDODBudgetGroup %>% str_detect("AMENDMENT|SUPPLA"),
TRUE ~ isAmendedBudget
)
)
all_data
}
# parser ------------------------------------------------------------------
.parse_normal_excel <-
function(tmp, sheet_no = 1) {
data <-
tmp %>% read_excel(sheet = sheet_no, col_names = F)
is_na_1 <-
data[, 1] %>% pull() %>% .[[1]] %>% is.na()
is_appropriation <-
data[,1] %>% pull() %>% str_detect("Appropriation") %>% sum(na.rm = T) >= 1
only_4 <- ncol(data) == 4 &&
data %>% pull(1) %>% str_to_upper() %>% str_detect("PL TITLE") %>% sum(na.rm = T) > 0
early_budget <- data %>% pull(1) %>% str_to_upper() %>% str_detect("CODE") %>% sum(na.rm = T) > 0 && ncol(data) == 12
if (is_na_1) {
df_cols <-
data %>% dplyr::slice(1:2) %>% t() %>% as_tibble()
df_cols <-
df_cols %>%
mutate(
V1 = V1 %>% str_remove_all("[0-9]|Total of Displayed Rows"),
V1 = case_when(V1 == "" ~ NA_character_,
TRUE ~ V1)
)
column_names <-
df_cols %>%
mutate(
V1 = case_when(V1 == "\\-" ~ NA_character_,
TRUE ~ V1),
item = case_when(is.na(V1) ~ V2,
TRUE ~ str_c(V1, V2))
) %>%
pull(item) %>%
str_replace_all("\r\n|\n", " ") %>%
str_replace_all("\\-FY", "\\FY")
if (unique(column_names) %>% length() != ncol(data)) {
df_cols <-
tibble(col = column_names) %>% mutate(id = 1:n()) %>% group_by(col) %>%
mutate(count = 1:n()) %>%
filter(count == 1) %>%
ungroup()
data <- data[, df_cols$id]
column_names <- df_cols$col
}
data <-
data %>% dplyr::slice(3:nrow(data)) %>%
set_names(column_names)
} else if (is_appropriation) {
val_rows <-
data[, 1] %>%
setNames("value") %>%
mutate(idRow = 1:n()) %>%
filter(value == "Appropriation") %>% pull(idRow)
data <-
data %>% dplyr::slice(val_rows + 1:nrow(data)) %>%
set_names(
data %>% dplyr::slice(val_rows) %>% t() %>% as.character() %>% str_replace_all("\r\n|\n", " ") %>%
str_replace_all("\\-FY", "\\FY")
) %>%
filter(!Appropriation %>% str_detect("^Total"))
} else if (only_4) {
val_rows <-
data[, 1] %>%
setNames("value") %>%
mutate(idRow = 1:n()) %>%
filter(value == "PL Title Name") %>% pull(idRow)
data <-
data %>% dplyr::slice(val_rows + 1:nrow(data)) %>%
set_names(
data %>% dplyr::slice(val_rows) %>% t() %>% as.character() %>% str_replace_all("\r\n|\n", " ") %>%
str_replace_all("\\-FY", "\\FY")
) %>%
filter(!`PL Title Name` %>% str_detect("^Total"))
} else if (early_budget){
df_cols <-
data %>% slice(4:5)
df_cols <- df_cols %>% t() %>% as_tibble() %>% fill(V1)
cols <- df_cols %>%
mutate_if(is.character,
list(function(x) {
x %>% coalesce("")
})) %>%
unite(item, V1, V2, sep = "") %>%
pull()
data <- data %>% slice(6:nrow(data))
data <- data %>%
setNames(cols)
} else {
df_cols <-
data %>% dplyr::slice(1) %>% t() %>% as_tibble()
column_names <-
df_cols$V1 %>%
str_replace_all("\r\n|\n", " ") %>%
str_replace_all("\\-FY", "\\FY")
data <-
data %>% dplyr::slice(2:nrow(data)) %>%
set_names(column_names)
}
data <-
data %>%
mutate(idRow = 1:n()) %>%
select(idRow, everything())
remove_cols <-
data %>% select(matches("FY")) %>%
select(matches("TOTAL|Total|Remaining Req|remove")) %>%
names()
if (length(remove_cols) > 0) {
data <- data %>%
select(-one_of(remove_cols))
}
gather_cols <-
data %>% select(matches("FY|Emergency|Amount|Amt")) %>% names()
key_cols <- data %>% select(-one_of(gather_cols)) %>% names()
data <-
data %>% gather(item, value, -key_cols) %>%
filter(!is.na(value))
data <-
.munge_dod_names(data = data)
if (data %>% hasName("isAdded")) {
data <- data %>%
mutate(isAdded = isAdded == "Addd")
}
if (data %>% hasName("isTOA")) {
data <-
data %>%
mutate(isTOA = case_when(isTOA == "Y" ~ TRUE,
isTOA == "N" ~ F))
}
data <-
data %>%
mutate(
value = parse_number(value) * 1000,
item = item %>% str_replace_all("\\-FY", "FY"),
yearBudget = item %>% str_remove_all("FY |Emergency Disaster Relief Act of") %>% substr(1, 5) %>% parse_number(),
item = item %>% str_remove_all("^FY |\\(|\\)|[0-9]") %>% str_trim(),
isBudgetCurrentYear = yearBudget == max(yearBudget)
) %>%
filter(value != 0) %>%
select(idRow, yearBudget, everything())
data <-
data %>%
filter(!item %in% c("Total Enacted", "Total OCO", "Total Base + OCO", "Total"))
data
}
.parse_fy_excel <-
function(tmp) {
sheet_names <-
tmp %>% excel_sheets() %>% str_to_upper()
has_cover <-sheet_names %>% str_detect("COVER") %>% sum(na.rm = T) > 0
data <-
seq_along(sheet_names) %>%
map_dfr(function(x) {
item <- sheet_names[[x]]
if (item == "COVER") {
return(invisible())
}
data <- tmp %>% read_excel(sheet = x, col_names = F)
new_item <-
sheet_names[[x]] %>% str_remove_all("^FY ") %>%
str_remove_all("[0-9]") %>%
str_trim()
if (new_item == "") {
new_item <- "Base"
}
if (has_cover) {
df_cols <-
data %>% dplyr::slice(5) %>% t() %>% as_tibble()
column_names <- df_cols$V1 %>%
str_replace_all("\r\n|\n", " ") %>%
str_replace_all("\\-FY", "\\FY")
data <-
data %>%
dplyr::slice(6:nrow(data)) %>%
set_names(column_names)
yearBudget <-
column_names[column_names %>% str_detect("FY")][[1]] %>% parse_number()
}
is_na_1 <- data[, 1] %>% pull() %>% .[[1]] %>% is.na()
if (is_na_1) {
yearBudget <- item %>% parse_number()
df_cols <- data %>% dplyr::slice(1:2) %>% t() %>% as_tibble()
df_cols <- df_cols %>%
mutate(
V1 = V1 %>% str_remove_all("[0-9]|Total of Displayed Rows"),
V1 = case_when(V1 == "" ~ NA_character_,
TRUE ~ V1)
)
column_names <-
df_cols %>%
mutate(item = case_when(is.na(V1) ~ V2,
TRUE ~ str_c(V1, V2))) %>%
pull(item) %>%
str_replace_all("\r\n|\n", " ") %>%
str_replace_all("\\-FY", "\\FY")
data <-
data %>% dplyr::slice(3:nrow(data)) %>%
set_names(column_names)
}
if (!has_cover & !is_na_1) {
yearBudget <- item %>% parse_number()
df_cols <-
data %>% dplyr::slice(1) %>% t() %>% as_tibble()
column_names <-
df_cols$V1 %>%
str_replace_all("\r\n|\n", " ") %>%
str_replace_all("\\-FY", "\\FY")
data <-
data %>% dplyr::slice(2:nrow(data)) %>%
set_names(column_names)
}
data <-
data %>%
mutate(idRow = 1:n()) %>%
select(idRow, everything())
remove_cols <-
data %>% select(matches("FY")) %>% select(matches("TOTAL|Total|Remaining Req|^remove")) %>%
names()
if (length(remove_cols) > 0) {
data <- data %>%
select(-one_of(remove_cols))
}
gather_cols <-
data %>% select(matches("FY|Amount")) %>% names()
key_cols <-
data %>% select(-one_of(gather_cols)) %>% names()
data <-
data %>% gather(item, value, -key_cols) %>%
filter(!is.na(value))
data <-
data %>%
mutate(item = item %>% gsub("\\s+", " ", .))
data <-
.munge_dod_names(data = data)
if (data %>% hasName("isAdded")) {
data <- data %>%
mutate(isAdded = isAdded == "Addd")
}
if (data %>% hasName("isTOA")) {
data <-
data %>%
mutate(isTOA = case_when(isTOA == "Y" ~ TRUE,
isTOA == "N" ~ F))
}
if (data %>%
filter(item %>% str_detect("TOA")) %>% nrow() > 0) {
data <-
data %>%
filter(item %>% str_detect("TOA"))
}
if (data %>% hasName("nameAccountOMB")) {
data <- data %>%
filter(!nameAccountOMB %>% str_detect("Total")) %>%
filter(!is.na(nameAccountOMB))
}
if (!has_cover) {
data <- data %>%
mutate(item = new_item)
}
data <-
data %>%
mutate(
value = as.numeric(value) * 1000,
yearBudget,
item = item %>% str_remove_all("^FY |\\(|\\)|[0-9]") %>% str_trim(),
isBudgetCurrentYear = yearBudget == max(yearBudget)
) %>%
filter(value != 0) %>%
select(idRow, yearBudget, everything())
data <-
data %>%
filter(!item %in% c("Total Enacted", "Total OCO", "Total Base + OCO", "Total"))
data
})
if (data %>% hasName("yearBudget")) {
data <- data %>%
mutate(yearBudget = as.numeric(yearBudget))
}
data <- data %>%
mutate(isBudgetCurrentYear = yearBudget == max(yearBudget, na.rm = T))
data
}
.dl_dod_excel <-
function(url = "https://comptroller.defense.gov/Portals/45/Documents/defbudget/fy2020/m1.xlsx",
only_current_year = T,
return_message = T) {
if (return_message) {
glue("Downloading {url}") %>% message()
}
tmp <-
tempfile()
curl_download(url, tmp)
fileparts <- url %>% str_split("/") %>% flatten_chr()
budget_group <-
fileparts[length(fileparts)] %>%
str_remove_all("\\.xlsx") %>%
str_remove_all("\\.xls") %>%
str_to_upper()
if (budget_group %>% endsWith("A")) {
budget_group <- budget_group %>% str_remove_all("A")
}
sheet_names <- tmp %>% excel_sheets() %>% str_to_upper() %>% str_trim()
sheet_names %>% print()
starts_fy <-
sheet_names[[1]] %>% str_trim() %>%
startsWith("FY")
is_cover <-
sheet_names[[1]] %>% str_to_upper() == "COVER"
is_ogsi <- url == "https://comptroller.defense.gov/portals/45/documents/defbudget/fy2015/ogsi.xlsx"
use_fy <- starts_fy | is_cover
if (use_fy) {
data <-
.parse_fy_excel(tmp = tmp)
} else if (is_ogsi) {
data <-
.parse_normal_excel(tmp = tmp, sheet_no = 2)
} else {
data <-
.parse_normal_excel(tmp = tmp, sheet_no = 1)
}
data <-
data %>%
mutate(item = case_when(
item == "" ~ "Base",
TRUE ~ item
))
data <-
data %>%
mutate(slugDODBudgetGroup = budget_group) %>%
select(idRow, slugDODBudgetGroup, everything())
if (only_current_year) {
data <-
data %>%
filter(isBudgetCurrentYear)
}
data <-
data %>%
mutate(urlBudgetDODFile = url)
data <- data %>%
mutate_if(is.character,
list(function(x) {
gsub("\\s+", " ", x) %>% stri_enc_toascii() %>% str_remove_all(" \032 ")
})) %>%
dplyr::select(which(colMeans(is.na(.)) < 1))
tmp %>%
unlink()
data
}
.dl_dod_urls <-
function(urls = "https://comptroller.defense.gov/Portals/45/Documents/defbudget/fy2020/m1.xlsx",
only_current_year = T,
return_message = T) {
.dl_dod_excel_safe <- possibly(.dl_dod_excel, tibble())
all_data <-
urls %>%
map_dfr(function(url) {
.dl_dod_excel(
url = url,
only_current_year = only_current_year,
return_message = return_message
)
})
all_data <-
all_data %>%
left_join(dictionary_dod_budget_codes(), by = "slugDODBudgetGroup") %>%
select(one_of(
c(
"yearBudget",
"isBudgetCurrentYear",
"slugDODBudgetGroup",
"nameDODBudgetGroup"
)
),
everything())
all_data <-
all_data %>%
select(one_of(all_data %>% select(-one_of(c(
"item", "value"
))) %>% names()), everything()) %>%
mutate(item = str_to_upper(item)) %>%
rename(amountItem = value,
typeBudget = item)
if (all_data %>% filter(typeBudget %>% str_detect("QUANTITY")) %>% nrow() > 0) {
df_counts <-
all_data %>% filter(typeBudget %>% str_detect("QUANTITY")) %>%
select(
yearBudget,
codeProgramElement,
isBudgetCurrentYear,
slugOrganization,
slugDODBudgetGroup,
codeAccountOMB,
matches("nameBudget"),
typeBudget,
countItem = amountItem
) %>%
mutate(countItem = countItem / 1000)
df_counts <-
df_counts %>%
mutate(typeBudget = typeBudget %>% str_remove_all(" QUANTITY") %>% str_trim())
all_data <-
all_data %>%
filter(!typeBudget %>% str_detect("QUANTITY"))
all_data <-
all_data %>%
mutate(
typeBudget = case_when(
typeBudget %>% endsWith(" AMOUNT") ~ typeBudget %>% str_remove_all(" AMOUNT"),
TRUE ~ typeBudget
)
)
all_data <-
all_data %>%
left_join(df_counts,
by = names(df_counts)[names(df_counts) %in% names(all_data)])
all_data <-
all_data %>%
mutate(amountUnitCost = case_when(is.na(countItem) ~ NA_real_,
TRUE ~ amountItem / countItem))
}
all_data <-
all_data %>%
.munge_data(clean_address = F)
ignore_types <-
c(
"TOTAL BASE + OCO AMOUNT",
"Total Base + OCO Quantity",
"TOTAL OCO QUANTITY",
"TOTAL BASE + OCO AMOUNT",
"TOTAL OCO",
"TOTAL BASE + OCO"
) %>% str_to_upper() %>% unique()
all_data <-
all_data %>%
filter(!typeBudget %in% ignore_types)
has_parent_line <-
all_data %>% hasName("nameBudgetActivity") &
all_data %>% hasName("nameProgramElement")
if (has_parent_line) {
all_data <-
all_data %>%
mutate(
nameBudgetActivity = case_when(
is.na(nameBudgetActivity) ~ nameProgramElement,
TRUE ~ nameBudgetActivity
),
nameProgramElement = case_when(
nameProgramElement == nameBudgetActivity ~ NA_character_,
TRUE ~ nameProgramElement
)
)
}
all_data <-
all_data %>%
mutate(
typeBudgetActual = typeBudget,
typeBudgetActual = case_when(typeBudgetActual %>% str_detect("OCO") ~ "OCO",
TRUE ~ "BASE"),
typeBudgetSub = typeBudget %>% str_remove_all("\\+| FOR | BASE |OCO") %>% str_trim(),
typeBudgetSub = case_when(
typeBudgetSub == typeBudgetActual ~ NA_character_,
TRUE ~ typeBudgetSub
)
) %>%
select(-typeBudget) %>%
rename(typeBudget = typeBudgetActual) %>%
select(
yearBudget,
slugDODBudgetGroup,
nameDODBudgetGroup,
typeBudget,
typeBudgetSub,
everything()
)
if (all_data %>% hasName("nameProgramElement") & all_data %>% hasName("nameBudgetActivity")) {
all_data <-
all_data %>%
mutate(nameProgramElement = case_when(
is.na(nameProgramElement) ~ nameBudgetActivity,
TRUE ~ nameProgramElement
))
}
if (all_data %>% hasName("nameAccountOMB")) {
all_data <-
all_data %>%
separate(
nameAccountOMB,
into = c("nameAccountOMB", "slugOrganizationAccount"),
sep = ",\\s*(?=[^,]+$)",
fill = "right",
extra = "merge"
)
if (data %>% hasName("slugOrganization")) {
all_data <-
all_data %>%
mutate(
slugOrganizationAccount = case_when(
slugOrganizationAccount %in% c("AIR FORCE") ~ "AF",
slugOrganizationAccount %in% c("ANG") ~ "AFNG",
slugOrganizationAccount %in% c("AF RES") ~ "AF RESERVE",
slugOrganizationAccount %in% c("MARINE CORPS") ~ "MC",
slugOrganizationAccount %in% c("MC RES") ~ "MC RESERVE",
slugOrganizationAccount %in% c("NAVY & MC") ~ "N/MC",
slugOrganizationAccount %in% c("N") ~ "NAVY",
slugOrganizationAccount %in% c("A") ~ "ARMY",
slugOrganizationAccount %in% c("A RES", "ARMY R", "ARMY RES") ~ "ARMY RESERVE",
slugOrganizationAccount %in% c("A GUARD") ~ "ARNG",
slugOrganizationAccount %in% c("N RES", "NAVY RES") ~ "NAVY RESERVE",
slugOrganizationAccount %in% c("DEFENSE-WIDE", "DEF-WIDE", "DEFENSE", "DEF") ~ "DW",
slugOrganizationAccount %>% str_detect("DEF COUNTERINTELLIGENCE|AND CIVIC AID") ~ "DW",
TRUE ~ slugOrganizationAccount
)
) %>%
mutate(slugOrganization = case_when(
is.na(slugOrganization) ~ slugOrganizationAccount,
TRUE ~ slugOrganization
)) %>%
mutate(
slugOrganizationAccount = case_when(
is.na(slugOrganizationAccount) ~ slugOrganization,
TRUE ~ slugOrganizationAccount
)
)
}
}
if (all_data %>% hasName("nameBudgetActivity")) {
all_data <-
all_data %>%
mutate(nameBudgetActivityActual = nameBudgetActivity)
activities <-
all_data %>%
filter(!is.na(nameBudgetActivityActual)) %>%
distinct(nameBudgetActivityActual) %>%
pull()
df_activities <-
activities %>%
map_dfr(function(x) {
x %>% message()
parts <-
x %>%
str_split('\\(') %>%
flatten_chr() %>%
str_split("\\)") %>%
flatten_chr() %>%
str_trim() %>%
discard(function(x) {
x == ""
})
if (length(parts) == 1) {
d <- tibble(nameBudgetActivityActual = x,
nameBudgetActivity = x)
return(d)
}
if (length(parts) == 3) {
d <- tibble(
nameBudgetActivityActual = x ,
nameBudgetActivity = parts[c(1, 3)] %>% str_c(collapse = " "),
slugBudgetActivity = parts[2]
)
return(d)
}
d <- tibble(
nameBudgetActivityActual = x ,
nameBudgetActivity = parts[1],
slugBudgetActivity = parts[2]
)
d
})
all_data <-
all_data %>%
select(-one_of("nameBudgetActivityActual")) %>%
left_join(df_activities, by = "nameBudgetActivity") %>%
mutate(
nameBudgetActivity = nameBudgetActivity %>% str_trim(),
nameBudgetActivity = nameBudgetActivity %>% gsub("\\)|\\(", "" , .) %>% str_trim()
) %>%
separate(
nameBudgetActivity,
into = c("nameBudgetActivity", "nameBudgetActivityDetail"),
sep = "\\ - |\\- ",
fill = "right",
extra = "merge"
)
}
if (all_data %>% hasName("nameBudgetParent")) {
all_data <-
all_data %>%
mutate(
nameBudgetParent = case_when(
nameBudgetParent %in% c("ADVANCED COMPONENT DEVELOPMENT & PROTOTYPES") ~ "ADVANCED COMPONENT DEVELOPMENT AND PROTOTYPES",
nameBudgetParent %in% c("OPERATIONAL SYSTEM DEVELOPMENT") ~ "OPERATIONAL SYSTEMS DEVELOPMENT",
nameBudgetParent %in% c("SYSTEM DEVELOPMENT & DEMONSTRATION") ~ "SYSTEM DEVELOPMENT AND DEMONSTRATION",
nameBudgetParent %in% c("RESEARCH, DEVELOPMENT, TEST, AND EVALUATION", "RDT&E") ~ "RDT&E",
nameBudgetParent %in% c("RDT&E MANAGEMENT SUPPORT") ~ "RDTE SUPPORT",
nameBudgetParent %in% c(
"ADMIN & SRVWD ACTIVITIES",
"ADMIN & SRVWIDE ACTIVITIES",
"ADMINISTRATION AND SERVICE-WIDE ACTIVITIES",
"ADMINISTRATION AND SERVICEWIDE ACTIVITIES"
) ~ "ADMINISTRATION AND SERVICEWIDE ACTIVITIES",
nameBudgetParent %in% c("ADMIN EXPNS") ~ "ADMINISTRATIVE EXPENSES",
nameBudgetParent %in% c(
"AIRCRAFT SUPPORT EQUIP & FACILITIES",
"AIRCRAFT SUPT EQUIPMENT & FACILITIES"
) ~ "AIRCRAFT SUPPORT EQUIPMENT AND FACILITIES",
nameBudgetParent %in% c("COMMUNICATIONS & ELECTRONICS EQUIP") ~ "COMMUNICATIONS AND ELECTRONICS EQUIPMENT",
nameBudgetParent %in% c("MAJOR CONST") ~ "MAJOR CONSTRUCTION",
nameBudgetParent %in% c("MINOR CONST") ~ "MINOR CONSTRUCTION",
nameBudgetParent %in% c("OPERATION & MAINTENANCE") ~ "OPERATIONS AND MAINTENANCE",
nameBudgetParent %in% c("OPERATIONAL SYSTEM DEVELOPMENT") ~ "OPERATIONAL SYSTEMS DEVELOPMENT",
nameBudgetParent %in% c("SPARES", "SPARE AND REPAIR PARTS") ~ "SPARES AND REPAIR PARTS",
nameBudgetParent %in% c("SYSTEM DEVELOPMENT & DEMONSTRATION") ~ "SYSTEM DEVELOPMENT AND DEMONSTRATION",
nameBudgetParent %in% c(
"PAY AND ALLOWANCES OF ENLISTED",
"PAY AND ALLOWANCES OF ENLISTED PERSONNEL"
) ~
"PAY AND ALLOWANCES - ENLISTED PERSONNEL",
nameBudgetParent %in% c("PAY AND ALLOWANCES OF CADETS") ~ "PAY AND ALLOWANCES - CADETS",
nameBudgetParent %in% c("PAY AND ALLOWANCES OF OFFICERS") ~ "PAY AND ALLOWANCES - OFFICERS",
TRUE ~ nameBudgetParent
)
) %>%
mutate(
nameBudgetParent = nameBudgetParent %>% str_replace_all("SUPPLY CHAIN MANAGEMENT", replacement = "SUPPLY MANAGEMENT") %>% str_replace_all("\\, MC", replacement = "\\ - MC") %>% str_replace_all("\\, NAVY", "\\ - NAVY")
) %>%
rename(nameBudgetParentActual = nameBudgetParent) %>%
separate(
nameBudgetParentActual,
into = c("nameBudgetParent", "nameBudgetParentDetail"),
sep = "\\ - ",
fill = "right",
remove = F,
extra = "merge"
) %>%
mutate_if(is.character, str_trim)
}
if (all_data %>% hasName("nameProgramElement")) {
all_data <-
all_data %>%
mutate(nameProgramElementActual = nameProgramElement)
budget_items <-
all_data %>%
distinct(nameProgramElement) %>%
filter(!is.na(nameProgramElement)) %>%
pull()
df_items <-
budget_items %>%
map_dfr(function(x) {
x %>% message()
is_semi <-
x %>% str_detect("\\:|^EDI-|^SOF |^OCO-")
if (is_semi) {
parts <-
x %>%
str_replace_all("^SOF ", "SOF:") %>%
str_split('\\:|\\-') %>%
flatten_chr() %>%
str_split("\\)") %>%
flatten_chr() %>%
str_trim() %>%
discard(function(x) {
x == ""
})
d <-
tibble(
nameProgramElementActual = x ,
nameProgramElement = parts[2],
codeProgramElement = parts[1]
)
return(d)
}
parts <-
x %>%
str_split('\\(') %>%
flatten_chr() %>%
str_split("\\)") %>%
flatten_chr() %>%
str_trim() %>%
discard(function(x) {
x == ""
})
if (length(parts) == 1) {
d <-
tibble(nameProgramElementActual = x,
nameProgramElement = x)
return(d)
}
if (length(parts) == 3) {
d <- tibble(
nameProgramElementActual = x ,
nameProgramElement = parts[c(1, 3)] %>% str_c(collapse = " "),
codeProgramElement = parts[2]
)
return(d)
}
d <- tibble(
nameProgramElementActual = x ,
nameProgramElement = parts[1],
codeProgramElement = parts[2]
)
d
}) %>%
select(-matches("slug"))
if (df_items %>% hasName("codeProgramElement")) {
df_items <- df_items %>%
rename(detailProgramElement = codeProgramElement)
}
all_data <-
all_data %>%
select(-one_of("nameProgramElementActual")) %>%
left_join(df_items, by = "nameProgramElement") %>%
mutate(
nameProgramElement = nameProgramElement %>% str_trim(),
nameProgramElement = nameProgramElement %>% gsub("\\)|\\(", "" , .) %>% str_trim()
)
}
if (all_data %>% hasName("slugOrganization")) {
all_data <-
all_data %>%
mutate(
slugOrganization = case_when(
slugOrganization %in% c("AIR FORCE", "F") ~ "AF",
slugOrganization %in% c("A") ~ "ARMY",
slugOrganization %in% c("N") ~ "NAVY",
slugOrganization %in% c("MARINE CORPS") ~ "MC",
slugOrganization %in% c("DEFENSE-WIDE", "DEF-WIDE", "DEFENSE",
"DEF", "D") ~ "DW",
TRUE ~ slugOrganization
)
)
}
all_data <-
all_data %>%
select(one_of(all_data %>%
select(-matches("amount|count")) %>% names()),
everything()) %>%
select(yearBudget:typeBudget, matches("name"), everything()) %>%
arrange(slugDODBudgetGroup, idRow) %>%
select(-idRow)
all_data
}
# download ----------------------------------------------------------------
#' Department of Defense Budgets
#'
#' Acquires DOD budgets for specified
#' years
#'
#' @param budget_years vector of budgegt years
#' @param use_ammendments if \code{TRUE} uses
#' ammended budgets
#' @param only_current_year if \code{TRUE} includes only
#' current year budget
#' @param return_message if \code{TRUE} returns message
#'
#' @return
#' @export
#'
#' @examples
#' \dontrun{
#' df_2020 <-
#' dod_years_budgets(budget_years = 2020)
#' }
dod_years_budgets <-
function(budget_years = 2020,
use_ammendments = F,
only_current_year = T,
only_normal_budgets = F,
return_message = T) {
if (length(budget_years) == 0) {
stop("Enter a budget year")
}
df_urls <-
dictionary_dod_budget_urls(
include_all_page_files = F,
filter_years = budget_years,
return_message = return_message
) %>%
rename(yearBudgetData = yearBudget)
if (df_urls$isAmendedBudget %>% sum(na.rm = T) > 0 &
use_ammendments) {
df_urls <- df_urls %>% filter(isAmendedBudget)
} else {
df_urls <- df_urls %>% filter(!isAmendedBudget)
}
if (only_normal_budgets) {
df_urls <- df_urls %>% filter(isBudgetGroupNormal)
}
df_urls <- df_urls %>%
group_by(yearBudgetData, nameDODBudgetGroup, isAmendedBudget) %>%
dplyr::slice(1) %>%
ungroup()
urls <- df_urls$urlBudgetDODFile
all_data <-
.dl_dod_urls(
urls = urls,
only_current_year = only_current_year,
return_message = return_message
) %>%
select(-matches("remove"))
if (all_data %>% hasName("slugTreasuryAgency")) {
all_data <- all_data %>%
mutate(slugTreasuryAgency = case_when(
is.na(slugTreasuryAgency) ~ codeAccountOMB %>% str_remove_all("[0-9]"),
TRUE ~ slugTreasuryAgency
),
codeAccountOMB = codeAccountOMB %>% str_remove_all("[A-Z]")
)
}
column_order <-
c(
"yearBudget",
"isBudgetCurrentYear",
"slugDODBudgetGroup",
"nameDODBudgetGroup",
"slugClassification",
"typeBudget",
"typeBudgetSub",
"slugBudgetParent",
"codeAccountOMB",
"nameAccountOMB",
"slugOrganization",
"slugOrganizationAccount",
"slugBudgetParent",
"nameBudgetParentActual",
"nameBudgetParent",
"nameBudgetParentDetail",
"slugBudgetActivity",
"nameBudgetActivityActual",
"nameBudgetActivity",
"nameBudgetActivityDetail",
"codeProgramElement",
"nameProgramElementActual",
"nameProgramElement",
"nameProgramElementDetail",
"nameLocation",
"nameConstructionProject",
"nameStateCountry",
"isAdded",
"isTOA",
"urlBudgetDODFile",
"idLineNumber",
"slugCost",
"typeCost",
"slugTreasuryAgency",
"slugStateCountry",
"amountItem",
"countItem",
"amountUnitCost"
)
all_data <-
all_data %>%
mutate(
nameProgramElement = case_when(
is.na(nameProgramElement) ~ nameProgramElementActual,
TRUE ~ nameProgramElement
),
nameProgramElementActual = case_when(
is.na(nameProgramElementActual) ~ nameProgramElement,
TRUE ~ nameProgramElementActual
)
)
all_data <- all_data %>%
mutate(codeProgramElement = case_when(
is.na(codeProgramElement) ~ nameConstructionProject,
TRUE ~ codeProgramElement
))
all_data <- all_data %>%
select(one_of(column_order), everything())
all_data <- all_data %>%
fill(slugClassification)
all_data <-
all_data %>%
.add_cgac() %>%
.generate_federal_account_ids(cgac_column = "idCGAC", account_column = "codeAccountOMB")
all_data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.