.tbl_col_class <- function(data) {
map_df(data, class) %>% gather(column,value) %>%
mutate(idColumn = 1:n()) %>%
mutate(isExcluded = value == "NULL",
isNested = value %>% str_detect("list|data"),
isBase = !value %>% str_detect("NULL|list|data"))
}
#' SAM Financial Assistance Dictionaries
#'
#' Returns the 23 dictionaries related to
#' Federal Government Financial Assistance.
#'
#' @param assign_data if \code{TRUE} assigns each
#' table to a dictionary a tbl in the global environment
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_sam_financial_assistance()
dictionary_sam_financial_assistance <-
memoise::memoise(function(assign_data = T) {
url <-
"https://beta.sam.gov/api/prod/fac/v1/programs/dictionaries"
data <- fromJSON(url, simplifyDataFrame = T)
data <- data[["_embedded"]][["jSONObjectList"]]
all_data <-
1:nrow(data) %>%
map_dfr(function(x) {
df_row <-
data %>%
dplyr::slice(x)
table <- df_row$content$id
table %>% message()
d <-
df_row$content$elements %>% flatten_df() %>%
as_tibble() %>%
.remove_na() %>%
mutate(table,
numberTable = x) %>%
select(numberTable, table, everything()) %>%
.munge_psc_names()
if (d %>% hasName("elements")) {
d <-
d %>% unnest(cols = "elements") %>% .remove_na() %>%
rename(
codeElementChild = code,
idElementChild = element_id,
nameChild = value
) %>%
select(one_of(names(d)), everything())
}
d
})
all_data <-
all_data %>%
.munge_data() %>%
mutate_at(c("slugTable", "codeElement", "idElement", "idElementChild"),
str_to_lower) %>%
separate(
descriptionElement,
into = c('typeElementParent', "typeElementParentDetails"),
remove = F,
fill = "right",
extra = "merge",
sep = "\\ - "
) %>%
separate(
nameChild,
into = c('typeChild', "typeChildDetails"),
remove = F,
extra = "merge",
fill = "right",
sep = "\\ - |\\("
) %>%
mutate_if(is.character, str_squish)
all_data <-
all_data %>%
select(-numberTable)
if (assign_data) {
tables <- unique(all_data$slugTable)
tables %>%
walk(function(table) {
glue("Assigning {table}") %>% message()
d <-
all_data %>% filter(slugTable == table) %>%
.remove_na()
assign(x = glue("dict_{table}"), d, envir = .GlobalEnv)
})
}
all_data
})
.dictionary_assistance_names <-
function() {
tibble(nameFieldSAM = c("ACCOUNT IDENTIFICATION (121)", "APPEALS (096)", "APPLICANT ELIGIBILITY (081)",
"APPLICATION PROCEDURES (092)", "ARCHIVED DATE", "AUDITS (112)",
"AUTHORIZATION (040)", "AWARD PROCEDURE (093)", "BENEFICIARY ELIGIBILITY (082)",
"CREDENTIALS/DOCUMENTATION (083)", "CRITERIA FOR SELECTING PROPOSALS (180)",
"DEADLINES (094)", "EXAMPLES OF FUNDED PROJECTS (170)", "FEDERAL AGENCY (030)",
"FORMULA AND MATCHING REQUIREMENTS (101)", "HEADQUARTERS OFFICE (152)",
"LENGTH AND TIME PHASING OF ASSISTANCE (102)", "OBJECTIVES (050)",
"OBLIGATIONS (122)", "OMB AGENCY CODE", "OMB BUREAU CODE", "PARENT SHORTNAME",
"POPULAR NAME (020)", "PREAPPLICATION COORDINATION (091)", "PROGRAM ACCOMPLISHMENTS (130)",
"PROGRAM NUMBER", "PROGRAM TITLE", "PUBLISHED DATE", "RANGE AND AVERAGE OF FINANCIAL ASSISTANCE (123)",
"RANGE OF APPROVAL/DISAPPROVAL TIME (095)", "RECORDS (113)",
"REGIONAL OR LOCAL OFFICE (151)", "REGULATIONS, GUIDELINES, AND LITERATURE (140)",
"RELATED PROGRAMS (160)", "RENEWALS (097)", "REPORTS (111)",
"TYPES OF ASSISTANCE (060)", "URL", "USES AND USE RESTRICTIONS (070)",
"WEBSITE ADDRESS (153)",
"RECOVERY"),
nameActual =
c("idAccount", "descriptionAppeals", "typesApplicants",
"jsonApplicationProcedures", "dateArchived", "jsonAuditProcedures",
"jsonAuthorizationProcedures", "descriptionAwardProcedure", "descriptionBeneficiaryEligibility",
"jsonCredentials", "descriptionSelectionCriteria",
"jsonDeadlines", "descriptionProjectsFunded", "nameAgency",
"jsonMatchingFormula", "nameAddressOfficeFunding",
"jsonPhasingAssistance", "descriptionProgramObjective",
"descriptionObligationBudget", "codeOMBAgency", "codeOMBBureau", "slugDepartment",
"nameProgramPopular", "jsonPreApplicationCoordination", "jsonProgramAccomplishments",
"idCFDA", "nameProgram", "dateProgramStarted", "descriptionAverageAward",
"descriptionRangeResponseTime", "descriptionRecordKeeping",
"jsonLocalOfficeFunding", "descriptionRegulations",
"typesRelatedPrograms", "descriptionRenewals",
"jsonReports",
"typeAssistance", "urlSAM", "descriptionUseRestrictions",
"urlProgram",
"isRecoveryAct")
)
}
#' Financial Assistance Field Dictionary
#'
#' Returns information about the fields contained in
#' any government financial assistance data.
#'
#' @param url default link
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_financial_assistance_fields()
dictionary_financial_assistance_fields <-
memoise::memoise(function(url = "https://s3.amazonaws.com/falextracts/Data%20Dictionary/Assistance%20Listings/FAL_Data_Dictionary.xlsx") {
data <- rio::import(url) %>% as_tibble()
data <-
data %>%
setNames(c("nameFieldSAM", "typeField", "lengthField", "descriptionField")) %>%
filter(!is.na(descriptionField)) %>%
mutate(descriptionField = descriptionField %>% str_squish() %>% str_to_upper()) %>%
filter(!descriptionField == "DEFINITION") %>%
mutate_if(is.character, str_squish) %>%
mutate_if(is.character,
list(function(x) {
ifelse(x == "", NA, x)
})) %>%
mutate_at(c("nameFieldSAM", "typeField"),
str_to_upper) %>%
group_by(nameFieldSAM) %>%
dplyr::slice(1) %>%
ungroup() %>%
separate(
nameFieldSAM,
into = c("nameField", "idField"),
sep = "\\(",
fill = "right",
remove = F
)
data <- data %>%
mutate_if(is.character,
list(function(x) {
x %>% str_squish()
}))
data <-
data %>%
mutate(idField = idField %>% str_remove_all("\\)")) %>%
left_join(
.dictionary_assistance_names(), by = "nameFieldSAM"
) %>%
select(nameActual, everything())
data
})
#' All Active Financial Assistance Programs
#'
#' Returns information about the types
#' of financial assistance and grant programs
#' of the United States Governments
#'
#' @return
#' @export
#'
#' @examples
#' us_financial_assistance_programs()
us_financial_assistance_programs <-
function() {
data <-
"https://s3.amazonaws.com/falextracts/Assistance%20Listings/datagov/AssistanceListings_DataGov_PUBLIC_CURRENT.csv" %>%
fread(verbose = F,showProgress = FALSE) %>%
as_tibble()
actual_names <-
tibble(nameFieldSAM = names(data) %>% str_to_upper()) %>%
left_join(.dictionary_assistance_names(), by = "nameFieldSAM") %>%
pull(nameActual)
data <- data %>% setNames(actual_names)
data <- data %>%
mutate(dateProgramStarted = mdy(dateProgramStarted))
data <- data %>%
separate(
idCFDA,
sep = "\\.",
remove = F,
into = c("bureauCFDA", "numberProgram"),
fill = "right",
convert = T
)
data <-
data %>%
.munge_data() %>%
mutate_if(is.character, list(function(x) {
ifelse(
x %in% c(
"",
"N/A",
"{}",
"NONE",
"NOT APPLICABLE.",
"NO DATA AVAILABLE.",
"NO DATA AVAILABLE",
"\\.",
"NOT APPLICABLE"
),
NA_character_,
x
)
}))
df_related_programs <- data %>%
filter(!is.na(typesRelatedPrograms)) %>%
distinct(idCFDA, typesRelatedPrograms, nameProgram) %>%
separate_rows(typesRelatedPrograms, sep = "\\;") %>%
mutate_if(is.character, str_squish) %>%
filter(typesRelatedPrograms != "") %>%
group_by(idCFDA, nameProgram) %>%
summarise(
typesRelatedPrograms = typesRelatedPrograms %>% str_c(collapse = " | "),
countRelatedPrograms = n()
) %>%
ungroup()
data <- data %>%
select(-typesRelatedPrograms) %>%
left_join(df_related_programs, by = c("idCFDA", "nameProgram")) %>%
select(one_of(names(data)), everything())
df_accounts <- data %>%
select(nameProgram, idCFDA, idAccount) %>%
separate_rows(idAccount, sep = "\\;") %>%
mutate_if(is.character, str_squish) %>%
filter(idAccount != "") %>%
group_by(idCFDA, nameProgram) %>%
summarise(idAccount = idAccount %>% str_c(collapse = " | "),
countAccounts = n()) %>%
ungroup()
data <-
data %>% select(-idAccount) %>%
left_join(df_accounts, by = c("idCFDA", "nameProgram")) %>%
select(one_of(names(data)), everything()) %>%
rename(idAccountsTreasury = idAccount)
df_accounts <-
data %>%
select(nameProgram, idCFDA, typeAssistance) %>%
separate_rows(typeAssistance, sep = "\\;") %>%
mutate_if(is.character, str_squish) %>%
filter(typeAssistance != "") %>%
group_by(idCFDA, nameProgram) %>%
arrange(typeAssistance) %>%
summarise(
typeAssistance = typeAssistance %>% str_c(collapse = " | "),
countTypesAssistance = n()
) %>%
ungroup()
data <-
data %>% select(-typeAssistance) %>%
left_join(df_accounts, by = c("idCFDA", "nameProgram")) %>%
select(one_of(names(data)), everything()) %>%
rename(typesAssistance = typeAssistance)
df_budgets <-
data %>%
select(nameProgram, idCFDA, descriptionObligationBudget) %>%
separate_rows(descriptionObligationBudget, sep = "\\;") %>%
mutate_if(is.character, str_squish) %>%
filter(descriptionObligationBudget != "-") %>%
separate(
descriptionObligationBudget,
sep = "\\$",
fill = "right",
into = c("yearBudget", "amountBudget")
) %>%
mutate_if(is.character, str_squish) %>%
filter(!is.na(amountBudget)) %>%
mutate(
amountBudget = parse_number(amountBudget),
isBudgetEstimate = yearBudget %>% str_detect("EST"),
slugBudgetYear = parse_number(yearBudget)
) %>%
filter(yearBudget %>% str_detect("FY")) %>%
mutate(
yearBudget = case_when(
slugBudgetYear <= 9 ~ glue("200{slugBudgetYear}") %>% as.character(),
slugBudgetYear %>% between(10, 99) ~ glue("20{slugBudgetYear}") %>% as.character(),
TRUE ~ as.character(slugBudgetYear)
)
) %>%
select(-slugBudgetYear) %>%
mutate(dateBudget = glue("{yearBudget}-10-01") %>% ymd()) %>%
select(idCFDA,
nameProgram,
isBudgetEstimate,
dateBudget,
everything()) %>%
filter(!is.na(dateBudget)) %>%
group_by(idCFDA, nameProgram) %>%
nest(.key = "dataBudgets") %>%
mutate(hasBudgets = T,
countYears = dataBudgets %>% map_dbl(nrow)) %>%
ungroup()
data <- data %>%
left_join(df_budgets, by = c("idCFDA", "nameProgram")) %>%
select(one_of(names(data)), everything())
data <- data %>%
mutate(
slugKey = urlSAM %>% str_remove_all("https://beta.sam.gov/fal/|/view"),
urlAssistanceAPI = glue("https://beta.sam.gov/api/prod/fac/v1/programs/{slugKey}") %>% as.character()
)
data <-
data %>%
mutate(nameAgency = nameAgency %>% str_replace_all("\\ , ", "\\, "))
data <- data %>%
mutate_if(is.character,
list(function(x) {
case_when(x == "N/A" ~ NA_character_,
TRUE ~ x)
}))
cols <-
data %>% select_if(is.character) %>% names()
sbir_match_cols <-
cols %>%
map_dfr(function(x){
df_matches <-
data %>% select(x) %>%
filter(!!sym(x) %>% str_detect("SBIR|STTR|SMALL BUSINESS INNOVAT|SMALL BUSINESS TECHNOLOGY TRANSFER"))
matches <- nrow(df_matches)
tibble(column = x, matches)
})
match_cols <- sbir_match_cols %>% filter(matches >0)
df_sbirs <-
match_cols$column %>%
map_dfr(function(x) {
df_matches <-
data %>% select(x, idCFDA) %>%
filter(
!!sym(x) %>% str_detect(
"SBIR|STTR|SMALL BUSINESS INNOVAT|SMALL BUSINESS TECHNOLOGY TRANSFER"
)
) %>%
select(idCFDA)
df_matches
}) %>%
distinct() %>%
mutate(isSBIR = T)
data <-
data %>%
left_join(df_sbirs, by = "idCFDA")
data <-
data %>%
separate(
idCFDA,
sep = "\\.",
remove = F,
into = c("bureauCFDA", "numberProgram"),
fill = "right",
convert = T
) %>%
mutate_at(c("bureauCFDA", "numberProgram"),
as.integer)
## Clean Agency
### TO DO -- JSON PARSER and Amount Parsers
data
}
# programs ----------------------------------------------------------------
.parse_financial_assistance_program_json <-
function(url = "https://beta.sam.gov/api/prod/fac/v1/programs/1fd5888d80b24c42b6d95838ca265293") {
json <-
url %>% fromJSON()
data <-
tibble(item = unlist(json) %>% names(),
value = unlist(json) %>% as.character()) %>%
mutate(slugKey = url %>% str_remove_all("https://beta.sam.gov/api/prod/fac/v1/programs/")) %>%
filter(!is.na(value)) %>%
filter(value != "") %>%
filter(!item == "_links.self.href") %>%
select(slugKey, everything()) %>%
mutate(item = item %>% str_remove_all("data."),
value = str_squish(value)) %>%
filter(!item %>% str_detect("^status.|^additionalInfo."))
data
}
#' Parse financial assistance program urls
#'
#' Parses JSON from financial assistance programs
#' listed in SAM Beta.
#'
#' @param urls
#'
#' @return
#' @export
#'
#' @examples
parse_financial_assistance_program_urls <-
function(urls = c("https://beta.sam.gov/api/prod/fac/v1/programs/0cb9d7d9f3e7420ca4730a01da4f9e3f",
"https://beta.sam.gov/api/prod/fac/v1/programs/183a5de92e964588b7252e951d99e7af",
"https://beta.sam.gov/api/prod/fac/v1/programs/20894235cc6649789feaa14b71406638",
"https://beta.sam.gov/api/prod/fac/v1/programs/e6a08f2eaa8446ee86d91c77b133d431",
"https://beta.sam.gov/api/prod/fac/v1/programs/cd9013559fdf35befc9a3641aec94af7",
"https://beta.sam.gov/api/prod/fac/v1/programs/a5d50bfa60fe4b0695d0e0371f220523",
"https://beta.sam.gov/api/prod/fac/v1/programs/8f56239bcdbb5245e64be7e84d75a2f3",
"https://beta.sam.gov/api/prod/fac/v1/programs/ca81d5c9137b478087ea73f825097e19",
"https://beta.sam.gov/api/prod/fac/v1/programs/5aeb34c3ffc34a2799d5322bde5b5098",
"https://beta.sam.gov/api/prod/fac/v1/programs/0f138edbb8c54e1b91f4ed434b324316"
)
) {
.parse_financial_assistance_program_json_safe <-
possibly(.parse_financial_assistance_program_json, tibble())
data <-
urls %>%
future_map_dfr(function(url) {
.parse_financial_assistance_program_json_safe(url = url)
})
df_cols <-
distinct(data, item) %>%
mutate(fields = str_count(item, "\\."))
df_base <-
data %>%
filter(item %in% c(df_cols %>% filter(fields == 0) %>% pull(item)))
df_base <- df_base %>% spread(item, value)
df_assistance <-
df_base %>% select(slugKey, matches("assistanceTypes")) %>%
gather(typeAssitance, value, -slugKey, na.rm = T) %>%
select(-typeAssitance) %>%
rename(codeAssistance = value) %>%
nest(dataAssistance = c(codeAssistance)) %>%
mutate(countAssistanceTypes = dataAssistance %>% map_dbl(nrow),
hasAssistance = T)
df_related_programs <-
df_base %>% select(slugKey, matches("relatedPrograms")) %>%
gather(typeAssitance, value, -slugKey, na.rm = T) %>%
rename(slugKeyRelatedProgram = value) %>%
select(-typeAssitance) %>%
distinct() %>%
nest(dataRelatedPrograms = c(slugKeyRelatedProgram)) %>%
mutate(countRelatedPrograms = dataRelatedPrograms %>% map_dbl(nrow),
hasRelatedPrograms = T)
df_base <-
df_base %>% select(-matches("relatedPrograms|assistanceTypes"))
df_base <-
df_base %>%
.munge_grant_names() %>%
mutate(nameProgramPopular = nameProgramPopular %>% str_remove_all("\\)|\\(")) %>%
.munge_data() %>%
mutate(yearFiscal = as.integer(yearFiscal))
df_nested_cols <-
df_cols %>% filter(fields > 0) %>%
separate(
item,
into = c("table", "fields"),
sep = "\\.",
extra = "merge",
fill = "right",
remove = F
)
tables <- df_nested_cols$table %>% unique()
data_nested <-
tables %>%
map(function(table_name) {
items <-
df_nested_cols %>%
filter(table == table_name) %>%
pull(item)
df <- data %>%
filter(item %in% items)
if (table_name == "authorizations") {
df <-
df %>%
mutate(item = item %>% str_remove_all("authorizations."))
df <-
df %>%
filter(!value %in% c("\\.", "", "N/A", "n/a")) %>%
mutate(
isList = item %>% str_detect("list"),
item = item %>% str_remove_all("list.")
) %>%
filter(value != ".")
df <-
df %>%
separate(
item,
into = c("table", "item"),
sep = "\\.",
fill = "left"
) %>%
mutate(
numberItem = item %>% str_to_lower() %>% str_remove_all("[a-z]") %>% as.integer() %>% coalesce(0L),
item = item %>% str_remove_all("[0-9]")
)
df <-
df %>%
mutate(table = case_when(is.na(table) ~ "act",
TRUE ~ table)) %>%
unite(item, table, item, sep = "") %>%
select(slugKey, numberItem, item, value) %>%
distinct()
df <-
df %>%
group_by(slugKey, numberItem, item) %>%
summarise(value = value %>% str_c(collapse = " ")) %>%
ungroup() %>%
spread(item, value)
df <-
df %>%
.munge_grant_names() %>%
.munge_data()
df <-
df %>%
nest(
-slugKey,
) %>%
rename(dataAuthorizations = data)
return(df)
}
if (table_name == "eligibility") {
df <-
df %>%
mutate(item = item %>% str_remove_all("eligibility.")) %>%
mutate(id = str_to_lower(item) %>% str_remove_all("[a-z]")) %>%
mutate(id = id %>% str_remove_all("\\.")) %>%
separate(
item,
into = c("table", "item"),
sep = "\\.",
fill = "right",
extra = "merge"
) %>%
mutate(id = str_to_lower(item) %>% str_remove_all("[a-z]") %>% str_remove_all("\\."))
df_base_r <-
df %>% filter(id == "") %>%
unite(item, item, table, sep = "") %>%
select(-id) %>% spread(item, value)
df_nested_r <-
df %>%
filter(id != "") %>%
mutate(field = item %>% str_remove_all("[0-9]"))
df_applicant_types <-
df_nested_r %>% filter(table == "applicant") %>%
filter(field == "types") %>%
select(slugKey, table, idApplicantType = value, id) %>%
mutate(idApplicantType = as.numeric(idApplicantType)) %>%
select(slugKey, idApplicantType) %>%
nest(dataApplicantTypes = c(idApplicantType)) %>%
mutate(
hasApplicantTypes = TRUE,
countApplicantTypes = dataApplicantTypes %>% map_dbl(nrow)
)
df_assistance_types <-
df_nested_r %>% filter(table == "assistanceUsage") %>%
filter(field == "types") %>%
select(slugKey, table, idAssistanceUsage = value, id) %>%
mutate(idAssistanceUsage = as.numeric(idAssistanceUsage)) %>%
select(slugKey, idAssistanceUsage) %>%
nest(dataAssistanceUsage = c(idAssistanceUsage)) %>%
mutate(
hasAssistanceUsage = TRUE,
countAssistanceUsage = dataAssistanceUsage %>% map_dbl(nrow)
)
df_beneficiary_types <-
df_nested_r %>% filter(table == "beneficiary") %>%
filter(field == "types") %>%
select(slugKey, table, beneficiary = value, id) %>%
mutate(idBeneficiary = as.numeric(beneficiary)) %>%
select(slugKey, idBeneficiary) %>%
nest(dataBeneficiary = c(idBeneficiary)) %>%
mutate(hasBeneficiary = TRUE,
countBeneficiary = dataBeneficiary %>% map_dbl(nrow))
df <-
list(df_base_r,
df_beneficiary_types,
df_assistance_types,
df_applicant_types) %>%
reduce(left_join)
df <- .munge_grant_names(df)
df <-
df %>%
.munge_data()
return(df)
}
if (table_name == "compliance") {
df <-
df %>%
mutate(item = item %>% str_remove_all("compliance.|questions.|formula.|types.")) %>%
mutate(id = str_to_lower(item) %>% str_remove_all("[a-z]")) %>%
mutate(id = id %>% str_remove_all("\\.")) %>%
separate(
item,
into = c("table", "item"),
sep = "\\.",
fill = "right",
extra = "merge"
) %>%
mutate(id = str_to_lower(item) %>% str_remove_all("[a-z]")) %>%
mutate(
table = case_when(
table == "audit" ~ "Audit",
table == "records" ~ "Records",
table == "documents" ~ "Documents",
table == "ndMatching" ~ "Matching",
table == "CFR200Requirements" ~ "CFR200",
TRUE ~ table
)
)
df_base_r <-
df %>% filter(id == "") %>%
unite(item, item, table, sep = "") %>%
select(-id) %>% spread(item, value)
df_nested_r <-
df %>%
filter(id != "") %>%
mutate(field = item %>% str_remove_all("[0-9]"))
df_reports <-
df_nested_r %>% filter(table == "reports") %>%
filter(field == "code") %>%
select(slugKey, table, value, id) %>%
left_join(
df_nested_r %>% filter(table == "reports") %>%
filter(field == "isSelected") %>%
select(slugKey, table, result = value, id) %>%
mutate(result = as.logical(result)),
Joining,
by = c("slugKey", "table", "id")
) %>%
select(slugKey, value, result) %>%
spread(value, result)
df_cfar <-
df_nested_r %>% filter(table == "CFR200") %>%
filter(field == "code") %>%
select(slugKey, table, value, id) %>%
left_join(
df_nested_r %>% filter(table == "CFR200") %>%
filter(field == "isSelected") %>%
select(slugKey, table, result = value, id) %>%
mutate(result = as.logical(result)),
Joining,
by = c("slugKey", "table", "id")
) %>%
select(slugKey, value, result) %>%
spread(value, result)
df_matching <-
df_nested_r %>% filter(table == "Matching") %>%
select(slugKey, item, value) %>%
spread(item, value)
df <-
list(df_base_r, df_cfar, df_matching, df_reports) %>%
reduce(left_join)
df <- .munge_grant_names(df)
df <- df %>%
.munge_data()
df <- df %>%
mutate(pctMatching = as.integer(pctMatching) / 100)
return(df)
}
if (table_name == "assistance") {
df <-
df %>%
mutate(item = item %>% str_remove_all("assistance.")) %>%
mutate(id = str_to_lower(item) %>% str_remove_all("[a-z]")) %>%
mutate(id = id %>% str_remove_all("\\.")) %>%
filter(id == "") %>%
select(-id) %>%
spread(item, value) %>%
.munge_grant_names() %>%
mutate_if(is.character, list(function(x) {
x %>% str_remove_all("\\(|\\)")
})) %>%
.munge_data()
return(df)
}
if (table_name == "financial") {
df <-
df %>%
mutate(item = item %>% str_remove_all("financial."))
df <-
df %>%
filter(!value %in% c("\\.", "", "N/A", "n/a")) %>%
mutate(
isList = item %>% str_detect("list"),
item = item %>% str_remove_all("list.")
) %>%
filter(value != ".") %>%
separate(
item,
into = c("table", "item"),
sep = "\\.",
extra = "merge",
fill = "left"
) %>%
mutate(
numberItem = item %>% str_to_lower() %>% str_remove_all("[a-z]") %>% as.integer() %>% coalesce(0L),
item = item %>% str_remove_all("[0-9]")
) %>%
mutate(table = case_when(table %>% is.na() ~ "base",
TRUE ~ table))
df_base <-
df_base %>%
.munge_grant_names() %>%
.munge_data()
df_accounts <-
df %>%
filter(table == "accounts") %>%
unite(item, table, item, sep = ".") %>%
select(-isList) %>%
spread(item, value) %>%
.munge_grant_names() %>%
.munge_data() %>%
mutate(numberItem = numberItem + 1)
df_acc <-
df %>%
filter(table == "accomplishments") %>%
filter(item != "isApplicable") %>%
mutate(item = case_when(
item == "fiscalYear" ~ "year",
TRUE ~ "descriptionAccomplishments"
))
df_years_acc <-
df_acc %>%
filter(item == "year") %>%
select(slugKey, year = value) %>%
mutate(year = as.integer(year)) %>%
group_by(slugKey, year) %>%
mutate(numberItem = 1:n() - 1) %>%
ungroup() %>%
select(slugKey, numberItem, year) %>%
group_by(slugKey) %>%
mutate(idRow = 1:n()) %>%
ungroup()
df_des <- df_acc %>%
filter(item != "year") %>%
select(slugKey, descriptionAccomplishments = value) %>%
group_by(slugKey) %>%
mutate(idRow = 1:n()) %>%
ungroup()
df_acc <-
df_years_acc %>%
left_join(df_des, by = c("slugKey", "idRow")) %>%
select(slugKey, numberItem, year, descriptionAccomplishments)
df_acc <- df_acc %>%
.munge_data()
rm(df_des)
rm(df_years_acc)
gc()
df_values <-
df %>%
filter(table == "obligations") %>%
mutate(item = item %>% str_remove_all("values."))
df_year <-
df_values %>% filter(item == "year") %>%
select(slugKey, year = value) %>%
mutate(year = as.integer(year)) %>%
group_by(slugKey, year) %>%
arrange(year) %>%
mutate(numberItem = 1:n() - 1) %>%
ungroup() %>%
group_by(slugKey) %>%
mutate(idRow = 1:n()) %>%
ungroup()
df_estimate <-
df_values %>%
filter(item == "estimate") %>%
select(slugKey, estimate = value) %>%
mutate(estimate = readr::parse_number(estimate)) %>%
ungroup() %>%
group_by(slugKey) %>%
mutate(idRow = 1:n()) %>%
ungroup() %>%
filter(!is.na(estimate)) %>%
rename(amountEstimate = estimate)
df_obligation_ids <- df_values %>%
filter(item == "obligationId") %>%
select(slugKey, idObligation = value) %>%
ungroup() %>%
group_by(slugKey) %>%
mutate(idRow = 1:n()) %>%
ungroup()
df_assistance_types <-
df_values %>%
filter(item == "assistanceType") %>%
select(slugKey, codeAssistance = value) %>%
ungroup() %>%
group_by(slugKey) %>%
mutate(idRow = 1:n()) %>%
ungroup()
df_recovery <-
df_values %>%
filter(item == "isRecoveryAct") %>%
select(slugKey, isRecoveryAct = value) %>%
mutate(isRecoveryAct = as.logical(isRecoveryAct)) %>%
ungroup() %>%
group_by(slugKey) %>%
mutate(idRow = 1:n()) %>%
ungroup()
df_descriptions <- df_values %>%
filter(item %in% c("explanation", "description")) %>%
select(slugKey, value) %>%
distinct() %>%
group_by(slugKey) %>%
mutate(idRow = 1:n()) %>%
ungroup()
if (nrow(df_values %>%
filter(item == "actual")) > 0 ) {
df_actual <- df_values %>%
filter(item == "actual") %>%
select(slugKey, actual = value) %>%
mutate(actual = readr::parse_number(actual)) %>%
ungroup() %>%
group_by(slugKey) %>%
mutate(idRow = 1:n()) %>%
ungroup() %>%
filter(!is.na(actual)) %>%
rename(amountActual = actual)
}
df <- list(
df_year,
df_acc,
df_obligation_ids,
df_assistance_types,
df_recovery,
df_estimate
) %>% reduce(left_join)
if ('df_actual' %>% exists()) {
df <- df %>% left_join(df_actual)
}
df <-
df %>%
select(-idRow) %>%
nest(
-slugKey
) %>%
rename(dataBudgets = data) %>%
mutate(
hasFinancialData = T,
countYearsFinancialdata = dataBudgets %>% map_dbl(nrow)
)
df
return(df)
}
if (table_name == "projects") {
df <-
df %>%
mutate(item = item %>% str_remove_all("projects.|list."))
df_base_r <-
df %>% filter(item == "isApplicable") %>% spread(item, value) %>% mutate(isApplicable = as.logical(isApplicable))
df_nest_r <- df %>% filter(item != "isApplicable") %>%
mutate(item = item %>% str_remove_all("[0-9]")) %>%
filter(item == "fiscalYear") %>%
select(slugKey, yearFiscal = value) %>%
left_join(
df %>% filter(item != "isApplicable") %>%
mutate(item = item %>% str_remove_all("[0-9]")) %>%
filter(item != "fiscalYear") %>%
select(slugKey, descriptionProjects = value),
by = "slugKey"
) %>%
mutate(yearFiscal = as.integer(yearFiscal)) %>%
.munge_data()
df_nest_r <- df_nest_r %>%
nest(dataProjectsFunded = c(yearFiscal, descriptionProjects)) %>%
mutate(countFundedProjectDescriptions = dataProjectsFunded %>% map_dbl(nrow)) %>%
select(slugKey,
countFundedProjectDescriptions,
dataProjectsFunded)
df <- df_base_r %>%
left_join(df_nest_r, by = "slugKey") %>%
rename(hasFundedProjectDescriptions = isApplicable)
return(df)
}
if (table_name == "contacts") {
df <- df %>%
mutate(item = item %>% str_remove_all("contacts.local.|contacts.headquarters."))
df <- df %>%
mutate(item = item %>% str_remove_all("[0-9]"))
df <- df %>%
group_by(slugKey, item) %>% dplyr::slice(1) %>%
ungroup() %>%
spread(item, value) %>%
.munge_grant_names() %>%
rename(descriptionContact = descriptionProgram,
titleContact = nameProgram) %>%
.munge_data(clean_address = F) %>%
mutate(countryContact = case_when(countryContact == "US" ~ "USA",
TRUE ~ countryContact))
return(df)
}
})
data_nested <- data_nested %>% reduce(left_join)
data <-
df_base %>%
left_join(data_nested, by = "slugKey") %>%
left_join(df_assistance, by = "slugKey") %>%
left_join(df_related_programs, by = "slugKey") %>%
select(names(df_base), everything()) %>%
separate(
idCFDA,
sep = "\\.",
remove = F,
into = c("bureauCFDA", "numberProgram"),
fill = "right",
convert = T
) %>%
mutate_at(c("bureauCFDA", "numberProgram"),
as.integer)
data
}
#' Bulk download of all CFDA programs
#'
#' Returns all active U.S. financial assistance
#' programs and all of the detailed information
#' about the program from the SAM api
#'
#' @param return_message if \code{TRUE} returns a message
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' \dontrun{
#' bulk_us_financial_assistance_programs()
#' }
bulk_us_financial_assistance_programs <-
function(return_message = T) {
df_grants <- us_financial_assistance_programs()
if (return_message) {
glue("Acquiring detailed data for all {nrow(df_grants)} active U.S. financial assistance programs") %>% message()
}
data <-
parse_financial_assistance_program_urls(urls = df_grants$urlAssistanceAPI)
data <- df_grants %>%
select(
slugKey,
nameAgency,
slugDepartment,
idAccountsTreasury,
bureauCFDA,
idCFDA,
isRecoveryAct,
nameAddressOfficeFunding,
dateProgramStarted,
descriptionRegulations
) %>%
left_join(data %>% select(-one_of(c(
"idCFDA", "bureauCFDA"
))), by = "slugKey")
if (return_message) {
actual <-
data %>% select(slugKey, dataBudgets) %>%
unnest_legacy() %>%
pull(amountActual) %>%
sum(na.rm = T) %>%
currency(digits = 0)
glue("Acquired {crayon::green(actual)} in actual financial assistance spending") %>%
message()
}
data
}
.generate_ast_url <-
function() {
current_year <- Sys.Date() %>% year()
month_no <- month(Sys.Date())
month_name_slug <- month.name[month_no] %>% substr(1,3)
month_slug <-
case_when(
month_no %>% nchar() == 1 ~ glue("0{month_no}") %>% as.character(),
TRUE ~ as.character(month_no)
)
url <- glue(
"https://beta.sam.gov/api/prod/fileextractservices/v1/api/listfiles?random=1580754033930&domain=Assistance%20Listings/grantsgov/{current_year}/{month_slug}-{month_name_slug}"
) %>%
as.character()
json_data <- fromJSON(url, simplifyDataFrame = T)
data <- json_data[[1]][[1]][1:4] %>% as_tibble()
urls <- json_data[[1]]$customS3ObjectSummaryList[["_links"]]$self$href
data <-
data %>%
select(2,4) %>%
setNames(c("dateData", "slugAssistance")) %>%
mutate(dateData = mdy(dateData),
urlData = urls %>% map_chr(URLencode))
data
}
.parse_summary_assistance <-
function(url = "https://s3.amazonaws.com/falextracts/Assistance%20Listings/grantsgov/2020/02-Feb/AssistanceListings_GrantsGov_PUBLIC_DAILY_20200202.csv") {
data <-
fread(url) %>%
as_tibble()
data <- data %>%
set_names(
c(
"nameProgram",
"idCFDA",
"nameAgency",
"dateProgramStarted",
"slugDepartment",
"urlAssistanceAPI"
)
) %>%
mutate(dateProgramStarted = mdy(dateProgramStarted))
data <- data %>%
separate(
idCFDA,
sep = "\\.",
remove = F,
into = c("bureauCFDA", "numberProgram"),
fill = "right",
convert = T
)
data <-
data %>%
.munge_data() %>%
mutate_if(is.character, list(function(x) {
ifelse(
x %in% c(
"",
"N/A",
"{}",
"NONE",
"NOT APPLICABLE.",
"NO DATA AVAILABLE.",
"NO DATA AVAILABLE",
"\\.",
"NOT APPLICABLE"
),
NA_character_,
x
)
}))
data
}
#' US Financial Assistance Programs
#'
#' Brief list of the most recent CFDA
#' programs
#'
#' @param include_program_details if \code{TRUE} includes program
#' details
#'
#' @return
#' @export
#'
#' @examples
us_financial_assistance_programs_summary <-
function(include_program_details = F) {
url <- .generate_ast_url() %>%
filter(dateData == max(dateData)) %>%
pull(urlData)
data <-
.parse_summary_assistance(url = url)
data <-
data %>%
mutate(
urlSAM = urlAssistanceAPI,
slugKey = urlAssistanceAPI %>% str_remove_all("https://beta.sam.gov/fal/|/view"),
urlAssistanceAPI = glue("https://beta.sam.gov/api/prod/fac/v1/programs/{slugKey}") %>% as.character()
) %>%
select(-slugKey)
if (include_program_details) {
df_details <-
data$urlAssistanceAPI %>%
parse_financial_assistance_program_urls()
data <- data %>%
left_join(
df_details %>%
mutate(idCFDA = as.numeric(idCFDA),)
,
by = c("nameProgram", "idCFDA", "bureauCFDA", "numberProgram")
)
}
data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.