R/financial_assistance.R

Defines functions us_financial_assistance_programs_summary .parse_summary_assistance .generate_ast_url bulk_us_financial_assistance_programs parse_financial_assistance_program_urls .parse_financial_assistance_program_json us_financial_assistance_programs .dictionary_assistance_names .tbl_col_class

Documented in bulk_us_financial_assistance_programs parse_financial_assistance_program_urls us_financial_assistance_programs us_financial_assistance_programs_summary

.tbl_col_class <- function(data) {
  map_df(data, class) %>% gather(column,value) %>%
    mutate(idColumn = 1:n()) %>%
    mutate(isExcluded = value == "NULL",
           isNested = value %>% str_detect("list|data"),
           isBase = !value %>% str_detect("NULL|list|data"))
}

#' SAM Financial Assistance Dictionaries
#'
#' Returns the 23 dictionaries related to
#' Federal Government Financial Assistance.
#'
#' @param assign_data if \code{TRUE} assigns each
#' table to a dictionary a tbl in the global environment
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_sam_financial_assistance()
dictionary_sam_financial_assistance <-
  memoise::memoise(function(assign_data = T) {
    url <-
      "https://beta.sam.gov/api/prod/fac/v1/programs/dictionaries"
    data <- fromJSON(url, simplifyDataFrame = T)
    data <- data[["_embedded"]][["jSONObjectList"]]

    all_data <-
      1:nrow(data) %>%
      map_dfr(function(x) {
        df_row <-
          data %>%
          dplyr::slice(x)
        table <- df_row$content$id
        table %>% message()

        d <-
          df_row$content$elements %>% flatten_df() %>%
          as_tibble() %>%
          .remove_na() %>%
          mutate(table,
                 numberTable = x) %>%
          select(numberTable, table, everything()) %>%
          .munge_psc_names()

        if (d %>% hasName("elements")) {
          d <-
            d %>% unnest(cols = "elements") %>% .remove_na() %>%
            rename(
              codeElementChild = code,
              idElementChild = element_id,
              nameChild = value
            ) %>%
            select(one_of(names(d)), everything())
        }

        d
      })

    all_data <-
      all_data %>%
      .munge_data() %>%
      mutate_at(c("slugTable", "codeElement", "idElement", "idElementChild"),
                str_to_lower) %>%
      separate(
        descriptionElement,
        into = c('typeElementParent', "typeElementParentDetails"),
        remove = F,
        fill = "right",
        extra = "merge",
        sep = "\\ - "
      ) %>%
      separate(
        nameChild,
        into = c('typeChild', "typeChildDetails"),
        remove = F,
        extra = "merge",
        fill = "right",
        sep = "\\ - |\\("
      ) %>%
      mutate_if(is.character, str_squish)

    all_data <-
      all_data %>%
      select(-numberTable)

    if (assign_data) {
      tables <- unique(all_data$slugTable)

      tables %>%
        walk(function(table) {
          glue("Assigning {table}") %>% message()
          d <-
            all_data %>% filter(slugTable == table) %>%
            .remove_na()
          assign(x = glue("dict_{table}"), d, envir = .GlobalEnv)
        })

    }

    all_data

  })


.dictionary_assistance_names <-
  function() {
    tibble(nameFieldSAM = c("ACCOUNT IDENTIFICATION (121)", "APPEALS (096)", "APPLICANT ELIGIBILITY (081)",
                "APPLICATION PROCEDURES (092)", "ARCHIVED DATE", "AUDITS (112)",
                "AUTHORIZATION (040)", "AWARD PROCEDURE (093)", "BENEFICIARY ELIGIBILITY (082)",
                "CREDENTIALS/DOCUMENTATION (083)", "CRITERIA FOR SELECTING PROPOSALS (180)",
                "DEADLINES (094)", "EXAMPLES OF FUNDED PROJECTS (170)", "FEDERAL AGENCY (030)",
                "FORMULA AND MATCHING REQUIREMENTS (101)", "HEADQUARTERS OFFICE (152)",
                "LENGTH AND TIME PHASING OF ASSISTANCE (102)", "OBJECTIVES (050)",
                "OBLIGATIONS (122)", "OMB AGENCY CODE", "OMB BUREAU CODE", "PARENT SHORTNAME",
                "POPULAR NAME (020)", "PREAPPLICATION COORDINATION (091)", "PROGRAM ACCOMPLISHMENTS (130)",
                "PROGRAM NUMBER", "PROGRAM TITLE", "PUBLISHED DATE", "RANGE AND AVERAGE OF FINANCIAL ASSISTANCE (123)",
                "RANGE OF APPROVAL/DISAPPROVAL TIME (095)", "RECORDS (113)",
                "REGIONAL OR LOCAL OFFICE (151)", "REGULATIONS, GUIDELINES, AND LITERATURE (140)",
                "RELATED PROGRAMS (160)", "RENEWALS (097)", "REPORTS (111)",
                "TYPES OF ASSISTANCE (060)", "URL", "USES AND USE RESTRICTIONS (070)",
                "WEBSITE ADDRESS (153)",
                "RECOVERY"),
           nameActual =
             c("idAccount", "descriptionAppeals", "typesApplicants",
               "jsonApplicationProcedures", "dateArchived", "jsonAuditProcedures",
               "jsonAuthorizationProcedures", "descriptionAwardProcedure", "descriptionBeneficiaryEligibility",
               "jsonCredentials", "descriptionSelectionCriteria",
               "jsonDeadlines", "descriptionProjectsFunded", "nameAgency",
               "jsonMatchingFormula", "nameAddressOfficeFunding",
               "jsonPhasingAssistance", "descriptionProgramObjective",
               "descriptionObligationBudget", "codeOMBAgency", "codeOMBBureau", "slugDepartment",
               "nameProgramPopular", "jsonPreApplicationCoordination", "jsonProgramAccomplishments",
               "idCFDA", "nameProgram", "dateProgramStarted", "descriptionAverageAward",
               "descriptionRangeResponseTime", "descriptionRecordKeeping",
               "jsonLocalOfficeFunding", "descriptionRegulations",
               "typesRelatedPrograms", "descriptionRenewals",
               "jsonReports",
               "typeAssistance", "urlSAM", "descriptionUseRestrictions",
               "urlProgram",
               "isRecoveryAct")
    )
  }

#' Financial Assistance Field Dictionary
#'
#' Returns information about the fields contained in
#' any government financial assistance data.
#'
#' @param url default link
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_financial_assistance_fields()
dictionary_financial_assistance_fields <-
  memoise::memoise(function(url = "https://s3.amazonaws.com/falextracts/Data%20Dictionary/Assistance%20Listings/FAL_Data_Dictionary.xlsx") {
    data <- rio::import(url) %>% as_tibble()
    data <-
      data %>%
      setNames(c("nameFieldSAM", "typeField", "lengthField", "descriptionField")) %>%
      filter(!is.na(descriptionField)) %>%
      mutate(descriptionField = descriptionField %>% str_squish() %>% str_to_upper()) %>%
      filter(!descriptionField == "DEFINITION") %>%
      mutate_if(is.character, str_squish) %>%
      mutate_if(is.character,
                list(function(x) {
                  ifelse(x == "", NA, x)
                })) %>%
      mutate_at(c("nameFieldSAM", "typeField"),
                str_to_upper) %>%
      group_by(nameFieldSAM) %>%
      dplyr::slice(1) %>%
      ungroup() %>%
      separate(
        nameFieldSAM,
        into = c("nameField", "idField"),
        sep = "\\(",
        fill = "right",
        remove = F
      )

    data <- data %>%
      mutate_if(is.character,
                list(function(x) {
                  x %>% str_squish()
                }))
    data <-
      data %>%
      mutate(idField = idField %>% str_remove_all("\\)")) %>%
      left_join(
        .dictionary_assistance_names(), by = "nameFieldSAM"
      ) %>%
      select(nameActual, everything())


    data
  })

#' All Active Financial Assistance Programs
#'
#' Returns information about the types
#' of financial assistance and grant programs
#' of the United States Governments
#'
#' @return
#' @export
#'
#' @examples
#' us_financial_assistance_programs()
us_financial_assistance_programs <-
  function() {
    data <-
      "https://s3.amazonaws.com/falextracts/Assistance%20Listings/datagov/AssistanceListings_DataGov_PUBLIC_CURRENT.csv" %>%
      fread(verbose = F,showProgress = FALSE) %>%
      as_tibble()

    actual_names <-
      tibble(nameFieldSAM = names(data) %>% str_to_upper()) %>%
      left_join(.dictionary_assistance_names(), by = "nameFieldSAM") %>%
      pull(nameActual)


    data <- data %>% setNames(actual_names)

    data <- data %>%
      mutate(dateProgramStarted = mdy(dateProgramStarted))

    data <- data %>%
      separate(
        idCFDA,
        sep = "\\.",
        remove = F,
        into = c("bureauCFDA", "numberProgram"),
        fill = "right",
        convert = T
      )

    data <-
      data %>%
      .munge_data() %>%
      mutate_if(is.character, list(function(x) {
        ifelse(
          x %in% c(
            "",
            "N/A",
            "{}",
            "NONE",
            "NOT APPLICABLE.",
            "NO DATA AVAILABLE.",
            "NO DATA AVAILABLE",
            "\\.",
            "NOT APPLICABLE"
          ),
          NA_character_,
          x
        )
      }))


    df_related_programs <- data %>%
      filter(!is.na(typesRelatedPrograms)) %>%
      distinct(idCFDA, typesRelatedPrograms, nameProgram) %>%
      separate_rows(typesRelatedPrograms, sep = "\\;") %>%
      mutate_if(is.character, str_squish) %>%
      filter(typesRelatedPrograms != "") %>%
      group_by(idCFDA, nameProgram) %>%
      summarise(
        typesRelatedPrograms = typesRelatedPrograms %>% str_c(collapse =  " | "),
        countRelatedPrograms = n()
      ) %>%
      ungroup()


    data <- data %>%
      select(-typesRelatedPrograms) %>%
      left_join(df_related_programs, by = c("idCFDA", "nameProgram")) %>%
      select(one_of(names(data)), everything())

    df_accounts <- data %>%
      select(nameProgram, idCFDA, idAccount) %>%
      separate_rows(idAccount, sep = "\\;") %>%
      mutate_if(is.character, str_squish) %>%
      filter(idAccount != "") %>%
      group_by(idCFDA, nameProgram) %>%
      summarise(idAccount = idAccount %>% str_c(collapse =  " | "),
                countAccounts = n()) %>%
      ungroup()

    data <-
      data %>% select(-idAccount) %>%
      left_join(df_accounts,  by = c("idCFDA", "nameProgram")) %>%
      select(one_of(names(data)), everything()) %>%
      rename(idAccountsTreasury = idAccount)

    df_accounts <-
      data %>%
      select(nameProgram, idCFDA, typeAssistance) %>%
      separate_rows(typeAssistance, sep = "\\;") %>%
      mutate_if(is.character, str_squish) %>%
      filter(typeAssistance != "") %>%
      group_by(idCFDA, nameProgram) %>%
      arrange(typeAssistance) %>%
      summarise(
        typeAssistance = typeAssistance %>% str_c(collapse =  " | "),
        countTypesAssistance = n()
      ) %>%
      ungroup()

    data <-
      data %>% select(-typeAssistance) %>%
      left_join(df_accounts,  by = c("idCFDA", "nameProgram")) %>%
      select(one_of(names(data)), everything()) %>%
      rename(typesAssistance = typeAssistance)


    df_budgets <-
      data %>%
      select(nameProgram, idCFDA, descriptionObligationBudget) %>%
      separate_rows(descriptionObligationBudget, sep = "\\;") %>%
      mutate_if(is.character, str_squish) %>%
      filter(descriptionObligationBudget != "-") %>%
      separate(
        descriptionObligationBudget,
        sep = "\\$",
        fill = "right",
        into = c("yearBudget", "amountBudget")
      ) %>%
      mutate_if(is.character, str_squish) %>%
      filter(!is.na(amountBudget)) %>%
      mutate(
        amountBudget = parse_number(amountBudget),
        isBudgetEstimate = yearBudget %>% str_detect("EST"),
        slugBudgetYear = parse_number(yearBudget)
      ) %>%
      filter(yearBudget %>% str_detect("FY")) %>%
      mutate(
        yearBudget = case_when(
          slugBudgetYear <= 9 ~ glue("200{slugBudgetYear}") %>% as.character(),
          slugBudgetYear %>% between(10, 99) ~ glue("20{slugBudgetYear}") %>% as.character(),
          TRUE ~ as.character(slugBudgetYear)
        )
      ) %>%
      select(-slugBudgetYear) %>%
      mutate(dateBudget = glue("{yearBudget}-10-01") %>% ymd()) %>%
      select(idCFDA,
             nameProgram,
             isBudgetEstimate,
             dateBudget,
             everything()) %>%
      filter(!is.na(dateBudget)) %>%
      group_by(idCFDA, nameProgram) %>%
      nest(.key = "dataBudgets") %>%
      mutate(hasBudgets = T,
             countYears = dataBudgets %>% map_dbl(nrow)) %>%
      ungroup()

    data <- data %>%
      left_join(df_budgets, by = c("idCFDA", "nameProgram")) %>%
      select(one_of(names(data)), everything())

    data <- data %>%
      mutate(
        slugKey = urlSAM %>% str_remove_all("https://beta.sam.gov/fal/|/view"),
        urlAssistanceAPI = glue("https://beta.sam.gov/api/prod/fac/v1/programs/{slugKey}") %>% as.character()
      )

    data <-
      data %>%
      mutate(nameAgency = nameAgency %>% str_replace_all("\\ , ", "\\, "))

    data <- data %>%
      mutate_if(is.character,
                list(function(x) {
                  case_when(x == "N/A" ~ NA_character_,
                            TRUE ~ x)
                }))

    cols <-
      data %>% select_if(is.character) %>% names()

    sbir_match_cols <-
      cols %>%
      map_dfr(function(x){
        df_matches <-
          data %>% select(x) %>%
          filter(!!sym(x) %>% str_detect("SBIR|STTR|SMALL BUSINESS INNOVAT|SMALL BUSINESS TECHNOLOGY TRANSFER"))
        matches <- nrow(df_matches)
        tibble(column = x, matches)

      })

    match_cols <- sbir_match_cols %>% filter(matches >0)

    df_sbirs <-
      match_cols$column %>%
      map_dfr(function(x) {
        df_matches <-
          data %>% select(x, idCFDA) %>%
          filter(
            !!sym(x) %>% str_detect(
              "SBIR|STTR|SMALL BUSINESS INNOVAT|SMALL BUSINESS TECHNOLOGY TRANSFER"
            )
          ) %>%
          select(idCFDA)
        df_matches
      }) %>%
      distinct() %>%
      mutate(isSBIR = T)

    data <-
      data %>%
      left_join(df_sbirs, by = "idCFDA")

    data <-
      data %>%
      separate(
        idCFDA,
        sep = "\\.",
        remove = F,
        into = c("bureauCFDA", "numberProgram"),
        fill = "right",
        convert = T
      ) %>%
      mutate_at(c("bureauCFDA", "numberProgram"),
                as.integer)

    ## Clean Agency

    ### TO DO -- JSON PARSER and Amount Parsers


    data
  }



# programs ----------------------------------------------------------------

.parse_financial_assistance_program_json <-
  function(url = "https://beta.sam.gov/api/prod/fac/v1/programs/1fd5888d80b24c42b6d95838ca265293") {
    json <-
      url %>% fromJSON()

    data <-
      tibble(item  = unlist(json) %>% names(),
             value = unlist(json) %>% as.character()) %>%
      mutate(slugKey = url %>% str_remove_all("https://beta.sam.gov/api/prod/fac/v1/programs/")) %>%
      filter(!is.na(value)) %>%
      filter(value != "") %>%
      filter(!item == "_links.self.href") %>%
      select(slugKey, everything()) %>%
      mutate(item = item %>% str_remove_all("data."),
             value = str_squish(value)) %>%
      filter(!item %>% str_detect("^status.|^additionalInfo."))

    data
  }

#' Parse financial assistance program urls
#'
#' Parses JSON from financial assistance programs
#' listed in SAM Beta.
#'
#' @param urls
#'
#' @return
#' @export
#'
#' @examples
parse_financial_assistance_program_urls <-
  function(urls = c("https://beta.sam.gov/api/prod/fac/v1/programs/0cb9d7d9f3e7420ca4730a01da4f9e3f",
                    "https://beta.sam.gov/api/prod/fac/v1/programs/183a5de92e964588b7252e951d99e7af",
                    "https://beta.sam.gov/api/prod/fac/v1/programs/20894235cc6649789feaa14b71406638",
                    "https://beta.sam.gov/api/prod/fac/v1/programs/e6a08f2eaa8446ee86d91c77b133d431",
                    "https://beta.sam.gov/api/prod/fac/v1/programs/cd9013559fdf35befc9a3641aec94af7",
                    "https://beta.sam.gov/api/prod/fac/v1/programs/a5d50bfa60fe4b0695d0e0371f220523",
                    "https://beta.sam.gov/api/prod/fac/v1/programs/8f56239bcdbb5245e64be7e84d75a2f3",
                    "https://beta.sam.gov/api/prod/fac/v1/programs/ca81d5c9137b478087ea73f825097e19",
                    "https://beta.sam.gov/api/prod/fac/v1/programs/5aeb34c3ffc34a2799d5322bde5b5098",
                    "https://beta.sam.gov/api/prod/fac/v1/programs/0f138edbb8c54e1b91f4ed434b324316"
  )
  ) {

    .parse_financial_assistance_program_json_safe <-
      possibly(.parse_financial_assistance_program_json, tibble())

    data <-
      urls %>%
      future_map_dfr(function(url) {
        .parse_financial_assistance_program_json_safe(url = url)
      })

    df_cols <-
      distinct(data, item) %>%
      mutate(fields = str_count(item, "\\."))

    df_base <-
      data %>%
      filter(item %in% c(df_cols %>% filter(fields == 0) %>% pull(item)))

    df_base <- df_base %>% spread(item, value)

    df_assistance <-
      df_base %>% select(slugKey, matches("assistanceTypes")) %>%
      gather(typeAssitance, value, -slugKey, na.rm = T) %>%
      select(-typeAssitance) %>%
      rename(codeAssistance = value) %>%
      nest(dataAssistance = c(codeAssistance)) %>%
      mutate(countAssistanceTypes = dataAssistance %>% map_dbl(nrow),
             hasAssistance = T)

    df_related_programs <-
      df_base %>% select(slugKey, matches("relatedPrograms")) %>%
      gather(typeAssitance, value, -slugKey, na.rm = T) %>%
      rename(slugKeyRelatedProgram = value) %>%
      select(-typeAssitance) %>%
      distinct() %>%
      nest(dataRelatedPrograms = c(slugKeyRelatedProgram)) %>%
      mutate(countRelatedPrograms = dataRelatedPrograms %>% map_dbl(nrow),
             hasRelatedPrograms = T)

    df_base <-
      df_base %>% select(-matches("relatedPrograms|assistanceTypes"))

    df_base <-
      df_base %>%
      .munge_grant_names() %>%
      mutate(nameProgramPopular = nameProgramPopular %>% str_remove_all("\\)|\\(")) %>%
      .munge_data() %>%
      mutate(yearFiscal = as.integer(yearFiscal))

    df_nested_cols <-
      df_cols %>% filter(fields > 0) %>%
      separate(
        item,
        into = c("table", "fields"),
        sep = "\\.",
        extra = "merge",
        fill = "right",
        remove = F
      )

    tables <- df_nested_cols$table %>% unique()


    data_nested <-
      tables %>%
      map(function(table_name) {
        items <-
          df_nested_cols %>%
          filter(table == table_name) %>%
          pull(item)
        df <- data %>%
          filter(item %in% items)

        if (table_name  == "authorizations") {
          df <-
            df %>%
            mutate(item = item %>% str_remove_all("authorizations."))

          df <-
            df %>%
            filter(!value %in% c("\\.", "", "N/A", "n/a")) %>%
            mutate(
              isList = item %>% str_detect("list"),
              item = item %>% str_remove_all("list.")
            ) %>%
            filter(value != ".")

          df <-
            df %>%
            separate(
              item,
              into = c("table", "item"),
              sep = "\\.",
              fill = "left"
            ) %>%
            mutate(
              numberItem = item %>% str_to_lower() %>% str_remove_all("[a-z]") %>% as.integer() %>% coalesce(0L),
              item = item %>% str_remove_all("[0-9]")
            )

          df <-
            df %>%
            mutate(table = case_when(is.na(table)  ~ "act",
                                     TRUE ~ table)) %>%
            unite(item, table, item, sep = "") %>%
            select(slugKey, numberItem, item, value) %>%
            distinct()

          df <-
            df %>%
            group_by(slugKey, numberItem, item) %>%
            summarise(value = value %>% str_c(collapse = " ")) %>%
            ungroup() %>%
            spread(item, value)

          df <-
            df %>%
            .munge_grant_names() %>%
            .munge_data()

          df <-
            df %>%
            nest(
              -slugKey,
            ) %>%
            rename(dataAuthorizations = data)

          return(df)
        }

        if (table_name  == "eligibility") {
          df <-
            df %>%
            mutate(item = item %>% str_remove_all("eligibility.")) %>%
            mutate(id = str_to_lower(item) %>% str_remove_all("[a-z]")) %>%
            mutate(id = id %>% str_remove_all("\\.")) %>%
            separate(
              item,
              into = c("table", "item"),
              sep  = "\\.",
              fill = "right",
              extra = "merge"
            ) %>%
            mutate(id = str_to_lower(item) %>% str_remove_all("[a-z]") %>% str_remove_all("\\."))

          df_base_r <-
            df %>% filter(id == "") %>%
            unite(item, item, table, sep = "") %>%
            select(-id) %>% spread(item, value)

          df_nested_r <-
            df %>%
            filter(id != "") %>%
            mutate(field = item %>% str_remove_all("[0-9]"))


          df_applicant_types <-
            df_nested_r %>% filter(table == "applicant") %>%
            filter(field == "types") %>%
            select(slugKey, table, idApplicantType = value, id) %>%
            mutate(idApplicantType = as.numeric(idApplicantType)) %>%
            select(slugKey, idApplicantType) %>%
            nest(dataApplicantTypes = c(idApplicantType)) %>%
            mutate(
              hasApplicantTypes = TRUE,
              countApplicantTypes = dataApplicantTypes %>% map_dbl(nrow)
            )
          df_assistance_types <-
            df_nested_r %>% filter(table == "assistanceUsage") %>%
            filter(field == "types") %>%
            select(slugKey, table, idAssistanceUsage = value, id) %>%
            mutate(idAssistanceUsage = as.numeric(idAssistanceUsage)) %>%
            select(slugKey, idAssistanceUsage) %>%
            nest(dataAssistanceUsage = c(idAssistanceUsage)) %>%
            mutate(
              hasAssistanceUsage = TRUE,
              countAssistanceUsage = dataAssistanceUsage %>% map_dbl(nrow)
            )

          df_beneficiary_types <-
            df_nested_r %>% filter(table == "beneficiary") %>%
            filter(field == "types") %>%
            select(slugKey, table, beneficiary = value, id) %>%
            mutate(idBeneficiary = as.numeric(beneficiary)) %>%
            select(slugKey, idBeneficiary) %>%
            nest(dataBeneficiary = c(idBeneficiary)) %>%
            mutate(hasBeneficiary = TRUE,
                   countBeneficiary = dataBeneficiary %>% map_dbl(nrow))



          df <-
            list(df_base_r,
                 df_beneficiary_types,
                 df_assistance_types,
                 df_applicant_types) %>%
            reduce(left_join)

          df <- .munge_grant_names(df)

          df <-
            df %>%
            .munge_data()


          return(df)
        }

        if (table_name == "compliance") {
          df <-
            df %>%
            mutate(item = item %>% str_remove_all("compliance.|questions.|formula.|types.")) %>%
            mutate(id = str_to_lower(item) %>% str_remove_all("[a-z]")) %>%
            mutate(id = id %>% str_remove_all("\\.")) %>%
            separate(
              item,
              into = c("table", "item"),
              sep  = "\\.",
              fill = "right",
              extra = "merge"
            ) %>%
            mutate(id = str_to_lower(item) %>% str_remove_all("[a-z]")) %>%
            mutate(
              table = case_when(
                table == "audit" ~ "Audit",
                table == "records" ~ "Records",
                table == "documents" ~ "Documents",
                table == "ndMatching" ~ "Matching",
                table == "CFR200Requirements" ~ "CFR200",
                TRUE ~ table
              )
            )

          df_base_r <-
            df %>% filter(id == "") %>%
            unite(item, item, table, sep = "") %>%
            select(-id) %>% spread(item, value)

          df_nested_r <-
            df %>%
            filter(id != "") %>%
            mutate(field = item %>% str_remove_all("[0-9]"))

          df_reports <-
            df_nested_r %>% filter(table == "reports") %>%
            filter(field == "code") %>%
            select(slugKey, table, value, id) %>%
            left_join(
              df_nested_r %>% filter(table == "reports") %>%
                filter(field == "isSelected") %>%
                select(slugKey, table, result = value, id) %>%
                mutate(result = as.logical(result)),
              Joining,
              by = c("slugKey", "table", "id")
            ) %>%
            select(slugKey, value, result) %>%
            spread(value, result)

          df_cfar <-
            df_nested_r %>% filter(table == "CFR200") %>%
            filter(field == "code") %>%
            select(slugKey, table, value, id) %>%
            left_join(
              df_nested_r %>% filter(table == "CFR200") %>%
                filter(field == "isSelected") %>%
                select(slugKey, table, result = value, id) %>%
                mutate(result = as.logical(result)),
              Joining,
              by = c("slugKey", "table", "id")
            ) %>%
            select(slugKey, value, result) %>%
            spread(value, result)


          df_matching <-
            df_nested_r %>% filter(table == "Matching") %>%
            select(slugKey, item, value) %>%
            spread(item, value)

          df <-
            list(df_base_r, df_cfar, df_matching, df_reports) %>%
            reduce(left_join)

          df <- .munge_grant_names(df)

          df <- df %>%
            .munge_data()

          df <- df %>%
            mutate(pctMatching = as.integer(pctMatching) / 100)

          return(df)

        }

        if (table_name == "assistance") {
          df <-
            df %>%
            mutate(item = item %>% str_remove_all("assistance.")) %>%
            mutate(id = str_to_lower(item) %>% str_remove_all("[a-z]")) %>%
            mutate(id = id %>% str_remove_all("\\.")) %>%
            filter(id == "") %>%
            select(-id) %>%
            spread(item, value) %>%
            .munge_grant_names() %>%
            mutate_if(is.character, list(function(x) {
              x %>% str_remove_all("\\(|\\)")
            })) %>%
            .munge_data()

          return(df)

        }

        if (table_name  == "financial") {
          df <-
            df %>%
            mutate(item = item %>% str_remove_all("financial."))

          df <-
            df %>%
            filter(!value %in% c("\\.", "", "N/A", "n/a")) %>%
            mutate(
              isList = item %>% str_detect("list"),
              item = item %>% str_remove_all("list.")
            ) %>%
            filter(value != ".") %>%
            separate(
              item,
              into = c("table", "item"),
              sep = "\\.",
              extra = "merge",
              fill = "left"
            ) %>%
            mutate(
              numberItem = item %>% str_to_lower() %>% str_remove_all("[a-z]") %>% as.integer() %>% coalesce(0L),
              item = item %>% str_remove_all("[0-9]")
            ) %>%
            mutate(table = case_when(table %>% is.na() ~ "base",
                                     TRUE ~ table))

          df_base <-
            df_base %>%
            .munge_grant_names() %>%
            .munge_data()

          df_accounts <-
            df %>%
            filter(table == "accounts") %>%
            unite(item, table, item, sep = ".") %>%
            select(-isList) %>%
            spread(item, value) %>%
            .munge_grant_names() %>%
            .munge_data() %>%
            mutate(numberItem = numberItem + 1)


          df_acc <-
            df %>%
            filter(table == "accomplishments") %>%
            filter(item != "isApplicable") %>%
            mutate(item = case_when(
              item == "fiscalYear" ~ "year",
              TRUE ~ "descriptionAccomplishments"
            ))

          df_years_acc <-
            df_acc %>%
            filter(item == "year") %>%
            select(slugKey, year = value) %>%
            mutate(year = as.integer(year)) %>%
            group_by(slugKey, year) %>%
            mutate(numberItem = 1:n() - 1) %>%
            ungroup() %>%
            select(slugKey, numberItem, year) %>%
            group_by(slugKey) %>%
            mutate(idRow = 1:n()) %>%
            ungroup()

          df_des <- df_acc %>%
            filter(item != "year") %>%
            select(slugKey,  descriptionAccomplishments = value) %>%
            group_by(slugKey) %>%
            mutate(idRow = 1:n()) %>%
            ungroup()

          df_acc <-
            df_years_acc %>%
            left_join(df_des, by = c("slugKey", "idRow")) %>%
            select(slugKey, numberItem, year, descriptionAccomplishments)

          df_acc <- df_acc %>%
            .munge_data()

          rm(df_des)
          rm(df_years_acc)
          gc()

          df_values <-
            df %>%
            filter(table == "obligations") %>%
            mutate(item = item %>% str_remove_all("values."))

          df_year <-
            df_values %>% filter(item == "year") %>%
            select(slugKey, year = value) %>%
            mutate(year = as.integer(year)) %>%
            group_by(slugKey, year) %>%
            arrange(year) %>%
            mutate(numberItem = 1:n() - 1) %>%
            ungroup() %>%
            group_by(slugKey) %>%
            mutate(idRow = 1:n()) %>%
            ungroup()

          df_estimate <-
            df_values %>%
            filter(item == "estimate") %>%
            select(slugKey, estimate = value) %>%
            mutate(estimate = readr::parse_number(estimate)) %>%
            ungroup() %>%
            group_by(slugKey) %>%
            mutate(idRow = 1:n()) %>%
            ungroup() %>%
            filter(!is.na(estimate)) %>%
            rename(amountEstimate = estimate)

          df_obligation_ids <- df_values %>%
            filter(item == "obligationId") %>%
            select(slugKey, idObligation = value) %>%
            ungroup() %>%
            group_by(slugKey) %>%
            mutate(idRow = 1:n()) %>%
            ungroup()

          df_assistance_types <-
            df_values %>%
            filter(item == "assistanceType") %>%
            select(slugKey, codeAssistance = value) %>%
            ungroup() %>%
            group_by(slugKey) %>%
            mutate(idRow = 1:n()) %>%
            ungroup()

          df_recovery <-
            df_values %>%
            filter(item == "isRecoveryAct") %>%
            select(slugKey, isRecoveryAct = value) %>%
            mutate(isRecoveryAct = as.logical(isRecoveryAct)) %>%
            ungroup() %>%
            group_by(slugKey) %>%
            mutate(idRow = 1:n()) %>%
            ungroup()

          df_descriptions <- df_values %>%
            filter(item %in% c("explanation", "description")) %>%
            select(slugKey, value) %>%
            distinct() %>%
            group_by(slugKey) %>%
            mutate(idRow = 1:n()) %>%
            ungroup()

          if (nrow(df_values %>%
                   filter(item == "actual")) > 0 ) {
            df_actual <- df_values %>%
              filter(item == "actual") %>%
              select(slugKey, actual = value) %>%
              mutate(actual = readr::parse_number(actual)) %>%
              ungroup() %>%
              group_by(slugKey) %>%
              mutate(idRow = 1:n()) %>%
              ungroup() %>%
              filter(!is.na(actual)) %>%
              rename(amountActual = actual)

          }

          df <- list(
            df_year,
            df_acc,
            df_obligation_ids,
            df_assistance_types,
            df_recovery,
            df_estimate
          ) %>% reduce(left_join)

          if ('df_actual' %>% exists()) {
            df <- df %>% left_join(df_actual)
          }

          df <-
            df %>%
            select(-idRow) %>%
            nest(
              -slugKey
            ) %>%
            rename(dataBudgets = data) %>%
            mutate(
              hasFinancialData = T,
              countYearsFinancialdata = dataBudgets %>% map_dbl(nrow)
            )


          df

          return(df)
        }

        if (table_name == "projects") {
          df <-
            df %>%
            mutate(item = item %>% str_remove_all("projects.|list."))

          df_base_r <-
            df %>% filter(item == "isApplicable") %>% spread(item, value) %>% mutate(isApplicable = as.logical(isApplicable))

          df_nest_r <- df %>% filter(item != "isApplicable") %>%
            mutate(item = item %>% str_remove_all("[0-9]")) %>%
            filter(item == "fiscalYear") %>%
            select(slugKey, yearFiscal = value) %>%
            left_join(
              df %>% filter(item != "isApplicable") %>%
                mutate(item = item %>% str_remove_all("[0-9]")) %>%
                filter(item != "fiscalYear") %>%
                select(slugKey, descriptionProjects = value),
              by = "slugKey"
            ) %>%
            mutate(yearFiscal = as.integer(yearFiscal)) %>%
            .munge_data()

          df_nest_r <- df_nest_r %>%
            nest(dataProjectsFunded = c(yearFiscal, descriptionProjects)) %>%
            mutate(countFundedProjectDescriptions = dataProjectsFunded %>% map_dbl(nrow)) %>%
            select(slugKey,
                   countFundedProjectDescriptions,
                   dataProjectsFunded)

          df <- df_base_r %>%
            left_join(df_nest_r, by = "slugKey") %>%
            rename(hasFundedProjectDescriptions = isApplicable)
          return(df)
        }

        if (table_name == "contacts") {
          df <- df %>%
            mutate(item = item %>% str_remove_all("contacts.local.|contacts.headquarters."))

          df <- df %>%
            mutate(item = item %>% str_remove_all("[0-9]"))

          df <- df %>%
            group_by(slugKey, item) %>% dplyr::slice(1) %>%
            ungroup() %>%
            spread(item, value) %>%
            .munge_grant_names() %>%
            rename(descriptionContact = descriptionProgram,
                   titleContact = nameProgram) %>%
            .munge_data(clean_address = F) %>%
            mutate(countryContact = case_when(countryContact == "US" ~ "USA",
                                              TRUE ~ countryContact))

          return(df)

        }
      })

    data_nested <- data_nested %>% reduce(left_join)

    data <-
      df_base %>%
      left_join(data_nested, by = "slugKey") %>%
      left_join(df_assistance, by = "slugKey") %>%
      left_join(df_related_programs, by = "slugKey") %>%
      select(names(df_base), everything()) %>%
      separate(
        idCFDA,
        sep = "\\.",
        remove = F,
        into = c("bureauCFDA", "numberProgram"),
        fill = "right",
        convert = T
      ) %>%
      mutate_at(c("bureauCFDA", "numberProgram"),
                as.integer)

    data

  }



#' Bulk download of all CFDA programs
#'
#' Returns all active U.S. financial assistance
#' programs and all of the detailed information
#' about the program from the SAM api
#'
#' @param return_message if \code{TRUE} returns a message
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' \dontrun{
#' bulk_us_financial_assistance_programs()
#' }
bulk_us_financial_assistance_programs <-
  function(return_message = T) {
    df_grants <- us_financial_assistance_programs()

    if (return_message) {
      glue("Acquiring detailed data for all {nrow(df_grants)} active U.S. financial assistance programs") %>% message()
    }

    data <-
      parse_financial_assistance_program_urls(urls = df_grants$urlAssistanceAPI)

    data <- df_grants %>%
      select(
        slugKey,
        nameAgency,
        slugDepartment,
        idAccountsTreasury,
        bureauCFDA,
        idCFDA,
        isRecoveryAct,
        nameAddressOfficeFunding,
        dateProgramStarted,
        descriptionRegulations
      ) %>%
      left_join(data %>%       select(-one_of(c(
        "idCFDA", "bureauCFDA"
      ))), by = "slugKey")

    if (return_message) {
      actual <-
        data %>% select(slugKey, dataBudgets) %>%
        unnest_legacy() %>%
        pull(amountActual) %>%
        sum(na.rm = T) %>%
        currency(digits = 0)

      glue("Acquired {crayon::green(actual)} in actual financial assistance spending") %>%
        message()

    }

    data
  }

.generate_ast_url <-
  function() {
    current_year <- Sys.Date() %>% year()
    month_no <- month(Sys.Date())

    month_name_slug <- month.name[month_no] %>% substr(1,3)
    month_slug <-
      case_when(
        month_no %>% nchar() == 1 ~ glue("0{month_no}") %>% as.character(),
        TRUE ~ as.character(month_no)
      )

    url <- glue(
      "https://beta.sam.gov/api/prod/fileextractservices/v1/api/listfiles?random=1580754033930&domain=Assistance%20Listings/grantsgov/{current_year}/{month_slug}-{month_name_slug}"
    ) %>%
      as.character()

    json_data <- fromJSON(url, simplifyDataFrame = T)

    data <- json_data[[1]][[1]][1:4] %>% as_tibble()
    urls <- json_data[[1]]$customS3ObjectSummaryList[["_links"]]$self$href
    data <-
      data %>%
      select(2,4) %>%
      setNames(c("dateData", "slugAssistance")) %>%
      mutate(dateData = mdy(dateData),
             urlData = urls %>% map_chr(URLencode))

    data
  }

.parse_summary_assistance <-
  function(url = "https://s3.amazonaws.com/falextracts/Assistance%20Listings/grantsgov/2020/02-Feb/AssistanceListings_GrantsGov_PUBLIC_DAILY_20200202.csv") {
    data <-
      fread(url) %>%
      as_tibble()

    data <- data %>%
      set_names(
        c(
          "nameProgram",
          "idCFDA",
          "nameAgency",
          "dateProgramStarted",
          "slugDepartment",
          "urlAssistanceAPI"
        )
      ) %>%
      mutate(dateProgramStarted = mdy(dateProgramStarted))

    data <- data %>%
      separate(
        idCFDA,
        sep = "\\.",
        remove = F,
        into = c("bureauCFDA", "numberProgram"),
        fill = "right",
        convert = T
      )

    data <-
      data %>%
      .munge_data() %>%
      mutate_if(is.character, list(function(x) {
        ifelse(
          x %in% c(
            "",
            "N/A",
            "{}",
            "NONE",
            "NOT APPLICABLE.",
            "NO DATA AVAILABLE.",
            "NO DATA AVAILABLE",
            "\\.",
            "NOT APPLICABLE"
          ),
          NA_character_,
          x
        )
      }))

    data
  }

#' US Financial Assistance Programs
#'
#' Brief list of the most recent CFDA
#' programs
#'
#' @param include_program_details if \code{TRUE} includes program
#' details
#'
#' @return
#' @export
#'
#' @examples
us_financial_assistance_programs_summary <-
  function(include_program_details = F) {
    url <- .generate_ast_url() %>%
      filter(dateData == max(dateData)) %>%
      pull(urlData)

    data <-
      .parse_summary_assistance(url = url)

    data <-
      data %>%
      mutate(
        urlSAM = urlAssistanceAPI,
        slugKey = urlAssistanceAPI %>% str_remove_all("https://beta.sam.gov/fal/|/view"),
        urlAssistanceAPI = glue("https://beta.sam.gov/api/prod/fac/v1/programs/{slugKey}") %>% as.character()
      ) %>%
      select(-slugKey)

    if (include_program_details) {
      df_details <-
        data$urlAssistanceAPI %>%
        parse_financial_assistance_program_urls()

      data <- data %>%
        left_join(
          df_details %>%
            mutate(idCFDA = as.numeric(idCFDA),)
          ,
          by = c("nameProgram", "idCFDA", "bureauCFDA", "numberProgram")
        )
    }


    data
  }
abresler/govtrackR documentation built on July 11, 2020, 12:30 a.m.