R/treasury.R

Defines functions dictionary_treasury_fiscal_schema dictionary_treasury_fiscal_schema_49 .dl_fast .fast_url .parse_page

Documented in dictionary_treasury_fiscal_schema dictionary_treasury_fiscal_schema_49

# https://fiscal.treasury.gov/reference-guidance/fast-book/
.parse_page <-
  function(page_text) {
    page_text %>%
      str_remove_all("FAST Book — Agency Identifier Codes|Bulletin No. 2019-07") %>%
      str_split("\n") %>% flatten_chr() %>% str_trim() %>% discard(function(x){x %in%  c("","                                             ")})
  }
.fast_url <-
  function(){
    page <- "https://fiscal.treasury.gov/reference-guidance/fast-book/" %>% read_html()
    urls <- page %>% html_nodes('.secondary a') %>% html_attr('href')
    urls[urls %>% str_detect("pdf")] %>% str_c("https://fiscal.treasury.gov", ., collapse = "")
  }

.dl_fast <-
  function() {
    url <- .fast_url()
    text <- pdftools::pdf_text(pdf = url)


  }


#' Bureau of the Fiscal Service Data Dictionary from V49
#'
#' Cached version of the V49 Fiscal treasury dictionary
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_treasury_fiscal_schema_49()
dictionary_treasury_fiscal_schema_49 <-
  function() {
    page <- "https://fiscal.treasury.gov/data/FSv49/TOPv10/Library/FullDataDictionary.html" %>%
      read_html()

    tables <- page %>% html_table(fill = F)

    data <- tables[[3]] %>% as_tibble()

    data <- data %>%
      .remove_na() %>%
      setNames(c("typeSchema", "nameField", "role", "description")) %>%
      mutate_if(is.character,
                list(function(x) {
                  case_when(x == "" ~ NA_character_,
                            TRUE ~ x)
                }))

    data
  }



#' Treasury Fiscal Book Data Dictionary
#'
#' Most recent data architecture for the
#' Bureau of the Fiscal Service Enterprise Data Architecture

#'
#' @return
#' @export
#'
#' @examples
#' dictionary_treasury_fiscal_schema()
dictionary_treasury_fiscal_schema <-
  function() {
  page <- "https://fiscal.treasury.gov/data/FSv501/CIRv201/Library/FullDataDictionary.html" %>%
    read_html()

  tables <- page %>% html_table(fill = F)

  data <-
    tables[[3]] %>% as_tibble() %>%
    setNames(c("status", "typeSchema", "nameField", "usedIn", "description")) %>%
    mutate(idElement = 1:n())

  data <- data %>%
    select(idElement, everything())

  df_elements <-
    data %>%
    select(idElement, usedIn) %>%
    separate_rows("usedIn", sep = "\\,") %>%
    mutate(usedIn = str_trim(usedIn)) %>%
    mutate(urlSchema = glue("https://fiscal.treasury.gov/data/FSv501/CIRv201{usedIn}") %>% as.character()) %>%
    group_by(idElement) %>%
    nest() %>%
    ungroup() %>%
    rename(dataElements = data) %>%
    mutate(countElements = dataElements %>% map_dbl(nrow))

  data <-
    data %>%
    select(-usedIn) %>%
    left_join(df_elements, by = "idElement") %>%
    select(-idElement)

  data
}
abresler/govtrackR documentation built on July 11, 2020, 12:30 a.m.