R/dtic.R

Defines functions .parse_coi_page .dtic_coi .dictionary_dtic_names .munge_dtic_names .fix_dtic_amount .fix_dtic_dates .decode_program_element

# program elements --------------------------------------------------------



.decode_program_element <-
  function(x = "0603502N") {
    dod_program <-
      x %>% substr(1,2)
    rd_category <-
      x %>% substr(3,4)

    if (nchar(x) >= 5) {
      equip_activity <-
        x %>% substr(5,5)
    } else {
      equip_activity <- NA
    }

    if (nchar(x) >= 7) {
      serial_no <-
        x %>% substr(6,7)
    } else {
      serial_no <- NA
    }

    if (nchar(x) >= 8) {
      service <- x %>% substr(8, nchar(x))
    } else {
      service <- NA
    }


    tibble(
      codeProgramElement = x,
      idDODProgram = dod_program,
      idRDCategory = rd_category,
      idEquipmentCategory = equip_activity,
      idSerial = serial_no,
      codeService = service
    )
  }

### http://acqnotes.com/acqnote/acquisitions/program-element-pe


# utils -------------------------------------------------------------------
.fix_dtic_dates <-
  function(data) {
    date_names <- data %>% select(matches("date")) %>% names()

    if (length(date_names) == 0) {
      return(data)
    }

    data %>%
      mutate_at(date_names,
                list(function(x){
                  glue("{x}-01") %>% as.character() %>% ymd()
                }))
  }

.fix_dtic_amount <-
  function(data) {
    amount_names <- data %>% select(matches("amount")) %>% names()

    if (length(date_names) == 0) {
      return(data)
    }

    data %>%
      mutate_at(amount_names,
                list(function(x){
                  parse_number(x) * 1000000
                }))
  }

.munge_dtic_names <-
  function(data) {
    dict_names <- .dictionary_dtic_names()

    grant_names <-
      names(data)

    actual_names <-
      grant_names %>%
      map_chr(function(name) {
        df_row <-
          dict_names %>% filter(nameDTIC == name)
        if (nrow(df_row) == 0) {
          glue::glue("Missing {name}") %>% message()
          return(name)
        }

        df_row$nameActual
      })

    data %>%
      set_names(actual_names)
  }

.dictionary_dtic_names <-
  function() {
    tibble(
      nameDTIC =  c(
        "recordNumber",
        "title",
        "pdfUrl",
        "xmlUrl",
        "snippet",
        "orgName",
        "budgetActivity",
        "programElementNum",
        "programElementTitle",
        "appropriationNum",
        "cache",
        "fiscalYear",
        "orgCode",
        "fileName",
        "cacheUrl",
        "cyear",
        "pyear",
        "docTypeIndexValue",
        "orgUrlPart",
        "AppropriationNumber",
        "AppropriationTitle",
        "BudgetActivityNumber",
        "BudgetActivityTitle",
        "BudgetCycle",
        "BudgetSubActivityNumber",
        "BudgetSubActivityTitle",
        "BudgetYear",
        "Code",
        "Description",
        "Justification",
        "LineItemNumber",
        "LineItemTitle",
        "P1LineNumber",
        "ServiceAgencyName",
        "SubmissionDate",
        "urlXML",
        "p40AItemScheduleSetting",
        "ProgramElementNumber",

        "AppropriationCode",
        "AppropriationName",
        "ProgramElementMissionDescription",
        "ProgramElementNote",
        "ProgramElementTitle",
        "R1LineNumber",
        "codeblistProgramElementNumber",
        "documentassemblyoptionsp40AItemScheduleSetting",
        "changesummarySummaryExplanation",
        "amountCost",
        "amountYearCurrentTotal",
        "amountYearFiveTotal",
        "amountYearFourTotal",
        "amountYearOneBase",
        "amountYearOneTotal",
        "amountYearPriorTotal",
        "amountYearsPriorAllTotal",
        "amountYearThreeTotal",
        "amountYearTwoTotal",
        "otherrelatedlistProgramElementNumber",
        "R1LineItemNumber",
        "CostType",
        "documentassemblyoptionssuppressP40As",
        "amountYearOneOCO"
      ),
      nameActual = c(
        "numberRecord",
        "nameBudgetLineItem",
        "urlPDF",
        "urlXML",
        "slugSnippet",
        "nameOrganization",
        "nameBudgetActivity",
        "codeProgramElement",
        "nameProgramElement",
        "slugAppropriation",
        "slugCache",
        "yearBudget",
        "slugOrganization",
        "nameFile",
        "urlCache",
        "amountItem",
        "amountItemPriorYear",
        "nameDODBudgetGroup",
        "slugOrganizationFile",

        "codeAppropriation",
        "nameAppropriation",
        "slugBudgetActivity",
        "nameBudgetActivity",
        "slugBudgetCycle",
        "slugBudgetSubActivity",
        "nameBudgetSubActivity",
        "yearBudget",
        "codeBudget",
        "descriptionAppropriation",
        "descriptionJustification",
        "codeBudgetLineItem",
        "nameBudgetLineItem",
        "slugP1LineNumber",
        "nameAgency",
        "dateSubmission",
        "urlXML",
        "slugp40AItemScheduleSetting",
        "codeProgramElement",

        "codeAppropriation",
        "nameAppropriation",
        "descriptionProgramElement",
        "noteProgramElement",
        "titleProgramElement",
        "numberLineR1",
        "codeProgramElement",
        "typeP40AItemScheduleSetting",
        "descriptionChanges",
        "amountCost",
        "amountYearCurrentTotal",
        "amountYearFiveTotal",
        "amountYearFourTotal",
        "amountYearOneBase",
        "amountYearOneTotal",
        "amountYearPriorTotal",
        "amountYearsPriorAllTotal",
        "amountYearThreeTotal",
        "amountYearTwoTotal",
        "codeProgramElementRelated",
        "numberLineR1",
        "typeCost",
        "removeSuppressP40",
        "amountYearOneOCO"
      )
    )
  }



# communities_of_interest -------------------------------------------------

### https://defenseinnovationmarketplace.dtic.mil/communities-of-interest/


.dtic_coi <-
  function() {
    page <- read_html("https://defenseinnovationmarketplace.dtic.mil/communities-of-interest/")

    communities <- page %>% html_nodes("h5") %>% html_text() %>% str_squish()
    descriptions <-
      page %>% html_nodes(".textwidget") %>% html_text() %>% str_squish()
    descriptions <- descriptions[2:length(descriptions)] %>%
      discard(function(x) {
        x %in% c(
          "",
          "Enter Search Term(s): //< ![CDATA[ var usasearch_config = { siteHandle:\"chieftechnologist\" }; var script = document.createElement(\"script\"); script.type = \"text/javascript\"; script.src = \"//search.usa.gov/javascripts/remote.loader.js\"; document.getElementsByTagName(\"head\")[0].appendChild(script); //]]>"
        )
      })

    descriptions <-
      descriptions %>% str_remove_all(glue("^{communities}") %>% str_c(collapse = "|")) %>%
      str_remove_all("^COUNTER-IED|^COUNTER-WMD|^ENGINEERED RESILIENT SYSTEMS") %>%
      str_remove_all("ALUMNI COI|S&T|EMS") %>%
      str_replace_all("\\)|\\(", "") %>%
      str_squish()

    descriptions <-
      case_when(descriptions == "Webpage is under construction." ~ NA_character_,
                TRUE ~ descriptions)

    data <-
      tibble(nameCommunityOfInterest = communities,
           descriptionCommunityOfInterest = descriptions)

    links <- page %>%
      html_nodes("a") %>%
      html_attr("href")

    links <-
      links[links %>% str_detect("https://defenseinnovationmarketplace.dtic.mil/communities-of-interest/")] %>%
      unique() %>%
      discard(list(function(x){
        is.na(x)
      }))

    data <-
      data %>%
      mutate(isAlumni = nameCommunityOfInterest %>% str_detect("ALUMNI")) %>%
      separate(
        nameCommunityOfInterest,
        into = c("nameCommunityOfInterest",
                 "acronymCommunityOfInterest"),
        sep = "\\(",
        extra = "merge",
        fill = "right"
      ) %>%
      select(-acronymCommunityOfInterest) %>%
      select(isAlumni, everything()) %>%
      mutate(hasDescription = !is.na(descriptionCommunityOfInterest))

    df_links <- tibble(urlCOI = links) %>%
      mutate(
        slug = urlCOI %>% str_remove_all(
          "https://defenseinnovationmarketplace.dtic.mil/communities-of-interest/"
        )
      ) %>%
      filter(slug != "") %>%
      mutate(slug = slug %>% str_remove_all("/$"))

    data <-
      data %>%
      mutate_if(is.character, str_squish)


    data <- data %>%
      mutate(
        slug = case_when(
          nameCommunityOfInterest == "ADVANCED ELECTRONICS" ~  "advanced-electronics",
          nameCommunityOfInterest == "AIR PLATFORMS" ~ "air-platforms",
          nameCommunityOfInterest == "AUTONOMY" ~ "autonomy",
          nameCommunityOfInterest == "BIOMEDICAL ASBREM" ~ "biomedical-asbrem",
          nameCommunityOfInterest == "C4I" ~ "c4i_coi",
          nameCommunityOfInterest == "COUNTER-IED" ~ "counter-improvised-explosive-devices-c-ied",
          nameCommunityOfInterest == "COUNTER-WMD" ~ "counter-weapons-of-mass-destruction-c-wmd",
          nameCommunityOfInterest == "CYBER" ~ "cyber",
          nameCommunityOfInterest == "ELECTRONIC WARFARE" ~ "electronic-warfare",
          nameCommunityOfInterest == "ENERGY AND POWER TECHNOLOGIES" ~ "energy-and-power-ep-technologies",
          nameCommunityOfInterest == "ENGINEERED RESILIENT SYSTEMS" ~ "engineered-resilient-systems",
          nameCommunityOfInterest == "GROUND AND SEA PLATFORMS" ~  "ground-and-sea-platforms-gsp",
          nameCommunityOfInterest == "HUMAN SYSTEMS" ~ "human-systems",
          nameCommunityOfInterest == "MATERIALS AND MANUFACTURING PROCESSES" ~ "materials-manufacturing-processes-mmp",
          nameCommunityOfInterest == "SENSORS" ~ "sensors",
          nameCommunityOfInterest == "SPACE" ~ "space"

        )
      )

    data <- data %>%
      left_join(df_links, by = "slug") %>%
      select(-slug)

    data
  }

.parse_coi_page <-
  function(url = "https://defenseinnovationmarketplace.dtic.mil/communities-of-interest/air-platforms/") {
    page <- read_html(url)
    text <- page %>% html_nodes('.sow-accordion-title, p, #content li') %>% html_text() %>% str_squish()
    text <- text[!text %>% str_detect("CDATA")]

    data <- tibble(urlCOI = url, textCOI = text)
    has_links <- page %>% html_nodes("#content a") %>% length() > 0

    data <-
      data %>%
      filter(textCOI != "")

    if (has_links) {
      nameFile <-
        page %>% html_nodes("#content a") %>% html_text()

      urlFile <-
        page %>% html_nodes("#content a") %>% html_attr("href") %>%
        str_c("https://defenseinnovationmarketplace.dtic.mil",.)
      data <-
        data %>%
        filter(!textCOI %in% c(nameFile, "Communities of Interest")) %>%
        filter(!textCOI %>% str_detect("PDF"))

      data <- data %>%
        filter(!textCOI %in% c(nameFile, "Communities of Interest")) %>%
        filter(!textCOI %>% str_detect("PDF")) %>%
        group_by(urlCOI) %>%
        summarise(textCOI = textCOI %>% str_c(collapse = "\n")) %>%
        ungroup()

      df_urls <- tibble(nameFile, urlFile)
      data <- data %>%
        mutate(dataResearch = list(df_urls))
    } else {
      data <-
        data %>%
        filter(!textCOI %in% c(nameFile, "Communities of Interest")) %>%
        filter(!textCOI %>% str_detect("PDF")) %>%
        group_by(urlCOI) %>%
        summarise(textCOI = textCOI %>% str_c(collapse = "\n")) %>%
        ungroup()
    }

    data
  }

#' Department of Defense Communities of Interest
#'
#' Returns DTIC hosted information about the Defense Innovation Marketplace
#'
#' @param return_message if \code{TRUE} returns information about link parsing
#'
#' @return
#' @export
#'
#' @examples
dtic_communities_of_interest <-
  function(snake_names = T,
           return_message = T) {
    options(warn =  - 1)
    data <- .dtic_coi()

    urls <-
      data %>% filter(!is.na(urlCOI)) %>%
      pull(urlCOI)

    df_text <- urls %>%
      map_dfr(function(x) {
        if (return_message) {
          glue("Parsing {x}") %>% message()
        }
        .parse_coi_page(url = x)
      })

    df_text <-
      df_text %>%
      mutate(hasResearchLinks = dataResearch %>% map_dbl(length) > 0)

    data <-
      data %>%
      left_join(df_text, by = "urlCOI") %>%
      mutate(hasResearchLinks = hasResearchLinks %>% coalesce(F))

    data <-
      data %>%
      munge_data(snake_names = snake_names)


    data
  }

#' DTIC Community of Interest Taxonomy
#'
#' Acquires and OCRs information about
#' the DTIC Communities of interest
#'
#' @return tibble
#' \url{https://defenseinnovationmarketplace.dtic.mil/communities-of-interest/}
#' @export
#' @family dtic
#'
#' @examples
#' dtic_communities_of_interest_taxonomy()
dtic_communities_of_interest_taxonomy <-
  function(snake_names = T) {
    urlDTIC <- "https://defenseinnovationmarketplace.dtic.mil/wp-content/uploads/2018/02/COI_Tier1_Taxonomy_7March2016.pdf"
    df_metadata <- pdftools::pdf_info(pdf = urlDTIC) %>% flatten_df()
    df_metadata <- df_metadata %>%
      mutate_if(is.character,
                list(function(x) {
                  case_when(x == "" ~ NA_character_,
                            TRUE ~ x)
                })) %>%
      .remove_na()

    text <-
      pdftools::pdf_text(pdf = urlDTIC) %>%
      str_split("\n") %>%
      flatten_chr() %>%
      str_squish() %>%
      discard(function(x) {
        x == ""
      }) %>%
      discard(function(x) {
        x %>% str_detect("Distribution Statement")
      })

    data <-
      tibble(text) %>%
      mutate(row = 1:n())

    df_parents <- data %>%
      filter(text %>% str_detect("^[0-9][.]|^[0-9][0-9][.]")) %>%
      rename(coi = text) %>%
      separate(
        coi,
        into = c("numberCommunityOfInterest", "nameCommunityOfInterest"),
        sep = "\\. "
      ) %>%
      mutate(
        numberCommunityOfInterest = as.numeric(numberCommunityOfInterest),
        nameCommunityOfInterest =
          case_when(
            nameCommunityOfInterest == "Energy and Power (E&P) Technologies" ~ "Energy and Power Technologies (E&P)",
            TRUE ~ nameCommunityOfInterest
          )
      )

    data <-
      data %>%
      left_join(df_parents, by = "row") %>%
      fill(numberCommunityOfInterest,
           nameCommunityOfInterest) %>%
      filter(!text %>% str_detect("^[0-9][.]|^[0-9][0-9][.]")) %>%
      filter(!is.na(numberCommunityOfInterest)) %>%
      separate(
        nameCommunityOfInterest,
        into = c("nameCommunityOfInterest", "acronymnCommunityOfInterest"),
        sep = "\\(",
        extra = 'merge',
        fill = 'right'
      ) %>%
      mutate_if(is.character, list(function(x) {
        x %>% str_squish() %>% str_remove_all("\\)") %>% str_to_upper()
      }))

    data <- data %>%
      group_by(numberCommunityOfInterest,
               nameCommunityOfInterest,
               acronymnCommunityOfInterest) %>%
      summarise(textCOI = text %>% str_c(collapse = "\n")) %>%
      ungroup()

    data <-
      data %>%
      select(one_of(names(df_parents %>% select(-row))), everything()) %>%
      mutate(urlDTIC)

    df_metadata <-
      df_metadata %>%
      mutate(urlDTIC)

    data <-
      data %>%
      left_join(df_metadata, by = "urlDTIC") %>%
      select(one_of(names(df_metadata)), everything()) %>%
      munge_data(snake_names = snake_names)

    data
  }


# entities ----------------------------------------------------------------


.parse_name_entity <-
  function(text) {
    parts <-
      text %>% str_split("\\(|\\)|\\*|\\ ") %>% flatten_chr() %>% discard(function(x) {
        x == ""
      })

    data <-
      tibble(word = parts) %>%
      mutate(idWord = 1:n())

    entity <-
      data %>%
      dplyr::slice(1:(nrow(data) - 2)) %>%
      pull(word) %>%
      str_c(collapse = " ")
    loc <- (nrow(data) - 1):nrow(data)

    location <- data %>%
      dplyr::slice(loc) %>%
      pull(word) %>%
      str_c(collapse = ", ")

    tibble(nameEntity = entity, locationEntity = location)

  }

#' DTIC Entities
#'
#' List of DTIC contributing
#' entities and their organizational
#' structure.
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' dtic_entites()
dtic_entites <-
  function() {
    data <-
      read_delim(
        "https://discover.dtic.mil/wp-content/uploads/2019/09/CS/corporate.txt",
        delim = "\\: ",
        col_names = F
      )

    data <-
      data %>% dplyr::slice(5:nrow(data)) %>%
      filter(X1 != "------ ") %>%
      separate(
        X1,
        "\\: ",
        into = c("X1", "X2", "X3", "X4", "X5"),
        fill = "right",
        extra = "merge"
      ) %>%
      mutate_all(str_squish) %>%
      setNames(
        c(
          "idSource",
          "nameEntity",
          "nameParentMaster",
          "nameParents",
          "nameSuborganizations"
        )
      ) %>%
      mutate_all(list(function(x) {
        ifelse(x == "", NA_character_, x)
      })) %>%
      mutate(idSource = as.numeric(idSource))

    df_suborginzations <-
      data %>%
      select(idSource, nameSuborganizations) %>%
      separate_rows(nameSuborganizations, sep = "\\|") %>%
      mutate(suborganizationsEntity = nameSuborganizations %>% str_squish()) %>%
      mutate_all(list(function(x) {
        ifelse(x == "", NA_character_, x)
      })) %>%
      filter(!is.na(nameSuborganizations)) %>%
      group_by(idSource) %>%
      summarise(
        countSubsidiaries = n(),
        nameSuborganizations = nameSuborganizations %>% str_c(collapse = "|")
      ) %>%
      ungroup()

    data <-
      data %>%
      select(-nameSuborganizations) %>%
      left_join(df_suborginzations, by = "idSource") %>%
      mutate(countSubsidiaries = ifelse(is.na(countSubsidiaries), 0 , countSubsidiaries))

    rm(df_suborginzations)


    df_parents <-
      data %>%
      filter(!is.na(nameParents)) %>%
      select(idSource, nameParents) %>%
      separate_rows(nameParents, sep = "\\|") %>%
      mutate_all(list(function(x) {
        ifelse(x == "", NA_character_, x)
      })) %>%
      filter(!is.na(nameParents)) %>%
      group_by(idSource) %>%
      summarise(countParents = n(),
                nameParents = nameParents %>% str_c(collapse = "|")) %>%
      ungroup() %>%
      mutate_if(is.numeric, list(function(x) {
        ifelse(is.na(x), 0, x)
      }))

    data <- data %>%
      select(-nameParents) %>%
      left_join(df_parents, by = "idSource") %>%
      select(one_of(names(df_parents)), everything()) %>%
      select(idSource, nameEntity, everything())

    data
  }

#' DTIC Keyword Thesaurus
#'
#' Returns all the terms
#' in the DTIC thesaurus and their
#' related parents, military uses cases
#' and closely related words
#'
#' @return \code{tibble()}
#' @export
#'
#' @examples
#' dtic_thesaurus()
dtic_thesaurus <-
  memoise::memoise(function() {
    values <-
      "https://discover.dtic.mil/wp-content/uploads/thesaurus/thesaurus.txt" %>% read_lines()
    values <-
      values %>% discard(function(x) {
        x == ""
      })

    data <-
      tibble(values) %>%
      mutate(idRow = 1:n()) %>%
      mutate(isSubField = values %>% str_detect("  ")) %>%
      select(idRow, everything())

    df_parents <-
      data %>% filter(!isSubField) %>%
      rename(parent = idRow) %>%
      select(-isSubField) %>%
      rename(termParent = values)

    data <-
      data %>%
      filter(isSubField) %>%
      mutate(parent = idRow - 1) %>%
      rename(typeTerm = values) %>%
      mutate(typeTerm = str_trim(typeTerm)) %>%
      separate(
        typeTerm,
        into = c("slugDescriptor", "nameTerm"),
        fill = "right",
        extra = "merge",
        sep = "\\  "
      ) %>%
      left_join(df_parents, by = "parent") %>%
      fill(termParent) %>%
      select(termParent, slugDescriptor, nameTerm)

    data <- data %>%
      mutate(
        nameDescriptor = case_when(
          slugDescriptor == "UF" ~ "relatedUsedFor",
          slugDescriptor == "NT" ~ "relatedTermNear",
          slugDescriptor == "BT" ~ "relatedBroadTerm",
          slugDescriptor == "RT" ~ "relatedTerm",
        )
      ) %>%
      select(termParent, nameDescriptor, slugDescriptor, everything())

    data
  })




# search_engine -----------------------------------------------------------

.generate_dtic_search <- function(term = 'China', quote = T) {
  t <- term
  if (quote) {
    term <- glue('"{term}"')
  }

  slug_term <- term %>% URLencode()
  url <-
    glue("https://discover.dtic.mil/results/?q={slug_term}") %>% as.character()
  tibble(termSearch = t, urlDTICSearch = url)
}

#' DTIC Library search tibble
#'
#' Creates URLs for items
#' in DTIC's library
#'
#' @param terms vector of terms
#' @param quote if \code{TRUE} quotes the terms
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' dtic_search_tbl(terms = c("FAIL SAFE", "CATHETERIZATION", "RETINOIC ACIDS"), quote = T)
dtic_search_tbl <-
  function(terms = NULL, quote = T) {
    if (length(terms) == 0) {
      stop("Enter terms to search DTIC")
    }

    terms %>%
      map_dfr(function(term) {
        .generate_dtic_search(term = term, quote = quote)
      })
  }

# r2_pe -------------------------------------------------------------------

# https://apps.dtic.mil/dodinvestment/#/

#' DTIC Program Elements
#'
#' Returns information about DTIC programe elements
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_dtic_program_elements()
dictionary_dtic_program_elements <-
  function() {
    data <-
      "https://apps.dtic.mil/dodinvestment/data/penandlin.json" %>%
      fromJSON()

    data %>%
      as_tibble() %>%
      setNames(c("codeProgramElement", "isCustom"))
  }




# rdte --------------------------------------------------------------------

.parse_dtic_rdte_dict <-
  memoise::memoise(function(url = "https://apps.dtic.mil/dodinvestment/api/service/search/advancedSearch?budgetApn=&budgetFYear=2020&budgetNum=&budgetType=All&numberOfResults=10000&searchMethod=3&searchText=&sortBy=fy&sortOrder=desc&start=0") {
    text <- read_lines(url)
    text <- text[[2]]
    data <- text %>% fromJSON(simplifyDataFrame = T)
    rm(text)
    gc()
    data <-
      data$documents %>%
      as_tibble()

    data <-
      data %>% .munge_dtic_names()

    df_urls <-
      data %>%
      select(numberRecord, matches("url")) %>%
      mutate(numberRecord = as.integer(numberRecord))

    data <-
      data %>%
      select(-matches("url")) %>%
      .munge_data() %>%
      .remove_na() %>%
      mutate(numberRecord = as.integer(numberRecord)) %>%
      left_join(df_urls, by = "numberRecord")

    amount_cols <- data %>% select(matches("amount")) %>% names()

    if (length(amount_cols) > 0) {
      data <- data %>%
        mutate_at(amount_cols,
                  list(function(x) {
                    x %>% as.integer() * 1000000
                  }))
    }

    data <- data %>%
      mutate(urlDTICBudgetAPI = url)

    data
  })

.generate_dtic_budget_urls <-
  function(years = 2000:2020,
           results = 10000) {
    urls <-
      glue(
        "https://apps.dtic.mil/dodinvestment/api/service/search/advancedSearch?budgetApn=&budgetFYear={years}&budgetNum=&budgetType=All&numberOfResults={results}&searchMethod=3&searchText=&sortBy=fy&sortOrder=desc&start=0"
      ) %>%
      as.character()

    data <-
      tibble(yearBudget = years, urlDTICBudgetAPI = urls)

    data
  }

#' RDTE Budgets
#'
#' Returns data about DTIC warehoused
#' RDTE budgets
#'
#'
#'
#' @param years vector of years - default 2000:2020
#' @param results number of results - default 10,000
#'
#' @return
#' @export
#'
#' @examples
#' \dontrun{
#'    dtic_rdte_budgets()
#' }
dtic_rdte_budgets <-
  function(years = 2000:2020,
           results = 10000) {
    df_urls <- .generate_dtic_budget_urls()

    urls <- df_urls$urlDTICBudgetAPI
    .parse_dtic_rdte_dict_safe <-
      possibly(.parse_dtic_rdte_dict, tibble())
    data <-
      urls %>%
      future_map_dfr(function(url) {
        url %>% message()
        .parse_dtic_rdte_dict_safe(url = url)
      })

    data <- data %>%
      mutate(numberRecord = as.integer(numberRecord)) %>%
      mutate(idRow = 1:n())

    df_urls <-
      data %>%
      select(idRow, matches("url"))

    data <-
      data %>%
      select(-matches("url")) %>%
      .munge_data() %>%
      mutate(numberRecord = as.integer(numberRecord)) %>%
      left_join(df_urls, by = "idRow") %>%
      select(-idRow)

    data <- data %>%
      mutate(urlXML = case_when(is.na(urlXML) ~ urlCache,
                                TRUE ~ urlXML))

    data
  }

.prune_dtic_xml <-
  function(data, threshold = 2,
           character_fields = c(
             "name",
             "title",
             "budgetcycle",
             "location",
             "ContractMethod",
             "ContractType",
             "SpecsAvailableNow",
             "FundingVehicle",
             "costtocomplete",
             "summaryexplanation",
             "appropriationcode",
             "description",
             "projectnumber",
             "idcode",
             "modelaffected",
             "modificationtype",
             "remarks",
             "modificationnumber",
             "budgetcycle",
             "programelementnote",
             "programelementnumber",
             ".text",
             "ProjectNumber",
             "performancemetrics",
             "articles",
             "ProjectNote",
             ".note"
           )) {

    if (!data %>% hasName("parent")) {
      data <-
        data %>%
        separate(
          item,
          into = c("parent", "item"),
          sep = "\\.",
          extra = "merge",
          fill = "right"
        ) %>%
        mutate(idRow = 1:n())

    }


    data_item_count <-
      data %>% count(item, sort = T)

    data_item_count <-
      data %>% count(item, sort = T) %>%
      mutate(countLevels = item %>% str_count("\\."))

    max_levels <- data_item_count$countLevels %>% max()  + 1
    new_cols <- glue("X{1:max_levels}") %>% as.character()

    data_items_long <-
      data_item_count %>%
      separate(
        col = item,
        into = new_cols,
        sep = "\\.",
        fill = "right",
        extra = "merge",
        remove = F
      ) %>%
      mutate(idItem = 1:n()) %>%
      select(-countLevels) %>%
      gather(table, field, -c(item, idItem, n), na.rm = T) %>%
      arrange(idItem)

    data_names <-
      data_items_long %>%
      group_by(idItem) %>%
      mutate(numberItem = 1:n()) %>%
      do(tail(., threshold)) %>%
      mutate(field = case_when(numberItem == min(numberItem) ~ str_to_lower(field),
                               TRUE ~ field)) %>%
      ungroup() %>%
      group_by(item) %>%
      summarise(nameItem = str_c(field, collapse = "")) %>%
      mutate(slugItem = str_to_lower(item))

    char_slugs <-
      character_fields %>% str_to_lower() %>% str_c(collapse = "|")
    data_names <-
      data_names %>%
      mutate(
        typeItem = case_when(
          slugItem %>% str_detect("date") ~ "date",
          slugItem %>% str_detect(char_slugs) ~ "character",
          TRUE ~ "numeric"
        )
      )

    data_names <- data_names %>%
      mutate(
        nameItem = nameItem %>% str_replace_all("five", "Five") %>%
          str_replace_all("four", "Four") %>%
          str_replace_all("three", "Three") %>%
          str_replace_all("two", "Two") %>%
          str_replace_all("one", "One") %>%
          str_replace_all("^budgetyear", "countBudgetYear") %>%
          str_replace_all("^in", "countIn") %>%
          str_replace_all("^out", "countOut"),
        nameItem = case_when(
          nameItem == "totalcostTotal" ~ "amountCostTotal",
          nameItem %>% str_detect("totalcost") ~ nameItem %>% str_replace_all("totalcost", "amountCost"),
          nameItem %>% str_detect("deliverydate") ~ nameItem %>% str_replace_all("deliverydate", "dateDelivery"),
          nameItem %>% str_detect("contractdate") ~ nameItem %>% str_replace_all("contractdate", "dateContract"),
          nameItem  == "categoryName" ~ "nameCategory",
          nameItem == "compOnentName" ~ "nameComponent",
          nameItem == "itemName" ~ "nameCategoryItem",
          nameItem == "supportName" ~ "nameSupport",
          nameItem == "manufacturerName" ~ "nameManufacturer",
          nameItem == "costelementName" ~ "nameCostElement",
          nameItem == "logisticsName" ~ "nameLogistics",
          nameItem == "nonorganicinstallationImplementationMethodName" ~ "nameInstallationMethod",
          nameItem == "kitName" ~ "nameKit",
          nameItem == "hardwareName" ~ "nameHardware",
          nameItem == "softwareName" ~ "nameSoftware",
          nameItem == "name" ~ "nameItem",
          nameItem == "itemIdCode" ~ "codeItem",
          nameItem %in% c("modificationitemTitle", "modificationtitle") ~ "titleModification",
          nameItem %in% c("modelsaffectedlistModelAffected") ~ "nameModelsAffected",
          nameItem %in% c("modificationnumber") ~ "codeModification",
          nameItem %in% c("modificationtype") ~ "typeModification",
          nameItem == "idcode" ~ "codeItem",
          nameItem == "description" ~ "descriptionItem",
          nameItem == "remarks" ~ "remarksItem",
          nameItem == "totalobligationauthorityTotal" ~ "amountObligationAuthorityTotal",
          nameItem %>% str_detect("totalobligationauthority") ~ nameItem %>% str_replace_all("totalobligationauthority", "amountObligationAuthority"),
          nameItem %>% str_detect("unitcost") ~ nameItem %>% str_replace_all("unitcost", "amountCostPerUnit"),
          nameItem %>% str_detect("netprocurementp1") ~ nameItem %>% str_replace_all("netprocurementp1", "amountProcurementNetP1"),
          nameItem %>% str_detect("^quantity") ~ nameItem %>% str_replace_all("^quantity", "count"),
          nameItem == "manufacturerLocation" ~ "locationManufacturer",
          nameItem == "manufacturerAdminLeadTimeAfterOct1InMonths" ~ "countAdminLeadTimePostOct1Months",
          nameItem == "manufacturerProductionLeadTimeAfterOct1InMonths" ~ "countProductionLeadTimePostOct1Months",
          TRUE ~ nameItem
        )
      ) %>%
      mutate(
        periodBudget =
          case_when(
            typeItem != "character" &
              nameItem %>% str_detect("CurrentYear") ~ "YearCurrent",
            typeItem != "character" &
              nameItem %>% str_detect("AllPriorYears") ~ "YearsPriorAll",
            typeItem != "character" &
              nameItem %>% str_detect("BudgetYearOne") ~ "YearOne",
            typeItem != "character" &
              nameItem %>% str_detect("BudgetYearTwo") ~ "YearTwo",
            typeItem != "character" &
              nameItem %>% str_detect("BudgetYearThree") ~ "YearThree",
            typeItem != "character" &
              nameItem %>% str_detect("BudgetYearFour") ~ "YearFour",
            typeItem != "character" &
              nameItem %>% str_detect("BudgetYearFive") ~ "YearFive",
            typeItem != "character" &
              nameItem %>% str_detect("PriorYear") ~ "YearPrior",
            typeItem != "character" &
              nameItem %>% str_detect("ToComplete") ~ "ToComplete",
            typeItem != "character" &
              nameItem %>% str_detect("Total$") ~ "TotalBudget",
          )
      )

    period_slugs <-
      c(
        "CurrentYear",
        "AllPriorYears",
        "BudgetYearOne",
        "PriorYear",
        "BudgetYearFive",
        "BudgetYearFour",
        "BudgetYearThree",
        "BudgetYearTwo",
        "ToComplete",
        "Total"
      ) %>% str_c(collapse = "|")

    data_names <-
      data_names %>%
      mutate(nameItem = nameItem %>% str_remove_all(period_slugs)) %>%
      mutate(
        typeBudget = case_when(
          !is.na(periodBudget) &
            nameItem %>% str_detect("OCO") ~ "OCO",
          !is.na(periodBudget) &
            nameItem %>% str_detect("Base") ~ "Base",
          !is.na(periodBudget) &
            !nameItem %>% str_detect("OCO|Base") ~ "Total"
        ),
        nameItem = nameItem %>% str_remove_all("OCO|Base")
      )

    data <- data %>%
      left_join(data_names, by = "item") %>%
      select(parent, item, nameItem, everything())


    data
  }

.parse_dtic_rdte_xml <-
  function(url = "https://apps.dtic.mil/procurement/Y2020/AirForce/U_P40_000071_BSA-1_BA-7_APP-3010F_PB_2020.xml",
           table_names = c("programelement", "lineitem", "codeblist"),
           return_message = T
           ) {
    doc <- read_xml(url)

    if (return_message) {
      cat(url, fill = T)
    }

    data <-
      doc %>%
      as_list() %>%
      unlist() %>%
      enframe(name = "item")

    data <-
      data %>%
      mutate(levelField = item %>% str_count("\\.")) %>%
      mutate(isNested = levelField > 3)

    table_name_slugs <-
      str_to_lower(table_names) %>% str_c(collapse = "|")

    df_base <-
      data %>%
      filter(!isNested) %>%
      .prune_dtic_xml(threshold = 2)

    df_base <-
      df_base %>%
      mutate(nameItem = nameItem %>% str_remove_all(table_name_slugs)) %>%
      mutate(nameItem = case_when(
        !is.na(periodBudget) &
          !is.na(typeBudget) ~ str_c(nameItem, periodBudget, typeBudget, sep = ""),
        TRUE ~ nameItem
      )) %>%
      mutate(nameItem = nameItem %>% str_replace_all("^funding", "amount")) %>%
      select(nameItem, value) %>%
      distinct() %>%
      group_by(nameItem) %>%
      summarise(value = value %>% str_c(collapse = " | ")) %>%
      ungroup() %>%
      spread(nameItem, value) %>%
      .munge_dtic_names() %>%
      .fix_dtic_dates() %>%
      .fix_dtic_amount() %>%
      .remove_na() %>%
      select(matches("name"), everything())

    df_base <-
      df_base %>%
      .munge_data() %>%
      mutate(urlXML = url)

    df_nested <-
      data %>% filter(isNested)

    df_nested <-
      df_nested %>%
      separate(
        item,
        into = c("nameTable", "item"),
        sep = "\\.",
        extra = "merge",
        fill = "right"
      ) %>%
      select(-nameTable)

    df_nested <-
      df_nested %>%
      separate(
        item,
        into = c("nameTable", "item"),
        sep = "\\.",
        extra = "merge",
        fill = "right"
      ) %>%
      select(-nameTable) %>%
      separate(
        item,
        into = c("nameTable", "item"),
        sep = "\\.",
        extra = "merge",
        fill = "right"
      )

    tables <-
      df_nested %>%
      distinct(nameTable) %>%
      pull()

    data_nested <-
      tables %>%
      map(function(table){
        table %>% message()
        df <-
          df_nested %>% filter(nameTable == table)

        if (table == "ItemExhibitList") {
          df <-
            df %>%
            separate(
              item,
              into = c("parent", "item"),
              sep = "\\.",
              extra = "merge",
              fill = "right"
            ) %>%
            select(parent, item, value) %>%
            mutate(idRow = 1:n())

          df <- df %>%
            .prune_dtic_xml(threshold = 2)

          df_items <-
            df %>%
            filter(nameItem %in% c("nameItem", "titleModification")) %>%
            select(idRow) %>%
            mutate(numberItem = 1:n())

          df <- df %>%
            left_join(df_items, by = "idRow") %>%
            select(numberItem, everything()) %>%
            fill(numberItem) %>%
            mutate(urlXML = url)


          item_tables <-
            df$parent %>% unique()

          df <-
            item_tables %>%
            map(function(item_table){
              data_name <- glue("data{item_table}") %>% as.character()
              df_table <-
                df %>%
                filter(parent == item_table) %>%
                select(numberItem, nameItem, typeItem, periodBudget, typeBudget, value)

              df_base_char <-
                df_table %>% filter(typeItem == "character") %>%
                select(numberItem, nameItem, value) %>%
                group_by(numberItem, nameItem) %>%
                summarise(value = unique(value) %>% str_c(collapse = " | ")) %>%
                ungroup() %>%
                spread(nameItem, value)

              if (df_table %>% filter(typeItem == "date") %>% nrow() > 0) {
                df_dates <-
                  df_table %>% filter(typeItem == "date") %>%
                  mutate(value = value %>% str_c("-01")) %>%
                  select(-typeItem) %>%
                  mutate_if(is.character,
                            list(function(x) {
                              ifelse(is.na(x), "", x)
                            })) %>%
                  unite(item, nameItem, periodBudget, typeBudget, sep = "") %>%
                  mutate(value = value %>% ymd()) %>%
                  group_by(numberItem, item) %>%
                  summarise(value = value %>% str_c(collapse = " | ")) %>%
                  spread(item,value) %>%
                  ungroup()

                df_base_char <-
                  df_base_char %>%
                  left_join(df_dates, by = "numberItem")
              }

              df_numeric <-
                df_table %>% filter(typeItem == "numeric") %>%
                select(-typeItem) %>%
                mutate(value = parse_number(value)) %>%
                distinct() %>%
                mutate(value = case_when(
                  !nameItem %in% c("amountCostPerUnit", "count") ~ value * 1000000,
                  TRUE ~ value
                )) %>%
                mutate_if(is.character,
                          list(function(x) {
                            ifelse(is.na(x), "", x)
                          })) %>%
                unite(item,
                      nameItem,
                      periodBudget,
                      typeBudget,
                      sep = "",
                      remove = F) %>%
                group_by(numberItem, item) %>%
                mutate(numberSubGroup = 1:n()) %>%
                select(numberItem, numberSubGroup, everything()) %>%
                ungroup() %>%
                group_by(numberItem) %>%
                nest() %>%
                rename(dataValues = data) %>%
                ungroup()

              df_row <-
                df_base_char %>%
                left_join(df_numeric, by = "numberItem") %>%
                mutate(hasData = dataValues %>% map_dbl(length) > 0) %>%
                .munge_data() %>%
                mutate(urlXML = url)


              df_row %>%
                group_by(urlXML) %>%
                nest() %>%
                ungroup() %>%
                rename(!!sym(data_name) := data)


            }) %>%
            reduce(left_join)

          return(df)
        }

        if (table == "SecondaryDistribution") {
          df <-
            df %>%
            mutate(item = item %>% str_remove_all("ComponentList.Component.|Cost."))

          df <- df %>%
            separate(item,
                     into = c("parent", "item"),
                     sep = "\\.",
                     fill = "left") %>%
            select(parent, item, value) %>%
            mutate(idRow = 1:n())

          df_items <-
            df %>% filter(item == "Name") %>%
            select(idRow) %>%
            mutate(numberItem = 1:n())

          df <- df %>%
            left_join(df_items, by = "idRow") %>%
            select(numberItem, everything()) %>%
            fill(numberItem)

          df <- df %>%
            mutate(
              item = case_when(
                item == "Name" ~ "nameItem",
                parent == "Quantity" ~ glue("count{item}") %>% as.character(),
                parent == "TotalQuantity" ~ glue("countTotal{item}") %>% as.character(),
                TRUE ~ glue("amount{item}") %>% as.character()
              )
            ) %>%
            select(numberItem, item, value) %>%
            spread(item, value) %>%
            select(numberItem, nameItem, everything()) %>%
            .munge_data()

          amt_cols <- df %>% select(matches("amount")) %>% names()

          if (length(amt_cols) > 0) {
            df <- df %>%
              mutate_at(amt_cols,
                        list(function(x){
                          x %>% as.numeric() * 1000000 %>% formattable::currency(digits = 0)
                        }))
          }

          df <-
            df %>%
            mutate(urlXML = url) %>%
            group_by(urlXML) %>%
            nest() %>%
            ungroup() %>%
            rename(dataSecondaryDistribution = data)

          return(df)
        }

        if (table == "ModsOutYearDelta") {
          df <-
            df %>%
            separate(
              item,
              into = c("parent", "item"),
              sep = "\\.",
              extra = "merge",
              fill = "right"
            )
          df <-
            df %>%
            mutate(parent = parent %>% str_c("amountTotal", .),
                   value = as.numeric(value) * 1000000) %>%
            select(parent, item, amount = value) %>%
            mutate(isTotal = item == "Total") %>%
            .munge_data() %>%
            mutate(numberYearBudget = case_when(
              item %>% str_detect("One") ~ 1L,
              item %>% str_detect("Two") ~ 2L,
              item %>% str_detect("Three") ~ 3L,
              item %>% str_detect("Four") ~ 4L,
              item %>% str_detect("Five") ~ 5L,
              TRUE ~ NA_integer_
            )) %>%
            mutate(urlXML = url) %>%
            group_by(urlXML) %>%
            nest() %>%
            ungroup() %>%
            rename(dataModsOutDelta = data)
          return(df)
        }

        if (table == "ChangeSummary") {
          df <-
            df %>%
            separate(
              item,
              into = c("parent", "item"),
              sep = "\\.",
              extra = "merge",
              fill = "right"
            ) %>%
            select(parent, item, value) %>%
            mutate(idRow = 1:n())

          df <- .prune_dtic_xml(data = df)




          df <-
            df %>%
            filter(!parent %>% str_detect("AdjustmentDetails")) %>%
            mutate(parent = str_c("amount", parent)) %>%
            unite(item, parent, item, sep = "") %>%
            select(item, value) %>%
            mutate(value = parse_number(value)) %>%
            filter(!is.na(value)) %>%
            spread(item, value) %>%
            mutate(urlXML = url) %>%
            group_by(urlXML) %>%
            nest() %>%
            ungroup() %>%
            rename(dataChangeSummary = data)
          return(df)
        }

        if (table == "ProjectList") {
          df <-
            df %>%
            separate(
              item,
              into = c("parent", "item"),
              sep = "\\.",
              extra = "merge",
              fill = "right"
            ) %>%
            select(parent, item, value) %>%
            mutate(idRow = 1:n())

          df <- .prune_dtic_xml(data = df)

          df <- df %>%
            mutate(nameItem = case_when(
              !is.na(periodBudget) &
                !is.na(typeBudget) ~ str_c(nameItem, periodBudget, typeBudget, sep = ""),
              TRUE ~ nameItem
            ))

          df_items <-
            df %>%
            filter(nameItem %in% c("projecttitle")) %>%
            select(idRow) %>%
            mutate(numberItem = 1:n())

          df <- df %>%
            left_join(df_items, by = "idRow") %>%
            select(numberItem, everything()) %>%
            fill(numberItem) %>%
            mutate(urlXML = url)


          item_tables <-
            df$parent %>% unique()

          df <-
            item_tables %>%
            map(function(item_table){
              data_name <- glue("data{item_table}") %>% as.character()
              df_table <-
                df %>%
                filter(parent == item_table) %>%
                select(numberItem, nameItem, typeItem, periodBudget, typeBudget, value)

              df_base_char <- df_table %>% filter(typeItem == "character") %>%
                select(numberItem, nameItem, value) %>%
                group_by(numberItem, nameItem) %>%
                summarise(value = unique(value) %>% str_c(collapse = " | ")) %>%
                ungroup() %>%
                spread(nameItem, value)

              if (df_table %>% filter(typeItem == "date") %>% nrow() > 0) {
                df_dates <-
                  df_table %>% filter(typeItem == "date") %>%
                  distinct() %>%
                  mutate(value = value %>% str_c("-01")) %>%
                  select(-typeItem) %>%
                  mutate_if(is.character,
                            list(function(x) {
                              ifelse(is.na(x), "", x)
                            })) %>%
                  unite(item, nameItem, periodBudget, typeBudget, sep = "") %>%
                  mutate(value = value %>% ymd()) %>%
                  group_by(numberItem, item) %>%
                  dplyr::slice(1) %>%
                  ungroup() %>%
                  spread(item,value)

                df_base_char <-
                  df_base_char %>%
                  left_join(df_dates, by = "numberItem")
              }

              df_numeric <-
                df_table %>% filter(typeItem == "numeric") %>%
                select(-typeItem) %>%
                mutate(value = parse_number(value)) %>%
                distinct() %>%
                mutate(value = case_when(
                  !nameItem %in% c("amountCostPerUnit", "count") ~ value * 1000000,
                  TRUE ~ value
                )) %>%
                mutate_if(is.character,
                          list(function(x) {
                            ifelse(is.na(x), "", x)
                          })) %>%
                unite(item,
                      nameItem,
                      periodBudget,
                      typeBudget,
                      sep = "",
                      remove = F) %>%
                group_by(numberItem, item) %>%
                mutate(numberSubGroup = 1:n()) %>%
                select(numberItem, numberSubGroup, everything()) %>%
                ungroup() %>%
                group_by(numberItem) %>%
                nest() %>%
                rename(dataValues = data) %>%
                ungroup()

              df_row <-
                df_base_char %>%
                left_join(df_numeric, by = "numberItem") %>%
                mutate(hasData = dataValues %>% map_dbl(length) > 0) %>%
                .munge_data() %>%
                mutate(urlXML = url)


              df_row %>%
                filter(!is.na(numberItem)) %>%
                group_by(urlXML) %>%
                nest() %>%
                ungroup() %>%
                rename(!!sym(data_name) := data)


            }) %>%
            reduce(left_join)

          return(df)
        }

        if (table == "ResourceSummary") {
          df <-
            df %>%
            separate(item, into = c("parent", "item"), sep = "\\.",
                     fill = "right")
          df <-
            df %>%
            mutate(parent = parent %>% str_c("amount", .),
                   value = as.numeric(value) * 1000000) %>%
            select(parent, item, amount = value) %>%
            .munge_data() %>%
            mutate(
              numberYearBudget = case_when(
                item %>% str_detect("One") ~ 1L,
                item %>% str_detect("Two") ~ 2L,
                item %>% str_detect("Three") ~ 3L,
                item %>% str_detect("Four") ~ 4L,
                item %>% str_detect("Five") ~ 5L,
                TRUE ~ NA_integer_
              ),
              typeBudget = case_when(
                item %>% str_detect("OCO") ~ "OCO",
                item %>% str_detect("Base") ~ "Base",
                TRUE ~ "Total"
              )
            ) %>%
            mutate(urlXML = url) %>%
            group_by(urlXML) %>%
            nest() %>%
            ungroup() %>%
            rename(dataResourceSummary = data)

          return(df)
        }
        data_name <- glue("data{table}") %>% as.character()
        df <-
          df %>%
          mutate(urlXML = url) %>%
          group_by(urlXML) %>%
          nest() %>%
          ungroup() %>%
          rename(!!sym(data_name) := data)

        df

      }) %>%
      reduce(left_join)

    data <-
      df_base %>%
      left_join(data_nested, by = "urlXML")

    data

  }

#' Parse DTIC DOD Investment URLs
#'
#' Parses vector of XML URLs from DTIC RDTE
#' budget justicifcations from \url{https://apps.dtic.mil/dodinvestment/#/}
#'
#' @param urls vector of urls
#' @param table_names list of table names to clean out defaults to \itemize{
#' \item programelement
#' \item codeblist
#' \item lineitem
#' }
#' @param return_message if \code{TRUE} prints am essage
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' parse_dtic_rdte_xml_urls(urls =  "https://apps.dtic.mil/procurement/Y2020/AirForce/U_P40_000071_BSA-1_BA-7_APP-3010F_PB_2020.xml")
parse_dtic_investment_xml_urls <-
  function(urls =  "https://apps.dtic.mil/procurement/Y2020/AirForce/U_P40_000071_BSA-1_BA-7_APP-3010F_PB_2020.xml",
           table_names = c("programelement", "lineitem", "codeblist"),
           return_message = T) {
    .parse_dtic_rdte_xml_safe <- possibly(.parse_dtic_rdte_xml, tibble())

    all_data <-
      urls %>%
    future_map_dfr(function(url){
      .parse_dtic_rdte_xml_safe(url = url, table_names = table_names,
                           return_message = return_message)
    })

    all_data <-
      all_data %>%
      select(-matches("remove"))

    data_cols <- all_data %>%
      select(matches("data")) %>%
      names()

    df_data <-
      all_data %>%
      transmute_at(data_cols,
                   list(function(x) {
                     x %>% map_dbl(length) > 0
                   }))

    names(df_data) <- names(df_data) %>% str_replace_all("data", "has")

    all_data <- all_data %>%
      bind_cols(df_data)

    all_data <-
      all_data %>%
      select(dateSubmission, nameAgency, everything())

    all_data
  }

# research ----------------------------------------------------------------
.parse_dod_data <-
  function(url = "https://dodgrantawards.dtic.mil/grants/api/service/search/advancedSearchExport?searchMethod=EITHER&searchDateMethod=STARTING&ascDescOrder=ASC") {
    data <-
      url %>%
      fread(showProgress = FALSE) %>%
      as_tibble()

  }

.munge_dod_data <-
  function(data) {
    data <-
      data %>%
      .munge_fpds_names()

    data <-
      data %>%
      mutate_at(c("datetimeCreated",
                  "datetimeModified"),
                list(mdy_hms)) %>%
      mutate_at(c("dateAwardStart", "dateAwardEnd"),
                list(mdy))

    data <-
      data %>%
      mutate(hasFNComma = nameFirstPrincipalInvestigator %>% str_detect("\\,")) %>%
      mutate(
        nameFirstClean = case_when(
          hasFNComma ~ nameLastPrincipalInvestigator,
          TRUE ~ nameFirstPrincipalInvestigator
        ),
        nameLastClean = case_when(
          hasFNComma ~ nameFirstPrincipalInvestigator %>% str_remove_all("\\,"),
          TRUE ~ nameLastPrincipalInvestigator
        )
      ) %>%
      unite(
        namePrincipalInvestigator,
        nameFirstClean,
        nameLastClean,
        sep = " ",
        remove = F
      ) %>%
      select(-c(
        nameFirstPrincipalInvestigator,
        nameLastPrincipalInvestigator,
        hasFNComma
      )) %>%
      rename(
        nameFirstPrincipalInvestigator = nameFirstClean,
        nameLastPrincipalInvestigator = nameLastClean
      ) %>%
      select(one_of(names(data)), everything()) %>%
      unite(
        namePrincipalInvestigator,
        nameFirstPrincipalInvestigator,
        nameLastPrincipalInvestigator,
        sep = " ",
        remove = F
      )

    data <-
      data  %>%
      select(one_of(
        c(
          "idAward",
          "nameAward",
          "descriptionAward",
          "nameAgency",
          "amountAward",
          "namePrincipalInvestigator",
          "nameOffice",
          "yearFiscal",
          "nameOrganization",
          "namePrincipalInvestigator",
          "nameFirstPrincipalInvestigator",
          "nameLastPrincipalInvestigator",
          "typePrincipalInvestigator",
          "dateAwardStart",
          "dateAwardEnd",
          "datetimeCreated",
          "datetimeModified"
        )
      ), everything())

    data <- data %>%
      mutate(idOffice = idAward %>% substr(1, 6)) %>%
      select(idAward:nameAgency, idOffice, nameOffice, everything())

    data <- data %>%
      mutate_if(is.character,
                list(function(x) {
                  x %>% stringi::stri_trans_general("Latin-ASCII") %>% str_trim()
                }))

    ids <-
      data %>% distinct(idOffice) %>% pull() %>% str_c(collapse = "|")

    data <-
      data %>%
      mutate(nameOffice = nameOffice %>% str_remove_all(ids) %>% str_trim()) %>%
      .munge_data(clean_address = F) %>%
      mutate(
        nameOffice = case_when(
          nameOffice == "AFRL/RQK" ~ "AFRL RQK",
          nameOffice == "- ACC-APG-RTP DIVISION" ~ "Army Contracting Command – Aberdeen Proving Ground" %>% str_to_upper(),
          TRUE ~ nameOffice
        )
      )


    data <-
      data %>%
      mutate(
        namePrincipalInvestigator =
          namePrincipalInvestigator %>% str_replace_all("\\^DR. |PROF.|PROFESSOR|ASST.", "") %>%
          str_replace_all(", JR.", " JR") %>%
          str_replace_all(", III", " III") %>%
          str_replace_all(", PH.D|, PH. D", " PHD")
      )


    has_more_commas <- data %>%
      filter(namePrincipalInvestigator %>% str_detect("\\,")) %>% nrow()  > 0

    if (has_more_commas) {
      data <- data %>%
        filter(namePrincipalInvestigator %>% str_detect("\\,")) %>%
        separate(
          namePrincipalInvestigator,
          into = c(
            "nameLastPrincipalInvestigator",
            "nameFirstPrincipalInvestigator"
          ),
          fill = "right",
          extra = "merge",
          sep = "\\,"
        ) %>%
        mutate_if(is.character, str_trim) %>%
        unite(
          namePrincipalInvestigator,
          nameFirstPrincipalInvestigator,
          nameLastPrincipalInvestigator,
          sep = " ",
          remove = F
        ) %>%
        bind_rows(data %>% filter(!namePrincipalInvestigator %>% str_detect("\\,"))) %>%
        arrange(dateAwardStart) %>%
        select(one_of(names(data)), everything()) %>%
        .munge_data(clean_address = F)
    }

    data <-
      data %>%
      mutate(yearFiscal = year(dateAwardStart)) %>%
      .munge_organizations()

    data <-
      data %>%
      mutate(
        namePrincipalInvestigator = case_when(
          namePrincipalInvestigator %in% c("PROF. RALPH", "RALPH ETIENNE- CUMMINS") ~ "RALPH ETIENNE-CUMMINGS",
          namePrincipalInvestigator %in% c("ABDALLA DARWISH", "ABDALLA DAWISH") ~ "ABDALLA DARWISH",
          namePrincipalInvestigator %in% c("ALEX K Y JEN") ~ "ALEX JEN",
          namePrincipalInvestigator %>% str_detect("ALI JADBABAIE") ~ "ALI JADBABAIE",
          namePrincipalInvestigator %>% str_detect("BENITO GONZLEZ") ~ "BENITO GONZALEZ",
          namePrincipalInvestigator %>% str_detect("CLAY GLOSTER") ~ "CLAY GLOSTER",
          namePrincipalInvestigator %>% str_detect("DAVID BOURELL") ~ "DAVID BOURELL",
          namePrincipalInvestigator %>% str_detect("DR AVESTA SADAN|DR AVESTA SASAN") ~ "AVESTA SASAN",
          TRUE ~ namePrincipalInvestigator
        )
      )

    data <- data %>%
      refine_columns(entity_columns = "namePrincipalInvestigator") %>%
      select(-namePrincipalInvestigator) %>%
      rename(namePrincipalInvestigator = namePrincipalInvestigatorClean) %>%
      select(one_of(names(data))) %>%
      mutate(namePrincipalInvestigator = namePrincipalInvestigator %>% str_replace_all("^DR |^SSOR ", ""))


    df_last_names <-
      data %>%
      distinct(namePrincipalInvestigator) %>%
      tbl_last_name(name_column = "namePrincipalInvestigator")

    df_last_classified_names <-
      df_last_names %>%
      filter(!is.na(nameLast)) %>%
      pull(nameLast) %>%
      unique() %>%
      entities::classify_last_names()

    df_last_names <-
      df_last_names %>%
      left_join(
        df_last_classified_names %>%
          rename(typeWRUPrincipalInvestigatorPrediction = typeWRUPrediction),
        by = "nameLast"
      )

    data <- data %>%
      left_join(
        df_last_names %>%
          distinct(
            namePrincipalInvestigator,
            typeWRUPrincipalInvestigatorPrediction
          ),
        by = "namePrincipalInvestigator"
      )

    data <- data %>%
      mutate(
        typeWRUPrincipalInvestigatorPrediction = ifelse(
          is.na(typeWRUPrincipalInvestigatorPrediction),
          "Other",
          typeWRUPrincipalInvestigatorPrediction
        )
      )

    data <- data %>%
      mutate(
        nameOffice = case_when(
          nameOffice %>% str_detect("10TH CONTRACTING SQUADRON, USAF ACADEMY, CO") ~ "10TH CONTRACTING SQUADRON - USAF ACADEMY CO",
          nameOffice %>% str_detect("ARMY CONTRACTING COMMANDABERDEEN PROVING GROUND") ~ "ARMY CONTRACTING COMMAND - ABERDEEN PROVING GROUND",
          nameOffice %>% str_detect(
            "MEDICAL RESEARCH & MATERIEL COMMAND|MEDICAL RESEARCH & MATERIAL COMMAND"
          ) ~ "MEDICAL RESEARCH & MATERIEL COMMAND",
          nameOffice %>% str_detect("-W6QK ACC-APG NATICK CONTRACTING DIVISION") ~ "ARMY CONTRACTING COMMAND - NATICK CONTRACTING DIVISION",
          nameOffice %>% str_detect("ACC - WARREN") ~ "ARMY CONTRACTING COMMAND - WARREN",
          nameOffice %>% str_detect(
            "AFOSR ASIAN OFFICE OF AEROSPACE RESEARCH AND DEVELOPMENT TOKYO JPN|AFOSR ASIAN OFFICE OF AEROSPACE RESEARCH AND DEVELOPMENT, TOKYO, JPN"
          ) ~ "Air Force Office of Scientific Research - ASIAN OFFICE OF AEROSPACE RESEARCH IN TOKYO",
          nameOffice %>% str_detect(
            "AFRL MUNITIONS DIRECTORATE EGLIN AFB FL|AFRL MUNITIONS DIRECTORATE, EGLIN AFB FL"
          ) ~ "AIR FORCE RESEARCH LABS - EGLIN Air Force Base",
          nameOffice %>% str_detect("AFRL ROME RESEARCH SITE ROME NY|AFRL ROME RESEARCH SITE, ROME NY") ~ "AIR FORCE RESEARCH LABS - Rome",
          nameOffice %>% str_detect("AFRL RQK|AFRL/PZL|AFRL/PZLDB|AFRL/RQKMC") ~ "AIR FORCE RESEARCH LABS - Patterson Air Force Base",
          nameOffice %>% str_detect("AFRL RVK|DET 8 AFRL PKD") ~ "AIR FORCE RESEARCH LABS - Kirtland Air Force Base",
          nameOffice %>% str_detect(
            "AIR FORCE OFFICE OF SCIENTIFIC RESEARCH ARLINGTON VA|AIR FORCE OFFICE OF SCIENTIFIC RESEARCH, ARLINGTON, VA"
          ) ~ "AIR FORCE OFFICE OF SCIENTIFIC RESEARCH - ARLINGTON VA",
          nameOffice %>% str_detect("ARMY CONTRACTING COMMAND - ACC-NJ") ~ "ARMY CONTRACTING COMMAND - Picatinny NJ",
          nameOffice %>% str_detect("ARMY CONTRACTING COMMAND -RTP DIVISION") ~ "ARMY CONTRACTING COMMAND - RESEARCH TRIANGLE PARK",
          nameOffice %>% str_detect("ARMY CONTRACTING COMMAND-REDSTONE") ~ "ARMY CONTRACTING COMMAND - REDSTONE",
          nameOffice %>% str_detect("DARPA CONTRACTS MANAGEMENT OFFICE") ~ "DARPA - CONTRACTS MANAGEMENT OFFICE",
          nameOffice %>% str_detect("DEPARTMENT OF DEFENSE EDUCATION ACTIVITY") ~ "DEPARTMENT OF DEFENSE - EDUCATION ACTIVITY",
          nameOffice %>% str_detect("NAVAL INFORMATION WARFARE CENTER ATLANTIC") ~ "NAVAL INFORMATION WARFARE CENTER - ATLANTIC",
          nameOffice %>% str_detect("NAVAL INFORMATION WARFARE CENTER PACIFIC|SSC PACIFIC") ~ "NAVAL INFORMATION WARFARE CENTER - PACIFIC",
          nameOffice %>% str_detect("NAVAL SURFACE WARFARE CENTER CRANE") ~ "NAVAL SURFACE WARFARE CENTER - CRANE INDIANA",
          nameOffice %>% str_detect("NSWC IHEODTD INDIAN HEAD") ~ "NAVAL SURFACE WARFARE CENTER - INDIAN HEAD",
          nameOffice %>% str_detect("NAVAL UNDERSEA WARFARE CENTER DIVISION KEYPORT") ~ "NAVAL UNDERSEA WARFARE CENTER DIVISION - KEYPORT WASHINGTON",
          nameOffice %>% str_detect("NAVSUP FLC SAN DIEGO") ~ "NAVSUP Fleet Logistics Center - San Diego",
          nameOffice %>% str_detect("NAVSUP FLC SIGONELLA NAPLES|NAVSUP FLC SIGONELLA, NAPLES") ~ "NAVSUP Fleet Logistics Center - Naples",
          nameOffice %>% str_detect(
            "OFFICE OF NAVAL RESEARCH LONDON ENGLAND|OFFICE OF NAVAL RESEARCH, LONDON, ENGLAND"
          ) ~ "OFFICE OF NAVAL RESEARCH - LONDON ENGLAND",
          nameOffice %>% str_detect("U.S. ARMY CORPS OF ENGINEERS - ALBUQUERQUE DISTRICT") ~ "US ARMY CORPS OF ENGINEERS - ALBUQUERQUE",
          nameOffice %>% str_detect("US ARMY CORPS OF ENGINEERS ALASKA DISTRICT") ~ "US ARMY CORPS OF ENGINEERS - ALASKA",
          nameOffice %>% str_detect("US ARMY CORPS OF ENGINEERS OMAHA") ~ "US ARMY CORPS OF ENGINEERS - OMAHA",
          nameOffice %>% str_detect("USACE- ERDC") ~ "US ARMY CORPS OF ENGINEERS - VICKSBURG",
          TRUE ~ nameOffice
        ),
        nameOffice = nameOffice %>% str_to_upper()
      ) %>%
      rename(nameOfficeFull = nameOffice)


    data <- data %>% mutate(
      nameOfficeFull = case_when(
        idOffice == "FA7000" ~ "10TH CONTRACTING SQUADRON - USAF ACADEMY CO",
        idOffice == "N00421" ~ "NAVAL AIR WARFARE CENTER",
        idOffice %in%  c("N00014", "N62909") ~ "OFFICE OF NAVAL RESEARCH",
        idOffice == "N00014" ~ "OFFICE OF NAVAL RESEARCH - LONDON ENGLAND",
        TRUE ~ nameOfficeFull
      )
    ) %>%
      separate(
        nameOfficeFull,
        into = c("nameOffice", "nameOfficeDetail"),
        remove = F,
        fill = "right",
        extra = "merge",
        sep = "\\ - "
      )

    data <- data %>%
      mutate(typeWRUPrincipalInvestigatorPrediction = typeWRUPrincipalInvestigatorPrediction %>% str_to_upper())

    data
  }

.dod_grants <-
  memoise::memoise(function() {
    data <-
      fread(
        "https://asbcllc.com/r_packages/govtrackR/data/dod_grants.tsv.gz",
        showProgress = FALSE
      ) %>%
      as_tibble() %>%
      .munge_data(clean_address = F)

    data
  })


#' Department of Defense Grants
#'
#' Returns historic Department
#' of Defense scientific grant data.
#'
#' @return
#' @export
#'
#' @examples
dod_grants <-
  function(snake_names = F) {
    data <- .dod_grants()
    data <-
      data %>%
      mutate(nameAgencyParent = "DEPARTMENT OF DEFENSE")


    data <- data %>% munge_data(snake_names = snake_names)

    data

  }



# pubdefense --------------------------------------------------------------


.parse_pubdefense_url <-
  function(url = "https://publicaccess.dtic.mil/padf_public/api/service/search/simpleSearch?ascDescOrder=ASC&newSearch=0&numberOfResults=1000000&orderProp=&performSimpleSearch=&searchText=%22china%22") {
    data <- fromJSON(url)
    data
  }


# dtic_investment ---------------------------------------------------------

# https://apps.dtic.mil/dodinvestment/#/browse


# categories --------------------------------------------------------------

.dtic_categories <- function() {
  page <-
    read_html("https://discover.dtic.mil/thesaurus/subject-categories/")
  nodes <- page %>% html_nodes(".fl-col-group-nested a")
  items <- nodes %>% html_text() %>% str_squish() %>% str_to_upper()

  urls <-
    nodes %>% html_attr("href")

  data <-
    tibble(nameCategory = items, urlCategoryDTIC = urls) %>%
    mutate(
      idCategory = 1:n(),
      idCategory = case_when(
        nchar(idCategory) == 1 ~ glue("0{idCategory}") %>% as.character(),
        TRUE ~ as.character(idCategory)
      )
    ) %>%
    select(idCategory, everything())

  data
}

.parse_dtic_category_url <-
  memoise::memoise(function(url = "https://discover.dtic.mil/thesaurus/subject-categories/12mathematical/") {
    page <- read_html(url)
    data <-
      page %>% html_table(fill = F) %>%
      .[[1]] %>%
      as_tibble() %>%
      setNames(c("idSubject", "nameSubject", "descriptionSubject")) %>%
      mutate(urlCategoryDTIC = url) %>%
      .munge_data()

    data <- data %>%
      mutate(idSubject = case_when(
        nchar(idSubject) == 1 ~ glue("0{idSubject}") %>% as.character(),
        TRUE ~ as.character(idSubject)
      ))

    df_subjects <- data %>%
      select(idSubject, descriptionSubject) %>%
      separate_rows(descriptionSubject, sep = "\\;") %>%
      mutate_all(list(function(x){
        x %>% str_remove_all("\\.") %>% str_squish()
      })) %>%
      nest(dataSubjects = c(descriptionSubject)) %>%
      mutate(countSubjects = dataSubjects %>% map_dbl(nrow)) %>%
      select(idSubject, countSubjects, dataSubjects)

    data <-
      data %>%
      left_join(df_subjects, by = "idSubject")

    data
  })

#' DTIC Thesaurus Subject Categories
#'
#' DTIC has identified 25 broad subject fields and 251 groups to categorize t
#' areas of scientific and technical interest.
#'
#' These fields and groups provide the structure for the subject grouping of technical reports in DTIC's collection and are used to define the areas of need-to-know in distributing these reports.
#'
#' Through this site, you will find the subject coverage for each subject category as well as cross-references to related fields and groups.
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' \dontrun{
#'    dtic_subject_categories()
#' }
dtic_subject_categories <-
  function() {
    data <-
      .dtic_categories()

    .parse_dtic_category_url_safe <-
      possibly(.parse_dtic_category_url, tibble())

    df_subjects <-
      data$urlCategoryDTIC %>%
      future_map_dfr(function(url){
        .parse_dtic_category_url_safe(url = url)
      })

    data <- data %>%
      left_join(df_subjects, by = "urlCategoryDTIC") %>%
      select(idCategory, nameCategory, idSubject, nameSubject, everything())

    data <-
      data %>%
      mutate(codeSubject = glue("{idCategory}/{idSubject}") %>% as.character()) %>%
      select(codeSubject, everything(),
             descriptionSubject)

    data
  }


# dtic_budgets ------------------------------------------------------------

# https://budget.dtic.mil/

.dtic_active_budget <-
  function(url = "https://budget.dtic.mil/") {
    page <- read_html(url)

    1:9 %>%
      map_dfr(function(x) {
        css <-
          glue("table:nth-child(4) tr+ tr td:nth-child({x})") %>% as.character()
        page %>% html_nodes(css)
      })


  }

.dictionary_dtic_budget_names <-
  function() {
    tibble(
      idColumn = 1:9,
      nameFile = c(
        "Summary Request",
        "Research Development, Test & Evaluation",
        "Research Development, Test & Evaluation",
        "Procurement",
        "Procurement",
        "Operations & Maintence",
        "Operations & Maintence",
        "Personnel",
        "Personnel"
      ),
      nameColumn = c(
        "urlSummaryPDF",
        "urlRDTEPDF",
        "urlRDTEXLS",
        "urlProcurementPDF",
        "urlProcurementXLS",
        "urlOMPDF",
        "urlOMXLS",
        "urlPersonnelPDF",
        "urlPersonnelXLS"
      ),
      typeFile = c("PDF",
                   "PDF",
                   "XLS",
                   "PDF",
                   "XLS",
                   "PDF",
                   "XLS",
                   "PDF",
                   "XLS"),
    )

  }

.parse_dtic_active_budget_page <-
  function(file_loc = "Downloads/index.html") {
    page <-
      read_html(file_loc)

    df_names <- .dictionary_dtic_budget_names()
    data <-
      1:9 %>%
      map_dfr(function(x) {
        css <-
          glue("table+ table tr+ tr td:nth-child({x}) a") %>% as.character()

        nodes <-
          page %>% html_nodes(css)

        text <- nodes %>% html_text()

        d <- tibble(
          idColumn = x,
          text = nodes %>% html_text(),
          url = nodes %>% html_attr("href")
        ) %>%
          filter(!text %>% str_detect("Access")) %>%
          mutate(idRowColumn = 1:n())

        d %>%
          mutate(
            slug = url %>% str_remove_all("https://budget.dtic.mil/|") %>%
              str_remove_all("pdfs|spreadsheets")
          ) %>%
          select(idColumn, idRowColumn, text, slug, url)
      }) %>%
      filter(text != "") %>%
      left_join(df_names, by = "idColumn") %>%
      select(idColumn, names(df_names), everything()) %>%
      arrange(idRowColumn) %>%
      mutate(idRow = 1:n()) %>%
      select(idRow, idRowColumn, everything())

    df_base <-
      data %>%
      filter(nameFile == "Summary Request") %>%
      select(idRow, nameColumn, text, slug, url) %>%
      separate(
        slug,
        into = c("base", "yearBudget"),
        fill = "right",
        extra = "merge",
        sep = "/"
      ) %>%
      mutate(yearBudget = yearBudget %>% substr(1, 6) %>% parse_number(),
             idRow = 1 + idRow) %>%
      select(idRow,
             yearBudget,
             nameBudget = text,
             urlSummaryPDF = url) %>%
      separate(
        nameBudget,
        into = c("nameBudget", "detailsBudget"),
        fill = "right",
        extra = "merge",
        sep = "\\("
      ) %>%
      mutate(detailsBudget = detailsBudget %>% str_remove_all("\\)")) %>%
      mutate(nameBudget = nameBudget %>% str_remove_all("FY[0-9][0-9][0-9][0-9]|[0-9][0-9]MB")) %>%
      mutate_if(is.character,
                str_squish) %>%
      .munge_data()

    df_other <-
      data %>%
      filter(nameFile != "Summary Request")

    data <-
      df_other %>%
      select(idRow, nameFile, nameColumn, typeFile, text, url) %>%
      mutate(sizeFileKB = case_when(
        text %>% str_detect("KB") ~ parse_number(text),
        TRUE ~ parse_number(text) * 1000
      )) %>%
      select(-text) %>%
      left_join(df_base, by = "idRow") %>%
      select(idRow,
             yearBudget,
             nameBudget,
             detailsBudget,
             urlSummaryPDF,
             everything()) %>%
      fill(yearBudget) %>%
      fill(detailsBudget) %>%
      fill(nameBudget) %>%
      fill(urlSummaryPDF) %>%
      select(-idRow)

    data
  }

.dtic_historic_budgets <-
  function(url = "https://budget.dtic.mil/previous_reports.html") {
    page <- read_html(url)

    df_names <- .dictionary_dtic_budget_names()
    data <-
      1:9 %>%
      map_dfr(function(x) {
        css <-
          glue("tr+ tr td:nth-child({x}) a") %>% as.character()
        nodes <-
          page %>% html_nodes(css)

        text <- nodes %>% html_text()

        tibble(
          idColumn = x,
          text = nodes %>% html_text(),
          slug = nodes %>% html_attr("href"),
          url = str_c("https://budget.dtic.mil/", slug, sep = "")
        ) %>%
          filter(text != "") %>%
          mutate(idRowColumn = 1:n())
      }) %>%
      left_join(df_names, by = "idColumn") %>%
      select(idColumn, names(df_names), everything()) %>%
      arrange(idRowColumn) %>%
      mutate(idRow = 1:n()) %>%
      select(idRow, idRowColumn, everything())

    df_base <-
      data %>%
      filter(nameFile == "Summary Request") %>%
      select(idRow, nameColumn, text, slug, url) %>%
      separate(
        slug,
        into = c("base", "yearBudget"),
        fill = "right",
        sep = "/"
      ) %>%
      mutate(yearBudget = yearBudget %>% substr(1, 6) %>% parse_number(),
             idRow = 1 + idRow) %>%
      select(idRow,
             yearBudget,
             nameBudget = text,
             urlSummaryPDF = url) %>%
      separate(
        nameBudget,
        into = c("nameBudget", "detailsBudget"),
        fill = "right",
        extra = "merge",
        sep = "\\("
      ) %>%
      mutate(detailsBudget = detailsBudget %>% str_remove_all("\\)")) %>%
      mutate(nameBudget = nameBudget %>% str_remove_all("FY[0-9][0-9][0-9][0-9]|[0-9][0-9]MB")) %>%
      mutate_if(is.character,
                str_squish) %>%
      .munge_data()

    df_other <-
      data %>%
      filter(nameFile != "Summary Request")

    data <-
      df_other %>%
      select(idRow, nameFile, nameColumn, typeFile, text, url) %>%
      mutate(sizeFileKB = case_when(
        text %>% str_detect("KB") ~ parse_number(text),
        TRUE ~ parse_number(text) * 1000
      )) %>%
      select(-text) %>%
      left_join(df_base, by = "idRow") %>%
      select(idRow,
             yearBudget,
             nameBudget,
             detailsBudget,
             urlSummaryPDF,
             everything()) %>%
      fill(yearBudget) %>%
      fill(detailsBudget) %>%
      fill(nameBudget) %>%
      fill(urlSummaryPDF) %>%
      select(-idRow) %>%
      mutate(urlHistoricBudgets = url)


    data


  }



.dictionary_dtic_budget_urls <-
  function() {
    fread("https://asbcllc.com/r_packages/govtrackR/data/dtic_budget_urls.csv.tz",showProgress = FALSE) %>%
      as_tibble()
  }

#' DTIC Budget URL Dictionary
#'
#' This returns URL data for DTIC
#' budgets from 2007 to 2020
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_dtic_budget_urls()
dictionary_dtic_budget_urls <-
  function() {
    .tt <- memoise::memoise(.dictionary_dtic_budget_urls)
    data <- .tt()

    data <-
      data %>%
      left_join(
        data %>% distinct(yearBudget) %>% filter(yearBudget == max(yearBudget)) %>%
          mutate(isBudgetCurrentYear = T),
        by = "yearBudget"
      ) %>%
      select(yearBudget, isBudgetCurrentYear, everything()) %>%
      mutate(isBudgetCurrentYear = case_when(isBudgetCurrentYear %>% is.na() ~ F,
                                             T ~ isBudgetCurrentYear))

    data
  }
abresler/govtrackR documentation built on July 11, 2020, 12:30 a.m.