R/matchers.R

Defines functions .match_companies

.match_companies <-
  function(data_companies = df_naics,
           data_contracts = df_sample_contracts) {
    data_contracts <-
      data_contracts %>%
      filter(!is.na(idNAICS))

    df_naics <-
      data_contracts %>%
      select(idNAICS, datePosted, nameAgency, nameSolicitation,  amountContract, urlSolicitation)

    naics_codes <-
      df_naics$idNAICS %>% unique()

    df_matches <-
      naics_codes %>%
      map_df(function(naics){
        df_match <-
          data_companies %>%
          filter(idNAICS == naics)
        if (nrow(df_match) == 0) {
          glue::glue("No active matches for NAICS: {naics}") %>% message()
        }
        matched_companies <- df_match$nameCompanyLegal %>% str_c(collapse = " | ")
        glue::glue("{nrow(df_match)} matches for NAICS: {naics}") %>% message()
        tibble(idNAICS = naics, nameCompanyMatches = matched_companies)
      })

    df_naics <-
      df_naics %>%
      left_join(df_matches, by = "idNAICS") %>%
      filter(!is.na(nameCompanyMatches)) %>%
      left_join(dictionary_naics_codes()) %>%
      arrange(idNAICS) %>%
      select(idNAICS, nameNAICS, everything())

    df_naics
  }
abresler/govtrackR documentation built on July 11, 2020, 12:30 a.m.