R/sam.R

Defines functions .add_address .naics_2012 .naics_2007 .naics_2002

## asb_key <- "D4cKoXpvlody0ff5Dg0lCKd4uQDseRaPGih8iuNu"
# mungers -----------------------------------------------------------------

.naics_2002 <-
  function() {
    data <-
      "https://www.census.gov/eos/www/naics/reference_files_tools/2002/naics_6_02.txt" %>%
      read_tsv(col_names = F)

    data <-
      data %>%
      dplyr::slice(5:nrow(data))

    data <-
      data %>%
      separate("X1", into = c('idNAICS', "nameNAICS"),
               extra = "merge",
               sep = "\\  ") %>%
      mutate(
        idNAICS = as.numeric(idNAICS),
        nameNAICS = nameNAICS %>% str_remove("T$") %>% str_to_upper(),
        yearCodeBookNAICS = 2002
      )

    data
  }

.naics_2007 <-
  function() {
    data <-
      "https://www.census.gov/eos/www/naics/reference_files_tools/2007/naics07_6.xls" %>% download_excel_file()

    data <-
      data %>%
      dplyr::slice(3:nrow(data)) %>%
      setNames(c("idNAICS", "nameNAICS")) %>%
      mutate(
        idNAICS = as.numeric(idNAICS),
        nameNAICS = nameNAICS %>% str_remove("T$") %>% str_to_upper(),
        yearCodeBookNAICS = 2007
      )

    data
  }


.naics_2012 <-
  function() {
    data <-
      download_excel_file(url = "https://www.census.gov/eos/www/naics/reference_files_tools/2012/2012_NAICS_Structure.xls") %>%
      select(2,3)

    data <-
      data %>%
      dplyr::slice(4:nrow(data)) %>%
      setNames(c("idNAICS", "nameNAICS")) %>%
      mutate(idNAICS = as.numeric(idNAICS),
             nameNAICS = nameNAICS %>% str_remove("T$") %>% str_to_upper(),
             yearCodeBookNAICS = 2012)

    data
  }

#' NAICS Dictionary
#'
#' @param url location of csv file
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_naics_codes()
dictionary_naics_codes <-
  memoise::memoise(function(url = "https://www.census.gov/eos/www/naics/2017NAICS/2017_NAICS_Structure.xlsx") {
      options(warn = -1)
      data <-
        rio::import(url)

      data <-
        data %>%
        as_tibble() %>%
        select(2, 3)


      data <-
        data %>% dplyr::slice(3:nrow(data)) %>%
        set_names("idNAICS", "nameNAICS") %>%
        mutate(
          isT = nameNAICS %>% str_detect("[a-z]T"),
          nameNAICS = case_when(isT ~ nameNAICS %>% substr(1, nchar(nameNAICS) -
                                                             1),
                                TRUE ~ nameNAICS) %>% str_to_upper()
        ) %>%
        mutate(nameNAICS = case_when(isT ~ nameNAICS %>% str_remove_all("T$"),
                                     TRUE ~ nameNAICS))
      df_sectors <-
        data %>%
        filter(nchar(idNAICS) == 2 | idNAICS %>% str_detect("\\-")) %>%
        distinct(idSectorNAICS = idNAICS, nameSectorNAICS = nameNAICS, isT) %>%
        separate_rows("idSectorNAICS", sep = "\\-") %>%
        mutate(idSectorNAICS = as.numeric(idSectorNAICS)) %>%
        mutate(nameSectorNAICS = case_when(isT ~ nameSectorNAICS %>% str_remove_all("T$"),
                                     TRUE ~ nameSectorNAICS)) %>%
        select(-isT) %>%
        bind_rows(
          tibble(idSectorNAICS = 32, nameSectorNAICS = "MANUFACTURING")
        ) %>%
        arrange(idSectorNAICS)

      data <-
        data %>%
        group_by(nameNAICS) %>%
        filter(idNAICS == max(idNAICS)) %>%
        ungroup() %>%
        select(-isT) %>%
        mutate(idNAICS = as.integer(idNAICS)) %>%
        mutate(yearCodeBookNAICS = 2017)

      df_2012 <- .naics_2012()

      df_2007 <- .naics_2007()

      df_2002 <- .naics_2002()

      data <-
        list(data, df_2002, df_2007, df_2012) %>%
        reduce(bind_rows)


      data <-
        data %>%
        mutate(char = nchar(idNAICS))



      df_subsectors <-
        data %>% filter(char == 3) %>%
        distinct(idSubSectorNAICS = idNAICS, nameSubSectorNAICS = nameNAICS)

      df_industry_groups <-
        data %>% filter(char == 4) %>%
        distinct(idIndustryGroupNAICS = idNAICS,
                 nameIndustryGroupNAICS = nameNAICS)

      df_industries <-
        data %>% filter(char == 5) %>%
        distinct(idIndustryNAICS = idNAICS, nameIndustryNAICS = nameNAICS)

      data <-
        data %>%
        filter(char >= 6) %>%
        mutate(
          isNAICSSameNational = idNAICS %>% str_detect("0$"),
          idSectorNAICS =  idNAICS %>% substr(1, 2),
          idSubSectorNAICS =  idNAICS %>% substr(1, 3),
          idIndustryGroupNAICS =  idNAICS %>% substr(1, 4),
          idIndustryNAICS = idNAICS %>% substr(1, 5)
        ) %>%
        mutate_at(
          c(
            "idIndustryNAICS",
            "idIndustryGroupNAICS",
            "idSubSectorNAICS",
            "idSectorNAICS"
          ),
          as.numeric
        ) %>%
        group_by(idNAICS, nameNAICS) %>%
        filter(yearCodeBookNAICS == max(yearCodeBookNAICS)) %>%
        ungroup() %>%
        left_join(df_sectors, by = "idSectorNAICS") %>%
        left_join(df_subsectors, by = "idSubSectorNAICS") %>%
        left_join(df_industry_groups, by = "idIndustryGroupNAICS") %>%
        left_join(df_industries, by = "idIndustryNAICS") %>%
        select(-char)

      data <-
        data %>%
        select(yearCodeBookNAICS,
               idNAICS,
               nameNAICS,
               matches("name"),
               everything())

      data
  })

.add_address <-
  function(data) {
    if (data %>% tibble::has_name("location")) {
      return(data)
    }

    address_parts <-
      data %>%
      select(-matches("Incorp")) %>%
      select(matches("address|city|country|state|zipcode")) %>% names()

    if (length(address_parts) > 0) {
      address <- c()
      has_2  <- "addressStreet2" %>% str_detect(address_parts) %>% sum(na.rm = T) > 0
      if (has_2) {
        column <-
          address_parts["addressStreet2" %>% str_detect(address_parts)]
        value <- data %>% pull(column) %>% str_c(collapse = " ")

        address <- address %>% append(value)
      }
      has_1  <-
        "addressStreet1" %>% str_detect(address_parts) %>% sum(na.rm = T) > 0
      if (has_1) {
        column <-
          address_parts["addressStreet1" %>% str_detect(address_parts)]
        value <- data %>% pull(column) %>% str_c(collapse = " ")
        address <- address %>% append(value)
      }
      has_city  <- "city" %>% str_detect(address_parts) %>% sum(na.rm = T) > 0
      if (has_city) {
        column <-
          address_parts["city" %>% str_detect(address_parts)]
        value <- data %>% pull(column) %>% str_c(collapse = " ")
        value <- glue::glue(" {value}") %>% as.character()
        address <- address %>% append(value)
      }

      has_state  <- "state" %>% str_detect(address_parts) %>% sum(na.rm = T) > 0

      if (has_state) {
        column <-
          address_parts["state" %>% str_detect(address_parts)]
        value <- data %>% pull(column) %>% str_c(collapse = ", ")
        value <- glue::glue(", {value}") %>% as.character()
        address <- address %>% append(value)
      }
      has_zip  <- "zipcode" %>% str_detect(address_parts) %>% sum(na.rm = T) > 0
      if (has_zip) {
        column <-
          address_parts["zipcode" %>% str_detect(address_parts)]
        value <- data %>% pull(column) %>% str_c(collapse = " ")
        value <- glue::glue(" {value}") %>% as.character()
        address <- address %>% append(value)
      }
      has_country  <- "codeCountry" %>% str_detect(address_parts) %>% sum(na.rm = T) > 0
      if (has_country) {
        column <-
          address_parts["codeCountry" %>% str_detect(address_parts)]
        value <- data %>% pull(column) %>% str_c(collapse = ", ")
        value <- glue::glue(", {value}") %>% as.character()
        address <- address %>% append(value)
      }

      location <-
        address %>% str_c(collapse = "")
      data <- data %>%
        mutate(
          location = glue::glue(location) %>% as.character() %>% str_to_upper() %>% gsub("\\s+", " ", .) %>% str_trim()
        )
    }

    data
  }


.add_name <-
  function(data) {
    add_name <-
      data %>% tibble::has_name("nameFirst") &
      data %>% tibble::has_name("nameLast") &
      !data %>% tibble::has_name("namePerson")

    if (add_name) {
      data <-
        data %>%
        unite(namePerson,
              nameFirst,
              nameLast,
              sep = " ",
              remove = F)
    }

    data
  }

.munge_data_sam_names <-
  function(data) {
    dict_names <- dictionary_sam_names()
    sam_names <-
      names(data)

    actual_names <-
      sam_names %>%
      map_chr(function(name) {
        df_row <-
          dict_names %>% filter(nameSAM == name)
        if (nrow(df_row) == 0) {
          glue::glue("Missing {name}") %>% message()
          return(name)
        }

        df_row$nameActual
      })

    data %>%
      set_names(actual_names)
  }





# parseers ----------------------------------------------------------------

.parse_far_responses <-
  function(data) {
    slugFAR <- data$id

    answers <- data$answers

    df_row_names <-
      seq_along(answers) %>%
      map_df(function(x) {
        x %>% message()
        rows <- answers[[x]]
        if (length(rows) == 0) {
          return(tibble())
        }
        rows_name <- rows  %>% names()

        tibble(nameRow = rows_name) %>%
          mutate(idRow = x) %>%
          select(idRow, everything())
      })

    answer_names <-
      df_row_names %>% filter(nameRow != "section") %>% pull(nameRow) %>% unique()

    all_data <-
      answer_names %>%
      map(function(answer_name) {
        if (answer_name == "answerText") {
          skip_rows <-
            df_row_names %>%
            filter(nameRow %in% c("SamPointOfContact", "samFacility", "naics",
                                  "Software")) %>% pull(idRow)
          rows <-
            df_row_names %>%
            filter(!idRow %in% skip_rows) %>%
            pull(idRow) %>%
            unique()

          d <- answers[rows]

          all_data <-
            seq_along(d) %>%
            map_df(function(x) {
              df_row <-
                d[[x]] %>% as_tibble() %>% select(-matches("naics"))
              df_row <-
                df_row %>%
                .munge_data_sam_names()

              if (df_row %>% tibble::has_name("textAnswer")) {
                df_row <-
                  df_row %>%
                  mutate(
                    isResponseTRUE = case_when(
                      textAnswer == "No" ~ FALSE,
                      textAnswer == "Yes" ~ TRUE,
                      TRUE ~ NA
                    )
                  )
              }
              df_row
            })

          all_data <-
            all_data %>%
            left_join(dictionary_sam_table("far"), by = "slugFAR") %>%
            select(slugFAR, descriptionFAR, everything()) %>%
            nest(.key = "dataFARAnswers")

          return(all_data)
        }

        rows <-
          df_row_names %>%
          filter(nameRow == answer_name) %>%
          pull(idRow) %>%
          unique()

        if (length(rows) == 0) {
          return(tibble())
        }
        d <- answers[rows]
        if (length(d) == 0) {
          return(tibble())
        }

        if (answer_name == "Software") {
          all_data <-
            seq_along(d) %>%
            map_df(function(x) {
              df_row <- d[[x]]
              data_table <-
                df_row[names(df_row) %in% c("section", "answerText")] %>%
                as_tibble() %>%
                .munge_data_sam_names() %>%
                .munge_data() %>%
                left_join(dictionary_sam_table("far"), by = "slugFAR") %>%
                select(slugFAR, descriptionFAR, everything()) %>%
                filter(!is.na(textAnswer))

              data_software <-
                df_row$Software %>%
                discard(is_null) %>%
                flatten_df() %>%
                as_tibble()

              if (length(data_software) > 0) {
                data_software <-
                  data_software %>%
                  set_names(c("nameSoftware", "idSoftware", "typeProduct")) %>%
                  .munge_data()
                data_table <-
                  data_table %>%
                  mutate(dataSoftware = list(data_software))
              }

              data_table
            }) %>%
            nest(.key = "dataFARSoftware")
          return(all_data)
        }

        if (answer_name == "naics") {
          all_data <-
            seq_along(d) %>%
            map_df(function(x) {
              df_row <- d[[x]]
              data_table <-
                df_row[names(df_row) %in% c("section", "answerText")] %>%
                as_tibble() %>%
                .munge_data_sam_names() %>%
                .munge_data()

              data_table

              data_naics <-
                df_row$naics %>%
                discard(is_null) %>%
                flatten_df() %>%
                as_tibble() %>%
                .munge_data_sam_names() %>%
                .munge_data()

              tibble(
                name = c("answer", "naics"),
                data = list(data_table, data_naics)
              ) %>%
                mutate(number = x)

            })

          tables <- all_data$name %>% unique()
          all_data <-
            tables %>%
            map(function(table) {
              table_name <- case_when(table == "answer" ~ "dataFARNAICSAnswers",
                                      TRUE ~ "dataFARNAICS")

              df_row <-
                all_data %>%
                filter(name == table) %>%
                select(data) %>%
                unnest()

              if (table == "answer") {
                df_row <-
                  df_row %>%
                  mutate(
                    isResponseTRUE = case_when(
                      textAnswer == "No" ~ FALSE,
                      textAnswer == "Yes" ~ TRUE,
                      TRUE ~ NA
                    )
                  ) %>%
                  left_join(dictionary_sam_table("far"), by = "slugFAR")
              }


              df_row %>% nest(.key = UQ(table_name))
            }) %>%
            purrr::reduce(bind_cols)
          return(all_data)
        }

        if (answer_name == "SamPointOfContact") {
          d <- answers[[rows]]
          all_data <-
            d$SamPointOfContact %>%
            as_tibble() %>%
            mutate(section = d$section) %>%
            .munge_data_sam_names() %>%
            .munge_data() %>%
            left_join(dictionary_sam_table("far"), by = "slugFAR") %>%
            select(slugFAR, descriptionFAR, everything()) %>%
            nest(.key = "dataContactSAM")
          return(all_data)
        }

        if (answer_name == "samFacility") {
          all_data <-
            seq_along(d) %>%
            map_df(function(x) {
              df_row <- d[[x]]
              facility <-
                df_row$samFacility %>% purrr::discard(is_null) %>% flatten_df()
              names(facility) <-
                names(facility) %>% str_remove_all("\\.")
              df_row <-
                df_row[, 1:2] %>%
                .munge_data_sam_names() %>%
                bind_cols(facility %>%
                            .munge_data_sam_names() %>%
                            .munge_data(clean_address = F)) %>%
                as_tibble()
              df_row
            }) %>%
            mutate(isResponseTRUE = case_when(
              textAnswer == "No" ~ FALSE,
              textAnswer == "Yes" ~ TRUE,
              TRUE ~ NA
            )) %>%
            left_join(dictionary_sam_table("far"), by = "slugFAR") %>%
            select(slugFAR, descriptionFAR, everything())

          all_data <-
            all_data %>%
            nest(.key = "dataFacilitySAM")
          return(all_data)
        }

      }) %>%
      purrr::reduce(bind_cols)

    all_data
  }

.parse_dfar <-
  function(data) {
    if (names(data) %>% str_count("answers.answerText") %>% sum(na.rm = T) > 0) {
      data <-
        data %>% as_tibble()

      names(data) <- names(data) %>% str_remove_all("answers.")

      if (data %>% tibble::has_name("id")) {
        data <-
          data %>%
          select(-one_of("section"))
        data <-
          data %>%
          rename(slugFAR = id)
      }



      data <-
        data %>%
        .munge_data_sam_names() %>%
        .munge_data()

      data <-
        data %>%
        mutate(isResponseTRUE = case_when(textAnswer == "No" ~ FALSE,
                                          textAnswer == "Yes" ~ TRUE,
                                          TRUE ~ NA)) %>%
        nest(.key = "dataDFARAnswers")

      return(data)
    }

    if ((names(data) == "answers") %>% sum(na.rm = T) == 0) {
      return(tibble())
    }

    answers <- data$answers

    all_data <-
      seq_along(answers) %>%
      map_df(function(x) {
        answers[[x]] %>% as_tibble() %>%
          .munge_data_sam_names() %>%
          .munge_data(clean_address = F)
      }) %>%
      mutate(isResponseTRUE = case_when(textAnswer == "No" ~ FALSE,
                                        textAnswer == "Yes" ~ TRUE,
                                        TRUE ~ NA)) %>%
      nest(.key = "dataDFARAnswers")

    all_data
  }

.parse_certification_data <-
  function(data) {
    data <-
      data[names(data) %in%  c("farResponses", "dfarResponses")]
    has_far <-
      (names(data) == "farResponses") %>% sum(na.rm = T) > 0
    has_dfar <-
      (names(data) == "dfarResponses") %>% sum(na.rm = T) > 0

    if (sum(as.numeric(has_far), as.numeric(has_dfar)) == 0) {
      return(tibble())
    }

    if (has_far) {
      df_far <-
        data[["farResponses"]] %>% .parse_far_responses()
      df <- tibble(dataFAR = list(df_far))
    }

    if (has_dfar) {
      df_dfar <-
        data[["dfarResponses"]] %>% .parse_dfar()
      if ('df' %>% exists()) {
        df <- df %>% mutate(dataDFAR = list(df_dfar))
      } else {
        df <-
          tibble(dataDFAR = list(df_dfar))
      }
    }
    df
  }


# utils -------------------------------------------------------------------
.pad_dun <-
  function(duns = 81267103,
           zero_base = 13) {
    inital_chars <- nchar(duns)
    first_zero <- 9 - nchar(duns)

    if (first_zero > 0) {
      first_zero <- rep(0, first_zero) %>% str_c(collapse = "")
      duns <- glue::glue("{first_zero}{duns}") %>% as.character()
    }

    zero_count <-
      zero_base - nchar(duns)
    zeros <- rep(0, zero_count) %>% str_c(collapse = "")

    if (length(zeros) == 0) {
      slug_duns <-
        glue::glue("{duns}") %>% as.character()
    } else {
      slug_duns <-
        glue::glue("{duns}{zeros}") %>% as.character()
    }

    tibble(idDUNS = as.integer(duns), slugDUNS = slug_duns)
  }

.pad_duns <-
  function(duns = 81267103,
           zero_base = 13) {
    duns %>%
      map_dfr(function(x){
        .pad_dun(duns = x, zero_base = zero_base)
      })

  }

#' Pad DUNS
#'
#' Pad vector of duns for SAM API
#'
#' @param duns vector of duns numbers
#'
#' @return
#' @export
#'
#' @examples
#' pad_duns()
pad_duns <-
  function(duns = 608176715) {
    duns %>%
      map_dfr(function(duns) {
        .pad_duns(duns = duns)
      })
  }

#' SAM column dictionary
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_sam_names()
dictionary_sam_names <-
  function() {
    tibble(
      nameSAM = c(
        "lastUpdateDate",
        "businessStartDate",
        "congressionalDistrict",
        "corporateUrl",
        "corporateStructureCode",
        "dunsPlus4",
        "debtSubjectToOffset",
        "cage",
        "fiscalYearEndCloseDate",
        "publicDisplay",
        "registrationDate",
        "expirationDate",
        "purposeOfRegistration",
        "submissionDate",
        "creditCardUsage",
        "countryOfIncorporation",
        "businessTypes",
        "corporateStructureName",
        "statusMessage",
        "stateOfIncorporation",
        "hasKnownExclusion",
        "legalBusinessName",
        "duns",
        "activationDate",
        "status",
        "naicsName",
        "isPrimary",
        "naicsCode",
        "zip",
        "city",
        "countryCode",
        "stateorProvince",
        "line2",
        "line1",
        "zipPlus4",
        "companyDivision",
        "doingBusinessAsName",
        "idDUNS",
        "pscName",
        "pscCode",
        "firstName",
        "lastName",
        "title",
        "fax",
        "usPhone",
        "email",
        "usPhoneExt",
        "discId",
        "discDefinition",
        "businessTypeCode",
        "businessTypeName",
        "purposeCode",
        "purposeName",
        "farId",
        "farDescription",
        "countryCode ",
        "countryName",
        "expID",
        "expDescription",
        "revCode",
        "revDescription",
        "answerText",
        "section",
        "ownerName",
        "ownerAddresszip",
        "ownerAddressstateOrProvince",
        "ownerAddresscity",
        "ownerAddresscountryCode",
        "ownerAddressline1",
        "plantAddresszip",
        "plantAddressstateOrProvince",
        "plantAddresscity",
        "plantAddresscountryCode",
        "plantAddressline1",
        "ExcpCounter",
        "isSmallBusiness",
        "duns_plus4",
        "samAddress.zip",
        "samAddress.stateOrProvince",
        "samAddress.city",
        "samAddress.countryCode",
        "samAddress.zip4",
        "samAddress.line1",
        "stateOrProvince",
        "zip4",
        "slugFAR",
        "financialAssistanceResponse",
        "divisionNumber",
        "middleInitial",
        "ownerAddressline2",
        "plantAddressline2",
        "EndProduct.countryCode", "EndProduct.name",
        "Company.name",
        "Company.tin",
        "immediateOwnerCage.legalBusinessName",
        "immediateOwnerCage.cageCode",
        "immediateOwnerCage.hasOwner",
        "state",
        "ncage",
        "sensitivity",
        "dbaName",
        "activeDate",
        "businessTypeCounter",
        "businessType",
        "corporateURL",
        "stateProvision",
        "cageCode",
        "correspondenceFlag",
        "delinquentFedDebtFlag",
        "entityStructure",
        "noPublicDisplayFlag",
        "recentPredecessorCageCode",
        "recentPredecessorBusName",
        "secondRecentPredecessorCageCode",
        "secondRecentPredecessorBusName",
        "thirdRecentPredecessorCageCode",
        "thirdRecentPredecessorBusName",
        "immedOwnerPredecessorCageCode",
        "immedOwnerRecentPredecessorBusName",
        "highestOwnerPredecessorCageCode",
        "highestOwnerRecentPredecessorBusName",
        "filterName",
        "profitStructure",
        "organizationStructure",
        "entityType",
        "dodaac",
        "exclusionStatusFlag",
        "samAddress.address1",
        "samAddress.address2",
        "samAddress.addressCity",
        "samAddress.addressState",
        "samAddress.addressZip",
        "samAddress.addressZipPlus4",
        "samAddress.country",
        "mailAddress.address1",
        "mailAddress.address2",
        "mailAddress.addressCity",
        "mailAddress.addressState",
        "mailAddress.addressZip",
        "mailAddress.addressZipPlus4",
        "mailAddress.country",
        "primaryNaics",
        "registryFlag",
        "geographicalAreaServed",
        "averageNumberOfEmployees",
        "linkForFARReport",
        "linkForDFARSReport",
        "pocType",
        "pocFirstName",
        "pocMiddleName",
        "pocLastName",
        "pocTitle",
        "pocUSPhone",
        "pocUSPhoneExt",
        "pocNonUSPhone",
        "pocFax",
        "pocEmail",
        "pocAddress.address1",
        "pocAddress.address2",
        "pocAddress.addressCity",
        "pocAddress.addressState",
        "pocAddress.addressZip",
        "pocAddress.addressZipPlus4",
        "pocAddress.country",
        "pscList",
        "orgKey", "a11TacCode", "agencyName", "categoryDesc", "categoryId",
        "cfdaBur", "cfdaCode", "cfdaOmb", "createdDate", "description",
        "fpdsCode", "fpdsOrgId", "cgac", "fullParentPath", "fullParentPathName",
        "isSourceCfda", "isSourceCwCfda", "isSourceFpds", "lastModifiedBy",
        "lastModifiedDate", "modStatus", "name", "ombAgencyCode", "orgCode",
        "shortName", "l1ShortName", "sourceCfdaPk", "startDate", "summary",
        "tas2Code", "tas3Code", "level", "logoUrl", "code", "sendEmail",
        "l1OrgKey", "l1Name", "createdBy", "endDate", "ingestedOn", "sourceParentCfdaPk",
        "ediInformationFlag",
        "version",
        "pages",
        "encrypted",
        "linearized",
        "Author",
        "Creator",
        "Producer",
        "created",
        "modified",
        "metadata",
        "locked",
        "attachments",
        "layout",
        "Comments", "Company", "Keywords", "SourceModified", "Subject",
        "Title",
        "LastSaved",
        "Created"
      ),
      nameActual = c(
        "datetimeLastUpdated",
        "dateBusinessStart",
        "slugCongressionalDistrict",
        "urlCompany",
        "slugCorporateStructure",
        "slugDUNSPlus4",
        "isDebtSubjectToOffset",
        "slugCAGE",
        "slugFiscalYearEnd",
        "isPublicDisplay",
        "datetimeRegistration",
        "datetimeExpiration",
        "typeRegistration",
        "datetimeSubmission",
        "hasCreditCardUsage",
        "countryIncorporation",
        "slugBusinessType",
        "typeCorporateStructure",
        "typeStatus",
        "stateIncorporation",
        "hasKnownExclusion",
        "nameCompanyLegal",
        "idDUNS",
        "datetimeActivated",
        "statusCompany",
        "nameNAICS",
        "isPrimaryNAICS",
        "idNAICS",
        "zipcode",
        "city",
        "codeCountry",
        "state",
        "addressStreet1",
        "addressStreet2",
        "zipPlus4",
        "nameCompanyDivision",
        "nameCompanyDBA",
        "idDUNS",
        "nameProductService",
        "codeProductService",
        "nameFirst",
        "nameLast",
        "title",
        "fax",
        "telephone",
        "email",
        "telephoneExtension",
        "idDiscipline",
        "descriptionDiscipline",
        "slugBusinessType",
        "typeBusiness",
        "slugPurpose",
        "typePurpose",
        "slugFAR",
        "descriptionFAR",
        "codeCountry",
        "nameCountry",
        "codeExperience",
        "typeExperience",
        "idRevenue",
        "typeRevenue",
        "textAnswer",
        "slugFAR",
        "nameOwner",
        "zipcodeOwner",
        "stateOwner",
        "cityOwner",
        "codeCountryOwner",
        "addressStreet1Owner",
        "zipcodePlant",
        "statePlant",
        "cityPlant",
        "codeCountyPlant",
        "addressStreet1Plant",
        "countException",
        "isSmallBusiness",
        "dunsSlug",
        "zipcodeCompany",
        "stateCompany",
        "cityCompany",
        "codeCountryCompany",
        "zipcode4Company",
        "addressStreet1Company",
        "state",
        "zipcode4",
        "slugFAR",
        "hasFinancialAssistanceResponse",
        "numberDivision",
        "initialMiddle",
        "addressStreet2Owner",
        "addressStreet2Plant",
        "codeCountryProduct",
        "idNameEndProduct",
        "nameCompany",
        "tinCompany",
        "nameCompanyLegalCAGEOwner",
        "codeCAGEOwner",
        "hasCAGEOwner",
        "state",
        "slugCAGE",
        "typeSensitivity",
        "nameCompanyDBA",
        "dateActivated",
        "typeBusinessCounter",
        "typeEntityStructure",
        "urlCompany",
        "nameState",
        "slugCAGE",
        "hasCorrespondenceFlag",
        "hasDelinquentFedDebtFlag",
        "typeCorporateStructure",
        "hasNoPublicDisplayFlag",
        "slugCAGEPrior",
        "nameCompanyPrior",
        "slugCAGEPrior2",
        "nameCompanyPrior2",
        "slugCAGEPrior3",
        "nameCompanyPrior3",
        "slugCAGEImmedOwner",
        "nameCompanyPriorImmedOwner",
        "slugCAGEPriorHighest",
        "nameCompanyPriorHighest",
        "nameFilter",
        "typeProfitStructure",
        "typeOrganizationStructure",
        "typeEntity",
        "slugDeptDefenseAddressCode",
        "hasExclusionStatusFlag",
        "addressStreet1SAM",
        "addressStreet2SAM",
        "citySAM",
        "stateSAM",
        "zipcodeSAM",
        "zip4SAM",
        "countrySAM",
        "addressStreet1Mail",
        "addressStreet2Mail",
        "cityMail",
        "state4Mail",
        "zipcodeMail",
        "zip4Mail",
        "countryMail",
        "idNAICSPrimary",
        "hasRegistry",
        "areasGeographicalServed",
        "countEmployeesAverage",
        "urlFAR",
        "urlDFAR",
        "typePOC",
        "nameFirstPOC",
        "nameMiddlePOC",
        "nameLastPOC",
        "titlePOC",
        "phoneUSPOC",
        "phoneUSPOCExt",
        "phoneNonUSPOC",
        "faxPOC",
        "emailPOC",
        "addressStreet1POC",
        "addressStreet2POC",
        "cityPOC",
        "statePOC",
        "zipcodePOC",
        "zp4POC",
        "countryPOC",
        "pscList",

        "idSAM", "idA11TAC", "nameAgencyRemove", "typeCategory", "slugCategory",
        "idCFDABUR", "idCFDA", "idOMB", "datetimeCreated", "descriptionAgency",
        "codeAgency", "codeOrganization", "codeGovernmentAccuntingSystem", "idParentPath", "nameAgencyRemove2",
        "isCFDA", "isCWCDFDA", "isFPDS", "personLastModified",
        "datetimeLastUpdated", "status", "nameAgency", "idAgencyOMB", "codeOrganizationOther",
        "slugAgency", "slugAgencyOther", "keyCFDAP", "datetimeStarted", "summaryAgency",
        "idTAS2", "idTAS3", "idLevel", "urlLogo", "codeAgencyOther", "hasDSP",
        "idSAML1", "nameAgencyL1", "entryCreatedBy", "datetimeEnd", "datetimeIngested", "keyCFDAPOther",
        "hasEDIInformationFlag",
        "idVersion",
        "countPages",
        "isEncrypted",
        "isLinearized",
        "nameAuthor",
        "nameCreator",
        "typeProducer",
        "dateCreated",
        "dateModified",
        "xmlMetadata",
        "isLocked",
        "hasAttachments",
        "typeLayout",
        "descriptionComments", "companyLicense", "keywordLicense", "sourceModified", "subjectLicense",
        "titleFile",
        "codeLastSaved",
        "codeCreated"
      )

    )
  }

#' SAM Table dictionary
#'
#' @return
#' @export
#'
#' @examples
dictionary_sam_table_urls <-
  function() {
    tibble(
      nameDictionary = c(
        "discipline",
        "business type",
        "purpose",
        "far",
        "countries",
        "experience",
        "revenue"
      ),
      urlJSON = c(
        "http://gsa.github.io/sam_api/static/discipline.json",
        "http://gsa.github.io/sam_api/static/businessTypes.json",
        "http://gsa.github.io/sam_api/static/purpose.json",
        "http://gsa.github.io/sam_api/static/far.json",
        "http://gsa.github.io/sam_api/static/country.json",
        "http://gsa.github.io/sam_api/static/experience.json",
        "http://gsa.github.io/sam_api/static/revenue.json"
      )
    )
  }


#' SAM Dictionary
#'
#' Provides dictionary for a SAM table
#'
#' @param table \itemize{
#' \item discipline
#' \item business type
#' \item purpose
#' \item far
#' \item countries
#' \item experience
#' \item revenue
#' }
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' dictionary_sam_table(table = "discipline")
dictionary_sam_table <-
  function(table = "revenue") {
    if (length(table) == 0) {
      "Please provide a table" %>% message()
      return(invisible())
    }

    df_urls <- dictionary_sam_table_urls()
    table_slug <- table %>% str_to_lower()

    df_row <- df_urls %>% filter(nameDictionary == table)

    if (nrow(df_row) == 0) {
      glue::glue("No matching dictionary for {table}") %>% message()
      return(tibble())
    }
    table <- df_row$nameDictionary
    url <- df_row$urlJSON
    data <-
      url %>%
      fromJSON(simplifyDataFrame = T) %>%
      as_tibble() %>%
      mutate_if(is.character,
                list(function(x) {
                  ifelse(x == "", NA_character_, x) %>% str_trim()
                }))

    if (table == "far") {
      data <-
        data %>%
        fill(farDescription, .direction = "down")
    }

    data <- data %>% .munge_data_sam_names()

    if (table == "revenue") {
      data <-
        data %>%
        left_join(tibble(
          idRevenue = 1:10,
          amountRevenueMin = c(
            0,
            100000,
            250000,
            500000,
            1000000,
            2000000,
            5000000,
            10000000,
            25000000,
            50000000
          ),
          amountRevenueMax = c(
            99999,
            249999,
            499999,
            999999,
            1999999,
            4999999,
            9999999,
            24999999,
            49999999,
            NA_integer_
          )
        ),
        by = 'idRevenue') %>%
        .munge_data()
    }

    data
  }





# duns --------------------------------------------------------------------

#' Generate SAM DUNS API Calls
#'
#' @param duns vector of DUNS
#' @param base base call
#' @param version version - defaults to 3
#' @param api_key api key, defaults to demo_key
#'
#' @return \code{tibble()}
#' @export
#'
#' @examples
sam_api_urls <-
  function(duns = c(867393167, 608176715),
           base = "https://api.data.gov/sam/",
           version = 8,
           api_key = "DEMO_KEY") {
    options(scpen = 9999)
    df_duns <- pad_duns(duns = duns)
    slug <- df_duns$slugDUNS
    url <-
      glue::glue("{base}v{version}/registrations/{slug}?api_key={api_key}") %>% as.character()

    df_duns <-
      df_duns %>%
      mutate(urlAPI = url)

    df_duns
  }

.parse_sam_duns_url <-
  function(url = "https://api.data.gov:443/sam/v8/registrations/6081767150000?api_key=DEMO_KEY") {
    json_data <-
      fromJSON(url, flatten = T, simplifyDataFrame = T)

    data <- json_data$sam_data$registration

    df_classes <-
      data %>%
      map_df(class) %>%
      gather(column, class)

    base_cols <-
      df_classes %>% filter(!class %>% str_detect("data.frame|list|NULL")) %>% pull(column)

    list_cols <-
      df_classes %>% filter(class %>% str_detect("data.frame|list")) %>% pull(column)

    df_base <-
      data[names(data) %in% base_cols]

    df_base <-
      df_base %>% flatten_df()

    column_ids <- tibble(name = names(df_base)) %>%
      mutate(idRow = 1:n()) %>%
      filter(!name == "") %>%
      pull(idRow)

    duns <- df_base$duns %>% as.numeric()

    df_base <-
      df_base[, column_ids] %>%
      as_tibble() %>%
      mutate(duns = as.numeric(duns)) %>%
      as_tibble()


    df_base <-
      df_base %>%
      .munge_data_sam_names() %>%
      .munge_data()

    df_list <-
      list_cols %>%
      map(function(column) {
        column %>% message()
        df <- data[[column]]


        if (length(df) == 0) {
          return(invisible())
        }

        if (column == "disasterRelief") {
          df <- df[["geographicalAreas"]]
          if (length(df) == 0) {
            return(NULL)
          }
          df <- df %>% .munge_data_sam_names()
          df <- df %>%
            mutate(idDUNS = duns) %>%
            as_tibble() %>%
            nest(-idDUNS, .key = "dataDisasterRelief")
          return(df)
        }

        if (column == "qualifications") {
          df <- df[["acass"]][["answers"]]
          if (length(df) == 0) {
            return(NULL)
          }
          df <-
            df %>%
            .munge_data_sam_names() %>%
            as_tibble()

          if (df %>% tibble::has_name("slugFAR")) {
            df <-
              df %>% rename(slugSection = slugFAR)
          }
          df <-
            df %>% mutate(idDUNS = duns) %>%
            nest(-idDUNS, .key = "dataQualifications")

          return(df)
        }

        if (column == "certificationsURL") {
          pdf_url <- df %>% purrr::flatten() %>% as.character()
          if (length(pdf_url) > 0) {
            df_base <<-
              df_base %>%
              mutate(urlCertificationPDF = pdf_url)
          }
          return(invisible())
        }

        if (column == "certifications") {
          .parse_certification_data_safe <-
            possibly(.parse_certification_data, tibble())

          df <-
            df %>% .parse_certification_data_safe()

          if (nrow(df) == 0) {
            df <- tibble(idDUNS = duns)
          } else {
            df <- df %>%
              mutate(idDUNS = duns) %>%
              select(idDUNS, everything())
          }
          return(df)
        }

        col_class <- class(df)
        substr(column, 1, 1) <-
          str_to_upper(substr(column, 1, 1))
        column_name <-
          case_when(
            column == "Naics" ~ "dataNAICS",
            column == "PscCodes" ~ "dataPSC",
            column == "PastPerformancePoc" ~ "dataContactPersonPastPerformance",
            column == "AltPastPerformancePoc" ~ "dataContactPersonPastPerformanceOther",
            column == "ElectronicBusinessPoc" ~ "dataContactPersonElectronic",
            column == "AltElectronicBusinessPoc" ~ "dataContactPersonElectronicOther",
            column == "BondingInformation" ~ "dataBonding",
            column == "GovtBusinessPoc" ~ "dataContactPersonGovernment",
            column == "AltGovtBusinessPoc" ~ "dataContactPersonGovernmentOther",
            TRUE ~ str_c("data", column, collapse = "")
          )

        if (col_class %>% str_detect("data")) {
          df <-
            df %>% as_tibble() %>% mutate(idDUNS = duns) %>%
            select(idDUNS, everything()) %>%
            unique()

          df <-
            df %>%
            .munge_data_sam_names() %>%
            .munge_data()

          df <-
            df %>%
            nest(-idDUNS, .key = UQ(column_name))

          return(df)
        }

        df <-
          df %>%
          flatten_df()

        df <-
          df %>%
          .munge_data_sam_names()

        df <-
          df %>%
          mutate(idDUNS  = duns) %>%
          nest(-idDUNS, .key = UQ(column_name))
      }) %>%
      discard(function(x) {
        length(x) == 0
      }) %>%
      reduce(left_join) %>%
      suppressMessages()

    df_base <-
      df_base %>%
      left_join(df_list, by = "idDUNS") %>%
      mutate(urlAPI = url) %>%
      dplyr::select(one_of(
        c(
          "idDUNS",
          "nameCompanyDBA",
          "nameCompanyLegal" ,
          "nameCompanyDivision"
        )
      ), everything()) %>%
      suppressWarnings() %>%
      mutate_if(is.character, str_trim)

    df_base
  }

#' Parse same DUNS urls
#'
#' @param urls vector of URLS
#' @param sleep_time if not \code{NULL} sleeptime between API calls
#' @param return_message if \code{TRUE} return message
#'
#' @return
#' @export
#'
#' @examples
#'parse_sam_duns_urls(urls = "https://api.data.gov:443/sam/v8/registrations/6081767150000?api_key=DEMO_KEY")
parse_sam_duns_urls <-
  function(urls = "https://api.data.gov:443/sam/v8/registrations/6081767150000?api_key=DEMO_KEY",
           sleep_time = NULL,
           return_message = T) {
    df <-
      tibble()

    success <- function(res) {
      url <-
        res$url

      if (return_message) {
        glue::glue("Parsing {url}") %>%
          message()
      }
      .parse_sam_duns_url_safe <-
        purrr::possibly(.parse_sam_duns_url, tibble())

      all_data <-
        .parse_sam_duns_url(url = url)

      if (length(sleep_time) > 0) {
        Sys.sleep(time = sleep_time)
      }


      df <<-
        df %>%
        bind_rows(all_data)
    }
    failure <- function(msg) {
      tibble()
    }
    urls %>%
      map(function(x) {
        curl_fetch_multi(url = x, success, failure)
      })
    multi_run()
    df
  }

#' SAM search by DUNS
#'
#' @param duns vector of DUNS numbers
#' @param sleep_time if \code{}
#' @param api_key data.gov API key, defaults to public demo key
#' @param return_message if \code{TRUE} returns message
#'
#' @return a \code{tibble}
#' @export
#'
#' @examples
#' sam_duns(duns = 826857757,)
sam_duns <-
  function(duns = NULL,
           sleep_time = NULL,
           api_key = "DEMO_KEY",
           return_message = T) {
    if (length(duns) == 0) {
      stop("Enter DUNS")
    }
    options(scipen = 9999)
    df_urls <- sam_api_urls(duns = duns, api_key = api_key)

    urls <- df_urls$urlAPI
    .parse_sam_duns_url_safe <-
      possibly(.parse_sam_duns_url, tibble())

    all_data <-
      urls %>%
      map_dfr(function(url) {
        if (return_message) {
          glue::glue("Parsing {url}") %>% message()
        }
        data <- .parse_sam_duns_url_safe(url = url)
        if (length(sleep_time) > 0) {
          Sys.sleep(time = sleep_time)
        }
        data
      }) %>%
      suppressWarnings()

    if (nrow(all_data) == 0) {
      "No matches" %>% message()
      return(invisible())
    }

    all_data <-
      all_data %>%
      mutate(isActive = statusCompany == "ACTIVE") %>%
      select(
        one_of(
          "idDUNS",
          "nameCompanyLegal",
          "nameCompanyDBA",
          "nameCompanyDivision",
          "isActive",
          "statusCompany",
          "dateBusinessStart",
          "datetimeRegistration"
        ),
        everything()
      ) %>%
      suppressWarnings()

    if (all_data %>% tibble::has_name("isActive") &
        all_data %>% tibble::has_name("datetimeExpiration") &
        all_data %>% tibble::has_name("datetimeRegistration")) {
      all_data <-
        all_data %>%
        mutate(countGovernmentWorkDays = case_when(
          !isActive ~ (
            as.Date(datetimeExpiration) - as.Date(datetimeRegistration)
          ) %>% as.integer(),
          TRUE ~ NA_integer_
        ))
    }

    all_data <- all_data %>%
      mutate(idRow = 1:n())

    df_list <- all_data %>%
      select(idDUNS, matches("data")) %>%
      transmute_if(is.list,
                   .funs = list(function(x) {
                     x %>% map_dbl(length) > 0
                   })) %>%
      mutate(idRow = 1:n())

    names(df_list) <-
      names(df_list) %>% str_replace_all("^data", "has")

    all_data <-
      all_data %>%
      left_join(df_list, by = 'idRow') %>%
      select(-idRow)

    col_order <-
      c(all_data %>% select(-matches("data")) %>% names(),
        all_data %>% select(matches("data")) %>% names())



    all_data <-
      all_data %>%
      select(one_of(col_order))


    all_data
  }



# search ------------------------------------------------------------------

.generate_sam_search_url <-
  function(legal_name = NULL,
           dba = NULL,
           cage = NULL,
           duns = NULL,
           registration_status = NULL,
           disaster_response = NULL,
           city = NULL,
           country = NULL,
           state = NULL,
           zip = NULL,
           congressional_district = NULL,
           naics_sb = NULL,
           naics_any = NULL,
           registration_purpose = NULL,
           is_minority_owned = F,
           is_woman_owned = F,
           is_vet_owned = F,
           is_service_vet = F,
           is_8a = F,
           is_hubzone = F,
           is_ability_one = F,
           api_version = 3,
           api_key = "DEMO_KEY") {
    url_base <-
      glue::glue("https://api.data.gov/sam/v{api_version}/registrations?qterms=") %>% as.character()
    api_slug <- glue::glue("api_key={api_key}") %>% as.character()
    length_slug <- glue::glue("length=500000000")
    if (length(legal_name) > 0) {
      slugs <- legal_name %>% map_chr(URLencode)
      legal_names <- legal_name %>% str_c(collapse = " | ")
      slug_legal_name <-
        c("legalBusinessName:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
    } else {
      slug_legal_name <- ""
      legal_names <- legal_name
    }

    if (length(dba) > 0) {
      slugs <- dba %>% map_chr(URLencode)
      dbas <- dba %>% str_c(collapse = " | ")
      slug_dba <-
        c("doingBusinessAs:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
    } else {
      slug_dba <- ""
      dbas <- dba
    }

    if (length(cage) > 0) {
      slugs <- cage %>% map_chr(URLencode)
      cages <- cage %>% str_c(collapse = " | ")
      slug_cage <-
        c("cage:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
    } else {
      slug_cage <- ""
      cages <- cage
    }

    if (length(duns) > 0) {
      slugs <-
        duns %>% map_chr(URLencode)
      duns_nos <- duns %>% str_c(collapse = " | ")
      slug_duns <-
        c("duns:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
    } else {
      slug_duns <- ""
      duns_nos <- duns
    }

    if (length(city) > 0) {
      slugs <- city %>% map_chr(URLencode)
      cities <- str_c(city, collapse  = " | ")
      slug_city <-
        c("samAddress.city:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
    } else {
      slug_city <- ""
      cities <- city
    }
    if (length(country) > 0) {
      slugs <- country %>% map_chr(URLencode)
      countries <- str_c(country, collapse = " | ")
      slug_country <-
        c("samAddress.country:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
    } else {
      slug_country <- ""
      countries <- country
    }

    if (length(state) > 0) {
      slugs <- state %>% map_chr(URLencode)
      states <- states %>% str_c(sep = " | ")
      slug_state <-
        c("samAddress.stateOrProvince:(",
          str_c(slugs, collapse = ","),
          ")") %>% str_c(collapse = "")
    } else {
      slug_state <- ""
      states <- state
    }

    if (length(zip) > 0) {
      slugs <- zip %>% as.character() %>% map_chr(URLencode)
      zips <- str_c(zip, collapse = " | ")
      slug_zip <-
        c("samAddress.zip:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
    } else {
      slug_zip <- ""
      zips <- zip
    }

    if (length(congressional_district) > 0) {
      slugs <- congressional_district %>% map_chr(URLencode)
      districts <- str_c(congressional_district, collapse = " | ")
      slug_congressional_district <-
        c("congressionalDistrict:(",
          str_c(slugs, collapse = ","),
          ")") %>% str_c(collapse = "")
    } else {
      slug_congressional_district <- ""
      districts <- congressional_district
    }

    if (length(naics_sb) > 0) {
      slugs <- naics_sb
      naics_sbs <- naics_sb %>% str_c(collapse = " | ")
      slug_naics_sb <-
        c("naicsLimitedSB:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
    } else {
      slug_naics_sb <- ""
      naics_sbs <- naics_sb
    }

    if (length(naics_any) > 0) {
      slugs <- naics_any
      naics_anys <- str_c(naics_any, collapse = " | ")
      slug_naics_any <-
        c("naicsLimitedSB:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
    } else {
      slug_naics_any <- ""
      naics_anys <- naics_any
    }

    if (length(registration_purpose) > 0) {
      slugs <- registration_purpose %>% map_chr(URLencode)
      purposes <- str_c(registration_purpose, collapse = " | ")
      slug_registration_purpose <-
        c("purpose:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
    } else {
      slug_registration_purpose <- ""
      purposes <- registration_purpose
    }

    if (length(registration_status) > 0) {
      slugs <- registration_status %>% map_chr(URLencode)
      status <- str_c(registration_status, collapse =  " | ")
      slug_registration_status <-
        c("registrationStatus:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
    } else {
      slug_registration_status <- ""
      status <- registration_status
    }

    if (length(disaster_response) > 0) {
      slugs <- disaster_response %>% map_chr(URLencode)
      responses <- str_c(disaster_response, collapse = " | ")
      slug_disaster_response <-
        c("disasterResponse:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
    } else {
      slug_disaster_response <- ""
      responses <- disaster_response
    }

    if (is_minority_owned) {
      slug_is_minority_owned <- "minorityOwned:TRUE"
    } else {
      slug_is_minority_owned <- ""
    }

    if (is_woman_owned) {
      slug_is_woman_owned <- "womanOwned:TRUE"
    } else {
      slug_is_woman_owned <- ""
    }

    if (is_vet_owned) {
      slug_is_vet_owned <- "veteranOwned:TRUE"
    } else {
      slug_is_vet_owned <- ""
    }

    if (is_service_vet) {
      slug_is_service_vet <- "serviceDisabledVeteranOwned:TRUE"
    } else {
      slug_is_service_vet <- ""
    }

    if (is_8a) {
      slug_is_8a <- "sba8AProgram:TRUE"
    } else {
      slug_is_8a <- ""
    }

    if (is_hubzone) {
      slug_is_hubzone <- "sbaHubzoneProgram:TRUE"
    } else {
      slug_is_hubzone <- ""
    }

    if (is_ability_one) {
      slug_is_ability_one <- "ability1:TRUE"
    } else {
      slug_is_ability_one <- ""
    }

    api_call <- c(
      slug_legal_name,
      slug_dba,
      slug_cage,
      slug_duns,
      slug_registration_status,
      slug_disaster_response,
      slug_city,
      slug_country,
      slug_state,
      slug_zip,
      slug_congressional_district,
      slug_naics_sb,
      slug_naics_any,
      slug_registration_purpose,
      slug_is_minority_owned,
      slug_is_woman_owned,
      slug_is_vet_owned,
      slug_is_service_vet,
      slug_is_8a,
      slug_is_ability_one,
      api_slug,
      length_slug
    ) %>%
      purrr::discard(function(x) {
        x == ""
      }) %>%
      str_c(collapse = "&")

    url <- glue::glue("{url_base}{api_call}") %>% as.character()

    search_term <- list(
      legal_name = legal_names,
      dba = dbas,
      cage = cages,
      duns = duns_nos,
      registration_status = status,
      disaster_response = responses,
      city = cities,
      country = countries,
      state = states,
      zip = zips,
      congressional_district = districts,
      naics_sb = naics_sbs,
      naics_any = naics_anys,
      registration_purpose = purposes,
      is_minority_owned = is_minority_owned,
      is_woman_owned = is_woman_owned,
      is_vet_owned = is_vet_owned,
      is_service_vet = is_service_vet,
      is_8a = is_8a,
      is_ability_one = is_ability_one
    ) %>%
      flatten_df() %>%
      gather(item, value) %>%
      filter(!value == "FALSE") %>%
      unite(item, item, value, sep = ": ") %>%
      pull(item) %>%
      str_c(collapse = " & ")

    tibble(termSearch = search_term, urlAPI = url)
  }


.parse_sam_search_url <-
  memoise::memoise(function(url = "https://api.data.gov/sam/v3/registrations?qterms=legalBusinessName:(JBG)&api_key=DEMO_KEY&length=500") {
    json_data <-
      url %>% fromJSON(simplifyDataFrame = T, flatten = T)
    data <- json_data$results
    names(data) <- names(data) %>% str_remove_all("samAddress.")
    data <- data[!names(data) %in% "links"] %>% as_tibble()
    pad_duns_safe <- possibly(pad_duns, tibble())
    df_duns <- data$duns %>% pad_duns_safe()

    data <-
      data %>%
      .munge_data_sam_names() %>%
      .munge_data(clean_address = F) %>%
      mutate(urlAPI = url) %>%
      mutate_if(is.character, str_trim)

    if (data %>% hasName("statusCompany")) {
      data <-
        data %>%
        mutate(isActiveSAM = case_when(
        statusCompany == "ACTIVE" ~ TRUE,
        TRUE ~ F
      ))
    }

    if (data %>% tibble::has_name("idDUNS")) {
      data <- data %>%
        mutate(idDUNS = as.numeric(idDUNS))
    }

    if (nrow(df_duns) > 0) {
      data <-
        data %>%
        left_join(df_duns, by = "idDUNS")
    }

    data
  })

#' Search SAM API by item
#'
#' @param legal_name
#' @param dba
#' @param cage
#' @param duns
#' @param registration_status
#' @param disaster_response
#' @param city
#' @param country
#' @param state
#' @param zip
#' @param congressional_district
#' @param naics_sb
#' @param naics_any
#' @param registration_purpose
#' @param is_minority_owned
#' @param is_woman_owned
#' @param is_vet_owned
#' @param is_service_vet
#' @param is_8a
#' @param is_hubzone
#' @param is_ability_one
#' @param api_version
#' @param api_key
#'
#' @return
#' @export
#'
#' @examples
#' library(tidyverse)
#' library(govtrackR)
#' df <-  sam_search(legal_name = c("JBG", "Booz", "Lockheed"))
#' df %>% glimpse()
#'
#' df %>% group_by(idDUNS) %>% summarise(count = n(), entities = str_c(nameCompanyLegal, collapse = " | ")) %>% arrange(desc(count))
#'dict_naics <- dictionary_naics_codes()
#'codes <-
#'dict_naics %>%
#'filter(nameNAICS %>% str_detect("ICE CREAM|COOKIE")) %>%
#'pull(idNAICS)
#'
#'df_cookies_ice_cream <- sam_search(naics_any = c(codes))
#'df_cookies_ice_cream %>%
#'count(statusCompany, state)
#'
#'
sam_search <-
  function(legal_name = NULL,
           dba = NULL,
           cage = NULL,
           duns = NULL,
           registration_status = NULL,
           disaster_response = NULL,
           city = NULL,
           country = NULL,
           state = NULL,
           zip = NULL,
           congressional_district = NULL,
           naics_sb = NULL,
           naics_any = NULL,
           registration_purpose = NULL,
           is_minority_owned = F,
           is_woman_owned = F,
           is_vet_owned = F,
           is_service_vet = F,
           is_8a = F,
           is_hubzone = F,
           is_ability_one = F,
           api_version = 3,
           api_key = "DEMO_KEY",
           return_message = T) {
    no_terms <-
      list(
        legal_name = legal_name[[1]],
        dba = dba[[1]],
        cage = cage[[1]],
        duns = duns[[1]],
        registration_status = registration_status[[1]],
        disaster_response = disaster_response[[1]],
        city = city[[1]],
        country = country[[1]],
        state = state[[1]],
        zip = zip[[1]],
        congressional_district = congressional_district[[1]],
        naics_sb = naics_sb[[1]],
        naics_any = naics_any[[1]],
        registration_purpose = registration_purpose[[1]]
      ) %>% flatten_df() %>% nrow() == 0

    if (no_terms) {
      stop("Enter search term") %>% message()
    }

    df_urls <-
      .generate_sam_search_url(
        legal_name = legal_name,
        dba = dba,
        cage = cage,
        duns = duns,
        registration_status = registration_status,
        disaster_response = disaster_response,
        city = city,
        country = country,
        state = state,
        zip = zip,
        congressional_district = congressional_district,
        naics_sb = naics_sb,
        naics_any = naics_any,
        registration_purpose = registration_purpose,
        is_minority_owned = is_minority_owned,
        is_woman_owned = is_woman_owned ,
        is_vet_owned = is_vet_owned,
        is_service_vet = is_service_vet,
        is_8a = is_8a,
        is_hubzone = is_hubzone,
        is_ability_one = is_ability_one ,
        api_version = api_version,
        api_key = api_key
      )

    .parse_sam_search_url_safe <-
      possibly(.parse_sam_search_url, tibble())

    all_data <-
      df_urls$urlAPI %>%
      map_dfr(function(url) {
        if (return_message) {
          glue("Parsing {url}") %>% message()
        }
        .parse_sam_search_url_safe(url = url)
      })

    if (nrow(all_data)  == 0) {
      "No results"
      return(invisible())
    }
    all_data <-
      all_data %>%
      left_join(df_urls, by = "urlAPI") %>%
      select(termSearch, idDUNS, nameCompanyLegal, everything())

    all_data
  }


# sam_beta ----------------------------------------------------------------

# http://beta.sam.gov

.parse_sam_duns_v2 <-
  function(url = "https://api.sam.gov/prod/entities/961539384?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1568729993777&sort=name") {
    json_data <-
      fromJSON(url, simplifyDataFrame = T, flatten = T)

    data <- json_data$entityInfo
    df_classes <-
      data %>%
      map_df(class) %>%
      gather(column, class)

    base_cols <-
      df_classes %>% filter(!class %in% "list") %>% pull(column)

    df_base <-
      data %>% select(base_cols) %>% as_tibble()

    names(df_base) <-
      names(df_base) %>% str_remove_all("repsAndCerts.|coreData.|generalInfo.|generalInfo.|assertions.")

    df_base <- df_base %>% .munge_data_sam_names()

    names(df_base) <-
      names(df_base) %>% str_replace_all("datetime", "date")

    df_base <-
      df_base %>%
      .munge_data(clean_address = F)

    df_base


    df_base <- df_base %>%
      unite(
        addressStreetSAM,
        addressStreet1SAM,
        addressStreet2SAM,
        sep = " ",
        remove = F
      ) %>%
      unite(cityStateSAM,
            citySAM,
            stateSAM,
            sep = ", ",
            remove = F) %>%
      unite(cityStateZip,
            cityStateSAM,
            zipcodeSAM,
            sep = " ",
            remove = F) %>%
      unite(locationSAM,
            addressStreetSAM,
            cityStateZip,
            sep = ", ")

    list_cols <-  df_classes %>% filter(class %in% "list")

    df_pocs <- data[["mandatoryPOCs"]][[1]] %>% as_tibble()

    if (length(df_pocs) > 0) {
      df_pocs <-
        df_pocs %>% .munge_data_sam_names() %>% .munge_data(clean_address = F)
      df_pocs <- df_pocs %>%
        mutate(namePersonPOC = str_c(nameFirstPOC, nameLastPOC, sep = " ")) %>%
        select(namePersonPOC, everything()) %>%
        unite(
          addressStreetPOC,
          addressStreet1POC,
          addressStreet2POC,
          sep = " ",
          remove = F
        ) %>%
        unite(cityStatePOC,
              cityPOC,
              statePOC,
              sep = ", ",
              remove = F) %>%
        unite(cityStateZip,
              cityStatePOC,
              zipcodePOC,
              sep = " ",
              remove = F) %>%
        unite(locationPOC,
              addressStreetPOC,
              cityStateZip,
              sep = ", ")

      df_base <-
        df_base %>%
        mutate(dataPOCMandatory = list(df_pocs))
    }

    df_pocs_other <-
      data[["optionalPOCs"]][[1]] %>% as_tibble()

    if (length(df_pocs_other) > 0) {
      df_pocs_other <-
        df_pocs_other %>% .munge_data_sam_names() %>% .munge_data(clean_address = F)
      df_pocs_other <- df_pocs_other %>%
        mutate(namePersonPOC = str_c(nameFirstPOC, nameLastPOC, sep = " ")) %>%
        select(namePersonPOC, everything()) %>%
        unite(
          addressStreetPOC,
          addressStreet1POC,
          addressStreet2POC,
          sep = " ",
          remove = F
        ) %>%
        unite(cityStatePOC,
              cityPOC,
              statePOC,
              sep = ", ",
              remove = F) %>%
        unite(cityStateZip,
              cityStatePOC,
              zipcodePOC,
              sep = " ",
              remove = F) %>%
        unite(locationPOC,
              addressStreetPOC,
              cityStateZip,
              sep = ", ")

      df_base <-
        df_base %>%
        mutate(dataPOCOptional = list(df_pocs_other))
    }

    df_exclusions <-
      data[["exclusionsList"]][[1]]

    if (length(df_exclusions) > 0) {
      df_base <- df_base %>%
        mutate(hasExclusions = T)
    }

    naics <- data[["assertions.naicsList"]][[1]]
    if (length(naics) > 0) {
      df_naics <- dictionary_naics_codes() %>% suppressMessages()
      df_naics_company <-
        df_naics %>% filter(idNAICS %in% as.integer(naics))
      df_base <-
        df_base %>% left_join(df_naics %>% select("idNAICSPrimary" = idNAICS, nameNAICSPrimary = nameNAICS),
                              by = "idNAICSPrimary")
      df_base <- df_base %>%
        mutate(dataNAICS = list(df_naics_company))
    }


    psc <-
      data[["assertions.pscList"]][[1]]

    if (length(psc) > 0) {
      df_psc <-
        dictionary_psc_active() %>% filter(codeProductService %in% psc) %>% suppressWarnings() %>% suppressMessages()
      df_base <- df_base %>% mutate(dataPSC = list(df_psc))
    }

    df_base <-
      df_base %>%
      mutate(urlAPI = url)

    df_base <-
      df_base %>%
      .remove_na()

    df_base
  }


.generate_sam_v2_urls <-
  function(base_url = "https://api.sam.gov/prod/entities", duns = 961539384, api_key = NULL) {

    if (length(api_key) == 0) {
      api_key <- "O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii"
    }
    duns_slug <- .pad_duns(duns = duns, zero_base = 9) %>% pull(slugDUNS)

    url <- glue("{base_url}/{duns_slug}?api_key={api_key}") %>% as.character()
    url
  }

#' SAM data by DUNS
#'
#' Acquires data from SAM V2 API for
#' vector of DUNS
#'
#' @param duns numeric vector of DUNS numbers
#' @param api_key if not \code{NULL} api key
#' @param return_message if \code{TRUE} returns mesage
#'
#' @return
#' @export
#'
#' @examples
#' sam_duns_v2(175311393)
sam_duns_v2 <-
  function(duns = NULL, api_key = NULL, return_message = T) {

    if (length(duns) == 0) {
      stop("Please enter DUNS")
    }

    urls <-
      duns %>%
      map_chr(function(x){
        .generate_sam_v2_urls(duns = x, api_key = api_key)
      }) %>%
      unique()
    .parse_sam_duns_v2_safe <- possibly(.parse_sam_duns_v2, tibble())

    all_data <-
      urls %>%
      map_dfr(function(url){
        if (return_message) {
          glue("Parsing {url}") %>% message()
        }
        .parse_sam_duns_v2_safe(url = url)    %>% suppressMessages() %>%
          suppressWarnings()
      })


    if (all_data %>% tibble::has_name("dateExpiration") &
        all_data %>% tibble::has_name("dateRegistration")) {
      all_data <-
        all_data %>%
        mutate(countGovernmentWorkDays = (dateExpiration - dateRegistration) %>% as.integer())
    }
    all_data <-
      all_data %>%
      mutate(idRow = 1:n())

    df_list <-
      all_data %>%
      select(idDUNS, matches("data")) %>%
      transmute_if(is.list,
                   .funs = list(function(x) {
                     x %>% map_dbl(length) > 0
                   })) %>%
      mutate(idRow = 1:n())

    names(df_list) <-
      names(df_list) %>% str_replace_all("^data", "has")

    all_data <-
      all_data %>%
      left_join(df_list, by = 'idRow') %>%
      select(-idRow)

    col_order <-
      c(all_data %>% select(-matches("data")) %>% names(),
        all_data %>% select(matches("data")) %>% names())

    all_data

  }



#' SAM government agencies
#'
#' SAM summary of
#' government agencies.
#'
#' @return
#' @export
#'
#' @examples
#' sam_agencies()
sam_agencies <- function() {

  data <- "https://api.sam.gov/prod/federalorganizations/v1/organizations/departments/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1568828547464" %>%
    fromJSON(simplifyDataFrame = T)
  data <- data[[1]]
  df_base <- data$org %>% as_tibble()
  df_logos <- data[[2]] %>% as_tibble()
  df_logos <- tibble(urlSAMAPI = df_logos$self[[1]], urlLogo = df_logos$logo$href)

  df_base <- df_base %>% select(-links)
  df_base_cols <- df_base %>% map(class) %>% as_tibble() %>% gather(column, type)
  base_cols <-
    df_base_cols %>%
    filter(!type %in% c("list", "data.frame")) %>%
    pull(column)

  data <-
    df_base %>%
    select(one_of(base_cols))

data <-
    data %>%
    .munge_data_sam_names() %>%
    select(-matches("remove")) %>%
    .remove_na() %>%
    .munge_data(clean_address = F) %>%
    select(idSAM, everything())

  data <-
    data %>%
    select(idSAM, nameAgency, codeAgency, descriptionAgency, summaryAgency, everything())

  data
}
abresler/govtrackR documentation built on July 11, 2020, 12:30 a.m.