R/v2_sam.R

Defines functions .generate_active_sam_fbo_urls .generate_sam_v2_url .parse_sam_fbo .parse_sam_fbo_contract

# sam_fbo -----------------------------------------------------------------

.parse_sam_fbo_contract <-
  function(url = "https://api.sam.gov/prod/federalorganizations/v1/organizations/100081847?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1571237477558&sort=name&mode=slim") {

  }
.parse_sam_fbo <- function(url = "https://api.sam.gov/prod/sgs/v1/search/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&&index=opp&page=0&sort=-modifiedDate&size=5000&is_active=true") {
  json_data <-
    url %>% fromJSON(simplifyDataFrame = T)

  data <-
    json_data[1][[1]]$results %>% as_tibble() %>%
    mutate(idRow = 1:n()) %>%
    select(idRow, everything())

  df_cols <-
    data %>% map_df(class) %>% gather(column, class) %>%
    mutate(idColumn = 1:n()) %>%
    select(idColumn, everything())

  base_cols <-
    df_cols %>% filter(!class %>% str_detect("list|data")) %>% pull(idColumn)

  df_base <-
    data[,base_cols] %>% as_tibble()


  df_base <-
    .munge_biz_opps_names(data = df_base) %>%
    .munge_data(clean_address = F) %>%
    select(-matches("remove"))

  df_nested_cols <-
    df_cols %>% filter(class %>% str_detect("list|data"))

  df_nested <-
    1:nrow(df_nested_cols) %>%
    map(function(x) {
      df_row <-
        df_nested_cols %>% dplyr::slice(x)

      col <-
        df_row$column

      glue("Parsing {col}") %>% message()

      if (col == "descriptions") {
        df <- data %>% select(idRow, descriptions)
        df <-
          df %>% mutate(hasDescriptions = descriptions %>% map_dbl(length) > 0) %>%
          filter(hasDescriptions) %>%
          select(-hasDescriptions) %>%
          unnest()
        read_html_safe <- possibly(read_html, NULL)

        df_descriptions <-
          1:nrow(df) %>%
          map_dfr(function(y) {
            y %>% message()
            text <- df$content[[y]]
            html_row_text <- HTML(text)
            value <- read_html_safe(html_row_text)
            if (as.character(html_row_text) == ".") {
              return(invisible())
            }
            if (length(value) > 0) {
              value <- rvest::html_text(value) %>% str_squish() %>% str_to_upper()
            } else {
              value <- text %>% str_to_upper() %>% str_squish()
            }

            df <-
              tibble(idRow = df$idRow[[y]],
                     descriptionContract = value)
            df
          })

        df <-
          df_descriptions %>%
          left_join(df %>% select(idRow, datetimeModified = lastModifiedDate),
                    by = "idRow") %>%
          distinct() %>%
          mutate(datetimeModified = datetimeModified %>% ymd_hms()) %>%
          group_by(idRow, datetimeModified) %>%
          summarise(descriptionContract = descriptionContract %>% str_c(collapse = " ")) %>%
          ungroup()

        return(df)
      }

      if (col == "placeOfPerformance") {
        df <-
          data %>% select(idRow, placeOfPerformance)
        df <-
          df %>% mutate(hasPOP = placeOfPerformance %>% map_dbl(length) > 0) %>%
          filter(hasPOP) %>%
          select(-hasPOP)

        df <-
          df %>% unnest() %>% mutate_if(is.character, str_squish) %>%
          setNames(
            c(
              "idRow",
              "zipcodePerformance",
              "codeCityPerformance",
              "addressStreet1Performance",
              "addressStreetPerformance",
              "statePerformance"
            )
          ) %>%
          .remove_na()

        return(df)
      }

      if (col == "modifications") {
        df <- data %>% select(idRow, one_of(col))
        df <-
          data %>% select(idRow, one_of(col))

        df <-
          1:nrow(df) %>%
          map_dfr(function(row_x) {
            row_x %>% message()
            df_r <- df %>% dplyr::slice(row_x)
            count <-
              df_r$modifications$count

            if (length(count) >= 1) {
              df_count  <-
                tibble(countModifications = count) %>%
                mutate(idRow = row_x) %>%
                select(idRow, everything())
              return(df_count)
            }
          })

        return(df)

      }

      if (col == "naics") {
        df <- data %>% select(idRow, one_of(col))

        df <-
          df %>% unnest() %>%
          setNames(c("idRow", "idNAICS", "keyNAICS", "descriptionNAICS")) %>%
          mutate(idNAICS = as.numeric(idNAICS),
                 descriptionNAICS = str_to_upper(descriptionNAICS))
        return(df)
      }

      if (col == "organizationHierarchy") {
        df <-
          data %>% select(idRow, one_of(col))
        df <-
          1:nrow(df) %>%
          map_dfr(function(row_x) {
            row_x %>% message()
            df_r <- df %>% dplyr::slice(row_x)
            df_org <-
              df_r$organizationHierarchy

            df_org <- df_org[[1]] %>% as_tibble()


            df_org <-
              df_org %>% select(-address) %>% setNames(c("idOrganization", "idLevel", "nameOrganization")) %>%
              bind_cols(df_org$address %>% as_tibble() %>%
                          setNames(
                            c(
                              "zipcodeOrganization",
                              "countryOrganization",
                              "cityOrganization",
                              "streetOrganization",
                              "streetOrganization",
                              "stateOrganization"
                            )
                          )) %>%
              mutate(idRow = row_x) %>%
              select(idRow, idLevel, everything())

            df_org

          })

        df <- df %>%
          nest_legacy(-idRow, .key = "dataOrganization")

        return(df)
      }

      if (col == "award") {
        df <- data %>% select(idRow, award)
        df <-
          1:nrow(df) %>%
          map_dfr(function(row_x) {
            row_x %>% message()
            df_r <- df %>% dplyr::slice(row_x)
            df_award <-
              df_r$award


            all_data <- list(
              df_award %>% select(-c(
                awardee, justificationAuthority, fairOpportunity
              )) %>%
                as_tibble() %>%
                setNames(
                  c(
                    "dateContract",
                    "idContract",
                    "amountContract",
                    "descriptionAdditionalInformation",
                    "orderNumberDelivery"
                  )
                )
              ,
              df_award$awardee %>% flatten_df() %>% setNames(
                c(
                  "nameVendor",
                  "idDUNSVendor",
                  "zipcodeVendor",
                  "countryVendor",
                  "cityVendor",
                  "addressStreet1Vendor",
                  "addressStreet2Vendor",
                  "stateVendor"
                )
              ),
              df_award$justificationAuthority %>% flatten_df() %>%
                setNames(
                  c(
                    "codeJustification",
                    "descriptionJustification",
                    "codeModJustification"
                  )
                ),
              df_award$fairOpportunity %>% flatten_df() %>% setNames(
                c("codeFairOpportunity", "descriptionFairOpportunity")
              )
            ) %>%
              reduce(bind_cols) %>%
              mutate(idRow = row_x)

            all_data %>%
              .remove_na()
          })

        df <-
          df %>%
          .munge_data(clean_address = F) %>%
          filter(!is.na(dateContract)) %>%
          mutate(isAward = T) %>%
          select(idRow, everything())

        if (df %>% hasName("codeModJustification")) {
          df <-
            df %>%
            mutate(
              codeModJustification = case_when(
                codeModJustification == "N/A" ~ NA_character_,
                TRUE ~ codeModJustification
              )
            )
        }


        return(df)
      }

      if (col == "additionalReporting") {
        df <-
          data %>% select(idRow, one_of(col))

        df <-
          1:nrow(df) %>%
          map_dfr(function(row_x) {
            row_x %>% message()
            df_r <- df %>% dplyr::slice(row_x)
            reporting <-
              df_r$additionalReporting[[1]]

            if (length(reporting) >= 1) {
              df_reporting <-
                tibble(reporting) %>%
                mutate(idRow = row_x) %>%
                select(idRow, everything())
              return(df_reporting)
            }
          })

        df <- df %>%
          rename(typeReporting = reporting)


        return(df)
      }

      if (col == "type") {
        df <-
          data %>% select(idRow, one_of(col))

        df <-
          1:nrow(df) %>%
          map_dfr(function(row_x) {
            row_x %>% message()
            df_r <- df %>% dplyr::slice(row_x)
            df_type <-
              df_r$type %>% as_tibble()

            if (nrow(df_type) > 0) {
              df_type <-
                df_type %>% setNames(c("groupSolicitation", "typeSolicitation")) %>% mutate(idRow = row_x)
              return(df_type)
            }
          })

        df <-
          df %>% mutate_if(is.character, str_to_upper)

        return(df)
      }


      if (col == "pointOfContacts") {
        df <-
          data %>% select(idRow, pointOfContacts) %>%
          unnest()

        df <-
          df %>% select(-one_of("additionalInfo")) %>% suppressMessages()

        df <-
          df %>%
          .munge_fpds_names() %>%
          nest(-idRow, .key = "dataContacts") %>%
          mutate(countContacts = dataContacts %>% map_dbl(nrow))

        return(df)

      }

      if (col == "psc") {
        df <-
          data %>%
          select(idRow, one_of(col)) %>%
          unnest()
        df <- df %>% filter(!is.na(code))

        df <-
          df %>%
          setNames(c(
            "idRow",
            "codePSC",
            "idPSC",
            "descriptionCodeProductService"
          )) %>%
          mutate(descriptionCodeProductService = str_to_upper(descriptionCodeProductService)) %>%
          unite(codeProductService,
                codePSC,
                idPSC,
                remove = F,
                sep = "")
        return(df)
      }

      if (col == "solicitation") {
        df <- data %>% select(idRow, solicitation)
        df <-
          1:nrow(df) %>%
          map_dfr(function(row_x) {
            row_x %>% message()
            df_r <- df %>% dplyr::slice(row_x)
            df_all <-
              tibble(idRow = row_x)
            df_original_set_aside <-
              df_r$solicitation$originalSetAside %>% as_tibble() %>%
              filter(!is.na(value))

            if (nrow(df_original_set_aside) > 0) {
              df_all <-
                df_all %>%
                bind_rows(df_original_set_aside %>% mutate(idRow = row_x, type = "original"))
            }

            df_set_aside <- df_r$solicitation$setAside %>%
              as_tibble() %>%
              filter(!is.na(value))

            if (nrow(df_set_aside) > 0) {
              df_all <-
                df_all %>%
                bind_rows(df_set_aside %>% mutate(idRow = row_x, type = "new"))
            }
            df_all
          })



        if (ncol(df) == 1) {
          return(NULL)
        }
        df <-
          df %>%
          filter(!is.na(value))
        df <-
          df %>% nest_legacy(-idRow) %>%
          rename(dataSetAside = data)
        return(df)
      }

      if (col == "suggestion") {
        df <- data %>% select(idRow, one_of(col))
        df <-
          1:nrow(df) %>%
          map_dfr(function(row_x) {
            row_x %>% message()
            df_r <- df %>% dplyr::slice(row_x)
            contexts <-
              df_r$suggestion$input[[1]][1] %>%
              str_split("\\,") %>%
              flatten_chr() %>%
              str_trim() %>%
              str_to_upper() %>%
              str_c(collapse = " | ")

            if (length(contexts) > 0) {
              df_context <-
                tibble(idRow = row_x, keywordsFBO = contexts)
              return(df_context)
            }

          })
        return(df)
      }
    })

  df_nested <-
    df_nested %>%
    discard(function(x){
      x %>% length() == 0
    }) %>%
    reduce(left_join) %>% suppressMessages()

  data <-
    df_base %>%
    select(-one_of( "datetimeModified")) %>%
    left_join(df_nested, by = "idRow") %>%
    select(-matches("remove|idRow"))

  df_list <-
    data %>%
    transmute_if(is.list,
                 .funs = list(function(x) {
                   x %>% map_dbl(length) > 0
                 })) %>%
    mutate(idRow = 1:n())

  names(df_list) <-
    names(df_list) %>% str_replace_all("^data", "has")

  data <-
    data %>%
    mutate(idRow = 1:n()) %>%
    left_join(df_list, by = 'idRow') %>%
    select(-idRow) %>%
    mutate(urlSAMV2API = url)

  data
}


.generate_sam_v2_url <-
  function(base = "",
           version = "v1",
           api_key = ""){}

.generate_active_sam_fbo_urls <-
  function(url = "https://api.sam.gov/prod/sgs/v1/search/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&index=opp&page=0&sort=-modifiedDate&size=5000&is_active=true", size = 10) {
    url_test <-
      glue("https://api.sam.gov/prod/sgs/v1/search/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&index=opp&page=0&sort=-modifiedDate&size={size}&is_active=true")

    data <- url_test %>% fromJSON(simplifyDataFrame = T)
    pages <- data[[3]]$totalPages
    pages <- 0:pages
    urls <-
      glue("https://api.sam.gov/prod/sgs/v1/search/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&index=opp&page={pages}&sort=-modifiedDate&size={size}&is_active=true") %>%
      as.character()
    urls

    u}
abresler/govtrackR documentation built on July 11, 2020, 12:30 a.m.