# tools -------------------------------------------------------------------
#' Dictionary for SAM Office Funding ID Types
#'
#' @param data
#'
#' @return
#' @export
#'
#' @examples
dictionary_sam_funding_ids <-
function(data) {
tibble(idFunding = c(1,2,3,5),
typeFunding = c("CONTRACT FUNDING","CONTRACT AWARDS",
"FINANCIAL ASSISTANCE AWARDS",
"FINANCIAL ASSISTANCE FUNDING"))
}
.dictionary_psc_names <-
function() {
tibble(
namePSC = c(
"pscId",
"pscCode",
"pscName",
"activeInd",
"activeStartDate",
"updatedDate",
"pscNote",
"activeEndDate",
"pscFullName",
"parentPscCode",
"pscInclude",
"pscExclude",
"naicsId",
"naicsCode",
"naicsTitle",
"naicsSize",
"sourceYear",
"parentNaicsCode",
"numberTable",
"table",
"elements",
"element_id",
"value",
"code",
"description"
),
nameActual = c(
"codeProductService",
"idSolicitationGroup",
"nameProductService",
"isActivePSC",
"dateActive",
"dateUpdated",
"notesProductService",
"dateActiveEnd",
"nameFullPSC",
"idSolicitationGroup",
"descriptionPSCIncldues",
"descriptionPSCExcludes",
"codeNAICS",
"idNAICS",
"nameNAICS",
"numberNAICSLevel",
"yearNAICSCodebook",
"idNAICSParent",
"numberTable",
"slugTable",
"elements",
"idElement",
"nameElement",
"codeElement",
"descriptionElement"
)
)
}
.munge_psc_names <-
function(data) {
names_dict <- names(data)
dict <- .dictionary_psc_names()
actual_names <-
names_dict %>%
map_chr(function(name) {
df_row <-
dict %>% filter(namePSC == name)
if (nrow(df_row) == 0) {
glue::glue("Missing {name}") %>% message()
return(name)
}
df_row$nameActual
})
data %>%
set_names(actual_names)
}
.col_class_df <-
function(data) {
data %>% map_df(class) %>%
gather(column, type) %>%
mutate(isNested = type %>% str_detect("data|list"))
}
.stream_json <-
function(url, pagesize = 500000900, ...) {
json <- curl(url) %>% stream_in(pagesize = pagesize, ...)
names(json) <- names(json) %>% str_remove_all("\\_")
json
}
.dictionary_federal_names <-
function() {
tibble(
nameSAM = c(
"orgKey",
"name",
"parentOrgKey",
"fullParentPath",
"createdBy",
"createdDate",
"lastModifiedBy",
"lastModifiedDate",
"fullParentPathName",
"startDate",
"endDate",
"agencyName",
"fpdsCode",
"modStatus",
"cgac",
"level",
"fpdsOrgId",
"l1ShortName",
"code",
"l1OrgKey",
"l2OrgKey",
"l1Name",
"l3Name",
"l4Name",
"l5Name",
"parentOrg",
"l2Name",
"isSourceFpds",
"aacCode",
"description",
"summary",
"categoryDesc",
"orgCode",
"sourceCfdaPk",
"address_key",
"org_key",
"street_address",
"street_address_2",
"city",
"state",
"zipcode",
"country_code",
"created_by",
"created_date",
"last_modified_by",
"last_modified_date",
"mod_status",
"is_source_fpds",
"is_source_cfda",
"is_source_fh_app",
"type",
"office_type",
"start_date",
"end_date",
"office_id",
"is_latest",
"office_type_id",
"office_type_name",
"position",
"shortName",
"tas2Code",
"cfdaCode",
"ombAgencyCode",
"isSourceCfda",
"tas3Code",
"a11TacCode",
"logoUrl",
"cfdaBur",
"cfdaOmb",
"isSourceCwCfda",
"sourceParentCfdaPk",
"ombBureauCode",
"regionCode"
),
nameActual = c(
"idOrganizationSAM",
"nameOrganization",
"idOrganizationParent",
"idsPathSAM",
"partyCreatedBy",
"datetimeCreated",
"partyLastModifiedBy",
"datetimeLastModified",
"namesPathSAM",
"datetimeOrganizationStart",
"datetimeOrganizationEnd",
"nameDepartmentAgency",
"idAgencyFPDS",
"statusModification",
"slugCGAC",
"idOrganizationLevel",
"idOfficeFPDS",
"slugDepartment",
"idOffice",
"idOrganizationSAMDepartment",
"idOrganizationSAMAgency",
"nameDepartment",
"nameCommandMajor",
"nameCommandSub",
"nameOffice",
"nameOrganizationParent",
"nameAgency",
"isSourceFPDS",
"idOfficeAAC",
"descriptionOffice",
"summaryOffice",
"typeOrganization",
"codeOrganization",
"keyCFDA",
"idAddressSAM",
"idOrganizationSAM",
"addressStreet1Organization",
"addressStreet2Organization",
"cityOrganization",
"stateOrganization",
"zipcodeOrganization",
"codeCountryOrganization",
"remove_created_by",
"remove_created_date",
"remove_last_modified_by",
"remove_last_modified_date",
"remove_mod_status",
"remove_is_source_fpds",
"remove_is_source_cfda",
"remove_is_source_fh_app",
"remove_type",
"remove_office_type",
"remove_start_date",
"remove_end_date",
"remove_office_id",
"remove_is_latest",
"remove_office_type_id",
"typeFunding",
"remove_position",
"slugOrganization",
"idCGAC",
"bureauCFDA",
"codeAgency",
"isCFDA",
"idTAS3",
"idA11TAC",
"urlLogo",
"idCFDABureau",
"codeAgencyOMB",
"isSourceCFDA",
"keyCFDAOrganization",
"codeBureau",
"codeRegion"
)
)
}
.munge_federal_organization_names <-
function(data) {
names_dict <- names(data)
dict <- .dictionary_federal_names()
actual_names <-
names_dict %>%
map_chr(function(name) {
df_row <-
dict %>% filter(nameSAM == name)
if (nrow(df_row) == 0) {
glue::glue("Missing {name}") %>% message()
return(name)
}
df_row$nameActual
})
data %>%
set_names(actual_names)
}
# dicitonaries ------------------------------------------------------------
.parse_sam_org_json <-
function(json) {
data <-
json[[1]] %>% as_tibble()
if (nrow(data) == 0) {
return(invisible())
}
data <- json[[1]][[1]] %>% as_tibble()
data <-
data %>%
mutate_if(is.character, list(function(x) {
ifelse(x == "", NA, x) %>% str_squish()
})) %>%
.remove_na()
df_types <-
data %>%
.col_class_df()
base_cols <- df_types %>% filter(!isNested) %>% pull(column)
nested_cols <- df_types %>% filter(isNested) %>% pull(column)
df_nested <-
data %>% select(nested_cols)
df_nested_length <-
df_nested %>%
transmute_all(list(function(x) {
x %>% map_dbl(length)
}))
names(df_nested_length) <-
names(df_nested_length) %>% str_c("Length")
df_nested <-
df_nested %>%
bind_cols(df_nested_length) %>%
mutate(orgKey = data$orgKey) %>%
select(orgKey, everything())
data <-
data %>%
select(one_of(base_cols)) %>%
mutate_if(is.character, list(function(x) {
ifelse(x %in% c("", "\\?", "\\."), NA_character_, x)
}))
data <-
data %>% .munge_federal_organization_names()
data <-
data %>%
mutate_if(is.character, list(function(x) {
ifelse(x == "", NA, x) %>% str_squish()
})) %>%
.munge_data(clean_address = F) %>%
.remove_na()
if (df_nested %>% hasName("orgAddressesLength")) {
df_addresses <-
df_nested %>%
filter(orgAddressesLength > 0) %>%
select(orgAddresses) %>%
unnest()
df_addresses <- df_addresses %>% .munge_federal_organization_names() %>%
select(-matches("remove"))
if (df_addresses %>% nrow() > 0) {
data <- data %>%
left_join(df_addresses, by = "idOrganizationSAM")
}
}
if (df_nested %>% hasName("orgOfficeTypesLength")) {
df_office_types <-
df_nested %>%
filter(orgOfficeTypesLength > 0) %>%
select(orgOfficeTypes) %>%
unnest_legacy()
df_office_types <- df_office_types %>% .munge_federal_organization_names() %>%
select(-matches("remove"))
if (nrow(df_office_types) >0) {
df_office_types <- df_office_types %>%
mutate(typeFunding = case_when(
typeFunding == "CONTRACT FUNDING" ~ "hasContractFunding",
typeFunding == "CONTRACT AWARDS" ~ "hasContractAwards",
typeFunding == "FINANCIAL ASSISTANCE FUNDING" ~ "hasFinancialAssistanceFunding",
typeFunding == "FINANCIAL ASSISTANCE AWARDS" ~ "hasFinancialAssistanceAwards",
)) %>%
mutate(value = TRUE) %>%
spread(typeFunding, value) %>%
mutate_if(is.logical,
list(function(x){
x %>% coalesce(FALSE)
}))
data <-
data %>%
left_join(df_office_types, by = "idOrganizationSAM")
}
}
data
}
.dict_org_types <-
function() {
tibble(
type = c(
"DEPARTMENT",
"AGENCY",
"OFFICE",
"MAJOR COMMAND",
"SUB COMMAND"
),
name = c(
"Department",
"Agency",
"Office",
"CommandMajor",
"CommandSub"
)
)
}
#' SAM NAICS Dictionary
#'
#' Returns date for all searchable
#' NAICS codes from the new SAM API
#'
#' @param only_master if \code{TRUE} returns only master NAICS groups
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' dictionary_sam_naics_codes()
dictionary_sam_naics_codes <-
memoise::memoise(function(only_master = F) {
url <- "https://beta.sam.gov/api/prod/locationservices/v1/api/naics"
data <-
url %>%
.stream_json()
data <- data[["embedded"]] %>% as_tibble()
data <- data[[data %>% names()]][[1]] %>% as_tibble()
data <-
data %>% select(-matches("_links")) %>%
.munge_psc_names() %>%
.munge_data()
if (data %>% hasName("isActivePSC")){
data <-
data %>%
rename(isActiveNAICS = isActivePSC)
}
if (data %>% hasName("numberNAICSLevel")){
data <-
data %>%
mutate(numberNAICSLevel = as.numeric(numberNAICSLevel))
}
if (data %>% hasName("yearNAICSCodebook")){
data <-
data %>%
mutate(yearNAICSCodebook = as.numeric(yearNAICSCodebook))
}
data <- data %>%
mutate(
idNAICSParent = case_when(idNAICSParent == idNAICS ~ NA_real_,
TRUE ~ idNAICSParent),
isNAICSMasterGroup = numberNAICSLevel == 2
) %>%
select(codeNAICS, isNAICSMasterGroup, everything())
if (only_master) {
data <-
data %>%
filter(isNAICSMasterGroup)
}
data
})
#' SAM Product Service Group Solicitation Group Dictionary
#'
#' Returns the searchable parent PSC solicitation groups.
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_sam_psc_groups()
dictionary_sam_psc_groups <-
memoise::memoise(function() {
url <-
"https://beta.sam.gov/api/prod/locationservices/v1/api/psc?q=&active=ALL&advanceSearch=Y"
data <- .stream_json(url = url)
data <- data$embedded[[1]]
data <-
data[[1]] %>% as_tibble() %>% select(-matches("_links"))
data <-
data %>%
.munge_psc_names() %>%
.munge_data() %>%
filter(isActivePSC)
data
})
#' SAM setaside dictionary
#'
#' @param url json url
#'
#' @return
#' @export
#'
#' @examples
dictionary_sam_set_asides <-
memoise::memoise(function(url = "https://beta.sam.gov/api/prod/locationservices/v1/api/setAside?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1574113310771&searchby=setAsideName") {
data <- .stream_json(url = url)
data <-
data$embedded %>% as_tibble() %>% unnest()
data <-
data %>%
select(-matches("_links")) %>%
.remove_na() %>%
.munge_biz_opps_names() %>%
.munge_data(clean_address = F)
data
})
#' SAM Beta Solicitation type
#'
#' Information on the SAM/FBO solicitation
#' types.
#'
#' Returns groups for procurement type, justification authority
#' and statutory authority.
#'
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' dictonary_sam_solicitation_types()
dictonary_sam_solicitation_types <-
memoise::memoise(function() {
url = "https://beta.sam.gov/api/prod/opps/v2/dictionaries?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1574198133721&ids=procurement_type,additional_reporting,ja_statutory_authority,fo_justification_authority,justification_aquisition_authority"
data <- .stream_json(url = url)
data <-
data$embedded$dictionaries[[1]] %>%
as_tibble() %>%
unnest() %>%
.munge_data(clean_address = F) %>%
.remove_na() %>%
setNames(
c(
"idElement",
"codeElement",
"nameSolicitationType",
"isActiveType",
"sort",
"idURL"
)
) %>%
mutate(codeElement = str_to_lower(codeElement))
data <- data %>%
mutate(typeData = case_when(
idURL %>% str_detect("PROCUREMENT_TYPE") ~ "PROCUREMENT",
idURL %>% str_detect("JA_STATUTORY_AUTHORITY") ~ "STATUTORY AUTHORITY",
idURL %>% str_detect("ADDITIONAL_REPORTING") ~ "ADDITIONAL REPORTING",
idURL %>% str_detect("FO_JUSTIFICATION_AUTHORITY|JUSTIFICATION_AQUISITION_AUTHORITY") ~ "JUSTIFICATION AUTHORITY"
)) %>%
select(typeData, everything()) %>%
mutate(nameSolicitationType = str_squish(nameSolicitationType)) %>%
select(-one_of(c("sort", "idURL")))
data
})
.parse_sam_organizations <-
function(url = "https://beta.sam.gov/api/prod/federalorganizations/v1/organizations/departments/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii") {
json <- fromJSON(url, simplifyDataFrame = T)
data <- json[[1]] %>% as_tibble()
data <- data[["org"]] %>% as_tibble()
data <-
data %>%
mutate_if(is.character, list(function(x) {
ifelse(x == "", NA, x)
})) %>%
.remove_na()
df_types <-
data %>%
.col_class_df()
base_cols <- df_types %>% filter(!isNested) %>% pull(column)
df_base <-
data %>%
select(one_of(base_cols)) %>%
.munge_biz_opps_names() %>%
.munge_data(clean_address = F)
if (df_base %>% hasName("descriptionAttachment")) {
df_base <- df_base %>%
rename(descriptionOffice = descriptionAttachment)
}
if (df_base %>% hasName("nameFile")) {
df_base <- df_base %>%
rename(nameOffice = nameFile)
}
df_base
}
#' SAM Entity Exclusion Program Dictionary
#'
#' Types of exclusions for entities
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_sam_exclusion_programs()
dictionary_sam_exclusion_programs <-
function() {
tibble(
codeElement = c("Reciprocal", "NonProcurement", "Procurement"),
nameElement = c("Reciprocal", "Non Procurement", "Procurement")
)
}
#' SAM Entity Classification Dictionary
#'
#' SAM classification type data
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' dictionary_sam_classification_types()
dictionary_sam_classification_types <-
function() {
tibble(
codeElement = c(
"Firm",
"Individual",
"Special%20Entity%20Designation",
"Vessel"
),
nameElement = c("Firm", "Individual", "Special Entity Designation", "Vessel")
)
}
#' SAM Exclusion Type Dictionary
#'
#' Exclusion types
#'
#' @return
#' @export
#'
#' @examples
dictionary_sam_exclusion_types <-
function() {
tibble(
codeElement = 1:4,
nameElement = c("Ineligible (Proceedings Pending)", "Ineligible (Proceedings Completed)",
"Prohibition/Restriction", "Voluntary Exclusion")
)
}
.unembedded_data <-
function(data) {
}
.parse_sam_city_url <-
function(url = "https://beta.sam.gov/api/prod/locationservices/v1/api/cities?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1573667358453&cc=&q=&searchby=statecode&searchvalue=MD") {
}
# url generators --------------------------------------------------------------
function(url = "https://beta.sam.gov/search?keywords=&sort=-modifiedDate&index=opp&is_active=true&page=1&psc=&naics=&set_aside=¬ice_type=&date_filter_index=0&inactive_filter_values=false") {
data <- url %>% url_parse() %>% flatten_df()
data$query
}
.generate_url_sam_beta <-
function(type = "Opportunities",
active = T,
keywords = NULL,
title = NULL,
quote_words = T,
set_asides = NULL,
notice_type = NULL,
psc_codes = NULL,
naics_codes = NULL,
zipcodes = NULL,
sort = "date",
mode = "search",
version = 1,
publish_date_from = NULL,
publish_date_to = NULL,
response_date_from = NULL,
response_date_to = NULL,
updated_date_from = NULL,
updated_date_to = NULL,
inactive_date_from = NULL,
inactive_date_to = NULL,
performance_zipcode = NULL,
perfomance_state = NULL,
awardee = NULL,
award_minimum = NULL,
award_maximum = NULL,
awardee_state = NULL,
awardee_zipcode = NULL,
contract_office_zipcode = NULL,
contract_office_state = NULL,
contract_id = NULL,
contract_title = NULL,
classification = NULL,
country = NULL,
authority = NULL,
award_idv = NULL,
award_type = NULL,
contract_type = NULL,
vendor_zipcode = NULL,
api_key = NULL,
size = 5000,
return_message = T) {
type_slug <- str_to_lower(type)
type_slug <-
case_when(
type_slug %>% str_detect("opportunities") ~ "opp",
type_slug %>% str_detect("contract data") ~ "fpds",
type_slug %>% str_detect("all award data") ~ "",
type_slug %>% str_detect("assistance") ~ "cfda",
type_slug %>% str_detect("entity") ~ "ei",
type_slug %>% str_detect("federal") ~ "fh"
)
sort_slug <-
case_when(sort %>% str_detect("date") ~ "-modifiedDate",
TRUE ~ "title")
search_slug <- c()
search_params <- c()
publish_from_slug <-
case_when(length(publish_date_from) > 0 ~ ymd(publish_date_from) %>% as.character() %>% str_c("&publish_date.from=",., "-12:00","&"),
TRUE ~ "")
publish_to_slug <-
case_when(length(publish_date_to) > 0 ~ ymd(publish_date_to) %>% as.character() %>% str_c("&publish_date.to=",., "-12:00","&"),
TRUE ~ "")
response_from_slug <-
case_when(length(response_date_from) > 0 ~ ymd(response_date_from) %>% as.character() %>% str_c("&response_date.from=",., "-12:00","&"),
TRUE ~ "")
response_to_slug <-
case_when(length(response_date_to) > 0 ~ ymd(response_date_to) %>% as.character() %>% str_c("&response_date.to=",., "-12:00","&"),
TRUE ~ "")
updated_from_slug <-
case_when(length(updated_date_from) > 0 ~ ymd(updated_date_from) %>% as.character() %>% str_c("&modified_date.from=",., "-12:00","&"),
TRUE ~ "")
updated_to_slug <-
case_when(length(updated_date_to) > 0 ~ ymd(updated_date_to) %>% as.character() %>% str_c("&modified_date.to=",., "-12:00","&"),
TRUE ~ "")
inactive_from_slug <-
case_when(length(inactive_date_from) > 0 ~ ymd(updated_date_from) %>% as.character() %>% str_c("&modified_date.from=",., "-12:00","&"),
TRUE ~ "")
inactive_to_slug <-
case_when(length(inactive_date_from) > 0 ~ ymd(inactive_date_from) %>% as.character() %>% str_c("&modified_date.to=",., "-12:00","&"),
TRUE ~ "")
performance_zipcode_slug <-
case_when(length(performance_zipcode) > 0 ~
str_c("&pop_zip=",str_c(performance_zipcode, collapse = ","),"&"),
TRUE ~ "")
awardee_slug <-
case_when(length(awardee) > 0 ~
str_c("&awarded_name=",str_c(glue('"{awardee}"'), collapse = ","),"&"),
TRUE ~ "")
award_minimum_slug <-
case_when(length(award_minimum) > 0 ~
str_c("&award_amount.from=",str_c(award_minimum, collapse = ","),"&"),
TRUE ~ "")
award_maximum_slug <-
case_when(length(award_maximum) > 0 ~
str_c("&award_amount.to=",str_c(award_maximum, collapse = ","),"&"),
TRUE ~ "")
awardee_zipcode_slug <-
case_when(length(awardee_zipcode) > 0 ~
str_c("&awardee_zip=",str_c(awardee_zipcode, collapse = ","),"&"),,
TRUE ~ "")
perfomance_state_slug <- case_when(length(perfomance_state) > 0 ~
str_c("&pop_state=", str_c(perfomance_state, collapse = ","), "&"),
TRUE ~ "")
awardee_state_slug <- case_when(length(awardee_state) > 0 ~
str_c("&awardee_state=", str_c(awardee_state, collapse = ","), "&"),
TRUE ~ "")
contract_office_zipcode_slug <-
case_when(length(contract_office_zipcode) > 0 ~
str_c("&office_zip=",str_c(contract_office_zipcode, collapse = ","),"&"),
TRUE ~ "")
contract_office_state_slug <-
case_when(length(contract_office_state) > 0 ~
str_c("&office_state=",str_c(perfomance_state, collapse = ","),"&"),
TRUE ~ "")
classification_slug <-
case_when(length(classification) > 0 ~
str_c("&classification=",str_c(classification, collapse = ","),"&"),
TRUE ~ "")
country_slug <-
case_when(length(country) > 0 ~
str_c("&country=",str_c(country, collapse = ","),"&"),
TRUE ~ "")
naics_slug <-
case_when(length(naics_codes) > 0 ~
str_c("&naics=", str_c(naics_codes, collapse = ","), "&"),
TRUE ~ "")
psc_slugs <-
case_when(length(psc_codes) > 0 ~
str_c("&psc=", str_c(psc_codes, collapse = ","), "&"),
TRUE ~ "")
authority_slug
additional_reporting_slug
notice_type_slug
award_type_slug
state_slug
county_slug
duns_slug
entity_slug
contract_type_slug
applicant_type_slug
beneficiary_type_slug
if (length(keywords) > 0) {
keyword_terms <-
case_when(quote_words ~ glue('"{keywords}"') %>% as.character(),
TRUE ~ str_c(keywords))
keyword_slugs <-
keyword_terms %>% str_c(collapse = " ") %>% URLencode() %>% str_to_lower()
keyword_slugs <- glue("&q={keyword_slugs}") %>% as.character()
keyword_terms <-
glue("Keywords: {keyword_terms}") %>% as.character()
search_params <-
append(search_params, keyword_terms) %>% str_c(collapse = "&")
search_slug <-
append(search_slug, keyword_slugs) %>% str_c(collapse = "")
}
if (length(psc) > 0) {
psc_slugs <- psc %>% str_to_upper() %>% str_c(collapse = "|")
psc_url_slug <- dictionary_federal_supply_codes() %>%
filter(nameSolicitationGroup %>% str_detect(psc_slugs)) %>%
pull(idSolicitationGroup) %>%
str_c(collapse = ",")
psc_url_slug <-
str_c("&psc=", psc_url_slug, collapse = "")
psc_terms <-
glue("PSC: {psc_slugs}")
search_params <-
append(search_params, psc_slugs) %>% str_c(collapse = "&")
search_slug <-
append(search_slug, psc_url_slug) %>% str_c(collapse = "")
}
if (length(set_asides) > 0) {
setaside_slugs <-
set_asides %>% str_to_upper() %>% str_c(collapse = "|")
df_s <- dictionary_sam_set_asides()
setaside_url_slug <-
df_s %>%
filter(nameSetAside %>% str_detect(setaside_slugs)) %>%
pull(codeSetAside) %>%
str_c(collapse = ",")
setaside_url_slug <-
str_c("&set_aside=", setaside_url_slug, collapse = "")
setaside_slugs <-
str_c(search_params, setaside_slugs, collapse = "&")
setaside_slugs <-
df_s %>%
filter(nameSetAside %>% str_detect(setaside_slugs)) %>%
pull(nameSetAside) %>%
str_c(collapse = "|")
setaside_terms <-
glue("Set Asides: {setaside_slugs}")
search_params <-
append(search_params, setaside_terms) %>% str_c(collapse = "&")
search_slug <-
append(search_slug, setaside_url_slug) %>% str_c(collapse = "")
}
if (length(notice_type) > 0) {
notice_types <-
notice_type %>% str_to_upper() %>% str_c(collapse = "|")
notice_slug <-
dictonary_sam_solicitation_types() %>%
filter(nameSolicitationType %>% str_detect(notice_types)) %>%
pull(codeElement) %>%
str_c(collapse = ",")
notice_url_slug <-
str_c("¬ice_type=", notice_slug, collapse = "")
setaside_slugs <- glue("Notice Types: {notice_types}")
search_params <-
append(search_params, setaside_slugs) %>% str_c(collapse = "&")
search_slug <-
append(search_slug, notice_url_slug) %>% str_c(collapse = "")
}
if (length(zipcode) > 0) {
zipcodes <-
zipcode %>% str_to_upper() %>% str_c(collapse = "|")
zipcode_slug <-
zipcode %>%
str_c(collapse = ",")
zipcode_url_slug <-
str_c("&zipcode=", zipcode_slug, collapse = "")
zipcode_slugs <- glue("Zipcode: {zipcodes}")
search_params <-
append(search_params, zipcode_slugs) %>% str_c(collapse = "&")
search_slug <-
append(search_slug, zipcode_url_slug) %>% str_c(collapse = "")
}
if (length(search_slug) == 0) {
search_slug <- ""
search_params <- "Unfiltered search"
}
if (length(api_key) > 0) {
api_slug <- glue("api_key={api_key}&") %>% as.character()
} else {
api_slug <- ""
}
url <-
glue(
"https://beta.sam.gov/api/prod/sgs/v1/search/?{api_slug}index={type_slug}{search_slug}&is_active={str_to_lower(active)}&mode={mode}&{sort_slug}&size=1"
) %>%
as.character()
json <- .stream_json(url = url)
actual_size <- max(size, 10000)
df_results <- json$page %>% as_tibble()
if (length(df_results) == 0) {
"No Results" %>% message
}
results <- df_results$totalElements
all_pages <- ceiling(results / size)
total_pages <- 0:(all_pages - 1)
if (return_message) {
p_slugs <- search_params %>% str_split("\\&") %>% flatten_chr() %>%
unique() %>%
str_c(collapse = "\n")
glue("Found {comma(results, digits = 0)} results in {type} for:\n\n{p_slugs}") %>% message()
}
if (results == 0) {
"No Results"
return(invisible())
}
urls <-
glue(
"https://beta.sam.gov/api/prod/sgs/v1/search/?{api_slug}index={type_slug}{search_slug}&is_active={str_to_lower(active)}&mode={mode}&{sort_slug}&page={total_pages}&size={size}"
) %>%
as.character()
data <- tibble(typeSearch = type, urlSAMAPI = urls)
if (length(search_params) > 0) {
data <- data %>%
mutate(searchParameters = search_params) %>%
select(typeSearch, searchParameters, everything())
}
data
}
#' Generate SAM Beta URLs for Parsing
#'
#' Return a \code{tibble} of URLs
#' based off of the user defined search parameters
#'
#' @param types SAM search type options include \itemize{
#' \item Opportunities - FBO opportunities (default)
#' \item Contract Data - FPDS contract data
#' \item All Award Data - All Government Award data
#' \item Assistance - Government Grants
#' \item Entity - Federally Registered entities
#' \item Federal - Federal Organizations
#' }
#' @param keywords vector of keywords to search
#' @param set_asides
#' @param sort method of sorting \itemize{
#' \item relevence
#' \item date - date modified
#'
#' }
#' @param mode search mode \itemize{
#' \item none
#' \item search - default
#' }
#' @param quote_words if \code{TRUE} quotes a keyword
#' @param active if \code{TRUE} only active listings
#' @param version default of 1
#' @param api_key apikey
#' @param psc if not NULL string of words to search PSCs
#' @param naics if not NULL string of words to search NAICS
#' @param size size of the results - defaults to 1000
#' @param return_message if \code{TRUE} returns a message
#' @param notice_types
#'
#' @return
#' @export
#'
#' @examples
sam_beta_urls <-
function(types = "Opportunities",
keywords = NULL,
quote_words = T,
notice_type = NULL,
set_asides = NULL,
sort = "date",
mode = "search",
active = T,
version = 1,
api_key = "O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii",
psc = NULL,
naics = NULL,
size = 5000,
return_message = T) {
df_urls <-
types %>%
map_dfr(function(type){
.generate_url_sam_beta(type = type,
keywords = keywords,
quote_words = quote_words,
set_asides = set_asides,
notice_type = notice_type,
psc = psc,
naics = naics,
sort = sort,
mode = mode,
active = active,
version = version,
api_key = api_key,
size = size,
return_message = return_message)
})
df_urls
}
.generate_sam_fbo_urls <-
function(url = "https://beta.sam.gov/api/prod/sgs/v1/search/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1573665296402&index=opp&q=&page=0&sort=-modifiedDate&mode=search&is_active=true&size=1",
json_size = 1000) {
json <- curl(url) %>% stream_in()
df <- json$page %>% as_tibble()
if (json_size > 10000) {
json_size <- 10000
}
total_pages <-
round(df$totalElements %>% as.integer() / json_size, digits = 0)
parts <-
url %>% str_split("&size") %>% flatten_chr() %>% .[[1]] %>% str_split("page=0") %>% flatten_chr()
df_schema <-
url %>% httr::parse_url() %>% flatten_df()
pages <- 0:(total_pages - 1)
urls <-
glue("{parts[[1]]}page={pages}{parts[2]}&size={json_size}") %>% as.character()
data <- tibble(numberPage = pages, urlSAMAPI = urls)
data
}
# parse -------------------------------------------------------------------
.parse_sam_fbo_json_url <-
function(url = "https://beta.sam.gov/api/prod/sgs/v1/search/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1573665296402&index=opp&q=&page=0&sort=-modifiedDate&mode=search&is_active=true&size=100",
return_message = T) {
if (return_message) {
glue("\n\nParsing {url}\n\n") %>% message()
}
.stream_json_safe <- possibly(.stream_json, NULL)
json <- .stream_json_safe(url = url)
if (length(json) == 0) {
return(tibble())
}
data <-
json %>% .parse_sam_fbo_embedded()
df_schema <- httr::parse_url(url) %>% flatten_df()
if (df_schema %>% hasName("api_key")) {
api_key <- df_schema %>% pull(api_key)
data <-
data %>%
mutate(
urlOpportunityAttachmentAPI = glue(
"https://beta.sam.gov/api/prod/opps/v3/opportunities/{idNotice}/resources?api_key={api_key}&excludeDeleted=false&withScanResult=false"
) %>% as.character(),
urlOpportunityAttachmentZIP = glue(
"https://beta.sam.gov/api/prod/opps/v3/opportunities/{idNotice}/resources/download/zip?api_key={api_key}&"
) %>% as.character(),
urlOpportunityAPI = glue(
"https://beta.sam.gov/api/prod/opps/v2/opportunities/{idNotice}?api_key={api_key}"
) %>% as.character(),
urlSAMAPI = url
)
} else {
api_key <- NULL
data <-
data %>%
mutate(
urlOpportunityAttachmentAPI = glue(
"https://beta.sam.gov/api/prod/opps/v3/opportunities/{idNotice}/resources?excludeDeleted=false&withScanResult=false"
) %>% as.character(),
urlOpportunityAttachmentZIP = glue(
"https://beta.sam.gov/api/prod/opps/v3/opportunities/{idNotice}/resources/download/zip"
) %>% as.character(),
urlOpportunityAPI = glue(
"https://beta.sam.gov/api/prod/opps/v2/opportunities/{idNotice}"
) %>% as.character(),
urlSAMAPI = url
)
}
id_cols <- data %>% select(matches("id")) %>%
select(-matches("idSolici")) %>%
select_if(is.character) %>% names()
if (length(id_cols) > 0) {
data <- data %>%
mutate_at(id_cols,
str_to_lower)
}
data <-
data %>%
.remove_na()
data
}
#' Parse Vector of API Calls
#'
#' @param urls vector of JSON urls
#' @param return_message if \code{TRUE} returns a message
#'
#' @return
#' @export
#'
#' @examples
#' parse_sam_fbo_page_urls(urls = "https://beta.sam.gov/api/prod/sgs/v1/search/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1573665296402&index=opp&q=&page=0&sort=-modifiedDate&mode=search&is_active=true&size=500")
parse_sam_fbo_page_urls <-
function(urls = c(
"https://beta.sam.gov/api/prod/sgs/v1/search/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1573665296402&index=opp&q=&page=0&sort=-modifiedDate&mode=search&is_active=true&size=500",
"https://beta.sam.gov/api/prod/sgs/v1/search/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1573665296402&index=opp&q=&page=1&sort=-modifiedDate&mode=search&is_active=true&size=500",
"https://beta.sam.gov/api/prod/sgs/v1/search/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1573665296402&index=opp&q=&page=2&sort=-modifiedDate&mode=search&is_active=true&size=500",
"https://beta.sam.gov/api/prod/sgs/v1/search/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1573665296402&index=opp&q=&page=3&sort=-modifiedDate&mode=search&is_active=true&size=500",
"https://beta.sam.gov/api/prod/sgs/v1/search/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1573665296402&index=opp&q=&page=4&sort=-modifiedDate&mode=search&is_active=true&size=500"
),
return_message = T) {
.parse_sam_fbo_json_url_m <-
memoise::memoise(.parse_sam_fbo_json_url)
.parse_sam_fbo_json_url_safe <-
possibly(.parse_sam_fbo_json_url, tibble())
data <-
unique(urls) %>%
map_dfr(function(url) {
.parse_sam_fbo_json_url_safe(url = url, return_message = return_message)
})
data <-
data %>%
mutate(
isActive = datetimeResponse > Sys.Date(),
countDaysToRespond = (as.Date(datetimeResponse) - Sys.Date()) %>% as.numeric(),
countDaysToRespond = case_when(is.na(countDaysToRespond) ~ 0,
TRUE ~ countDaysToRespond),
countDaysToRespond = pmax(0, countDaysToRespond, na.rm = T),
countDaysOpenToRespond = (as.Date(datetimeResponse) - as.Date(datetimePublished)) %>% as.numeric(),
countDaysOpenToRespond = pmax(0, countDaysOpenToRespond, na.rm = T)
)
data
}
.parse_sam_fbo_embedded <-
function(json, return_message = T) {
data <- json$embedded$results[[1]]
if (data %>% hasName("_type")) {
data <- data %>%
rename(typeMatch = `_type`)
}
data <- data %>%
mutate(idRow = 1:n()) %>%
select(idRow, everything())
names(data) <- names(data) %>% str_remove_all("\\_")
df_types <-
data %>%
.col_class_df()
d_types <-
data$type %>%
as_tibble() %>%
setNames(c("codeSolicitation", "typeSolicitation")) %>%
mutate(idRow = 1:n()) %>%
.munge_data(clean_address = F) %>%
filter(!is.na(codeSolicitation))
nested_cols <-
df_types %>%
filter(type %>% str_detect("data|list")) %>%
pull(column)
base_cols <-
df_types %>% filter(!column %in% nested_cols) %>%
pull(column)
df_base <-
data %>%
select(one_of(base_cols)) %>%
as_tibble() %>%
.munge_biz_opps_names()
nested_cols <-
c("organizationHierarchy",
"modifications",
"descriptions",
"award")
df_nested <-
nested_cols %>%
map(function(col) {
if (return_message) {
col %>% message()
}
d <-
data[c("idRow", col)]
if (col == "modifications") {
mods <-
d$modifications$count
d <- d %>%
select(idRow) %>%
mutate(countModifications = mods) %>%
as_tibble()
return(d)
}
if (col == "organizationHierarchy") {
if (nrow(d) == 0) {
return(invisible())
}
d <-
d %>%
as_tibble() %>%
mutate(hasOrganizationHierarchy = organizationHierarchy %>% map_dbl(length) > 0)
d <- d %>%
filter(hasOrganizationHierarchy)
if (nrow(d) == 0) {
return(invisible())
}
df <-
d %>% unnest()
d_base <-
df %>%
select(-one_of("address")) %>%
rename(idOrganizationSAM = organizationId)
d_base <-
d_base %>% select(-one_of(c(
"hasOrganizationHierarchy", "status", "level"
))) %>%
gather(item, value, -c("idRow", "type")) %>%
left_join(.dict_org_types(), by = "type") %>%
select(-type) %>%
unite(item, item, name, sep = "")
d_cols <- c("idRow", d_base$item %>% unique())
d_base <- d_base %>%
group_by(idRow, item) %>%
summarise(value = value %>% str_c(collapse = " | ")) %>%
ungroup() %>%
spread(item, value) %>%
select(one_of(d_cols)) %>%
select(idRow, matches("name"), everything())
d_base <- d_base %>%
separate(nameAgency,
into = c("nameAgency", "slugAgency"),
fill = "right",
sep = "\\(") %>%
mutate(slugAgency = slugAgency %>% str_remove_all("\\)")) %>%
separate(
nameOffice,
into = c("nameOffice", "idOffice"),
fill = "right",
sep = "\\(",
extra = "merge"
) %>%
mutate(idOffice = idOffice %>% str_remove_all("\\)|\\)")) %>%
separate(
nameDepartment,
into = c("nameDept1", "nameDept2"),
fill = "right",
sep = "\\, ",
remove = F
) %>%
mutate(nameDepartment = case_when(
!is.na(nameDept2) ~ str_c(nameDept2, nameDept1, sep = " "),
TRUE ~ nameDept1
)) %>%
separate(
nameAgency,
into = c("nameAg1", "nameAg2"),
sep = "\\, ",
fill = "right",
remove = F
) %>%
mutate(nameAgency = case_when(
!is.na(nameAg2) ~ str_c(nameAg2, nameAg1, sep = " "),
TRUE ~ nameAg1
)) %>%
select(-matches("nameAg1|nameAg2|nameDept1|nameDept2")) %>%
.munge_data(clean_address = F)
df_addresses <-
df$address %>% as_tibble() %>% mutate(idRow2 = 1:n())
df_addresses <-
df %>%
mutate(idRow2 = 1:n()) %>%
select(idRow, type, organizationId, idRow2) %>%
left_join(df_addresses, by = "idRow2")
df_addresses <-
df_addresses %>%
rename(
zipcode = zip,
addressStreet = streetAddress,
addressStreet1 = streetAddress2
) %>%
mutate(country = case_when(country == "US" ~ "USA",
TRUE ~ country)) %>%
select(addressStreet,
addressStreet1,
city,
state,
zipcode,
country,
everything())
df_locations <-
df_addresses %>%
select(-one_of(c("organizationId", "idRow"))) %>%
gather(item, value, -c(idRow2,type), na.rm = T) %>%
arrange(idRow2) %>%
group_by(idRow2, type) %>%
summarise(location = str_c(value, collapse = " ")) %>%
ungroup()
df_addresses <-
df_locations %>%
left_join(df_addresses, by = c("idRow2", "type")) %>%
select(idRow, organizationId, everything()) %>%
select(-idRow2,)
df_addresses <-
df_addresses %>%
gather(item, value, -c("idRow", "type")) %>%
left_join(.dict_org_types(), by = "type") %>%
select(-type) %>%
unite(item, item, name, sep = "") %>%
group_by(idRow, item) %>%
summarise(value = value %>% str_c(collapse = " | ")) %>%
ungroup() %>%
spread(item, value) %>%
select(idRow, matches("location"), everything())
d_base <-
d_base %>%
left_join(df_addresses, by = "idRow")
return(d_base)
}
if (col == "descriptions") {
d <-
d %>%
as_tibble() %>%
mutate(hasDescription = descriptions %>% map_dbl(length) > 0)
d <- d %>%
filter(hasDescription)
if (nrow(d) == 0) {
return(invisible())
}
df_descriptions <-
d %>% unnest() %>%
rename(datetimeLastModified = lastModifiedDate,
description = content) %>%
select(-hasDescription)
df_descriptions <-
df_descriptions %>% mutate(html = glue("<html>{description}</html>") %>% as.character())
descriptions <- df_descriptions$html %>%
map_chr(function(x) {
x %>% read_html() %>% html_text() %>% str_squish()
})
df_descriptions <- df_descriptions %>%
select(idRow, datetimeLastModified) %>%
mutate(descriptions) %>%
.munge_data() %>%
filter(!is.na(descriptions)) %>%
group_by(idRow) %>%
summarise(
datetimeLastModified = max(datetimeLastModified),
descriptionSolicitation = str_c(descriptions, collapse = " ")
) %>%
ungroup
}
if (col == "award") {
d <-
d$award[[1]] %>%
as_tibble() %>%
mutate(idRow = 1:n()) %>%
select(c("idRow", "name", "duns")) %>%
setNames(c("idRow", "nameAddressAwardee", "idDUNSAwardee")) %>%
select(idRow, idDUNSAwardee, everything()) %>%
filter(!is.na(nameAddressAwardee))
if (nrow(d) == 0) {
return(invisible())
}
d <- d %>%
mutate(idDUNSAwardee = as.numeric(idDUNSAwardee)) %>%
mutate(nameAddressAwardee = nameAddressAwardee %>% str_replace_all("\\;", "\\ | ")) %>%
separate(
col = nameAddressAwardee,
into = c("nameAwardee", "locationAwardee"),
extra = "merge",
fill = "right",
sep = "\\|"
) %>%
separate(
locationAwardee,
into = c(
"addressStreetAwardee",
"cityAwardee",
"stateAwardee",
"zipcodeAwardee"
),
sep = "\\|",
fill = "right",
remove = F
) %>%
mutate_if(is.character, str_squish) %>%
mutate(locationAwardee = locationAwardee %>% str_remove_all("\\|")) %>%
.munge_data(clean_address = F)
return(d)
}
}) %>%
discard(function(x) {
is.null(x)
})
df_nested <-
df_nested %>% reduce(left_join) %>% suppressMessages()
data <-
df_base %>%
left_join(df_nested, by = "idRow") %>%
left_join(d_types, by = "idRow") %>%
.munge_data(clean_address = F) %>%
select(-one_of("scoreMatch", "idRow")) %>%
select(
codeSolicitation,
typeSolicitation,
idNotice,
idNoticeParent,
idSolicitation,
nameSolicitation,
everything()
) %>%
mutate_at(c("idNotice", "idNoticeParent"), str_to_lower) %>%
mutate(urlOpportunity = glue("https://beta.sam.gov/opp/{idNotice}/view") %>% as.character())
data
}
# opportunities -----------------------------------------------------------
.parse_sam_fbo_opportunity <-
function(url = "https://beta.sam.gov/api/prod/opps/v2/opportunities/31a7ddbbadd04b3f9a0158042c41d922?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii",
return_message = T) {
if (return_message) {
glue("\n\nContract Opportunity\nParsing {url}\n\n") %>% message()
}
data <- .stream_json(url = url)
df_schema <-url %>% httr::parse_url() %>% flatten_df()
if (df_schema %>% hasName("api_key")) {
api_key <-
df_schema %>% pull(api_key)
api_slug <- glue("api_key={api_key}&") %>% as.character()
} else {
api_slug <- ""
}
df_types <-
data %>%
.col_class_df()
nested_cols <-
df_types %>%
filter(type %>% str_detect("data|list")) %>%
pull(column)
base_cols <-
df_types %>% filter(!column %in% nested_cols) %>%
pull(column)
df_base <-
data %>% select(one_of(base_cols)) %>% as_tibble() %>%
select(-one_of("id")) %>%
.munge_biz_opps_names()
if (df_base %>% hasName("datePosted")) {
df_base <- df_base %>% rename(datetimePosted = datePosted)
}
df_base <- df_base %>%
.munge_data(clean_address = F)
id_notice <- df_base$idNotice
nested_cols <-
nested_cols[!nested_cols %>% str_detect("links|permissions")]
data_nested <-
nested_cols %>%
map(function(col) {
col %>% message()
df <- data[[col]]
if (col == "data") {
d_types <-
df %>%
.col_class_df()
n_cols <-
d_types %>%
filter(type %>% str_detect("data|list")) %>%
pull(column) %>%
unique()
base_cols <-
d_types %>% filter(!column %in% n_cols) %>%
pull(column) %>%
unique()
d_base <-
df %>% select(one_of(base_cols)) %>% as_tibble() %>%
select(-one_of("id")) %>%
.remove_na() %>%
.munge_biz_opps_names() %>%
.munge_data(clean_address = F) %>%
mutate(idNotice = id_notice) %>%
select(idNotice, everything())
n_cols <- unique(n_cols)
d_nested <-
n_cols %>%
map(function(n_col) {
n_col %>% message()
df_n <- df[[n_col]]
if (n_col == "award") {
if (names(df_n) %>% length() == 0) {
return(invisible())
}
award_names <- df_n %>% names()
if (!"awardee" %in% award_names) {
return(invisible())
}
if (nrow(df_n) == 0) {
return(invisible())
}
d_awardee <-
df_n %>% select(-awardee) %>% as_tibble()
if (d_awardee %>% hasName("fairOpportunity")) {
if (d_awardee$fairOpportunity %>% ncol() > 0) {
message("CHECK FAIR OPP")
}
}
if (d_awardee %>% hasName("justificationAuthority")) {
if (d_awardee$justificationAuthority %>% ncol() > 0) {
message("CHECK JUSTIFICATION")
}
}
d_awardee <-
d_awardee %>% select(-matches("fairOpportunity|justificationAutho"))
d_awardee <-
d_awardee %>% .munge_biz_opps_names() %>%
.munge_data(clean_address = F)
df_awardee <-
d_awardee %>% select(one_of("duns", "name")) %>%
.munge_biz_opps_names() %>%
as_tibble()
if (df_awardee %>% hasName("nameFile")) {
df_awardee <- df_awardee %>% rename(nameAwardee = nameFile)
}
if ("location" %in% names(d_awardee)) {
items <- d_awardee$location %>% t() %>% rownames()
values <-
d_awardee$location %>% t() %>% as.character()
df_loc <-
tibble(items, values) %>% spread(items, values) %>%
.munge_biz_opps_names() %>%
select(-matches("remove"))
names(df_loc) <-
names(df_loc) %>% str_c("Awardee", sep = "")
df_awardee <- df_awardee %>% bind_cols(df_loc)
}
df_awardee <- df_awardee %>%
mutate(idNotice = id_notice)
return(df_awardee)
}
if (n_col == "naics") {
df_n <- df_n[[1]] %>%
as_tibble() %>%
unnest() %>%
setNames(c("typeNAICS", "idNAICS")) %>%
.munge_data() %>%
mutate(idNotice = id_notice)
return(df_n)
}
if (n_col == "archive") {
df_n <-
df_n %>%
as_tibble() %>%
setNames(c("dateArchive", "typeArchive")) %>%
.munge_data(clean_address = F) %>%
mutate(idNotice = id_notice)
return(df_n)
}
if (n_col == "solicitation") {
if (!"deadlines" %in% names(df_n)) {
return(invisible())
}
df_deadlines <- df_n$deadlines %>% as_tibble()
df_deadlines <-
df_deadlines %>%
.munge_biz_opps_names() %>%
.munge_data() %>%
mutate(idNotice = id_notice)
if ("setAside" %in% names(df_n)) {
typeSetAside <- df_n$setAside
df_deadlines <- df_deadlines %>%
mutate(typeSetAside) %>%
select(typeSetAside, everything())
}
return(df_deadlines)
}
if (n_col == "pointOfContact") {
df_n <- df_n[[1]] %>% as_tibble()
if (df_n %>% hasName("fullName")) {
df_n <- df_n %>% rename(name = fullName)
}
name_cols <-
df_n %>% select(-matches("additionalInfo")) %>% names()
names(df_n)[names(df_n) %in% name_cols] <-
names(df_n)[names(df_n) %in% name_cols] %>% str_c("Contact", sep = "")
if (df_n %>% hasName("additionalInfo")) {
df_n <- df_n %>% select(-additionalInfo)
}
df_n <-
df_n %>%
.remove_na() %>%
mutate(idNotice = id_notice)
df_n <- df_n %>%
gather(column, value, -c(typeContact, idNotice)) %>%
mutate(
typeContact = str_c(
typeContact %>% substr(1, 1) %>% str_to_upper(),
typeContact %>% substr(2, nchar(typeContact))
)
) %>%
unite(column, column, typeContact, sep = "") %>%
arrange(column) %>%
spread(column, value)
return(df_n)
}
if (n_col == "placeOfPerformance") {
if (ncol(df_n) == 0) {
return(invisible())
}
items <- df_n %>% t() %>% rownames()
values <- df_n %>% t() %>% as.character()
df_address <- tibble(items, values) %>%
spread(items, values) %>%
.munge_biz_opps_names() %>%
select(-matches("remove")) %>%
select(
one_of(
"addressStreeet",
"city",
"state",
"zipcde",
"country"
),
everything()
)
names(df_address) <-
names(df_address) %>% str_c("Performance")
df_address <- df_address %>%
mutate(idNotice = id_notice) %>%
select(idNotice, everything())
return(df_address)
}
if (n_col == "additionalReporting") {
descriptionAdditionalReporting <- df_n[[1]]
df_n <-
tibble(descriptionAdditionalReporting) %>%
mutate(idNotice = id_notice)
return(df_n)
}
}) %>%
discard(function(x) {
x %>% is_null()
}) %>%
reduce(left_join) %>%
suppressMessages() %>%
.remove_na()
d_base <- d_base %>% left_join(d_nested, by = "idNotice")
return(d_base)
}
if (col == "additionalInfo") {
df$sections
}
if (col == "parent") {
if (ncol(df) == 0) {
return(invisible())
}
}
if (col == "related") {
if (ncol(df) == 0) {
return(invisible())
}
}
if (col == "status") {
if (ncol(df) == 0) {
return(invisible())
}
status <- df$value
df <-
tibble(typeStatusSAM = status, idNotice = id_notice)
return(df)
}
if (col == "description") {
if (length(df[[1]]) == 0) {
return(invisible())
}
df <- df[[1]]
if (ncol(df) == 0) {
return(invisible())
}
df <- df %>%
as_tibble() %>%
.munge_biz_opps_names()
df <-
df %>% mutate(html = glue("<html>{body}</html>") %>% as.character())
descriptions <- df$html %>%
map_chr(function(x) {
x %>% read_html() %>% html_text() %>% str_squish()
}) %>%
str_to_upper()
df <-
df %>%
mutate(descriptionSolicitation = descriptions) %>%
.munge_data() %>%
select(-one_of(c("body", "html")))
return(df)
}
}) %>%
discard(function(x) {
x %>% is_null()
})
if (length(data_nested) > 0) {
data_nested <-
data_nested %>% reduce(left_join) %>% .remove_na() %>% suppressMessages() %>%
select(-one_of("datetimeModified")) %>%
suppressMessages()
df_base <- df_base %>% left_join(data_nested, by = "idNotice")
}
df_base <-
df_base %>%
select(nameSolicitation,
idSolicitation,
matches("date"),
one_of(c(
"typeStatusSAM", "descriptionSolicitation"
)),
everything()) %>%
mutate(urlOpportunityAPI = url) %>%
.munge_data(clean_address = F)
df_base <-
df_base %>%
mutate(
urlOpportunityAttachmentAPI = glue(
"https://beta.sam.gov/api/prod/opps/v3/opportunities/{idNotice}/resources?{api_slug}excludeDeleted=false&withScanResult=false"
) %>% as.character(),
urlOpportunityAttachmentZIP = glue(
"https://beta.sam.gov/api/prod/opps/v3/opportunities/{idNotice}/resources/download/zip?{api_slug}"
) %>% as.character()
)
df_base
}
#' Parse Vector of SAM Contract Opporunity URLs
#'
#' @param urls vector of contract api urls
#' @param return_message if \code{TRUE} returns a message
#'
#' @return
#' @export
#'
#' @examples
#' parse_sam_fbo_opportunity_urls("https://beta.sam.gov/api/prod/opps/v2/opportunities/99fc745be6f84ad196e3b19c64716cf7?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii")
parse_sam_fbo_opportunity_urls <-
function(urls = c(
"https://beta.sam.gov/api/prod/opps/v2/opportunities/d3e90fba44ef479da331dc609080e825?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii",
"https://beta.sam.gov/api/prod/opps/v2/opportunities/99fc745be6f84ad196e3b19c64716cf7?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii",
"https://beta.sam.gov/api/prod/opps/v2/opportunities/a567aad2665e43a19323083b13bd533c?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii",
"https://beta.sam.gov/api/prod/opps/v2/opportunities/2f1547c4e0394ddf9d04ce2fc1332c0f?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii",
"https://beta.sam.gov/api/prod/opps/v2/opportunities/46af91ac4c2f4aac94b8a2ace865566c?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii"
),
return_message = T) {
.parse_sam_fbo_opportunity_m <-
memoise::memoise(.parse_sam_fbo_opportunity)
.parse_sam_fbo_opportunity_safe <-
possibly(.parse_sam_fbo_opportunity_m, tibble())
api_key <-
urls %>% sample(1) %>% httr::parse_url() %>% flatten_df() %>% pull(api_key)
all_data <- urls %>%
map_dfr(function(url) {
.parse_sam_fbo_opportunity_safe(url = url, return_message = return_message)
})
all_data
}
.dictionary_org_id_pages <-
function() {
tibble(idLevel = 1:5, countPages = c(1, 5, 10, 15, 10))
}
.generate_federal_api_urls <-
function(levels = 1:5, results = 25000, status = FALSE,
api_key = "O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii") {
df_pages <- .dictionary_org_id_pages()
urls <-
levels %>%
map(function(level){
max_p <- df_pages %>% filter(idLevel == level) %>% pull(countPages)
pages <- 1:max_p
glue(
"https://beta.sam.gov/api/prod/federalorganizations/v1/search?api_key={api_key}&q=&pageNum={pages}&pageSize={results}&orderBy=name&ascending=asc&searchType=general&levels={as.character(level)}&status={str_to_lower(status)}"
) %>% as.character()
}) %>%
flatten_chr()
urls
}
.parse_federal_json <-
function(levels = 1:5,
results = 25000,
status = FALSE,
api_key = "O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii") {
urls <-
.generate_federal_api_urls(
levels = levels,
results = results,
status = status,
api_key = api_key
)
data <- tibble()
.parse_sam_org_json_safe <-
possibly(.parse_sam_org_json, tibble::tibble())
urls %>%
future_walk(function(url) {
url %>% message()
fromJSON_safe <- possibly(jsonlite::fromJSON, tibble::tibble())
json <- fromJSON_safe(url, simplifyDataFrame = T)
if (class(json) != "list") {
return(invisible())
}
df <-
.parse_sam_org_json_safe(json = json)
rm(json)
gc()
data <<-
data %>%
bind_rows(df) %>%
distinct()
})
data <- data %>%
group_by(idsPathSAM, namesPathSAM) %>%
slice(1) %>%
ungroup()
data <-
data %>%
select(-slugDepartment) %>%
left_join(data %>%
distinct(slugDepartment, nameDepartment) %>%
filter(!is.na(slugDepartment)),
by = "nameDepartment") %>%
select(names(data), everything()) %>%
distinct()
if (data %>% hasName("codeAgency")) {
df_agency_omb <-
data %>%
select(nameDepartment,
codeAgency,
codeAgencyOMB) %>%
distinct() %>%
mutate(codeAgency = case_when(
is.na(codeAgency) ~ as.character(codeAgencyOMB),
TRUE ~ codeAgency
)) %>%
select(-codeAgencyOMB) %>%
rename(codeAgencyOMB = codeAgency) %>%
filter(!is.na(codeAgencyOMB)) %>%
distinct() %>%
group_by(nameDepartment) %>%
slice(1) %>%
ungroup() %>%
mutate(char = nchar((codeAgencyOMB)),
zeros = case_when(char == 1 ~ "00",
char == 2 ~ "0",
char == 3 ~ "")) %>%
unite(codeAgencyOMB, codeAgencyOMB, zeros, sep = "") %>%
select(-char)
data <- data %>%
select(-c(codeAgency, codeAgencyOMB)) %>%
left_join(df_agency_omb, by = "nameDepartment") %>%
select(one_of(names(data)), everything())
}
df_depts <- data %>%
distinct(nameDepartment) %>%
mutate(nameDepartmentActual = nameDepartment)
data <- data %>%
fix_usg_organization_col(org_col = "nameDepartment")
data <- data %>%
fix_usg_organization_col(org_col = "nameOrganization")
data <- data %>%
fix_usg_organization_col(org_col = "nameOrganizationParent")
data <- data %>%
fix_usg_organization_col(org_col = "nameAgency")
data <- data %>%
fix_usg_organization_col(org_col = "nameCommandMajor")
data <- data %>%
fix_usg_organization_col(org_col = "nameCommandSub")
data <- data %>%
fix_usg_organization_col(org_col = "nameOffice")
data <- data %>%
fix_usg_organization_col(org_col = "nameDepartmentAgency")
data <- data %>%
mutate(isActive = case_when(statusModification == "ACTIVE" ~ 1,
TRUE ~ 0))
data <-
data %>%
group_by(namesPathSAM, idOffice, idOfficeFPDS) %>%
filter(isActive == max(isActive)) %>%
filter(idOrganizationSAM == max(idOrganizationSAM)) %>%
ungroup()
data <- data %>%
mutate(isActive = case_when(is.na(datetimeOrganizationEnd) ~ 1,
TRUE ~ isActive))
data <-
data %>%
mutate(
typeOrganization = case_when(
idOrganizationLevel == 1 ~ "DEPARTMENT",
idOrganizationLevel == 2 ~ "AGENCY",
idOrganizationLevel == 3 ~ "MAJOR COMMAND",
idOrganizationLevel == 4 ~ "SUB COMMAND",
idOrganizationLevel == 5 ~ "OFFICE"
)
)
data <-
data %>%
mutate(
idOffice = case_when(is.na(idOffice) ~ idOfficeFPDS,
TRUE ~ idOffice),
idOfficeFPDS = case_when(is.na(idOfficeFPDS) ~ idOffice,
TRUE ~ idOfficeFPDS)
)
data <- data %>%
select(-one_of("summaryOffice"))
group_cols <-
names(data)[names(data) %in% c("namesPathSAM",
"idAgencyFPDS",
"idOffice",
"idOfficeAAC",
"idOfficeFPDS")]
data <-
data %>%
mutate_at(group_cols, as.character)
data <-
data %>%
group_by(!!!syms(group_cols)) %>%
filter(idOrganizationSAM == max(idOrganizationSAM)) %>%
ungroup()
data <-
data %>%
group_by(!!!syms(group_cols)) %>%
filter(isActive == max(isActive)) %>%
filter(idOrganizationSAM == max(idOrganizationSAM)) %>%
ungroup()
data <- data %>%
group_by(namesPathSAM, idOffice) %>%
filter(isActive == max(isActive)) %>%
ungroup()
data <- data %>%
group_by(namesPathSAM, idOfficeFPDS) %>%
filter(idOrganizationSAM == max(idOrganizationSAM)) %>%
ungroup()
has_all_names <-
names(data) %in% c("nameDepartment",
"nameAgency",
"nameCommandMajor",
"nameCommandSub",
"nameOffice") %>% sum() == 5
if (has_all_names) {
data <- data %>%
mutate(
nameOrganization = case_when(
idOrganizationLevel == 1 ~ nameDepartment,
idOrganizationLevel == 2 ~ nameAgency,
idOrganizationLevel == 3 ~ nameCommandMajor,
idOrganizationLevel == 4 ~ nameCommandSub,
idOrganizationLevel == 5 ~ nameOffice
)
)
data <- data %>%
mutate(
nameOrganizationParent = case_when(
idOrganizationLevel == 1 ~ NA_character_,
idOrganizationLevel == 2 ~ nameDepartment,
idOrganizationLevel == 3 ~ nameAgency,
idOrganizationLevel == 4 ~ nameCommandMajor,
idOrganizationLevel == 5 ~ nameCommandSub
)
)
}
data <- data %>%
group_by(namesPathSAM , codeOrganization) %>%
filter(isActive == max(isActive)) %>%
ungroup() %>%
mutate(isActive = as.logical(isActive))
data <-
data %>%
.munge_data()
data <-
data %>%
select(
typeOrganization,
isActive,
idOrganizationSAM,
namesPathSAM,
idsPathSAM,
one_of(
"nameOrganization",
"nameDepartment",
"nameDepartmentAgency",
"nameAgency",
"nameCommandMajor",
"nameOrganizationParent",
"nameOffice",
"nameCommandMajor",
"nameCommandSub"
),
matches("id[A-Z]"),
everything()
)
data
}
# federal organizations ---------------------------------------------------
#' Federal Organizations from New SAM API
#'
#' Returns information about U.S. government
#' organizations by user stated level
#'
#' @param levels vector of depth levels 1:5 \itemize{
#' \item 1 - Department
#' \item 2 - Agency
#' \item 3 - Major Command
#' \item 4 - Sub Command
#' \item 5 - Office
#' }
#' @param results number of results
#' @param status if \code{TRUE} includes
#' @param api_key status
#' @param only_active if \code{TRUE} returns only active
#' organizations
#'
#' @return
#' @export
#'
#' @examples
sam_federal_organizations <-
memoise::memoise(function(levels = 1:5,
results = 25000,
status = FALSE,
only_active = F,
munge_distinct = F,
join_addresses = T,
snake_names = F,
api_key = "O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii") {
data <-
.parse_federal_json(
levels = levels,
results = results,
status = status,
api_key = api_key
)
if (only_active) {
data <-
data %>%
filter(isActive)
}
if (data %>% hasName("nameOffice")) {
data <-
data %>%
filter(!nameOffice %in% c("ACTION"))
}
if (data %>% hasName("nameDepartment")) {
data <-
data %>%
filter(!nameDepartment %in% c("BNJBNIKJN", "NJKLNOUINUJ"))
}
if (munge_distinct) {
group_cols <-
names(data)[names(data) %in% c("idOrganizationLevel",
data %>% select(matches("name")) %>% names())]
data <-
data %>%
group_by(!!!syms(group_cols)) %>%
arrange(desc(datetimeCreated)) %>%
dplyr::slice(1) %>%
ungroup()
}
data <-
data %>%
select(one_of(
c(
"isActive",
"idOrganizationSAM",
"nameDepartment",
"slugDepartment",
"nameAgency",
"nameOffice",
"nameCommandMajor",
"typeOrganization"
)
), everything())
tbl_omb <- dictionary_omb_cgac_accounts()
if (data %>% hasName("idAgencyFPDS")) {
df_offices <- data %>%
filter(!is.na(idAgencyFPDS)) %>%
select(idAgencyFPDS) %>%
distinct() %>%
mutate(idCGACAgency = idAgencyFPDS %>% substr(1, 2) %>% as.numeric())
df_offices <-
df_offices %>%
left_join(tbl_omb %>% select(-slugCGAC),
by = c("idCGACAgency" = "idCGAC")) %>%
distinct()
data <- data %>%
left_join(df_offices, by = "idAgencyFPDS")
}
if (data %>% hasName("datetimeOrganizationStart")) {
data <- data %>%
mutate(dateOrganizationStart = as.Date(datetimeOrganizationStart))
}
if (join_addresses) {
data <-
data %>%
build_address() %>%
distinct()
}
if (snake_names) {
data <-
data %>%
clean_names()
}
data
})
# attachments -------------------------------------------------------------
.parse_attachment_url <-
function(url = "https://beta.sam.gov/api/prod/opps/v3/opportunities/c377e74a72896a7c4569deb03d1b248b/resources?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1573668320989&excludeDeleted=false&withScanResult=false",
return_message = T) {
data <- .stream_json(url = url)
if (names(data) %>% str_detect("embedded") == F) {
return(invisible())
}
data <- data$embedded
if (return_message) {
glue("\n\nParsing {url} for attachments\n\n") %>% message()
}
data <-
data$opportunityAttachmentList %>% enframe(name = "id") %>%
unnest() %>%
unnest() %>%
select(-id) %>%
.munge_biz_opps_names()
logical_cols <- data %>% select(matches("^is|^has")) %>% names()
if (length(logical_cols) > 0) {
data <- data %>%
mutate_at(logical_cols,
list(function(x) {
x %>% readr::parse_number() %>% as.logical()
}))
}
data <- data %>%
.munge_data(clean_address = F) %>%
mutate(urlOpportunityAttachmentAPI = url)
id_cols <- data %>% select(matches("id")) %>%
select_if(is.character) %>% names()
if (length(id_cols) > 0) {
data <- data %>%
mutate_at(id_cols,
str_to_lower)
}
data <-
data %>%
arrange(numberAttachment) %>%
.remove_na()
data
}
#' Parse SAM/FBO Contract Attachment JSON
#'
#' Extract attachment data from vector of URLs
#' relating to SAM contract opportunities
#'
#' @param urls vector of URLS
#' @param nest_data if \code{TRUE} nests data - defaults to TRUE
#' @param return_message if \code{TRUE} returns a messaage
#'
#' @return
#' @export
#'
#' @examples
#' parse_contract_attachment_urls(urls = "https://beta.sam.gov/api/prod/opps/v3/opportunities/52bc1e4035dd4f5484bad81174264ce8/resources?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&excludeDeleted=false&withScanResult=false", return_message = T)
parse_contract_attachment_urls <-
function(urls = c(
"https://beta.sam.gov/api/prod/opps/v3/opportunities/9d3960a1166c45b49ffc9cdc0c87584d/resources?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&excludeDeleted=false&withScanResult=false",
"https://beta.sam.gov/api/prod/opps/v3/opportunities/52bc1e4035dd4f5484bad81174264ce8/resources?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&excludeDeleted=false&withScanResult=false"
),
nest_data = T,
return_message = T) {
.parse_attachment_url_m <- memoise::memoise(.parse_attachment_url)
.parse_attachment_url_safe <-
possibly(.parse_attachment_url_m, tibble())
all_data <- urls %>%
map_dfr(function(url) {
.parse_attachment_url_safe(url = url, return_message = return_message)
})
all_data <-
all_data %>%
nest(-idNotice) %>%
rename(dataAttachments = data)
all_data <-
all_data %>%
mutate(
hasAttchments = length(dataAttachments) > 0,
countAttachments = dataAttachments %>% map_dbl(nrow),
urlOpportunityAttachmentZIP = glue(
"https://beta.sam.gov/api/prod/opps/v3/opportunities/{idNotice}/resources/download/zip"
) %>% as.character()
)
if (!nest_data) {
all_data <-
all_data %>%
unnest(dataAttachments)
}
all_data
}
# zip ---------------------------------------------------------------------
#https://beta.sam.gov/api/prod/opps/v3/opportunities/3485b97b372b498a84069d6132c59228/resources/download/zip?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&token=
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.