## asb_key <- "D4cKoXpvlody0ff5Dg0lCKd4uQDseRaPGih8iuNu"
# mungers -----------------------------------------------------------------
.naics_2002 <-
function() {
data <-
"https://www.census.gov/eos/www/naics/reference_files_tools/2002/naics_6_02.txt" %>%
read_tsv(col_names = F)
data <-
data %>%
dplyr::slice(5:nrow(data))
data <-
data %>%
separate("X1", into = c('idNAICS', "nameNAICS"),
extra = "merge",
sep = "\\ ") %>%
mutate(
idNAICS = as.numeric(idNAICS),
nameNAICS = nameNAICS %>% str_remove("T$") %>% str_to_upper(),
yearCodeBookNAICS = 2002
)
data
}
.naics_2007 <-
function() {
data <-
"https://www.census.gov/eos/www/naics/reference_files_tools/2007/naics07_6.xls" %>% download_excel_file()
data <-
data %>%
dplyr::slice(3:nrow(data)) %>%
setNames(c("idNAICS", "nameNAICS")) %>%
mutate(
idNAICS = as.numeric(idNAICS),
nameNAICS = nameNAICS %>% str_remove("T$") %>% str_to_upper(),
yearCodeBookNAICS = 2007
)
data
}
.naics_2012 <-
function() {
data <-
download_excel_file(url = "https://www.census.gov/eos/www/naics/reference_files_tools/2012/2012_NAICS_Structure.xls") %>%
select(2,3)
data <-
data %>%
dplyr::slice(4:nrow(data)) %>%
setNames(c("idNAICS", "nameNAICS")) %>%
mutate(idNAICS = as.numeric(idNAICS),
nameNAICS = nameNAICS %>% str_remove("T$") %>% str_to_upper(),
yearCodeBookNAICS = 2012)
data
}
#' NAICS Dictionary
#'
#' @param url location of csv file
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_naics_codes()
dictionary_naics_codes <-
memoise::memoise(function(url = "https://www.census.gov/eos/www/naics/2017NAICS/2017_NAICS_Structure.xlsx") {
options(warn = -1)
data <-
rio::import(url)
data <-
data %>%
as_tibble() %>%
select(2, 3)
data <-
data %>% dplyr::slice(3:nrow(data)) %>%
set_names("idNAICS", "nameNAICS") %>%
mutate(
isT = nameNAICS %>% str_detect("[a-z]T"),
nameNAICS = case_when(isT ~ nameNAICS %>% substr(1, nchar(nameNAICS) -
1),
TRUE ~ nameNAICS) %>% str_to_upper()
) %>%
mutate(nameNAICS = case_when(isT ~ nameNAICS %>% str_remove_all("T$"),
TRUE ~ nameNAICS))
df_sectors <-
data %>%
filter(nchar(idNAICS) == 2 | idNAICS %>% str_detect("\\-")) %>%
distinct(idSectorNAICS = idNAICS, nameSectorNAICS = nameNAICS, isT) %>%
separate_rows("idSectorNAICS", sep = "\\-") %>%
mutate(idSectorNAICS = as.numeric(idSectorNAICS)) %>%
mutate(nameSectorNAICS = case_when(isT ~ nameSectorNAICS %>% str_remove_all("T$"),
TRUE ~ nameSectorNAICS)) %>%
select(-isT) %>%
bind_rows(
tibble(idSectorNAICS = 32, nameSectorNAICS = "MANUFACTURING")
) %>%
arrange(idSectorNAICS)
data <-
data %>%
group_by(nameNAICS) %>%
filter(idNAICS == max(idNAICS)) %>%
ungroup() %>%
select(-isT) %>%
mutate(idNAICS = as.integer(idNAICS)) %>%
mutate(yearCodeBookNAICS = 2017)
df_2012 <- .naics_2012()
df_2007 <- .naics_2007()
df_2002 <- .naics_2002()
data <-
list(data, df_2002, df_2007, df_2012) %>%
reduce(bind_rows)
data <-
data %>%
mutate(char = nchar(idNAICS))
df_subsectors <-
data %>% filter(char == 3) %>%
distinct(idSubSectorNAICS = idNAICS, nameSubSectorNAICS = nameNAICS)
df_industry_groups <-
data %>% filter(char == 4) %>%
distinct(idIndustryGroupNAICS = idNAICS,
nameIndustryGroupNAICS = nameNAICS)
df_industries <-
data %>% filter(char == 5) %>%
distinct(idIndustryNAICS = idNAICS, nameIndustryNAICS = nameNAICS)
data <-
data %>%
filter(char >= 6) %>%
mutate(
isNAICSSameNational = idNAICS %>% str_detect("0$"),
idSectorNAICS = idNAICS %>% substr(1, 2),
idSubSectorNAICS = idNAICS %>% substr(1, 3),
idIndustryGroupNAICS = idNAICS %>% substr(1, 4),
idIndustryNAICS = idNAICS %>% substr(1, 5)
) %>%
mutate_at(
c(
"idIndustryNAICS",
"idIndustryGroupNAICS",
"idSubSectorNAICS",
"idSectorNAICS"
),
as.numeric
) %>%
group_by(idNAICS, nameNAICS) %>%
filter(yearCodeBookNAICS == max(yearCodeBookNAICS)) %>%
ungroup() %>%
left_join(df_sectors, by = "idSectorNAICS") %>%
left_join(df_subsectors, by = "idSubSectorNAICS") %>%
left_join(df_industry_groups, by = "idIndustryGroupNAICS") %>%
left_join(df_industries, by = "idIndustryNAICS") %>%
select(-char)
data <-
data %>%
select(yearCodeBookNAICS,
idNAICS,
nameNAICS,
matches("name"),
everything())
data
})
.add_address <-
function(data) {
if (data %>% tibble::has_name("location")) {
return(data)
}
address_parts <-
data %>%
select(-matches("Incorp")) %>%
select(matches("address|city|country|state|zipcode")) %>% names()
if (length(address_parts) > 0) {
address <- c()
has_2 <- "addressStreet2" %>% str_detect(address_parts) %>% sum(na.rm = T) > 0
if (has_2) {
column <-
address_parts["addressStreet2" %>% str_detect(address_parts)]
value <- data %>% pull(column) %>% str_c(collapse = " ")
address <- address %>% append(value)
}
has_1 <-
"addressStreet1" %>% str_detect(address_parts) %>% sum(na.rm = T) > 0
if (has_1) {
column <-
address_parts["addressStreet1" %>% str_detect(address_parts)]
value <- data %>% pull(column) %>% str_c(collapse = " ")
address <- address %>% append(value)
}
has_city <- "city" %>% str_detect(address_parts) %>% sum(na.rm = T) > 0
if (has_city) {
column <-
address_parts["city" %>% str_detect(address_parts)]
value <- data %>% pull(column) %>% str_c(collapse = " ")
value <- glue::glue(" {value}") %>% as.character()
address <- address %>% append(value)
}
has_state <- "state" %>% str_detect(address_parts) %>% sum(na.rm = T) > 0
if (has_state) {
column <-
address_parts["state" %>% str_detect(address_parts)]
value <- data %>% pull(column) %>% str_c(collapse = ", ")
value <- glue::glue(", {value}") %>% as.character()
address <- address %>% append(value)
}
has_zip <- "zipcode" %>% str_detect(address_parts) %>% sum(na.rm = T) > 0
if (has_zip) {
column <-
address_parts["zipcode" %>% str_detect(address_parts)]
value <- data %>% pull(column) %>% str_c(collapse = " ")
value <- glue::glue(" {value}") %>% as.character()
address <- address %>% append(value)
}
has_country <- "codeCountry" %>% str_detect(address_parts) %>% sum(na.rm = T) > 0
if (has_country) {
column <-
address_parts["codeCountry" %>% str_detect(address_parts)]
value <- data %>% pull(column) %>% str_c(collapse = ", ")
value <- glue::glue(", {value}") %>% as.character()
address <- address %>% append(value)
}
location <-
address %>% str_c(collapse = "")
data <- data %>%
mutate(
location = glue::glue(location) %>% as.character() %>% str_to_upper() %>% gsub("\\s+", " ", .) %>% str_trim()
)
}
data
}
.add_name <-
function(data) {
add_name <-
data %>% tibble::has_name("nameFirst") &
data %>% tibble::has_name("nameLast") &
!data %>% tibble::has_name("namePerson")
if (add_name) {
data <-
data %>%
unite(namePerson,
nameFirst,
nameLast,
sep = " ",
remove = F)
}
data
}
.munge_data_sam_names <-
function(data) {
dict_names <- dictionary_sam_names()
sam_names <-
names(data)
actual_names <-
sam_names %>%
map_chr(function(name) {
df_row <-
dict_names %>% filter(nameSAM == name)
if (nrow(df_row) == 0) {
glue::glue("Missing {name}") %>% message()
return(name)
}
df_row$nameActual
})
data %>%
set_names(actual_names)
}
# parseers ----------------------------------------------------------------
.parse_far_responses <-
function(data) {
slugFAR <- data$id
answers <- data$answers
df_row_names <-
seq_along(answers) %>%
map_df(function(x) {
x %>% message()
rows <- answers[[x]]
if (length(rows) == 0) {
return(tibble())
}
rows_name <- rows %>% names()
tibble(nameRow = rows_name) %>%
mutate(idRow = x) %>%
select(idRow, everything())
})
answer_names <-
df_row_names %>% filter(nameRow != "section") %>% pull(nameRow) %>% unique()
all_data <-
answer_names %>%
map(function(answer_name) {
if (answer_name == "answerText") {
skip_rows <-
df_row_names %>%
filter(nameRow %in% c("SamPointOfContact", "samFacility", "naics",
"Software")) %>% pull(idRow)
rows <-
df_row_names %>%
filter(!idRow %in% skip_rows) %>%
pull(idRow) %>%
unique()
d <- answers[rows]
all_data <-
seq_along(d) %>%
map_df(function(x) {
df_row <-
d[[x]] %>% as_tibble() %>% select(-matches("naics"))
df_row <-
df_row %>%
.munge_data_sam_names()
if (df_row %>% tibble::has_name("textAnswer")) {
df_row <-
df_row %>%
mutate(
isResponseTRUE = case_when(
textAnswer == "No" ~ FALSE,
textAnswer == "Yes" ~ TRUE,
TRUE ~ NA
)
)
}
df_row
})
all_data <-
all_data %>%
left_join(dictionary_sam_table("far"), by = "slugFAR") %>%
select(slugFAR, descriptionFAR, everything()) %>%
nest(.key = "dataFARAnswers")
return(all_data)
}
rows <-
df_row_names %>%
filter(nameRow == answer_name) %>%
pull(idRow) %>%
unique()
if (length(rows) == 0) {
return(tibble())
}
d <- answers[rows]
if (length(d) == 0) {
return(tibble())
}
if (answer_name == "Software") {
all_data <-
seq_along(d) %>%
map_df(function(x) {
df_row <- d[[x]]
data_table <-
df_row[names(df_row) %in% c("section", "answerText")] %>%
as_tibble() %>%
.munge_data_sam_names() %>%
.munge_data() %>%
left_join(dictionary_sam_table("far"), by = "slugFAR") %>%
select(slugFAR, descriptionFAR, everything()) %>%
filter(!is.na(textAnswer))
data_software <-
df_row$Software %>%
discard(is_null) %>%
flatten_df() %>%
as_tibble()
if (length(data_software) > 0) {
data_software <-
data_software %>%
set_names(c("nameSoftware", "idSoftware", "typeProduct")) %>%
.munge_data()
data_table <-
data_table %>%
mutate(dataSoftware = list(data_software))
}
data_table
}) %>%
nest(.key = "dataFARSoftware")
return(all_data)
}
if (answer_name == "naics") {
all_data <-
seq_along(d) %>%
map_df(function(x) {
df_row <- d[[x]]
data_table <-
df_row[names(df_row) %in% c("section", "answerText")] %>%
as_tibble() %>%
.munge_data_sam_names() %>%
.munge_data()
data_table
data_naics <-
df_row$naics %>%
discard(is_null) %>%
flatten_df() %>%
as_tibble() %>%
.munge_data_sam_names() %>%
.munge_data()
tibble(
name = c("answer", "naics"),
data = list(data_table, data_naics)
) %>%
mutate(number = x)
})
tables <- all_data$name %>% unique()
all_data <-
tables %>%
map(function(table) {
table_name <- case_when(table == "answer" ~ "dataFARNAICSAnswers",
TRUE ~ "dataFARNAICS")
df_row <-
all_data %>%
filter(name == table) %>%
select(data) %>%
unnest()
if (table == "answer") {
df_row <-
df_row %>%
mutate(
isResponseTRUE = case_when(
textAnswer == "No" ~ FALSE,
textAnswer == "Yes" ~ TRUE,
TRUE ~ NA
)
) %>%
left_join(dictionary_sam_table("far"), by = "slugFAR")
}
df_row %>% nest(.key = UQ(table_name))
}) %>%
purrr::reduce(bind_cols)
return(all_data)
}
if (answer_name == "SamPointOfContact") {
d <- answers[[rows]]
all_data <-
d$SamPointOfContact %>%
as_tibble() %>%
mutate(section = d$section) %>%
.munge_data_sam_names() %>%
.munge_data() %>%
left_join(dictionary_sam_table("far"), by = "slugFAR") %>%
select(slugFAR, descriptionFAR, everything()) %>%
nest(.key = "dataContactSAM")
return(all_data)
}
if (answer_name == "samFacility") {
all_data <-
seq_along(d) %>%
map_df(function(x) {
df_row <- d[[x]]
facility <-
df_row$samFacility %>% purrr::discard(is_null) %>% flatten_df()
names(facility) <-
names(facility) %>% str_remove_all("\\.")
df_row <-
df_row[, 1:2] %>%
.munge_data_sam_names() %>%
bind_cols(facility %>%
.munge_data_sam_names() %>%
.munge_data(clean_address = F)) %>%
as_tibble()
df_row
}) %>%
mutate(isResponseTRUE = case_when(
textAnswer == "No" ~ FALSE,
textAnswer == "Yes" ~ TRUE,
TRUE ~ NA
)) %>%
left_join(dictionary_sam_table("far"), by = "slugFAR") %>%
select(slugFAR, descriptionFAR, everything())
all_data <-
all_data %>%
nest(.key = "dataFacilitySAM")
return(all_data)
}
}) %>%
purrr::reduce(bind_cols)
all_data
}
.parse_dfar <-
function(data) {
if (names(data) %>% str_count("answers.answerText") %>% sum(na.rm = T) > 0) {
data <-
data %>% as_tibble()
names(data) <- names(data) %>% str_remove_all("answers.")
if (data %>% tibble::has_name("id")) {
data <-
data %>%
select(-one_of("section"))
data <-
data %>%
rename(slugFAR = id)
}
data <-
data %>%
.munge_data_sam_names() %>%
.munge_data()
data <-
data %>%
mutate(isResponseTRUE = case_when(textAnswer == "No" ~ FALSE,
textAnswer == "Yes" ~ TRUE,
TRUE ~ NA)) %>%
nest(.key = "dataDFARAnswers")
return(data)
}
if ((names(data) == "answers") %>% sum(na.rm = T) == 0) {
return(tibble())
}
answers <- data$answers
all_data <-
seq_along(answers) %>%
map_df(function(x) {
answers[[x]] %>% as_tibble() %>%
.munge_data_sam_names() %>%
.munge_data(clean_address = F)
}) %>%
mutate(isResponseTRUE = case_when(textAnswer == "No" ~ FALSE,
textAnswer == "Yes" ~ TRUE,
TRUE ~ NA)) %>%
nest(.key = "dataDFARAnswers")
all_data
}
.parse_certification_data <-
function(data) {
data <-
data[names(data) %in% c("farResponses", "dfarResponses")]
has_far <-
(names(data) == "farResponses") %>% sum(na.rm = T) > 0
has_dfar <-
(names(data) == "dfarResponses") %>% sum(na.rm = T) > 0
if (sum(as.numeric(has_far), as.numeric(has_dfar)) == 0) {
return(tibble())
}
if (has_far) {
df_far <-
data[["farResponses"]] %>% .parse_far_responses()
df <- tibble(dataFAR = list(df_far))
}
if (has_dfar) {
df_dfar <-
data[["dfarResponses"]] %>% .parse_dfar()
if ('df' %>% exists()) {
df <- df %>% mutate(dataDFAR = list(df_dfar))
} else {
df <-
tibble(dataDFAR = list(df_dfar))
}
}
df
}
# utils -------------------------------------------------------------------
.pad_dun <-
function(duns = 81267103,
zero_base = 13) {
inital_chars <- nchar(duns)
first_zero <- 9 - nchar(duns)
if (first_zero > 0) {
first_zero <- rep(0, first_zero) %>% str_c(collapse = "")
duns <- glue::glue("{first_zero}{duns}") %>% as.character()
}
zero_count <-
zero_base - nchar(duns)
zeros <- rep(0, zero_count) %>% str_c(collapse = "")
if (length(zeros) == 0) {
slug_duns <-
glue::glue("{duns}") %>% as.character()
} else {
slug_duns <-
glue::glue("{duns}{zeros}") %>% as.character()
}
tibble(idDUNS = as.integer(duns), slugDUNS = slug_duns)
}
.pad_duns <-
function(duns = 81267103,
zero_base = 13) {
duns %>%
map_dfr(function(x){
.pad_dun(duns = x, zero_base = zero_base)
})
}
#' Pad DUNS
#'
#' Pad vector of duns for SAM API
#'
#' @param duns vector of duns numbers
#'
#' @return
#' @export
#'
#' @examples
#' pad_duns()
pad_duns <-
function(duns = 608176715) {
duns %>%
map_dfr(function(duns) {
.pad_duns(duns = duns)
})
}
#' SAM column dictionary
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_sam_names()
dictionary_sam_names <-
function() {
tibble(
nameSAM = c(
"lastUpdateDate",
"businessStartDate",
"congressionalDistrict",
"corporateUrl",
"corporateStructureCode",
"dunsPlus4",
"debtSubjectToOffset",
"cage",
"fiscalYearEndCloseDate",
"publicDisplay",
"registrationDate",
"expirationDate",
"purposeOfRegistration",
"submissionDate",
"creditCardUsage",
"countryOfIncorporation",
"businessTypes",
"corporateStructureName",
"statusMessage",
"stateOfIncorporation",
"hasKnownExclusion",
"legalBusinessName",
"duns",
"activationDate",
"status",
"naicsName",
"isPrimary",
"naicsCode",
"zip",
"city",
"countryCode",
"stateorProvince",
"line2",
"line1",
"zipPlus4",
"companyDivision",
"doingBusinessAsName",
"idDUNS",
"pscName",
"pscCode",
"firstName",
"lastName",
"title",
"fax",
"usPhone",
"email",
"usPhoneExt",
"discId",
"discDefinition",
"businessTypeCode",
"businessTypeName",
"purposeCode",
"purposeName",
"farId",
"farDescription",
"countryCode ",
"countryName",
"expID",
"expDescription",
"revCode",
"revDescription",
"answerText",
"section",
"ownerName",
"ownerAddresszip",
"ownerAddressstateOrProvince",
"ownerAddresscity",
"ownerAddresscountryCode",
"ownerAddressline1",
"plantAddresszip",
"plantAddressstateOrProvince",
"plantAddresscity",
"plantAddresscountryCode",
"plantAddressline1",
"ExcpCounter",
"isSmallBusiness",
"duns_plus4",
"samAddress.zip",
"samAddress.stateOrProvince",
"samAddress.city",
"samAddress.countryCode",
"samAddress.zip4",
"samAddress.line1",
"stateOrProvince",
"zip4",
"slugFAR",
"financialAssistanceResponse",
"divisionNumber",
"middleInitial",
"ownerAddressline2",
"plantAddressline2",
"EndProduct.countryCode", "EndProduct.name",
"Company.name",
"Company.tin",
"immediateOwnerCage.legalBusinessName",
"immediateOwnerCage.cageCode",
"immediateOwnerCage.hasOwner",
"state",
"ncage",
"sensitivity",
"dbaName",
"activeDate",
"businessTypeCounter",
"businessType",
"corporateURL",
"stateProvision",
"cageCode",
"correspondenceFlag",
"delinquentFedDebtFlag",
"entityStructure",
"noPublicDisplayFlag",
"recentPredecessorCageCode",
"recentPredecessorBusName",
"secondRecentPredecessorCageCode",
"secondRecentPredecessorBusName",
"thirdRecentPredecessorCageCode",
"thirdRecentPredecessorBusName",
"immedOwnerPredecessorCageCode",
"immedOwnerRecentPredecessorBusName",
"highestOwnerPredecessorCageCode",
"highestOwnerRecentPredecessorBusName",
"filterName",
"profitStructure",
"organizationStructure",
"entityType",
"dodaac",
"exclusionStatusFlag",
"samAddress.address1",
"samAddress.address2",
"samAddress.addressCity",
"samAddress.addressState",
"samAddress.addressZip",
"samAddress.addressZipPlus4",
"samAddress.country",
"mailAddress.address1",
"mailAddress.address2",
"mailAddress.addressCity",
"mailAddress.addressState",
"mailAddress.addressZip",
"mailAddress.addressZipPlus4",
"mailAddress.country",
"primaryNaics",
"registryFlag",
"geographicalAreaServed",
"averageNumberOfEmployees",
"linkForFARReport",
"linkForDFARSReport",
"pocType",
"pocFirstName",
"pocMiddleName",
"pocLastName",
"pocTitle",
"pocUSPhone",
"pocUSPhoneExt",
"pocNonUSPhone",
"pocFax",
"pocEmail",
"pocAddress.address1",
"pocAddress.address2",
"pocAddress.addressCity",
"pocAddress.addressState",
"pocAddress.addressZip",
"pocAddress.addressZipPlus4",
"pocAddress.country",
"pscList",
"orgKey", "a11TacCode", "agencyName", "categoryDesc", "categoryId",
"cfdaBur", "cfdaCode", "cfdaOmb", "createdDate", "description",
"fpdsCode", "fpdsOrgId", "cgac", "fullParentPath", "fullParentPathName",
"isSourceCfda", "isSourceCwCfda", "isSourceFpds", "lastModifiedBy",
"lastModifiedDate", "modStatus", "name", "ombAgencyCode", "orgCode",
"shortName", "l1ShortName", "sourceCfdaPk", "startDate", "summary",
"tas2Code", "tas3Code", "level", "logoUrl", "code", "sendEmail",
"l1OrgKey", "l1Name", "createdBy", "endDate", "ingestedOn", "sourceParentCfdaPk",
"ediInformationFlag",
"version",
"pages",
"encrypted",
"linearized",
"Author",
"Creator",
"Producer",
"created",
"modified",
"metadata",
"locked",
"attachments",
"layout",
"Comments", "Company", "Keywords", "SourceModified", "Subject",
"Title",
"LastSaved",
"Created"
),
nameActual = c(
"datetimeLastUpdated",
"dateBusinessStart",
"slugCongressionalDistrict",
"urlCompany",
"slugCorporateStructure",
"slugDUNSPlus4",
"isDebtSubjectToOffset",
"slugCAGE",
"slugFiscalYearEnd",
"isPublicDisplay",
"datetimeRegistration",
"datetimeExpiration",
"typeRegistration",
"datetimeSubmission",
"hasCreditCardUsage",
"countryIncorporation",
"slugBusinessType",
"typeCorporateStructure",
"typeStatus",
"stateIncorporation",
"hasKnownExclusion",
"nameCompanyLegal",
"idDUNS",
"datetimeActivated",
"statusCompany",
"nameNAICS",
"isPrimaryNAICS",
"idNAICS",
"zipcode",
"city",
"codeCountry",
"state",
"addressStreet1",
"addressStreet2",
"zipPlus4",
"nameCompanyDivision",
"nameCompanyDBA",
"idDUNS",
"nameProductService",
"codeProductService",
"nameFirst",
"nameLast",
"title",
"fax",
"telephone",
"email",
"telephoneExtension",
"idDiscipline",
"descriptionDiscipline",
"slugBusinessType",
"typeBusiness",
"slugPurpose",
"typePurpose",
"slugFAR",
"descriptionFAR",
"codeCountry",
"nameCountry",
"codeExperience",
"typeExperience",
"idRevenue",
"typeRevenue",
"textAnswer",
"slugFAR",
"nameOwner",
"zipcodeOwner",
"stateOwner",
"cityOwner",
"codeCountryOwner",
"addressStreet1Owner",
"zipcodePlant",
"statePlant",
"cityPlant",
"codeCountyPlant",
"addressStreet1Plant",
"countException",
"isSmallBusiness",
"dunsSlug",
"zipcodeCompany",
"stateCompany",
"cityCompany",
"codeCountryCompany",
"zipcode4Company",
"addressStreet1Company",
"state",
"zipcode4",
"slugFAR",
"hasFinancialAssistanceResponse",
"numberDivision",
"initialMiddle",
"addressStreet2Owner",
"addressStreet2Plant",
"codeCountryProduct",
"idNameEndProduct",
"nameCompany",
"tinCompany",
"nameCompanyLegalCAGEOwner",
"codeCAGEOwner",
"hasCAGEOwner",
"state",
"slugCAGE",
"typeSensitivity",
"nameCompanyDBA",
"dateActivated",
"typeBusinessCounter",
"typeEntityStructure",
"urlCompany",
"nameState",
"slugCAGE",
"hasCorrespondenceFlag",
"hasDelinquentFedDebtFlag",
"typeCorporateStructure",
"hasNoPublicDisplayFlag",
"slugCAGEPrior",
"nameCompanyPrior",
"slugCAGEPrior2",
"nameCompanyPrior2",
"slugCAGEPrior3",
"nameCompanyPrior3",
"slugCAGEImmedOwner",
"nameCompanyPriorImmedOwner",
"slugCAGEPriorHighest",
"nameCompanyPriorHighest",
"nameFilter",
"typeProfitStructure",
"typeOrganizationStructure",
"typeEntity",
"slugDeptDefenseAddressCode",
"hasExclusionStatusFlag",
"addressStreet1SAM",
"addressStreet2SAM",
"citySAM",
"stateSAM",
"zipcodeSAM",
"zip4SAM",
"countrySAM",
"addressStreet1Mail",
"addressStreet2Mail",
"cityMail",
"state4Mail",
"zipcodeMail",
"zip4Mail",
"countryMail",
"idNAICSPrimary",
"hasRegistry",
"areasGeographicalServed",
"countEmployeesAverage",
"urlFAR",
"urlDFAR",
"typePOC",
"nameFirstPOC",
"nameMiddlePOC",
"nameLastPOC",
"titlePOC",
"phoneUSPOC",
"phoneUSPOCExt",
"phoneNonUSPOC",
"faxPOC",
"emailPOC",
"addressStreet1POC",
"addressStreet2POC",
"cityPOC",
"statePOC",
"zipcodePOC",
"zp4POC",
"countryPOC",
"pscList",
"idSAM", "idA11TAC", "nameAgencyRemove", "typeCategory", "slugCategory",
"idCFDABUR", "idCFDA", "idOMB", "datetimeCreated", "descriptionAgency",
"codeAgency", "codeOrganization", "codeGovernmentAccuntingSystem", "idParentPath", "nameAgencyRemove2",
"isCFDA", "isCWCDFDA", "isFPDS", "personLastModified",
"datetimeLastUpdated", "status", "nameAgency", "idAgencyOMB", "codeOrganizationOther",
"slugAgency", "slugAgencyOther", "keyCFDAP", "datetimeStarted", "summaryAgency",
"idTAS2", "idTAS3", "idLevel", "urlLogo", "codeAgencyOther", "hasDSP",
"idSAML1", "nameAgencyL1", "entryCreatedBy", "datetimeEnd", "datetimeIngested", "keyCFDAPOther",
"hasEDIInformationFlag",
"idVersion",
"countPages",
"isEncrypted",
"isLinearized",
"nameAuthor",
"nameCreator",
"typeProducer",
"dateCreated",
"dateModified",
"xmlMetadata",
"isLocked",
"hasAttachments",
"typeLayout",
"descriptionComments", "companyLicense", "keywordLicense", "sourceModified", "subjectLicense",
"titleFile",
"codeLastSaved",
"codeCreated"
)
)
}
#' SAM Table dictionary
#'
#' @return
#' @export
#'
#' @examples
dictionary_sam_table_urls <-
function() {
tibble(
nameDictionary = c(
"discipline",
"business type",
"purpose",
"far",
"countries",
"experience",
"revenue"
),
urlJSON = c(
"http://gsa.github.io/sam_api/static/discipline.json",
"http://gsa.github.io/sam_api/static/businessTypes.json",
"http://gsa.github.io/sam_api/static/purpose.json",
"http://gsa.github.io/sam_api/static/far.json",
"http://gsa.github.io/sam_api/static/country.json",
"http://gsa.github.io/sam_api/static/experience.json",
"http://gsa.github.io/sam_api/static/revenue.json"
)
)
}
#' SAM Dictionary
#'
#' Provides dictionary for a SAM table
#'
#' @param table \itemize{
#' \item discipline
#' \item business type
#' \item purpose
#' \item far
#' \item countries
#' \item experience
#' \item revenue
#' }
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' dictionary_sam_table(table = "discipline")
dictionary_sam_table <-
function(table = "revenue") {
if (length(table) == 0) {
"Please provide a table" %>% message()
return(invisible())
}
df_urls <- dictionary_sam_table_urls()
table_slug <- table %>% str_to_lower()
df_row <- df_urls %>% filter(nameDictionary == table)
if (nrow(df_row) == 0) {
glue::glue("No matching dictionary for {table}") %>% message()
return(tibble())
}
table <- df_row$nameDictionary
url <- df_row$urlJSON
data <-
url %>%
fromJSON(simplifyDataFrame = T) %>%
as_tibble() %>%
mutate_if(is.character,
list(function(x) {
ifelse(x == "", NA_character_, x) %>% str_trim()
}))
if (table == "far") {
data <-
data %>%
fill(farDescription, .direction = "down")
}
data <- data %>% .munge_data_sam_names()
if (table == "revenue") {
data <-
data %>%
left_join(tibble(
idRevenue = 1:10,
amountRevenueMin = c(
0,
100000,
250000,
500000,
1000000,
2000000,
5000000,
10000000,
25000000,
50000000
),
amountRevenueMax = c(
99999,
249999,
499999,
999999,
1999999,
4999999,
9999999,
24999999,
49999999,
NA_integer_
)
),
by = 'idRevenue') %>%
.munge_data()
}
data
}
# duns --------------------------------------------------------------------
#' Generate SAM DUNS API Calls
#'
#' @param duns vector of DUNS
#' @param base base call
#' @param version version - defaults to 3
#' @param api_key api key, defaults to demo_key
#'
#' @return \code{tibble()}
#' @export
#'
#' @examples
sam_api_urls <-
function(duns = c(867393167, 608176715),
base = "https://api.data.gov/sam/",
version = 8,
api_key = "DEMO_KEY") {
options(scpen = 9999)
df_duns <- pad_duns(duns = duns)
slug <- df_duns$slugDUNS
url <-
glue::glue("{base}v{version}/registrations/{slug}?api_key={api_key}") %>% as.character()
df_duns <-
df_duns %>%
mutate(urlAPI = url)
df_duns
}
.parse_sam_duns_url <-
function(url = "https://api.data.gov:443/sam/v8/registrations/6081767150000?api_key=DEMO_KEY") {
json_data <-
fromJSON(url, flatten = T, simplifyDataFrame = T)
data <- json_data$sam_data$registration
df_classes <-
data %>%
map_df(class) %>%
gather(column, class)
base_cols <-
df_classes %>% filter(!class %>% str_detect("data.frame|list|NULL")) %>% pull(column)
list_cols <-
df_classes %>% filter(class %>% str_detect("data.frame|list")) %>% pull(column)
df_base <-
data[names(data) %in% base_cols]
df_base <-
df_base %>% flatten_df()
column_ids <- tibble(name = names(df_base)) %>%
mutate(idRow = 1:n()) %>%
filter(!name == "") %>%
pull(idRow)
duns <- df_base$duns %>% as.numeric()
df_base <-
df_base[, column_ids] %>%
as_tibble() %>%
mutate(duns = as.numeric(duns)) %>%
as_tibble()
df_base <-
df_base %>%
.munge_data_sam_names() %>%
.munge_data()
df_list <-
list_cols %>%
map(function(column) {
column %>% message()
df <- data[[column]]
if (length(df) == 0) {
return(invisible())
}
if (column == "disasterRelief") {
df <- df[["geographicalAreas"]]
if (length(df) == 0) {
return(NULL)
}
df <- df %>% .munge_data_sam_names()
df <- df %>%
mutate(idDUNS = duns) %>%
as_tibble() %>%
nest(-idDUNS, .key = "dataDisasterRelief")
return(df)
}
if (column == "qualifications") {
df <- df[["acass"]][["answers"]]
if (length(df) == 0) {
return(NULL)
}
df <-
df %>%
.munge_data_sam_names() %>%
as_tibble()
if (df %>% tibble::has_name("slugFAR")) {
df <-
df %>% rename(slugSection = slugFAR)
}
df <-
df %>% mutate(idDUNS = duns) %>%
nest(-idDUNS, .key = "dataQualifications")
return(df)
}
if (column == "certificationsURL") {
pdf_url <- df %>% purrr::flatten() %>% as.character()
if (length(pdf_url) > 0) {
df_base <<-
df_base %>%
mutate(urlCertificationPDF = pdf_url)
}
return(invisible())
}
if (column == "certifications") {
.parse_certification_data_safe <-
possibly(.parse_certification_data, tibble())
df <-
df %>% .parse_certification_data_safe()
if (nrow(df) == 0) {
df <- tibble(idDUNS = duns)
} else {
df <- df %>%
mutate(idDUNS = duns) %>%
select(idDUNS, everything())
}
return(df)
}
col_class <- class(df)
substr(column, 1, 1) <-
str_to_upper(substr(column, 1, 1))
column_name <-
case_when(
column == "Naics" ~ "dataNAICS",
column == "PscCodes" ~ "dataPSC",
column == "PastPerformancePoc" ~ "dataContactPersonPastPerformance",
column == "AltPastPerformancePoc" ~ "dataContactPersonPastPerformanceOther",
column == "ElectronicBusinessPoc" ~ "dataContactPersonElectronic",
column == "AltElectronicBusinessPoc" ~ "dataContactPersonElectronicOther",
column == "BondingInformation" ~ "dataBonding",
column == "GovtBusinessPoc" ~ "dataContactPersonGovernment",
column == "AltGovtBusinessPoc" ~ "dataContactPersonGovernmentOther",
TRUE ~ str_c("data", column, collapse = "")
)
if (col_class %>% str_detect("data")) {
df <-
df %>% as_tibble() %>% mutate(idDUNS = duns) %>%
select(idDUNS, everything()) %>%
unique()
df <-
df %>%
.munge_data_sam_names() %>%
.munge_data()
df <-
df %>%
nest(-idDUNS, .key = UQ(column_name))
return(df)
}
df <-
df %>%
flatten_df()
df <-
df %>%
.munge_data_sam_names()
df <-
df %>%
mutate(idDUNS = duns) %>%
nest(-idDUNS, .key = UQ(column_name))
}) %>%
discard(function(x) {
length(x) == 0
}) %>%
reduce(left_join) %>%
suppressMessages()
df_base <-
df_base %>%
left_join(df_list, by = "idDUNS") %>%
mutate(urlAPI = url) %>%
dplyr::select(one_of(
c(
"idDUNS",
"nameCompanyDBA",
"nameCompanyLegal" ,
"nameCompanyDivision"
)
), everything()) %>%
suppressWarnings() %>%
mutate_if(is.character, str_trim)
df_base
}
#' Parse same DUNS urls
#'
#' @param urls vector of URLS
#' @param sleep_time if not \code{NULL} sleeptime between API calls
#' @param return_message if \code{TRUE} return message
#'
#' @return
#' @export
#'
#' @examples
#'parse_sam_duns_urls(urls = "https://api.data.gov:443/sam/v8/registrations/6081767150000?api_key=DEMO_KEY")
parse_sam_duns_urls <-
function(urls = "https://api.data.gov:443/sam/v8/registrations/6081767150000?api_key=DEMO_KEY",
sleep_time = NULL,
return_message = T) {
df <-
tibble()
success <- function(res) {
url <-
res$url
if (return_message) {
glue::glue("Parsing {url}") %>%
message()
}
.parse_sam_duns_url_safe <-
purrr::possibly(.parse_sam_duns_url, tibble())
all_data <-
.parse_sam_duns_url(url = url)
if (length(sleep_time) > 0) {
Sys.sleep(time = sleep_time)
}
df <<-
df %>%
bind_rows(all_data)
}
failure <- function(msg) {
tibble()
}
urls %>%
map(function(x) {
curl_fetch_multi(url = x, success, failure)
})
multi_run()
df
}
#' SAM search by DUNS
#'
#' @param duns vector of DUNS numbers
#' @param sleep_time if \code{}
#' @param api_key data.gov API key, defaults to public demo key
#' @param return_message if \code{TRUE} returns message
#'
#' @return a \code{tibble}
#' @export
#'
#' @examples
#' sam_duns(duns = 826857757,)
sam_duns <-
function(duns = NULL,
sleep_time = NULL,
api_key = "DEMO_KEY",
return_message = T) {
if (length(duns) == 0) {
stop("Enter DUNS")
}
options(scipen = 9999)
df_urls <- sam_api_urls(duns = duns, api_key = api_key)
urls <- df_urls$urlAPI
.parse_sam_duns_url_safe <-
possibly(.parse_sam_duns_url, tibble())
all_data <-
urls %>%
map_dfr(function(url) {
if (return_message) {
glue::glue("Parsing {url}") %>% message()
}
data <- .parse_sam_duns_url_safe(url = url)
if (length(sleep_time) > 0) {
Sys.sleep(time = sleep_time)
}
data
}) %>%
suppressWarnings()
if (nrow(all_data) == 0) {
"No matches" %>% message()
return(invisible())
}
all_data <-
all_data %>%
mutate(isActive = statusCompany == "ACTIVE") %>%
select(
one_of(
"idDUNS",
"nameCompanyLegal",
"nameCompanyDBA",
"nameCompanyDivision",
"isActive",
"statusCompany",
"dateBusinessStart",
"datetimeRegistration"
),
everything()
) %>%
suppressWarnings()
if (all_data %>% tibble::has_name("isActive") &
all_data %>% tibble::has_name("datetimeExpiration") &
all_data %>% tibble::has_name("datetimeRegistration")) {
all_data <-
all_data %>%
mutate(countGovernmentWorkDays = case_when(
!isActive ~ (
as.Date(datetimeExpiration) - as.Date(datetimeRegistration)
) %>% as.integer(),
TRUE ~ NA_integer_
))
}
all_data <- all_data %>%
mutate(idRow = 1:n())
df_list <- all_data %>%
select(idDUNS, matches("data")) %>%
transmute_if(is.list,
.funs = list(function(x) {
x %>% map_dbl(length) > 0
})) %>%
mutate(idRow = 1:n())
names(df_list) <-
names(df_list) %>% str_replace_all("^data", "has")
all_data <-
all_data %>%
left_join(df_list, by = 'idRow') %>%
select(-idRow)
col_order <-
c(all_data %>% select(-matches("data")) %>% names(),
all_data %>% select(matches("data")) %>% names())
all_data <-
all_data %>%
select(one_of(col_order))
all_data
}
# search ------------------------------------------------------------------
.generate_sam_search_url <-
function(legal_name = NULL,
dba = NULL,
cage = NULL,
duns = NULL,
registration_status = NULL,
disaster_response = NULL,
city = NULL,
country = NULL,
state = NULL,
zip = NULL,
congressional_district = NULL,
naics_sb = NULL,
naics_any = NULL,
registration_purpose = NULL,
is_minority_owned = F,
is_woman_owned = F,
is_vet_owned = F,
is_service_vet = F,
is_8a = F,
is_hubzone = F,
is_ability_one = F,
api_version = 3,
api_key = "DEMO_KEY") {
url_base <-
glue::glue("https://api.data.gov/sam/v{api_version}/registrations?qterms=") %>% as.character()
api_slug <- glue::glue("api_key={api_key}") %>% as.character()
length_slug <- glue::glue("length=500000000")
if (length(legal_name) > 0) {
slugs <- legal_name %>% map_chr(URLencode)
legal_names <- legal_name %>% str_c(collapse = " | ")
slug_legal_name <-
c("legalBusinessName:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
} else {
slug_legal_name <- ""
legal_names <- legal_name
}
if (length(dba) > 0) {
slugs <- dba %>% map_chr(URLencode)
dbas <- dba %>% str_c(collapse = " | ")
slug_dba <-
c("doingBusinessAs:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
} else {
slug_dba <- ""
dbas <- dba
}
if (length(cage) > 0) {
slugs <- cage %>% map_chr(URLencode)
cages <- cage %>% str_c(collapse = " | ")
slug_cage <-
c("cage:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
} else {
slug_cage <- ""
cages <- cage
}
if (length(duns) > 0) {
slugs <-
duns %>% map_chr(URLencode)
duns_nos <- duns %>% str_c(collapse = " | ")
slug_duns <-
c("duns:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
} else {
slug_duns <- ""
duns_nos <- duns
}
if (length(city) > 0) {
slugs <- city %>% map_chr(URLencode)
cities <- str_c(city, collapse = " | ")
slug_city <-
c("samAddress.city:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
} else {
slug_city <- ""
cities <- city
}
if (length(country) > 0) {
slugs <- country %>% map_chr(URLencode)
countries <- str_c(country, collapse = " | ")
slug_country <-
c("samAddress.country:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
} else {
slug_country <- ""
countries <- country
}
if (length(state) > 0) {
slugs <- state %>% map_chr(URLencode)
states <- states %>% str_c(sep = " | ")
slug_state <-
c("samAddress.stateOrProvince:(",
str_c(slugs, collapse = ","),
")") %>% str_c(collapse = "")
} else {
slug_state <- ""
states <- state
}
if (length(zip) > 0) {
slugs <- zip %>% as.character() %>% map_chr(URLencode)
zips <- str_c(zip, collapse = " | ")
slug_zip <-
c("samAddress.zip:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
} else {
slug_zip <- ""
zips <- zip
}
if (length(congressional_district) > 0) {
slugs <- congressional_district %>% map_chr(URLencode)
districts <- str_c(congressional_district, collapse = " | ")
slug_congressional_district <-
c("congressionalDistrict:(",
str_c(slugs, collapse = ","),
")") %>% str_c(collapse = "")
} else {
slug_congressional_district <- ""
districts <- congressional_district
}
if (length(naics_sb) > 0) {
slugs <- naics_sb
naics_sbs <- naics_sb %>% str_c(collapse = " | ")
slug_naics_sb <-
c("naicsLimitedSB:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
} else {
slug_naics_sb <- ""
naics_sbs <- naics_sb
}
if (length(naics_any) > 0) {
slugs <- naics_any
naics_anys <- str_c(naics_any, collapse = " | ")
slug_naics_any <-
c("naicsLimitedSB:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
} else {
slug_naics_any <- ""
naics_anys <- naics_any
}
if (length(registration_purpose) > 0) {
slugs <- registration_purpose %>% map_chr(URLencode)
purposes <- str_c(registration_purpose, collapse = " | ")
slug_registration_purpose <-
c("purpose:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
} else {
slug_registration_purpose <- ""
purposes <- registration_purpose
}
if (length(registration_status) > 0) {
slugs <- registration_status %>% map_chr(URLencode)
status <- str_c(registration_status, collapse = " | ")
slug_registration_status <-
c("registrationStatus:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
} else {
slug_registration_status <- ""
status <- registration_status
}
if (length(disaster_response) > 0) {
slugs <- disaster_response %>% map_chr(URLencode)
responses <- str_c(disaster_response, collapse = " | ")
slug_disaster_response <-
c("disasterResponse:(", str_c(slugs, collapse = ","), ")") %>% str_c(collapse = "")
} else {
slug_disaster_response <- ""
responses <- disaster_response
}
if (is_minority_owned) {
slug_is_minority_owned <- "minorityOwned:TRUE"
} else {
slug_is_minority_owned <- ""
}
if (is_woman_owned) {
slug_is_woman_owned <- "womanOwned:TRUE"
} else {
slug_is_woman_owned <- ""
}
if (is_vet_owned) {
slug_is_vet_owned <- "veteranOwned:TRUE"
} else {
slug_is_vet_owned <- ""
}
if (is_service_vet) {
slug_is_service_vet <- "serviceDisabledVeteranOwned:TRUE"
} else {
slug_is_service_vet <- ""
}
if (is_8a) {
slug_is_8a <- "sba8AProgram:TRUE"
} else {
slug_is_8a <- ""
}
if (is_hubzone) {
slug_is_hubzone <- "sbaHubzoneProgram:TRUE"
} else {
slug_is_hubzone <- ""
}
if (is_ability_one) {
slug_is_ability_one <- "ability1:TRUE"
} else {
slug_is_ability_one <- ""
}
api_call <- c(
slug_legal_name,
slug_dba,
slug_cage,
slug_duns,
slug_registration_status,
slug_disaster_response,
slug_city,
slug_country,
slug_state,
slug_zip,
slug_congressional_district,
slug_naics_sb,
slug_naics_any,
slug_registration_purpose,
slug_is_minority_owned,
slug_is_woman_owned,
slug_is_vet_owned,
slug_is_service_vet,
slug_is_8a,
slug_is_ability_one,
api_slug,
length_slug
) %>%
purrr::discard(function(x) {
x == ""
}) %>%
str_c(collapse = "&")
url <- glue::glue("{url_base}{api_call}") %>% as.character()
search_term <- list(
legal_name = legal_names,
dba = dbas,
cage = cages,
duns = duns_nos,
registration_status = status,
disaster_response = responses,
city = cities,
country = countries,
state = states,
zip = zips,
congressional_district = districts,
naics_sb = naics_sbs,
naics_any = naics_anys,
registration_purpose = purposes,
is_minority_owned = is_minority_owned,
is_woman_owned = is_woman_owned,
is_vet_owned = is_vet_owned,
is_service_vet = is_service_vet,
is_8a = is_8a,
is_ability_one = is_ability_one
) %>%
flatten_df() %>%
gather(item, value) %>%
filter(!value == "FALSE") %>%
unite(item, item, value, sep = ": ") %>%
pull(item) %>%
str_c(collapse = " & ")
tibble(termSearch = search_term, urlAPI = url)
}
.parse_sam_search_url <-
memoise::memoise(function(url = "https://api.data.gov/sam/v3/registrations?qterms=legalBusinessName:(JBG)&api_key=DEMO_KEY&length=500") {
json_data <-
url %>% fromJSON(simplifyDataFrame = T, flatten = T)
data <- json_data$results
names(data) <- names(data) %>% str_remove_all("samAddress.")
data <- data[!names(data) %in% "links"] %>% as_tibble()
pad_duns_safe <- possibly(pad_duns, tibble())
df_duns <- data$duns %>% pad_duns_safe()
data <-
data %>%
.munge_data_sam_names() %>%
.munge_data(clean_address = F) %>%
mutate(urlAPI = url) %>%
mutate_if(is.character, str_trim)
if (data %>% hasName("statusCompany")) {
data <-
data %>%
mutate(isActiveSAM = case_when(
statusCompany == "ACTIVE" ~ TRUE,
TRUE ~ F
))
}
if (data %>% tibble::has_name("idDUNS")) {
data <- data %>%
mutate(idDUNS = as.numeric(idDUNS))
}
if (nrow(df_duns) > 0) {
data <-
data %>%
left_join(df_duns, by = "idDUNS")
}
data
})
#' Search SAM API by item
#'
#' @param legal_name
#' @param dba
#' @param cage
#' @param duns
#' @param registration_status
#' @param disaster_response
#' @param city
#' @param country
#' @param state
#' @param zip
#' @param congressional_district
#' @param naics_sb
#' @param naics_any
#' @param registration_purpose
#' @param is_minority_owned
#' @param is_woman_owned
#' @param is_vet_owned
#' @param is_service_vet
#' @param is_8a
#' @param is_hubzone
#' @param is_ability_one
#' @param api_version
#' @param api_key
#'
#' @return
#' @export
#'
#' @examples
#' library(tidyverse)
#' library(govtrackR)
#' df <- sam_search(legal_name = c("JBG", "Booz", "Lockheed"))
#' df %>% glimpse()
#'
#' df %>% group_by(idDUNS) %>% summarise(count = n(), entities = str_c(nameCompanyLegal, collapse = " | ")) %>% arrange(desc(count))
#'dict_naics <- dictionary_naics_codes()
#'codes <-
#'dict_naics %>%
#'filter(nameNAICS %>% str_detect("ICE CREAM|COOKIE")) %>%
#'pull(idNAICS)
#'
#'df_cookies_ice_cream <- sam_search(naics_any = c(codes))
#'df_cookies_ice_cream %>%
#'count(statusCompany, state)
#'
#'
sam_search <-
function(legal_name = NULL,
dba = NULL,
cage = NULL,
duns = NULL,
registration_status = NULL,
disaster_response = NULL,
city = NULL,
country = NULL,
state = NULL,
zip = NULL,
congressional_district = NULL,
naics_sb = NULL,
naics_any = NULL,
registration_purpose = NULL,
is_minority_owned = F,
is_woman_owned = F,
is_vet_owned = F,
is_service_vet = F,
is_8a = F,
is_hubzone = F,
is_ability_one = F,
api_version = 3,
api_key = "DEMO_KEY",
return_message = T) {
no_terms <-
list(
legal_name = legal_name[[1]],
dba = dba[[1]],
cage = cage[[1]],
duns = duns[[1]],
registration_status = registration_status[[1]],
disaster_response = disaster_response[[1]],
city = city[[1]],
country = country[[1]],
state = state[[1]],
zip = zip[[1]],
congressional_district = congressional_district[[1]],
naics_sb = naics_sb[[1]],
naics_any = naics_any[[1]],
registration_purpose = registration_purpose[[1]]
) %>% flatten_df() %>% nrow() == 0
if (no_terms) {
stop("Enter search term") %>% message()
}
df_urls <-
.generate_sam_search_url(
legal_name = legal_name,
dba = dba,
cage = cage,
duns = duns,
registration_status = registration_status,
disaster_response = disaster_response,
city = city,
country = country,
state = state,
zip = zip,
congressional_district = congressional_district,
naics_sb = naics_sb,
naics_any = naics_any,
registration_purpose = registration_purpose,
is_minority_owned = is_minority_owned,
is_woman_owned = is_woman_owned ,
is_vet_owned = is_vet_owned,
is_service_vet = is_service_vet,
is_8a = is_8a,
is_hubzone = is_hubzone,
is_ability_one = is_ability_one ,
api_version = api_version,
api_key = api_key
)
.parse_sam_search_url_safe <-
possibly(.parse_sam_search_url, tibble())
all_data <-
df_urls$urlAPI %>%
map_dfr(function(url) {
if (return_message) {
glue("Parsing {url}") %>% message()
}
.parse_sam_search_url_safe(url = url)
})
if (nrow(all_data) == 0) {
"No results"
return(invisible())
}
all_data <-
all_data %>%
left_join(df_urls, by = "urlAPI") %>%
select(termSearch, idDUNS, nameCompanyLegal, everything())
all_data
}
# sam_beta ----------------------------------------------------------------
# http://beta.sam.gov
.parse_sam_duns_v2 <-
function(url = "https://api.sam.gov/prod/entities/961539384?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1568729993777&sort=name") {
json_data <-
fromJSON(url, simplifyDataFrame = T, flatten = T)
data <- json_data$entityInfo
df_classes <-
data %>%
map_df(class) %>%
gather(column, class)
base_cols <-
df_classes %>% filter(!class %in% "list") %>% pull(column)
df_base <-
data %>% select(base_cols) %>% as_tibble()
names(df_base) <-
names(df_base) %>% str_remove_all("repsAndCerts.|coreData.|generalInfo.|generalInfo.|assertions.")
df_base <- df_base %>% .munge_data_sam_names()
names(df_base) <-
names(df_base) %>% str_replace_all("datetime", "date")
df_base <-
df_base %>%
.munge_data(clean_address = F)
df_base
df_base <- df_base %>%
unite(
addressStreetSAM,
addressStreet1SAM,
addressStreet2SAM,
sep = " ",
remove = F
) %>%
unite(cityStateSAM,
citySAM,
stateSAM,
sep = ", ",
remove = F) %>%
unite(cityStateZip,
cityStateSAM,
zipcodeSAM,
sep = " ",
remove = F) %>%
unite(locationSAM,
addressStreetSAM,
cityStateZip,
sep = ", ")
list_cols <- df_classes %>% filter(class %in% "list")
df_pocs <- data[["mandatoryPOCs"]][[1]] %>% as_tibble()
if (length(df_pocs) > 0) {
df_pocs <-
df_pocs %>% .munge_data_sam_names() %>% .munge_data(clean_address = F)
df_pocs <- df_pocs %>%
mutate(namePersonPOC = str_c(nameFirstPOC, nameLastPOC, sep = " ")) %>%
select(namePersonPOC, everything()) %>%
unite(
addressStreetPOC,
addressStreet1POC,
addressStreet2POC,
sep = " ",
remove = F
) %>%
unite(cityStatePOC,
cityPOC,
statePOC,
sep = ", ",
remove = F) %>%
unite(cityStateZip,
cityStatePOC,
zipcodePOC,
sep = " ",
remove = F) %>%
unite(locationPOC,
addressStreetPOC,
cityStateZip,
sep = ", ")
df_base <-
df_base %>%
mutate(dataPOCMandatory = list(df_pocs))
}
df_pocs_other <-
data[["optionalPOCs"]][[1]] %>% as_tibble()
if (length(df_pocs_other) > 0) {
df_pocs_other <-
df_pocs_other %>% .munge_data_sam_names() %>% .munge_data(clean_address = F)
df_pocs_other <- df_pocs_other %>%
mutate(namePersonPOC = str_c(nameFirstPOC, nameLastPOC, sep = " ")) %>%
select(namePersonPOC, everything()) %>%
unite(
addressStreetPOC,
addressStreet1POC,
addressStreet2POC,
sep = " ",
remove = F
) %>%
unite(cityStatePOC,
cityPOC,
statePOC,
sep = ", ",
remove = F) %>%
unite(cityStateZip,
cityStatePOC,
zipcodePOC,
sep = " ",
remove = F) %>%
unite(locationPOC,
addressStreetPOC,
cityStateZip,
sep = ", ")
df_base <-
df_base %>%
mutate(dataPOCOptional = list(df_pocs_other))
}
df_exclusions <-
data[["exclusionsList"]][[1]]
if (length(df_exclusions) > 0) {
df_base <- df_base %>%
mutate(hasExclusions = T)
}
naics <- data[["assertions.naicsList"]][[1]]
if (length(naics) > 0) {
df_naics <- dictionary_naics_codes() %>% suppressMessages()
df_naics_company <-
df_naics %>% filter(idNAICS %in% as.integer(naics))
df_base <-
df_base %>% left_join(df_naics %>% select("idNAICSPrimary" = idNAICS, nameNAICSPrimary = nameNAICS),
by = "idNAICSPrimary")
df_base <- df_base %>%
mutate(dataNAICS = list(df_naics_company))
}
psc <-
data[["assertions.pscList"]][[1]]
if (length(psc) > 0) {
df_psc <-
dictionary_psc_active() %>% filter(codeProductService %in% psc) %>% suppressWarnings() %>% suppressMessages()
df_base <- df_base %>% mutate(dataPSC = list(df_psc))
}
df_base <-
df_base %>%
mutate(urlAPI = url)
df_base <-
df_base %>%
.remove_na()
df_base
}
.generate_sam_v2_urls <-
function(base_url = "https://api.sam.gov/prod/entities", duns = 961539384, api_key = NULL) {
if (length(api_key) == 0) {
api_key <- "O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii"
}
duns_slug <- .pad_duns(duns = duns, zero_base = 9) %>% pull(slugDUNS)
url <- glue("{base_url}/{duns_slug}?api_key={api_key}") %>% as.character()
url
}
#' SAM data by DUNS
#'
#' Acquires data from SAM V2 API for
#' vector of DUNS
#'
#' @param duns numeric vector of DUNS numbers
#' @param api_key if not \code{NULL} api key
#' @param return_message if \code{TRUE} returns mesage
#'
#' @return
#' @export
#'
#' @examples
#' sam_duns_v2(175311393)
sam_duns_v2 <-
function(duns = NULL, api_key = NULL, return_message = T) {
if (length(duns) == 0) {
stop("Please enter DUNS")
}
urls <-
duns %>%
map_chr(function(x){
.generate_sam_v2_urls(duns = x, api_key = api_key)
}) %>%
unique()
.parse_sam_duns_v2_safe <- possibly(.parse_sam_duns_v2, tibble())
all_data <-
urls %>%
map_dfr(function(url){
if (return_message) {
glue("Parsing {url}") %>% message()
}
.parse_sam_duns_v2_safe(url = url) %>% suppressMessages() %>%
suppressWarnings()
})
if (all_data %>% tibble::has_name("dateExpiration") &
all_data %>% tibble::has_name("dateRegistration")) {
all_data <-
all_data %>%
mutate(countGovernmentWorkDays = (dateExpiration - dateRegistration) %>% as.integer())
}
all_data <-
all_data %>%
mutate(idRow = 1:n())
df_list <-
all_data %>%
select(idDUNS, matches("data")) %>%
transmute_if(is.list,
.funs = list(function(x) {
x %>% map_dbl(length) > 0
})) %>%
mutate(idRow = 1:n())
names(df_list) <-
names(df_list) %>% str_replace_all("^data", "has")
all_data <-
all_data %>%
left_join(df_list, by = 'idRow') %>%
select(-idRow)
col_order <-
c(all_data %>% select(-matches("data")) %>% names(),
all_data %>% select(matches("data")) %>% names())
all_data
}
#' SAM government agencies
#'
#' SAM summary of
#' government agencies.
#'
#' @return
#' @export
#'
#' @examples
#' sam_agencies()
sam_agencies <- function() {
data <- "https://api.sam.gov/prod/federalorganizations/v1/organizations/departments/?api_key=O4kzViWGVYNumPqhAzUhYGiZZZwW3RKUEYJOI6ii&random=1568828547464" %>%
fromJSON(simplifyDataFrame = T)
data <- data[[1]]
df_base <- data$org %>% as_tibble()
df_logos <- data[[2]] %>% as_tibble()
df_logos <- tibble(urlSAMAPI = df_logos$self[[1]], urlLogo = df_logos$logo$href)
df_base <- df_base %>% select(-links)
df_base_cols <- df_base %>% map(class) %>% as_tibble() %>% gather(column, type)
base_cols <-
df_base_cols %>%
filter(!type %in% c("list", "data.frame")) %>%
pull(column)
data <-
df_base %>%
select(one_of(base_cols))
data <-
data %>%
.munge_data_sam_names() %>%
select(-matches("remove")) %>%
.remove_na() %>%
.munge_data(clean_address = F) %>%
select(idSAM, everything())
data <-
data %>%
select(idSAM, nameAgency, codeAgency, descriptionAgency, summaryAgency, everything())
data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.