.dict_xml_parent_items <-
function() {
parent_items <-
c(
"OtherTransactionAward",
"OtherTransactionIDV",
"award",
"awardID",
"IDV",
"referencedIDVID",
"awardContractID",
"contractMarketingData",
"treasuryAccount",
"relevantContractDates",
"dollarValues",
"totalDollarValues",
"purchaserInformation",
"contractData",
"listOfTreasuryAccounts",
"legislativeMandates",
"productOrServiceInformation",
"vendor",
"vendorSocioEconomicIndicators",
"vendorSiteDetails",
"businessOrOrganizationType",
"minorityOwned",
"vendorBusinessTypes",
"federalGovernment",
"localGovernment",
"vendorHeader",
"businessOrOrganizationType",
"vendorLineOfBusiness",
"vendorRelationshipWithFederalGovernment",
"typeOfGovernmentEntity",
"vendorOrganizationFactors",
"listOfContractAdministrationsDelegated",
"profitStructure",
"typeOfEducationalEntity",
"contractDetail",
"preferencePrograms",
"vendorCertifications",
"vendorLocation",
"vendorDUNSInformation",
"ccrRegistrationDetails",
"placeOfPerformance",
"principalPlaceOfPerformance",
"competition",
"transactionInformation",
"preferenceProgramssubcontractPlan",
"NASASpecificAwardElements",
"agencySpecificAwardElements",
"agencySpecificIDVElements",
"NASASpecificIDVElements",
"principalInvestigator",
"alternatePrincipalInvestigator"
)
tibble(parent_item = parent_items)
}
.parse_xml_entry_new <- function(xml_entry) {
xml_entry_nodes <- xml_contents(xml_entry)
xml_entry_names <- xml_name(xml_entry_nodes)
parent_items <-
pull(.dict_xml_parent_items())
dict_names <-
dictionary_fpds_lower_names
seq_along(xml_entry_names) %>%
map_dfc(function(x) {
table <-
xml_entry_names[[x]]
if (table == "title") {
df <- tibble(nameContract = xml_text(xml_entry_nodes[1]) %>% str_to_upper() %>% str_squish())
return(df)
}
if (table == "link") {
df <- tibble(urlFPDSContractAtom = xml_entry_nodes[[x]] %>% html_attr("href"))
return(df)
}
if (table == "modified") {
value <-
xml_text(xml_entry_nodes[x])
if (value == "") {
return(invisible())
}
df <- tibble(datetimeContractModified = lubridate::ymd_hms(value))
return(df)
}
xml_part <- xml_entry_nodes[x]
xml_nodes <-
xml_find_all(xml_part, ".//*")
items <-
as.character(xml_name(xml_nodes))
text <-
xml_text(xml_nodes)
node_attrs <-
xml_attrs(xml_nodes)
df_attrs <-
seq_along(node_attrs) %>%
map_dfr(function(x) {
if (x == 1) {
return(invisible())
}
value <- as.character(node_attrs[[x]])
if (length(value) == 0) {
return(invisible())
}
item <- names(node_attrs[[x]])
tibble(item, value) %>%
mutate(idNode = x) %>%
select(idNode, everything())
})
df_new <-
tibble(nameFPDS = items, text) %>%
mutate(idNode = 1:n()) %>%
filter(text != "")
df_new <-
df_new %>%
filter(!nameFPDS %>% str_detect("genericTags|genericStrings|genericString01")) %>%
filter(
!nameFPDS %in% c(
"award",
"IDVID",
"IDV",
"contractID",
"OtherTransactionAwardID",
"OtherTransactionAward",
"OtherTransactionIDV",
"OtherTransactionIDVID",
"OtherTransactionAwardContractID",
"OtherTransactionIDVContractID"
)
) %>%
select(idNode, everything()) %>%
mutate_if(is.character, str_squish)
df_attrs <- df_attrs %>%
filter(
!item %in% c(
"city",
"country",
"county",
"departmentID",
"departmentName",
"productOrServiceType"
)
) %>%
arrange(idNode, item)
df_descriptions <-
df_attrs %>%
group_by(idNode) %>%
summarise(description = value %>% str_c(collapse = " | ")) %>%
mutate(description = str_squish(description))
df_new <-
df_new %>%
left_join(df_descriptions, by = "idNode")
df_parents <-
df_new %>%
filter(nameFPDS %in% parent_items) %>%
select(idNode, parent = nameFPDS) %>%
mutate(idNode = idNode + 1)
df_parents <-
df_parents %>%
mutate(
slug = case_when(
parent == "principalPlaceOfPerformance" ~ "Performance",
parent == "vendorLocation" ~ "Vendor",
parent %in% c("referencedIDVID", "IDV") ~ "IDV",
parent %in% c("OtherTransactionAward") ~ "OTA",
parent %in% c("agencySpecificAwardElements") ~ "AgencySpecificAward",
parent %in% c("principalInvestigator") ~ "Investigator",
parent %in% c("alternatePrincipalInvestigator") ~ "InvestigatorAlternate",
TRUE ~ NA_character_
)
)
df_new <-
df_new %>%
filter(!nameFPDS %in% c(parent_items)) %>%
left_join(df_parents, by = "idNode") %>%
select(parent, everything()) %>%
select(-slug) %>%
fill(parent) %>%
left_join(df_parents %>% select(-idNode), by = "parent")
tbl_off_id <- df_new %>% filter(nameFPDS == "contractingOfficeID")
if (nrow(tbl_off_id) > 0) {
office_id <-
tbl_off_id %>% pull(text)
df_new <- df_new %>%
mutate(description = description %>% str_remove_all(office_id) %>% str_squish())
}
df_names <-
df_new %>%
distinct(nameFPDS, .keep_all = F) %>%
mutate(nameFPDSLower = str_to_lower(nameFPDS)) %>%
left_join(dict_names %>% select(nameFPDSLower, nameActual), by = "nameFPDSLower")
missing_symbol <-
df_names %>%
filter(is.na(nameActual)) %>%
nrow() > 0
if (missing_symbol) {
missing_items <-
df_names %>%
filter(is.na(nameActual)) %>%
pull(nameFPDS)
missing_items %>%
walk(function(x) {
glue("Missing: {x}") %>% message()
})
df_names <-
df_names %>%
mutate(nameActual = case_when(is.na(nameActual) ~ nameFPDS,
TRUE ~ nameActual))
}
df_new <-
df_new %>%
left_join(df_names, by = "nameFPDS") %>%
distinct() %>%
mutate(nameActual = case_when(is.na(slug) ~ nameActual,
TRUE ~ str_c(nameActual, slug, sep = "")))
filter_agency <-
df_new %>% filter(idNode == 4) %>% pull("nameFPDS") == "agencyID"
if (filter_agency) {
df_new <- df_new %>%
filter(idNode != 4)
}
df_new <-
df_new %>%
unite(n, parent, nameFPDS, sep = "_", remove = F) %>%
filter(n != "awardContractID_agencyID") %>%
select(-n)
df_new <-
df_new %>%
select(idNode, nameFPDS, nameActual, text, description) %>%
group_by(nameActual) %>%
filter(idNode == min(idNode)) %>%
ungroup()
df_base <-
df_new %>%
select(nameActual, text) %>%
mutate(
text = case_when(
nameActual %>% str_detect('zipcode') ~ text %>% substr(1, 5),
nameActual %>% str_detect("nameFirst") ~ text %>% str_remove_all("\\,"),
nameActual %>% str_detect("nameLast") ~ text %>% str_remove_all("\\,"),
TRUE ~ text
)
)
df_base <-
spread(df_base, nameActual, text)
has_desc <-
filter(df_new, !is.na(description)) %>% nrow() > 0
if (has_desc) {
df_desc <- df_new %>%
filter(!is.na(description)) %>%
filter(nameActual != "descriptionObligation") %>%
select(nameActual, description) %>%
mutate(
nameActual = nameActual %>% str_replace_all("^code|^is|^has", "type") %>% str_replace_all("^id", "name") %>%
str_replace_all("^count", "description") %>%
str_replace_all("typeCountry", "nameCountry") %>%
str_replace_all("typeState", "nameState") %>%
str_replace_all("typeProductService", "nameProductService")
) %>%
mutate(description = str_to_upper(description)) %>%
filter(
!nameActual %>% str_detect(
"nameContract|^transaction|nameCountryVendor|faxVendor|zipcode"
)
) %>%
mutate(nameActual = case_when(
nameActual == "typeOrganization" ~ "stateOrganized",
TRUE ~ nameActual
)) %>%
spread(nameActual, description)
df_new <-
df_desc %>%
bind_cols(df_base) %>%
select(one_of(names(df_base)), everything())
}
df_new <-
df_new %>%
select(matches("idContract|idAgency|nameAgency|idOffice|nameOffice"),
everything())
if (df_new %>% hasName("slugDUNS")) {
df_new <- df_new %>%
mutate(idDUNS = as.numeric(slugDUNS))
}
if (df_new %>% hasName("slugDUNSParent")) {
df_new <- df_new %>%
mutate(idDUNSParent = as.numeric(slugDUNSParent))
}
df_new
})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.