.guess_duns_type <-
function(data, metric = 5) {
if (!data %>% hasName("idDUNS")) {
return(data)
}
if (!data %>% hasName("idDUNSParent")) {
return(data)
}
if (data %>% hasName("idDUNSAnalysis")) {
return(data)
}
col_length <- data %>%
dplyr::select(idDUNS, idDUNSParent, matches("^is")) %>%
dplyr::select(idDUNS,
idDUNSParent,
matches("Government|NotForProfit|Federal|^isState")) %>%
names() %>%
length()
if (col_length < 6) {
return(data)
}
df_test <-
data %>%
dplyr::select(idDUNS, idDUNSParent, matches("^is")) %>%
dplyr::select(-matches("FBO|Vendor|MissingDUNS")) %>%
group_by(idDUNS, idDUNSParent) %>%
dplyr::select(idDUNS,
idDUNSParent,
matches(
"Government|NotForProfit|Federal|isState|College|School|University|Transit|International|isInternationalOrganization|Hospital|Airport|Institution|College"
)) %>%
summarise_all(sum) %>%
ungroup() %>%
gather(item, value, -c(idDUNS, idDUNSParent)) %>%
group_by(idDUNS, idDUNSParent) %>%
summarise(value = sum(value)) %>%
ungroup()
df_test <-
df_test %>%
mutate(has_parent = idDUNS != idDUNSParent) %>%
mutate(idDUNSAnalysis = case_when(value <= metric ~ idDUNSParent,
TRUE ~ idDUNS)) %>%
dplyr::select(idDUNS, idDUNSParent, idDUNSAnalysis)
data %>%
left_join(df_test, by = c("idDUNS", "idDUNSParent")) %>%
dplyr::select(one_of(names(df_test)), everything())
}
.add_budget_year <- function(data) {
if (!data %>% hasName("dateObligation")) {
return(data)
}
data %>%
mutate(
yearBudget = case_when(
lubridate::month(dateObligation) %>% as.numeric() >= 10 ~ lubridate::year(dateObligation) %>% as.numeric() + 1,
TRUE ~ lubridate::year(dateObligation) %>% as.numeric()
)
)
}
.add_dod_type <-
function(data) {
has_columns <-
data %>% hasName("codeDepartmentAward") &&
data %>% hasName("codeDepartmentFunding")
if (!has_columns) {
return(data)
}
data <-
data %>%
mutate(
typeDODAward = case_when(
codeDepartmentAward == 97 & codeDepartmentFunding == 97 ~ "AF",
codeDepartmentAward != 97 &
codeDepartmentFunding == 97 ~ "F",
codeDepartmentAward == 97 &
codeDepartmentFunding != 97 ~ "A",
codeDepartmentAward == 97 &
is.na(codeDepartmentFunding) != 97 ~ "A"
)
)
data
}
.fix_duns <-
function(data) {
duns_names <-
data %>% hasName("idDUNS") && data %>% hasName("idDUNSParent")
if (!duns_names) {
return(data)
}
data <-
data %>%
mutate_at(c("idDUNS", "idDUNSParent"), as.numeric) %>%
mutate(
idDUNS = case_when(idDUNS == 0 ~ NA_real_,
TRUE ~ idDUNS),
idDUNSParent = case_when(idDUNSParent == 0 ~ NA_real_,
TRUE ~ idDUNSParent),
hasDUNSParent = !is.na(idDUNSParent),
isMissingDUNS = is.na(idDUNS) & is.na(idDUNS),
idDUNSParent = case_when(is.na(idDUNSParent) ~ idDUNS,
TRUE ~ idDUNSParent)
)
data
}
.fix_foreign_reference <-
function(data) {
has_code <- data %>% hasName("codeForeignFunding")
has_type <- data %>% hasName("typeForeignFunding")
if (!has_code | !has_type) {
return(data)
}
data %>%
mutate(
typeForeignFunding = case_when(
codeForeignFunding == "A" ~ "FOREIGN FUNDS FMS",
codeForeignFunding == "B" ~ "FOREIGN FUNDS NON-FMS",
typeForeignFunding == "N" ~ "NOT APPLICABLE",
codeForeignFunding %in% c("X", "N") ~ "NOT APPLICABLE",
TRUE ~ typeForeignFunding
),
codeForeignFunding = case_when(
typeForeignFunding == "NOT APPLICABLE" ~ "N",
typeForeignFunding == "FOREIGN FUNDS NON-FMS" ~ "B",
typeForeignFunding == "FOREIGN FUNDS FMS" ~ "A",
TRUE ~ codeForeignFunding
)
)
}
.fix_sam_exceptions <-
function(data) {
has_code <- data %>% hasName("typeSAMException")
has_type <- data %>% hasName("idSAMException")
if (!has_code | !has_type) {
return(data)
}
data <- data %>%
mutate(
typeSAMException = case_when(
typeSAMException == "C" ~ "CLASSIFIED CONTRACTS",
typeSAMException == "A" ~ "AWARDS TO FOREIGN VENDORS FOR WORK PERFORMED OUTSIDE THE UNITED STATES",
typeSAMException == "G" ~ "GOVERNMENT - WIDE COMMERCIAL PURCHASE CARD",
typeSAMException == "M" ~ "MICRO-PURCHASES THAT DO NOT USE THE EFT",
TRUE ~ typeSAMException
)
)
data <- data %>%
mutate(
idSAMException = case_when(
typeSAMException %in% c(
"GOVERNMENT - WIDE COMMERCIAL PURCHASE CARD",
"GOVERNMENT-WIDE COMMERCIAL PURCHASE CARD"
) ~ 1,
typeSAMException == "CLASSIFIED CONTRACTS" ~ 2,
typeSAMException == "CONTRACTING OFFICERS DEPLOYED IN THE COURSE OF MILITARY OPERATIONS" ~ 3,
typeSAMException == "CONTRACTING OFFICERS CONDUCTING EMERGENCY OPERATIONS" ~ 4,
typeSAMException == "CONTRACTS TO SUPPORT UNUSUAL OR COMPELLING NEEDS" ~ 5,
typeSAMException == "AWARDS TO FOREIGN VENDORS FOR WORK PERFORMED OUTSIDE THE UNITED STATES" ~ 6,
typeSAMException == "MICRO-PURCHASES THAT DO NOT USE THE EFT" ~ 7,
TRUE ~ idSAMException
)
)
}
.add_analysis_contract <- function(data) {
if (data %>% hasName("idContractAnalysis")) {
return(data)
}
has_contract <- data %>% hasName("idContract")
has_parent <-
data %>% hasName("idContractParent") |
data %>% hasName("idContractIDV")
if (!has_contract | !has_parent) {
if (data %>% hasName("idContract")) {
data <- data %>%
mutate(idContractAnalysis = idContract) %>%
dplyr::select(idContractAnalysis, everything())
}
return(data)
}
if (!data %>% hasName("idContractParent")) {
data <- data %>%
rename(idContractParent = idContractIDV)
}
data <- data %>%
mutate(
countCharsContract = nchar(idContract),
countCharsParent = nchar(idContractParent),
lettersContract = idContract %>% str_count("[A-Z]"),
idContractAnalysis = case_when(
countCharsContract <= 5 &
!is.na(idContractParent) ~ idContractParent,
is.na(idContractParent) ~ idContract,
!is.na(idContractParent) &
lettersContract <= 1 ~ idContractParent,
TRUE ~ idContract
)
) %>%
mutate(
typeContractIDAnalysis = case_when(idContractAnalysis == idContractParent ~ "IDV",
TRUE ~ "Contract")
) %>%
mutate(idContractAnalysis = case_when(
is.na(idContractAnalysis) ~ idContractParent,
TRUE ~ idContractAnalysis
)) %>%
dplyr::select(idContract, idContractAnalysis , everything()) %>%
dplyr::select(-c(countCharsContract, countCharsParent, lettersContract))
data <- data %>%
rename(idContractIDV = idContractParent)
data
}
.add_agency_cgacs <-
function(data) {
cleaned_names <- data %>% clean_names() %>% names()
needs_agency_cgac_funding <- str_detect(cleaned_names, "name_agency_cgac_funding") %>% sum(na.rm = T) == 0
needs_agency_cgac_award <- str_detect(cleaned_names, "name_agency_cgac_award") %>% sum(na.rm = T) == 0
needs_agency_cgac_idv <- str_detect(cleaned_names, "name_cgac_agency_idv") %>% sum(na.rm = T) == 0
none_needed <- needs_agency_cgac_idv + needs_agency_cgac_award + needs_agency_cgac_funding == 0
if (none_needed) {
return(data)
}
df_cgac <-
.cgac_codes()
if (needs_agency_cgac_funding) {
if (data %>% hasName("idAgencyFunding") &
!data %>% hasName("idCGACFunding")) {
df_agencies <-
data %>%
dplyr::select(idAgencyFunding) %>%
distinct() %>%
filter(!is.na(idAgencyFunding)) %>%
mutate(idCGACFunding = case_when(
!is.na(idAgencyFunding) ~ idAgencyFunding %>% substr(1, 2) %>% as.numeric(),
TRUE ~ NA_real_
)) %>%
left_join(
df_cgac %>% dplyr::select(
idCGACFunding = idCGAC,
nameAgencyCGACFunding = nameAgencyCGAC,
slugCGACFunding = slugCGAC
),
by = "idCGACFunding"
) %>%
distinct()
data <-
data %>%
left_join(df_agencies, by = "idAgencyFunding")
}
}
if (needs_agency_cgac_award) {
if (data %>% hasName("idAgencyAward") &
!data %>% hasName("idCGACAward")) {
df_agencies <-
data %>%
dplyr::select(idAgencyAward) %>%
distinct() %>%
filter(!is.na(idAgencyAward)) %>%
mutate(idCGACAward = case_when(
!is.na(idAgencyAward) ~ idAgencyAward %>% substr(1, 2) %>% as.numeric(),
TRUE ~ NA_real_
))
df_ids <-
df_cgac %>% dplyr::select(
idCGACAward = idCGAC,
nameAgencyCGACAward = nameAgencyCGAC,
slugCGACAward = slugCGAC
) %>%
filter(idCGACAward %in% (df_agencies$idCGACAward %>% unique())) %>%
group_by(idCGACAward) %>%
dplyr::slice(1) %>%
ungroup()
df_agencies <-
df_agencies %>%
left_join(df_ids,
by = "idCGACAward") %>%
distinct()
data <- data %>%
left_join(df_agencies, by = "idAgencyAward")
}
}
if (needs_agency_cgac_idv) {
if (data %>% hasName("idAgencyAwardIDV") &
!data %>% hasName("idCGACIDV")) {
df_agencies <-
data %>%
dplyr::select(idAgencyAwardIDV) %>%
distinct() %>%
filter(!is.na(idAgencyAwardIDV)) %>%
mutate(
idCGACAgencyIDV = case_when(
!is.na(idAgencyAwardIDV) ~ idAgencyAwardIDV %>% substr(1, 2) %>% as.numeric(),
TRUE ~ NA_real_
)
) %>%
left_join(
df_cgac %>% dplyr::select(
idCGACAgencyIDV = idCGAC,
nameCGACAgencyIDV = nameAgencyCGAC,
slugCGACAgencyIDV = slugCGAC
),
by = "idCGACAgencyIDV"
) %>%
distinct()
data <-
data %>%
left_join(df_agencies, by = "idAgencyAwardIDV")
}
}
data
}
.allocate_federal_accounts <-
function(data) {
if (!data %>% hasName("dateObligation")) {
return(data)
}
if (data %>% hasName("idFederalAccounts")) {
data <-
data %>%
rename(idFederalAccount = idFederalAccounts)
}
if (!data %>% hasName("idFederalAccount")) {
return(data)
}
if (!data %>% hasName("idContractAnalysis")) {
return(data)
}
if (data %>% hasName("idFederalAccountResolved")) {
return(data)
}
df_accounts <-
data %>%
dplyr::select(idContractAnalysis, idFederalAccount, amountObligation) %>%
filter(!is.na(idContractAnalysis)) %>%
filter(!is.na(idFederalAccount)) %>%
mutate(countAccounts = idFederalAccount %>% str_count("\\;") + 1)
df_account_resolved <-
df_accounts %>% dplyr::select(idContractAnalysis, idFederalAccount) %>%
separate_rows(idFederalAccount, sep = "\\;") %>%
group_by(idContractAnalysis) %>%
summarise(
idFederalAccountResolved = unique(idFederalAccount) %>% sort() %>% str_c(collapse = " | "),
countFederalAccounts = n_distinct(idFederalAccount)
) %>%
ungroup()
df_transactions <-
data %>%
dplyr::select(dateObligation,
idContractAnalysis,
idFederalAccount,
amountObligation) %>%
left_join(df_account_resolved, by = "idContractAnalysis") %>%
dplyr::select(dateObligation,
idFederalAccount,
idFederalAccountResolved,
everything()) %>%
mutate(
countAccounts = idFederalAccount %>% str_count("\\;") + 1,
countAccountsResolved = idFederalAccountResolved %>% str_count("\\|") + 1,
amountObligationAllocated = case_when(
is.na(idFederalAccount) &
is.na(idFederalAccountResolved) ~ amountObligation,
!is.na(idFederalAccount) ~ (amountObligation * (1 / countAccounts)),
TRUE ~ (amountObligation * (1 / countAccountsResolved))
)
)
df_transactions <-
df_transactions %>%
dplyr::select(
dateObligation,
idContractAnalysis,
amountObligation,
idFederalAccountResolved,
amountObligationAllocated
) %>%
mutate(hasFederalAccount = !is.na(idFederalAccountResolved))
data <-
data %>%
left_join(df_transactions,
by = c("idContractAnalysis", "dateObligation", "amountObligation")) %>%
distinct()
data
}
.resolve_duns_listed <-
function(data, exclude_bloat = F) {
if (exclude_bloat) {
return(data)
}
if (!data %>% hasName("nameVendor")) {
return(data)
}
if (!data %>% hasName("idDUNS")) {
return(data)
}
if (data %>% hasName("namesVendorListed")) {
return(data)
}
if (data %>% filter(!is.na(idDUNS)) %>% filter(!is.na(nameVendor)) %>% nrow() == 0) {
return(data)
}
data <- data %>%
dplyr::select(-one_of(
"countVendorsListed",
"hasMultipleVendors",
"namesVendorListed"
)) %>%
suppressWarnings()
df_duns <-
data %>% count(idDUNS, nameVendor, name = "count", sort = T)
df_duns <-
df_duns %>%
group_by(idDUNS) %>%
summarise(namesVendorListed = unique(nameVendor) %>% str_c(collapse = " | ")) %>%
ungroup() %>%
mutate(
countVendorsListed = namesVendorListed %>% str_count("\\|") + 1,
hasMultipleVendors = countVendorsListed > 1
) %>%
ungroup()
data <- data %>%
left_join(df_duns, by = "idDUNS")
data
}
.resolve_parent_duns_listed <-
function(data, exclude_bloat = F) {
if (exclude_bloat) {
return(data)
}
if (!data %>% hasName("nameVendorParent")) {
return(data)
}
if (!data %>% hasName("idDUNSParent")) {
return(data)
}
if (data %>% hasName("namesVendorParentListed")) {
return(data)
}
data <- data %>%
dplyr::select(
-one_of(
"countVendorsParentListed",
"hasMultipleParentVendors",
"namesVendorParentListed"
)
) %>%
suppressWarnings()
if (data %>% filter(!is.na(idDUNSParent)) %>% filter(!is.na(nameVendorParent)) %>% nrow() == 0) {
return(data)
}
df_duns <-
data %>% count(idDUNSParent,
nameVendorParent,
name = "count",
sort = T) %>%
group_by(idDUNSParent) %>%
summarise(namesVendorParentListed = unique(nameVendorParent) %>% str_c(collapse = " | ")) %>%
ungroup() %>%
mutate(
countVendorsParentListed = namesVendorParentListed %>% str_count("\\|") + 1,
hasMultipleParentVendors = countVendorsParentListed > 1
) %>%
ungroup()
data <- data %>%
left_join(df_duns, by = "idDUNSParent")
rm(df_duns)
gc()
data
}
.add_original_dates <-
function(data) {
if (!data %>% hasName("idContractAnalysis")) {
return(data)
}
if (!data %>% hasName("dateObligation")) {
return(data)
}
if (!data %>% hasName("dateContractCompletionCurrent")) {
return(data)
}
if (!data %>% hasName("amountBaseAllOptionTotal")) {
return(data)
}
if (!data %>% hasName("amountBaseAllOptionTotalOriginal")) {
return(data)
}
df_first_dates <-
data %>%
group_by(idContractAnalysis) %>%
filter(dateObligation == min(dateObligation)) %>%
ungroup() %>%
group_by(idContractAnalysis) %>%
summarise(
amountBaseAllOptionTotalOriginal = max(amountBaseAllOptionTotal),
dateContractCompletionOriginal = max(dateContractCompletionCurrent)
) %>%
ungroup()
data <- data %>%
left_join(df_first_dates, by = c("idContractAnalysis")) %>%
mutate(
hasChangedBaseAmount = amountBaseAllOptionTotalOriginal != amountBaseAllOptionTotal,
hasChangedCompletionDate = dateContractCompletionCurrent != dateContractCompletionOriginal
)
data
}
#' Resolve DUNS numbers for listed data
#'
#' Attempts to resolve for DUNS and Parent DUNS when listed within a tibble
#'
#' @param data
#'
#' @return
#' @export
#'
#' @examples
resolve_listed_duns <-
function(data, exclude_bloat = F) {
data <-
.resolve_duns_listed(data = data, exclude_bloat = exclude_bloat)
data <-
.resolve_parent_duns_listed(data = data, exclude_bloat = exclude_bloat)
data <-
.resolve_cage_codes(data = data)
data
}
.add_department_funding_codes <-
function(data) {
if (data %>% hasName("nameDepartmentFunding")) {
return(data)
}
if (!data %>% hasName("idAgencyFunding")) {
return(data)
}
df_fpds_offices <- .fpds_departments()
df_agencies <-
data %>%
filter(!is.na(idAgencyFunding)) %>%
distinct(idAgencyFunding)
df_agencies <- df_agencies %>%
left_join(
df_fpds_offices %>% distinct(
idAgencyFunding = idAgency,
idDepartmentFunding = idDepartment,
nameDepartmentFunding = nameDepartment
),
by = "idAgencyFunding"
) %>%
group_by(idAgencyFunding) %>%
dplyr::slice(1) %>%
ungroup() %>%
mutate(
idDepartmentFunding = case_when(idAgencyFunding == "9700" ~ "9700",
TRUE ~ idDepartmentFunding),
nameDepartmentFunding = case_when(
idAgencyFunding == "9700" ~ "DEPARTMENT OF DEFENSE",
TRUE ~ nameDepartmentFunding
)
) %>%
mutate(codeDepartmentFunding = idDepartmentFunding %>% substr(1, 2) %>% as.numeric())
data <-
data %>%
left_join(df_agencies, by = "idAgencyFunding")
data
}
.add_department_award_codes <-
function(data) {
if (data %>% hasName("nameDepartmentAward")) {
return(data)
}
if (!data %>% hasName("idAgencyAward")) {
return(data)
}
df_fpds_offices <- .fpds_departments()
df_agencies <-
data %>%
select(idAgencyAward) %>%
filter(!is.na(idAgencyAward)) %>%
distinct()
df_offices <-
df_fpds_offices %>% distinct(
idAgencyAward = idAgency,
idDepartmentAward = idDepartment,
nameDepartmentAward = nameDepartment
) %>%
filter(idAgencyAward %in% (df_agencies$idAgencyAward))
df_agencies <-
df_agencies %>%
left_join(df_offices,
by = "idAgencyAward") %>%
group_by(idAgencyAward) %>%
dplyr::slice(1) %>%
ungroup() %>%
mutate(
idDepartmentAward = case_when(idAgencyAward == "9700" ~ "9700",
TRUE ~ idDepartmentAward),
nameDepartmentAward = case_when(
idAgencyAward == "9700" ~ "DEPARTMENT OF DEFENSE",
TRUE ~ nameDepartmentAward
)
) %>%
mutate(codeDepartmentAward = idDepartmentAward %>% substr(1, 2) %>% as.numeric())
data <-
data %>%
left_join(df_agencies, by = "idAgencyAward")
data
}
#' Adds missing FPDS department award and funding codes
#'
#' @param data
#'
#' @return
#' @export
#'
#' @examples
add_department_codes <-
function(data) {
data <-
.add_department_award_codes(data = data)
data <-
.add_department_funding_codes(data = data)
data
}
.resolve_cage_codes <-
function(data) {
if (!data %>% hasName("cageVendor")) {
return(data)
}
if (data %>% hasName("idDUNS")) {
return(data)
}
df_cage <- data %>%
dplyr::select(idDUNS, cageVendor) %>%
filter(!is.na(cageVendor)) %>%
group_by(idDUNS) %>%
summarise(cageVendorResolved = unique(cageVendor) %>% sort() %>% str_c(sep = " | ")) %>%
ungroup() %>%
mutate(countCAGEResolved = cageVendorResolved %>% str_count("\\ | ") + 1)
data <-
data %>%
left_join(df_cage, by = "idDUNS")
data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.