# dictionaries ------------------------------------------------------------
#' USA Spending name dictionary
#'
#' Dictionary of names from USA Spending to
#' govtrackR
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' dictionary_usa_spending_names()
dictionary_usa_spending_names <-
function() {
tibble(
nameUSASpending = c(
"contract_transaction_unique_key",
"contract_award_unique_key",
"award_id_piid",
"modification_number",
"transaction_number",
"parent_award_agency_id",
"parent_award_agency_name",
"parent_award_id",
"parent_award_modification_number",
"federal_action_obligation",
"total_dollars_obligated",
"base_and_exercised_options_value",
"current_total_value_of_award",
"base_and_all_options_value",
"potential_total_value_of_award",
"action_date",
"action_date_fiscal_year",
"period_of_performance_start_date",
"period_of_performance_current_end_date",
"period_of_performance_potential_end_date",
"ordering_period_end_date",
"solicitation_date",
"awarding_agency_code",
"awarding_agency_name",
"awarding_sub_agency_code",
"awarding_sub_agency_name",
"awarding_office_code",
"awarding_office_name",
"funding_agency_code",
"funding_agency_name",
"funding_sub_agency_code",
"funding_sub_agency_name",
"funding_office_code",
"funding_office_name",
"treasury_accounts_funding_this_award",
"federal_accounts_funding_this_award",
"foreign_funding",
"foreign_funding_description",
"sam_exception",
"sam_exception_description",
"recipient_duns",
"recipient_name",
"recipient_doing_business_as_name",
"cage_code",
"recipient_parent_name",
"recipient_parent_duns",
"recipient_country_code",
"recipient_country_name",
"recipient_address_line_1",
"recipient_address_line_2",
"recipient_city_name",
"recipient_state_code",
"recipient_state_name",
"recipient_zip_4_code",
"recipient_congressional_district",
"recipient_phone_number",
"recipient_fax_number",
"primary_place_of_performance_country_code",
"primary_place_of_performance_country_name",
"primary_place_of_performance_city_name",
"primary_place_of_performance_county_name",
"primary_place_of_performance_state_code",
"primary_place_of_performance_state_name",
"primary_place_of_performance_zip_4",
"primary_place_of_performance_congressional_district",
"award_or_idv_flag",
"award_type_code",
"award_type",
"idv_type_code",
"idv_type",
"multiple_or_single_award_idv_code",
"multiple_or_single_award_idv",
"type_of_idc_code",
"type_of_idc",
"type_of_contract_pricing_code",
"type_of_contract_pricing",
"award_description",
"action_type_code",
"action_type",
"solicitation_identifier",
"number_of_actions",
"inherently_governmental_functions",
"inherently_governmental_functions_description",
"product_or_service_code",
"product_or_service_code_description",
"contract_bundling_code",
"contract_bundling",
"dod_claimant_program_code",
"dod_claimant_program_description",
"naics_code",
"naics_description",
"recovered_materials_sustainability_code",
"recovered_materials_sustainability",
"domestic_or_foreign_entity_code",
"domestic_or_foreign_entity",
"dod_acquisition_program_code",
"dod_acquisition_program_description",
"information_technology_commercial_item_category_code",
"information_technology_commercial_item_category",
"epa_designated_product_code",
"epa_designated_product",
"country_of_product_or_service_origin_code",
"country_of_product_or_service_origin",
"place_of_manufacture_code",
"place_of_manufacture",
"subcontracting_plan_code",
"subcontracting_plan",
"extent_competed_code",
"extent_competed",
"solicitation_procedures_code",
"solicitation_procedures",
"type_of_set_aside_code",
"type_of_set_aside",
"evaluated_preference_code",
"evaluated_preference",
"research_code",
"research",
"fair_opportunity_limited_sources_code",
"fair_opportunity_limited_sources",
"other_than_full_and_open_competition_code",
"other_than_full_and_open_competition",
"number_of_offers_received",
"commercial_item_acquisition_procedures_code",
"commercial_item_acquisition_procedures",
"small_business_competitiveness_demonstration_program",
"simplified_procedures_for_certain_commercial_items_code",
"simplified_procedures_for_certain_commercial_items",
"a76_fair_act_action_code",
"a76_fair_act_action",
"fed_biz_opps_code",
"fed_biz_opps",
"local_area_set_aside_code",
"local_area_set_aside",
"price_evaluation_adjustment_preference_percent_difference",
"clinger_cohen_act_planning_code",
"clinger_cohen_act_planning",
"materials_supplies_articles_equipment_code",
"materials_supplies_articles_equipment",
"labor_standards_code",
"labor_standards",
"construction_wage_rate_requirements_code",
"construction_wage_rate_requirements",
"interagency_contracting_authority_code",
"interagency_contracting_authority",
"other_statutory_authority",
"program_acronym",
"parent_award_type_code",
"parent_award_type",
"parent_award_single_or_multiple_code",
"parent_award_single_or_multiple",
"major_program",
"national_interest_action_code",
"national_interest_action",
"cost_or_pricing_data_code",
"cost_or_pricing_data",
"cost_accounting_standards_clause_code",
"cost_accounting_standards_clause",
"gfe_gfp_code",
"gfe_gfp",
"sea_transportation_code",
"sea_transportation",
"undefinitized_action_code",
"undefinitized_action",
"consolidated_contract_code",
"consolidated_contract",
"performance_based_service_acquisition_code",
"performance_based_service_acquisition",
"multi_year_contract_code",
"multi_year_contract",
"contract_financing_code",
"contract_financing",
"purchase_card_as_payment_method_code",
"purchase_card_as_payment_method",
"contingency_humanitarian_or_peacekeeping_operation_code",
"contingency_humanitarian_or_peacekeeping_operation",
"alaskan_native_owned_corporation_or_firm",
"american_indian_owned_business",
"indian_tribe_federally_recognized",
"native_hawaiian_owned_business",
"tribally_owned_business",
"veteran_owned_business",
"service_disabled_veteran_owned_business",
"woman_owned_business",
"women_owned_small_business",
"economically_disadvantaged_women_owned_small_business",
"joint_venture_women_owned_small_business",
"joint_venture_economic_disadvantaged_women_owned_small_bus",
"minority_owned_business",
"subcontinent_asian_asian_indian_american_owned_business",
"asian_pacific_american_owned_business",
"black_american_owned_business",
"hispanic_american_owned_business",
"native_american_owned_business",
"other_minority_owned_business",
"contracting_officers_determination_of_business_size",
"contracting_officers_determination_of_business_size_code",
"emerging_small_business",
"community_developed_corporation_owned_firm",
"labor_surplus_area_firm",
"us_federal_government",
"federally_funded_research_and_development_corp",
"federal_agency",
"us_state_government",
"us_local_government",
"city_local_government",
"county_local_government",
"inter_municipal_local_government",
"local_government_owned",
"municipality_local_government",
"school_district_local_government",
"township_local_government",
"us_tribal_government",
"foreign_government",
"organizational_type",
"corporate_entity_not_tax_exempt",
"corporate_entity_tax_exempt",
"partnership_or_limited_liability_partnership",
"sole_proprietorship",
"small_agricultural_cooperative",
"international_organization",
"us_government_entity",
"community_development_corporation",
"domestic_shelter",
"educational_institution",
"foundation",
"hospital_flag",
"manufacturer_of_goods",
"veterinary_hospital",
"hispanic_servicing_institution",
"receives_contracts",
"receives_grants",
"receives_contracts_and_grants",
"airport_authority",
"council_of_governments",
"housing_authorities_public_tribal",
"interstate_entity",
"planning_commission",
"port_authority",
"transit_authority",
"subchapter_scorporation",
"limited_liability_corporation",
"foreign_owned_and_located",
"for_profit_organization",
"nonprofit_organization",
"other_not_for_profit_organization",
"the_ability_one_program",
"number_of_employees",
"annual_revenue",
"private_university_or_college",
"state_controlled_institution_of_higher_learning",
"1862_land_grant_college",
"1890_land_grant_college",
"1994_land_grant_college",
"minority_institution",
"historically_black_college",
"tribal_college",
"alaskan_native_servicing_institution",
"native_hawaiian_servicing_institution",
"school_of_forestry",
"veterinary_college",
"dot_certified_disadvantage",
"self_certified_small_disadvantaged_business",
"small_disadvantaged_business",
"c8a_program_participant",
"historically_underutilized_business_zone_hubzone_firm",
"sba_certified_8a_joint_venture",
"highly_compensated_officer_1_name",
"highly_compensated_officer_1_amount",
"highly_compensated_officer_2_name",
"highly_compensated_officer_2_amount",
"highly_compensated_officer_3_name",
"highly_compensated_officer_3_amount",
"highly_compensated_officer_4_name",
"highly_compensated_officer_4_amount",
"highly_compensated_officer_5_name",
"highly_compensated_officer_5_amount",
"last_modified_date",
"assistance_transaction_unique_key",
"assistance_award_unique_key",
"award_id_fain",
"award_id_uri",
"sai_number",
"non_federal_funding_amount",
"total_funding_amount",
"face_value_of_loan",
"original_subsidy_cost",
"total_subsidy_cost",
"total_loan_value",
"recipient_city_code",
"recipient_county_code",
"recipient_county_name",
"recipient_zip_code",
"recipient_zip_last_4_code",
"recipient_foreign_city_name",
"recipient_foreign_province_name",
"recipient_foreign_postal_code",
"primary_place_of_performance_code",
"primary_place_of_performance_county_code",
"primary_place_of_performance_foreign_location",
"cfda_number",
"cfda_title",
"assistance_type_code",
"assistance_type_description",
"business_funds_indicator_code",
"business_funds_indicator_description",
"business_types_code",
"business_types_description",
"correction_delete_indicator_code",
"correction_delete_indicator_description",
"action_type_description",
"record_type_code",
"record_type_description",
"agency_id",
"abbreviation",
"agency_name",
"congressional_justification_url",
"active_fy",
"active_fq",
"outlay_amount",
"obligated_amount",
"budget_authority_amount",
"current_total_budget_authority_amount",
"percentage_of_total_budget_authority",
"parent_award_id_piid",
"usaspending_permalink",
"total_obligated_amount",
"total_non_federal_funding_amount",
"original_loan_subsidy_cost",
"total_face_value_of_loan",
"total_loan_subsidy_cost",
"primary_place_of_performance_scope",
"government_furnished_property_code",
"government_furnished_property",
"alaskan_native_corporation_owned_firm",
"native_hawaiian_organization_owned_firm",
"tribally_owned_firm",
"receives_financial_assistance",
"receives_contracts_and_financial_assistance",
"foreign_owned"
),
nameActual =
c(
"keyContract",
"keyAwardcontract_award_unique_key",
"idContract",
"codeModification",
"numberTransaction",
"idAgencyAwardIDV",
"nameAgencyAwardIDV",
"idContractIDV",
"codeModificationIDV",
"amountObligation",
"amountObligationTotal",
"amountBaseAndExercisedOption",
"amountAwardCurrentTotalValue",
"amountBaseAllOptions",
"amountAwardPotentialTotal",
"dateObligation",
"yearFiscalObligation",
"datePerformanceStart",
"datePerformanceEnd",
"datePerformanceEndPotential",
"dateOrderingEnd",
"dateSolicitation",
"idDepartmentAward",
"nameDepartmentAward",
"idAgencyAward",
"nameAgencyAward",
"idOfficeAward",
"nameOfficeAward",
"idDepartmentFunding",
"nameDepartmentFunding",
"idAgencyFunding",
"nameAgencyFunding",
"idOfficeFunding",
"nameOfficeFunding",
"idTreasuryAccounts",
"idFederalAccounts",
"codeForeignFunding",
"typeForeignFunding",
"idSAMException",
"typeSAMException",
"idDUNS",
"nameVendor",
"nameVendorDBA",
"cageVendor",
"nameVendorParent",
"idDUNSParent",
"codeCountryVendor",
"nameCountryVendor",
"addressStreet1Vendor",
"addressStreet2Vendor",
"cityVendor",
"codeStateVendor",
"nameStateVendor",
"zipcodeVendor",
"codeCongressionalDistrictVendor",
"telephoneVendor",
"faxVendor",
"codeCountryPerformance",
"nameCountryPerformance",
"cityPerformance",
"nameCountyPerformance",
"codeStatePerformance",
"nameStatePerformance",
"zipcodePerformance",
"codeCongressionalDistrictPerformance",
"typeAwardFlag",
"codeAward",
"typeAward",
"codeIDV",
"typeIDV",
"codeSingleMultipleIDV",
"typeSingleMultipleIDV",
"codeIDC",
"typeIDC",
"codeContractPricing",
"typeContractPricing",
"descriptionAward",
"codeActionType",
"typeAction",
"idSolicitation",
"countActions",
"codeInherentlyGovernmentFunction",
"typeInherentlyGovernmentFunction",
"codeProductService",
"nameProductService",
"codeContractBundling",
"typeContractBundling",
"codeDODClaimant",
"typeDODClaimant",
"idNAICS",
"nameNAICS",
"codeRecoveredMaterials",
"typeRecoveredMaterials",
"codeDomesticForeignEntity",
"typeDomesticForeignEntity",
"codeDODAcquisition",
"typeDODAcquisition",
"codeTechnologyItem",
"typeTechnologyItem",
"codeEPADesignatedProduct",
"typeEPADesignatedProduct",
"codeCountryOriginProductService",
"nameCountryOriginProductService",
"codePlaceOfManufacture",
"typePlaceOfManufacture",
"codeSubContractingPlan",
"typeSubContractingPlan",
"codeContractCompetition",
"typeContractCompetition",
"codeSolicitationProcedures",
"typeSolicitationProcedures",
"codeSetAside",
"typeSetAside",
"codeEvaluatedPreference",
"typeEvaluatedPreference",
"codeResearch",
"typeResearch",
"codeFairOpportunityException",
"typeFairOpportunityException",
"codeOtherThanFullOpenCompetition",
"typeOtherThanFullOpenCompetition",
"countOffersReceived",
"codeCommercialAcquisitionProcedures",
"typeCommercialAcquisitionProcedures",
"isSmallBusinessCompetitivenessDemonstration",
"hasSimplifiedProcedures",
"removeSimplifiedProcedures",
"hasA76FairActAction",
"removeA76FairActAction",
"isFBOSourced",
"removeFBO",
"hasLocalAreaSetAside",
"removeLocalAreaSetAside",
"priceEvaluationAdjustmentPreferenceDifference",
"hasClingerCohenActPlanning",
"removeClinger",
"hasMaterialsSuppliesArticles",
"removeMaterialsSuppliesArticles",
"hasLaborStandardsClause",
"removeLabor",
"hasConstructionWageRequirements",
"removeConstruction",
"codeInterAgencyContractingAuthority",
"typeInterAgencyContractingAuthority",
"typeStatutoryAuthority",
"codeProgram",
"codeParentAward",
"typeParentAward",
"codeMultipleSingleAward",
"typeMultipleSingleAward",
"nameMajorProgram",
"codeNationalInterestAction",
"nameNationalInterestAction",
"codeCostPricingData",
"typeCostPricingData",
"codeCostAccountingClause",
"typeCostAccountingClause",
"hasGFEOrGFP",
"removeGFE",
"codeSeaTransported",
"removeSeaTransportation",
"codeUndefinitizedAction",
"typeUndefinitizedAction",
"codeConsolidatedContract",
"typeConsolidatedContract",
"codePerformanceBasedServiceContract",
"typePerformanceBasedServiceContract",
"isMultiYearContract",
"removeMultiYearContract",
"codeContractFinancing",
"typeContractFinancing",
"hasCreditCardPurchaseMethod",
"removeCreditCardPurchaseMethod",
"codeHumanitarianPeackeepingOperation",
"typeHumanitarianPeackeepingOperation",
"isAlaskanNativeOwnedBusiness",
"isAmericanIndianOwnedBusiness",
"isIndianTribeFederallyRecognized",
"isNativeHawaiianOwnedBusiness",
"isTriballyOwnedBusiness",
"isVeteranOwnedBusiness",
"isServiceDisabledVeteranOwnedBusiness",
"isWomanOwnedBusiness",
"isWomanOwnedSmallBusiness",
"isEconomicallyDisadvantagedWomanOwnedSmallBusiness",
"isJointVentureWomanOwnedSmallBusiness",
"isJointVentureEconomicallyDisadvantagedWomanOwnedSmallBusiness",
"isMinorityOwnedBusiness",
"isSubcontinentAsianIndianAmericanOwnedBusiness",
"isAsianPacificAmericanOwnedBusiness",
"isBlackAmericanOwnedBusiness",
"isHispanicAmericanOwnedBusiness",
"isNativeAmericanOwnedBusiness",
"isOtherMinorityOwnedBusiness",
"typeContractingOfficerBusinessSize",
"codeContractingOfficerBusinessSize",
"isEmergingSmallBusiness",
"isCommunityDevelopedCorporationOwnedFirm",
"isLaborSurplusAreaFirm",
"isUSFederalGovernment",
"isFederallyFundedResearchAndDevelopmentCorp",
"isFederalAgency",
"isStateGovernment",
"isLocalGovernment",
"isCityGovernment",
"isCountyGovernment",
"isInterMunicipalLocalGovernment",
"isLocalGovernmentOwned",
"isMunicipalityLocalGovernment",
"isSchoolDistrictLocalGovernment",
"isTownshipLocalGovernment",
"isTribalGovernment",
"isForeignGovernment",
"typeOrganization",
"isCorporateEntityNonTaxExempt",
"isCorporateEntityTaxExempt",
"isPartnershipOrLLP",
"isSoleProprietorship",
"isSmallAgriculturalCooperative",
"isInternationalOrganization",
"isUSGovernmentEntity",
"isCommunityDevelopmentCorporation",
"isDomesticShelter",
"isEducationalInstitution",
"isFoundation",
"isHospitalFlag",
"isManufacturerOfGoods",
"isVeterinaryHospital",
"isHispanicServicingInstitution",
"hasContracts",
"hasGrants",
"hasContractsAndGrants",
"isAirportAuthority",
"isCouncilOfGovernments",
"isHousingAuthoritiesPublicOrTribal",
"isInterstateEntity",
"isPlanningCommission",
"isPortAuthority",
"isTransitAuthority",
"isSubchapterSCorporation",
"isLimitedLiabilityCorporation",
"isForeignOwnedAndLocated",
"isForProfitOrganization",
"isNotForProfitOrganization",
"isOtherNotForProfitOrganization",
"isAbilityOneProgram",
"countEmployeesVendor",
"amountRevenueAnnualVendor",
"isPrivateUniversityOrCollege",
"isStateControlledInstitutionOfHigherLearning",
"is1862LandGrantCollege",
"is1890LandGrantCollege",
"is1994LandGrantCollege",
"isMinorityInstitution",
"isHistoricallyBlackCollegeOrUniversity",
"isTribalCollege",
"isAlaskanNativeServicingInstitution",
"isNativeHawaiianServicingInstitution",
"isSchoolOfForestry",
"isVeterinaryCollege",
"isDOTCertifiedDisadvantagedBusinessEnterprise",
"isSelfCertifiedSmallDisadvantagedBusiness",
"isSBACertifiedSmallDisadvantagedBusiness",
"isC8AParticipant",
"isUnderUtilizedHUBZone",
"isSBACertified8AJointVenture",
"nameOfficerHighCompensated1",
"amountOfficerHighCompensated1",
"nameOfficerHighCompensated2",
"amountOfficerHighCompensated2",
"nameOfficerHighCompensated3",
"amountOfficerHighCompensated3",
"nameOfficerHighCompensated4",
"amountOfficerHighCompensated4",
"nameOfficerHighCompensated5",
"amountOfficerHighCompensated5",
"datetimeLastModified",
"keyAssistanceTransaction",
"keyAssistanceAward",
"idFAIN",
"uriAward",
"codeSAI",
"amountNonFederalFunding",
"amountFundingTotal",
"amountLoanFaceValue",
"amountSubsidyOriginal",
"amountSubsidyTotal",
"amountLoanTotal",
"codeCityVendor",
"idCountyVendor",
"nameCountyVendor",
"zipcodeVendor",
"zip4Vendor",
"nameCityForeignVendor",
"nameProvinceForeignVendor",
"zipcodeForeignVendor",
"codePrimaryPerformance",
"idCountyPrimaryPerformance",
"typeLocationForeignPerformance",
"idCFDA",
"nameProgram",
"idAssistance",
"typeAssistance",
"codeBusinessFunds",
"typeBusinessFunds",
"codeBusinessType",
"typeBusiness",
"codeDeleteIndicator",
"typeDeleteIndicator",
"typeAction",
"idRecord",
"typeRecord",
"idAgency",
"slugAgency",
"nameAgency",
"urlCongressionalJustification",
"yearBudget",
"quarterMostRecent",
"amountSpent",
"amountObligated",
"amountBudgeted",
"amountBudgetTotal",
"pctBudgetTotal",
"idContractIDV",
"urlUSASpendingPermalink",
"amountObligationTotal",
"amountNonFederalFundingTotal",
"amountSubsidyOriginal",
"amountLoanFaceValueTotal",
"amountSubsidyTotal",
"typePrimaryPerformanceScope",
"codeGovernmentFurnishedProperty",
"typeGovernmentFurnishedProperty",
"isAlaskanNativeOwnedBusiness",
"isNativeHawaiianOwnedBusiness",
"isTriballyOwnedBusiness",
"hasAssistance",
"hasContractsAndGrants",
"isForeignOwned"
)
)
}
#' USA Spending Departments
#'
#' Returns searchable USA Spending
#' departments
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' usa_spending_departments()
usa_spending_departments <-
memoise::memoise(function() {
headers = c(
`Connection` = 'close',
`Accept` = 'application/json, text/plain, */*',
`Origin` = 'https://www.usaspending.gov',
`User-Agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
`Content-Type` = 'application/json;charset=UTF-8',
`Sec-Fetch-Site` = 'same-site',
`Sec-Fetch-Mode` = 'cors',
`Referer` = 'https://www.usaspending.gov/',
`Accept-Encoding` = 'gzip, deflate, br',
`Accept-Language` = 'en-US,en;q=0.9'
)
data = '{"agency":0}'
res <-
httr::POST(url = 'https://api.usaspending.gov/api/v2/bulk_download/list_agencies/',
httr::add_headers(.headers = headers),
body = data)
.res_to_data(res = res) %>%
as_tibble() %>%
setNames(c("nameDepartment", "idDepartment", "idAgency")) %>%
.munge_data()
})
# utils -------------------------------------------------------------------
.clean_org_usa <-
function(data,
col = "nameAgencyAward",
clean_entity_column = T) {
if (!data %>% hasName(col)) {
return(data)
}
id_col <-
col %>% str_replace_all("name", "id")
id_col <- case_when(
col %in% c("nameVendor") ~ c("idDUNS"),
col %in% c("nameVendorParent") ~ "idDUNSParent",
TRUE ~ id_col
)
if (!data %>% hasName(id_col)) {
return(data)
}
df_count <-
data %>%
count(!!sym(id_col), !!sym(col), sort = T, name = "count") %>%
.clean_usg_organizations(column = col)
if (clean_entity_column) {
df_count <- df_count %>%
clean_entity_data(entity_column = col)
df_count <- df_count %>% select(-col)
names(df_count) <- names(df_count) %>% str_remove_all("Clean")
}
df_count <- df_count %>%
filter(!is.na(!!sym(col))) %>%
arrange(!!sym(id_col)) %>%
group_by(!!sym(id_col)) %>%
filter(count == max(count)) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(-count)
data <-
data %>%
select(-one_of(col)) %>%
left_join(df_count, by = id_col) %>%
select(names(data), everything())
data
}
.clean_organization <-
function(data,
col = "nameVendor",
clean_entity_column = T) {
if (!data %>% hasName(col)) {
return(data)
}
new_name <-
col %>% str_c("Clean")
df_count <-
data %>%
count(!!sym(col), sort = T, name = "count")
new_values <- df_count %>% pull(col)
df_count <-
df_count %>%
mutate(
UQ(new_name) :=
new_values %>%
str_remove_all('\\"') %>%
str_remove_all("\\(") %>%
str_remove_all("\\)") %>%
str_replace_all('\\, INC.|\\, INC$', ' INC') %>%
str_replace_all("\\, INCORPORATED$", " INCORPORATED") %>%
str_replace_all("\\, CORP.$|\\, CORP$", " CORP") %>%
str_replace_all("\\, G.P.$|\\, GP$", " GP") %>%
str_replace_all("\\, P.C.$|\\, PC$", " PC") %>%
str_replace_all("\\, LTD$|\\, LTD.$", " LTD") %>%
str_replace_all("\\, A ", "\\ A ") %>%
str_replace_all("\\, AN ", "\\ AN ") %>%
str_replace_all("\\, OF ", "\\ OF ") %>%
str_replace_all("\\, LIMITED$", "\\ LIMITED") %>%
str_replace_all("\\, LIMITED PARTNERSHIP$", "\\ LIMITED PARTNERSHIP") %>%
str_replace_all("\\, PLLC$|\\, PLLC", " PLLC") %>%
str_replace_all("\\, LLC$|\\, LLC|\\, L.L.C|\\, L.L.C$", " LLC") %>%
str_replace_all("\\, LLP$|\\, LLP|\\, L.L.P|\\, L.L.P$", " LLP") %>%
str_replace_all("\\, LLLP$|\\, LLLP|\\, L.L.L.P|\\, L.L.L.P$", " LLLP") %>%
str_replace_all("\\, LP$|\\, L.P.$|\\, L.P$", " LP") %>%
str_remove_all("\\, ,$| ,$|^%|^ - |\\/ |\\& ") %>%
str_remove_all("^-") %>%
str_squish()
)
df_count <-
df_count %>%
filter(!is.na(!!sym(new_name))) %>%
.clean_usg_organizations(column = new_name) %>%
mutate(UQ(new_name) := !!sym(new_name) %>% str_remove_all("\\,") %>% str_squish())
if (clean_entity_column) {
col_order <- names(data)
df_count <-
df_count %>%
clean_entity_data(
entity_column = new_name,
use_business_suffix = T,
ignore_words = NULL,
use_n_gram_merge = F
)
df_count <-
df_count %>%
select(-new_name) %>%
select(-count)
data <-
data %>%
left_join(df_count, by = col)
data <-
data %>%
select(-col)
names(data) <-
names(data) %>% str_remove_all("Clean")
df_names <-
tibble(name = names(data)) %>% mutate(column = 1:n())
cols <-
df_names %>% group_by(name) %>% filter(column == min(column)) %>% ungroup() %>% pull(column)
data <- data[, cols]
data <-
data %>%
select(one_of(col_order), everything())
} else {
col_order <- names(data)
data <-
data %>%
left_join(df_count %>%
select(-count)
, by = col)
data <-
data %>%
select(-col)
names(data) <- names(data) %>% str_remove_all("Clean")
data <- data %>%
select(one_of(col_order), everything())
}
data
}
# endpoints ---------------------------------------------------------------
# https://api.usaspending.gov/docs/endpoints
# schemas -----------------------------------------------------------------
# https://fiscal.treasury.gov/data-transparency/DAIMS-current.html
.res_to_data <-
function(res) {
res %>%
content(as = "text", encoding = "UTF-8") %>%
fromJSON(simplifyDataFrame = T) %>%
flatten_df()
}
# bulk --------------------------------------------------------------------
## https://www.usaspending.gov/#/download_center/award_data_archive
.us_bulk_urls <- function(year = 2008,
agency = NULL,
api_version = 2,
type = "contracts") {
slug_type <-
case_when(type %>% str_to_lower() %>% str_detect("contract") ~ "contracts",
TRUE ~ "assistance")
agency_slug <-
ifelse(length(agency) == 0, "all", str_to_lower(agency))
headers <-
c(
`Connection` = 'close',
`Pragma` = 'no-cache',
`Cache-Control` = 'no-cache',
`Accept` = 'application/json, text/plain, */*',
`Origin` = 'https://www.usaspending.gov',
`User-Agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36',
`Content-Type` = 'application/json;charset=UTF-8',
`Sec-Fetch-Site` = 'same-site',
`Sec-Fetch-Mode` = 'cors',
`Referer` = 'https://www.usaspending.gov/',
`Accept-Encoding` = 'gzip, deflate, br',
`Accept-Language` = 'en-US,en;q=0.9'
)
data <-
list(agency = agency_slug,
fiscal_year = year,
type = slug_type) %>%
toJSON(auto_unbox = T)
url <-
glue(
'https://api.usaspending.gov/api/v{api_version}/bulk_download/list_monthly_files/'
) %>% as.character()
res <-
httr::POST(url = url,
httr::add_headers(.headers = headers),
body = data)
data <-
res %>%
.res_to_data()
data <-
data %>%
filter(!str_to_upper(file_name) %>% str_detect("DELTA")) %>%
mutate(fiscal_year = year) %>%
select(
yearFiscal = fiscal_year,
nameAgency = agency_name,
typeAward = type,
dateUpdated = updated_date,
urlBulkZip = url
) %>%
mutate(dateUpdated = ymd(dateUpdated)) %>%
mutate_at(c("typeAward", "nameAgency"), str_to_upper)
data
}
#' USA Spending Bulk URLs
#'
#' Generates a tibble containing
#' zip URLs for specified inputs
#'
#' @param years vector of years starting in 2001
#' @param types type of award \itemize{
#' \item contracts - contract spending
#' \item assistance - financial assistance awards
#' }
#' @param api_version API version, as of December 2019 it is 2
#' @param agencies if not \code{NA} vector of agency names, \code{NA} returns
#' all agencies
#' @param return_message if \code{TRUE} returns a message
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' bulk_usa_spending_urls(years = 2001:2020, types = c('contracts', 'assistance'), agencies = NA)
bulk_usa_spending_urls <-
function(years = 2001:2020,
types = c("contracts", "assistance"),
agencies = NA,
api_version = 2,
return_message = T) {
df_rows <-
tidyr::expand_grid(year = years,
type = types,
agency = agencies)
.us_bulk_urls_safe <-
possibly(.us_bulk_urls, tibble())
all_data <-
1:nrow(df_rows) %>%
map_dfr(function(x) {
df_row <- df_rows %>% dplyr::slice(x)
year_row <- df_row$year
type_row <- df_row$type
if (return_message) {
glue("Finding {type_row} for {year_row}") %>% message()
}
if (df_row$agency %>% is.na()) {
agency_slug <- NULL
} else {
agency_slug <- df_row$agency
}
.us_bulk_urls_safe(
year = year_row,
agency = agency_slug,
api_version = api_version,
type = type_row
)
})
all_data
}
#' Munge USA Spending names
#'
#' @param data
#'
#' @return
#' @export
#'
#' @examples
munge_usa_spending_names <-
function(data) {
dict_names <- dictionary_usa_spending_names()
usa_names <-
names(data)
actual_names <-
usa_names %>%
map_chr(function(name) {
df_row <-
dict_names %>% filter(nameUSASpending == name)
if (nrow(df_row) == 0) {
glue::glue("Missing {name}") %>% message()
return(name)
}
df_row$nameActual
})
data %>%
set_names(actual_names)
}
.munge_usa_bulk <-
function(data, clean_entity_column = T) {
start_rows <- nrow(data)
data <- data %>%
select(-matches("^remove[A-Z]")) %>%
.remove_na()
date_time_names <-
data %>% select(matches("^datetime[A-Z]|^date[A-Z]")) %>%
select_if(is.character) %>%
names()
data <- data %>%
select(-matches("^slug"))
if (length(date_time_names) > 0) {
data <-
data %>%
mutate_at(date_time_names,
list(function(x) {
case_when(nchar(x) == 10 ~ ymd(x),
TRUE ~ ymd_hms(x) %>% as.Date())
}))
}
logical_names <-
data %>% select(matches("^is[A-Z]|^has[A-Z]")) %>%
select_if(is.character) %>%
names()
if (length(logical_names)) {
data <-
data %>%
mutate_at(logical_names,
list(function(x) {
x %>% str_to_upper()
})) %>%
mutate_at(logical_names,
list(function(x) {
case_when(
x %in% c("T", "Y", "Y:", "TRUE", "YES",
"Y: YES") ~ TRUE,
x %in% c("F", "N", "FALSE",
"N: NO", "N:") ~ FALSE,
TRUE ~ NA
)
}))
}
code_cols <- data %>% select(matches("^code")) %>% names()
if (length(code_cols) > 0) {
data <- data %>%
mutate_at(code_cols, as.character)
}
data <-
.clean_org_usa(data = data,
col = "nameAgencyAwardIDV",
clean_entity_column = clean_entity_column)
data <-
.clean_org_usa(data = data,
col = "nameDepartmentAward",
clean_entity_column = clean_entity_column)
data <-
.clean_org_usa(data = data,
col = "nameAgencyAward",
clean_entity_column = clean_entity_column)
data <-
.clean_org_usa(data = data,
col = "nameOfficeAward",
clean_entity_column = clean_entity_column)
data <-
.clean_org_usa(data = data,
col = "nameDepartmentFunding",
clean_entity_column = clean_entity_column)
data <-
.clean_org_usa(data = data,
col = "nameAgencyFunding",
clean_entity_column = clean_entity_column)
data <-
.clean_org_usa(data = data,
col = "nameOfficeFunding",
clean_entity_column = clean_entity_column)
if (clean_entity_column) {
data <-
.clean_organization(data = data,
clean_entity_column = clean_entity_column,
col = "nameVendor") %>%
distinct()
data <-
.clean_organization(data = data,
clean_entity_column = clean_entity_column,
col = "nameVendorParent") %>% distinct()
}
end_rows <-
nrow(data)
if (start_rows != end_rows) {
stop("MERGE GOT SCREWED UP")
}
data
}
.dl_year_zip <-
function(url = "https://files.usaspending.gov/award_data_archive/2012_all_Contracts_Full_20191210.zip",
path = "Desktop/data/usa_spending/") {
oldwd <- getwd()
year <-
url %>% str_remove_all("https://files.usaspending.gov/award_data_archive/") %>%
str_remove_all("^FY") %>%
substr(1, 4)
type <-
case_when(url %>% str_to_lower() %>% str_detect("contracts") ~ "contracts",
TRUE ~ "assistance")
setwd("~")
setwd(dir = path)
outfile <- tempfile("download", fileext = ".zip")
file <- curl::curl_download(url, outfile)
folder <- glue("{type}_{year}") %>% as.character()
unz_files <- unzip(file, exdir = folder)
file %>% unlink()
gc()
return(invisible())
}
#' Download USA Spending URLs
#'
#' Download basic USA spending urls
#'
#' @param years
#' @param types
#' @param path
#'
#' @return
#' @export
#'
#' @examples
download_usa_spending <-
function(years = 2013:2020,
types = "contracts",
path = "Desktop/usa_contracts/") {
oldwd <- getwd()
df_urls <-
bulk_usa_spending_urls(years = years, types = types)
df_urls$urlBulkZip %>%
walk(function(url) {
url %>% message()
.dl_year_zip(url = url, path = path)
})
if (oldwd != getwd()) {
setwd(oldwd)
}
"FINISHED" %>% message()
return(invisible())
}
.dl_usa_spending_bulk <-
function(url = "https://files.usaspending.gov/award_data_archive/2012_all_Contracts_Full_20191210.zip",
clean_entity_column = F,
return_message = TRUE) {
if (return_message) {
glue("Downloading {url}") %>% message()
}
outfile <- tempfile("download", fileext = ".zip")
file <- curl::curl_download(url, outfile)
unz_files <- unzip(file, exdir = "xml")
glue::glue("Has {length(unz_files)} files") %>% cat()
data <-
unz_files %>%
map_dfr(function(x) {
x %>% message()
data <- fread(x, showProgress = FALSE)
data <-
data %>%
munge_usa_spending_names() %>%
as_tibble()
if (data %>% hasName("cageVendor")) {
data <- data %>%
mutate(cageVendor = as.character(cageVendor))
}
if (data %>% hasName("idAssistance")) {
data <- data %>%
mutate(
codeAssistance = case_when(
idAssistance == 2 ~ "A",
idAssistance == 3 ~ "A",
idAssistance == 4 ~ "B",
idAssistance == 5 ~ "B",
idAssistance == 6 ~ "C",
idAssistance == 7 ~ "E",
idAssistance == 8 ~ "F",
idAssistance == 9 ~ "G",
idAssistance == 10 ~ "D",
idAssistance == 11 ~ NA_character_
),
typeAssistance = case_when(
idAssistance == 2 ~ "BLOCK GRANT",
idAssistance == 3 ~ "FORMULA GRANT",
idAssistance == 4 ~ "PROJECT GRANT",
idAssistance == 5 ~ "COOPERATIVE AGREEMENT",
idAssistance == 6 ~ "DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY OR OTHER NON-REIMBURSABLE DIRECT FINANCIAL AID",
idAssistance == 7 ~ "DIRECT LOAN",
idAssistance == 8 ~ "GUARANTEED/INSURED LOAN",
idAssistance == 9 ~ "INSURANCE",
idAssistance == 10 ~ "DIRECT PAYMENT WITH UNRESTRICTED USE (RETIREMENT, PENSION, VETERANS BENEFITS, ETC.)",
idAssistance == 11 ~ "OTHER REIMBURSABLE, CONTINGENT, INTANGIBLE, OR INDIRECT FINANCIAL ASSISTANCE"
)
)
}
data <-
data %>%
mutate_if(is.character,
list(function(x) {
ifelse(x == "", NA_character_, x) %>% str_squish()
})) %>%
.remove_na()
data <- data %>%
mutate_if(is.character,
list(function(x) {
case_when(x %in% c("NAN", "N/A", "NA") ~ NA_character_,
TRUE ~ x)
})) %>%
.remove_na()
num_col <-
data %>% select(matches("idDUNS|codeCongressionalDistrict")) %>% names()
data <-
data %>%
mutate_at(num_col,
list(function(x) {
x %>% as.character() %>% parse_number()
})) %>%
.remove_na()
char_col <-
data %>% select(
matches(
"idCFDA|zipcode|nameOfficeAward|^code[A-Z]|^telephone|^fax|idAgency|idFederalAccounts|idOffice"
)
) %>% names()
data <-
data %>%
mutate_at(char_col,
list(function(x) {
x %>% as.character()
})) %>%
.remove_na()
data <-
.munge_usa_bulk(data = data, clean_entity_column = clean_entity_column)
data
})
file %>% unlink()
unlink("xml", recursive = T)
unz_files %>% unlink()
gc()
rm(unz_files)
data <- data %>%
mutate(
nameVendor = case_when(
is.na(nameVendor) & !is.na(nameVendorParent) ~ nameVendorParent,
is.na(nameVendor) &
is.na(nameVendor) ~ "UNDISCLOSED VENDOR",
TRUE ~ nameVendor
),
nameVendor = case_when(nameVendor == "0" ~ "UNDISCLOSED VENDOR",
TRUE ~ nameVendor),
nameVendorParent = case_when(
is.na(nameVendorParent) & !is.na(nameVendor) ~ nameVendorParent,
is.na(nameVendor) &
is.na(nameVendor) ~ "UNDISCLOSED VENDOR PARENT",
TRUE ~ nameVendorParent
),
nameVendorParent = case_when(
nameVendorParent == "0" ~ "UNDISCLOSED VENDOR PARENT",
TRUE ~ nameVendorParent
),
) %>%
mutate_at(c("idDUNS", "idDUNSParent"),
list(function(x) {
ifelse(x == 0, NA_integer_, x)
})) %>%
mutate(hasParent = idDUNSParent != idDUNS)
data <-
data %>%
.munge_usa_bulk(clean_entity_column = clean_entity_column)
data <- data %>%
mutate_if(is.character, str_squish)
data <- data %>%
mutate_at(c("nameVendor", "nameVendorParent"),
list(function(x) {
x %>% str_remove_all("^/|^-|^*") %>% str_squish()
}))
data <-
data %>%
mutate(urlBulkZip = url)
data
}
.fix_dl <-
function(folder = "Desktop/usa_contracts/contracts_2011/",
clean_entity_column = T) {
oldwd <- getwd()
if (getwd != "/Users/alexbresler") {
setwd("~")
}
setwd(folder)
files <-
list.files() %>%
discard(function(x) {
x %>% str_detect("tsv")
})
files %>%
map_dfr(function(x) {
x %>% message()
data <- fread(x, showProgress = FALSE)
data <-
data %>%
munge_usa_spending_names() %>%
as_tibble()
data <-
data %>%
add_solicitation_group()
data <-
data %>%
mutate_if(is.character, str_squish) %>%
.remove_na()
data <-
data %>%
mutate_if(is.character,
list(function(x) {
case_when(x %in% c("", "NAN", "N/A", "NA") ~ NA_character_,
TRUE ~ x) %>% str_squish()
})) %>%
.remove_na()
num_col <-
data %>% select(matches("idDUNS|codeCongressionalDistrict")) %>% names()
data <-
data %>%
mutate_at(num_col,
list(function(x) {
x %>% as.character() %>% parse_number()
})) %>%
.remove_na()
char_col <-
data %>% select(
matches(
"idCFDA|zipcode|nameOfficeAward|^code[A-Z]|^telephone|^fax|idAgency|idFederalAccounts|idOffice"
)
) %>% names()
data <-
data %>%
mutate_at(char_col,
list(function(x) {
x %>% as.character()
})) %>%
.remove_na()
data <-
.munge_usa_bulk(data = data, clean_entity_column = clean_entity_column)
new_file <-
x %>% str_replace_all(".csv", ".tsv.gz")
data %>%
data.table::fwrite(new_file)
unlink(x)
rm(data)
gc()
return(invisible())
})
"FINISHED" %>%
message()
return(invisible)
}
.fix_dls <-
function(folder = "Desktop/usa_contracts/",
type = "assistance",
clean_entity_column = T) {
oldwd <- getwd()
setwd("~")
setwd(folder)
folders <-
list.files() %>%
discard(function(x) {
!x %>% str_detect(type)
})
folders %>%
walk(function(f) {
folder <- glue("Desktop/usa_contracts/{f}") %>% as.character()
setwd("~")
setwd(folder)
files <-
list.files() %>%
discard(function(x) {
x %>% str_detect("tsv")
})
files %>%
map_dfr(function(x) {
x %>% message()
data <- fread(x, showProgress = FALSE)
data <-
data %>%
munge_usa_spending_names() %>%
as_tibble()
data <-
data %>%
add_solicitation_group()
data <-
data %>%
mutate_if(is.character, str_squish) %>%
.remove_na()
data <-
data %>%
mutate_if(is.character,
list(function(x) {
case_when(x %in% c("", "NAN", "N/A", "NA") ~ NA_character_,
TRUE ~ x) %>% str_squish()
})) %>%
.remove_na()
num_col <-
data %>% select(matches("idDUNS|codeCongressionalDistrict")) %>% names()
data <-
data %>%
mutate_at(num_col,
list(function(x) {
x %>% as.character() %>% parse_number()
})) %>%
.remove_na()
char_col <-
data %>% select(
matches(
"idCFDA|zipcode|nameOfficeAward|^code[A-Z]|^telephone|^fax|idAgency|idFederalAccounts|idOffice"
)
) %>% names()
data <-
data %>%
mutate_at(char_col,
list(function(x) {
x %>% as.character()
})) %>%
.remove_na()
data <-
.munge_usa_bulk(data = data, clean_entity_column = clean_entity_column)
new_file <-
x %>% str_replace_all(".csv", ".tsv.gz")
data %>%
data.table::fwrite(new_file)
unlink(x)
rm(data)
gc()
return(invisible())
})
"FINISHED" %>%
message()
return(invisible)
})
if (oldwd != getwd()) {
setwd(oldwd)
}
return(invisible())
}
.parse_dl_files <-
function(data_path = "Desktop/usa_contracts/",
final_path = "Desktop/data/usa_spending") {
oldwd <- getwd()
setwd("~")
setwd(data_path)
types <-
list.files() %>% str_remove_all("\\_|[0-9]") %>% unique()
types %>%
walk(function(type) {
folders <-
list.files()[list.files() %>%
str_detect(type)]
folders %>%
walk(function(folder) {
folder %>% message()
setwd(folder)
year <-
folder %>% str_remove_all("contracts|assistance|_") %>% as.numeric()
files <- list.files()
data <-
files %>%
map_dfr(function(x) {
df <- x %>%
fread(showProgress = FALSE) %>%
as_tibble() %>%
mutate_if(is.character, list(function(x) {
if_else(x == "", NA_character_, x)
}))
if (df %>% hasName("type_of_idc")) {
df <- df %>%
rename(typeIDC = type_of_idc)
}
if (df %>% hasName("usaspending_permalink")) {
df <- df %>%
rename(urlUSASpending = usaspending_permalink)
}
char_cols <-
df %>% select(
matches(
"codeCity|zipcode|idAgency|idOffice|idCFDA|slug|code|telephone|fax|type|cageVendor"
)
) %>% names()
num_cols <-
df %>% select(matches("amount")) %>% names()
df <- df %>% mutate_at(char_cols, as.character)
df <- df %>% mutate_at(num_cols, as.numeric)
df
})
final_data <-
glue("{final_path}/{type}/{year}.tsv.gz")
if (data %>% hasName("amountObligation")) {
amt <-
data %>% pull(amountObligation) %>% sum(na.rm = T) %>% currency(digits = 0)
glue(
"{nrow(data) %>% comma(digits = 0)} {type} transactions in {year} totaling {amt}"
) %>% message()
}
setwd("~")
to_logical_cols <-
data %>% select(matches("^is|^has")) %>% select_if(is.character) %>% names()
if (length(to_logical_cols) > 0) {
data <- data %>%
mutate_at(to_logical_cols,
list(function(x) {
case_when(x %in% c("f", "F") ~ F,
TRUE ~ T)
}))
}
data <-
data %>% .add_budget_year()
data <-
data %>% add_department_codes()
data <-
data %>% .add_dod_type()
data <-
data %>% .fix_foreign_reference()
data <-
data %>% .add_analysis_contract()
data <-
data %>% .add_original_dates()
data <-
data %>% .allocate_federal_accounts()
data <-
data %>% .guess_duns_type()
data <-
data %>% .add_agency_cgacs()
data %>% write_csv(final_data)
rm(data)
setwd(data_path)
return(invisible())
})
})
}
#' Fix USA Spending Downloads
#'
#' @param data_path
#' @param final_path
#' @param clean_entity_column
#'
#' @return
#' @export
#'
#' @examples
fix_usa_spending_downloads <-
function(data_path = "Desktop/usa_contracts/",
final_path = "Desktop/data/usa_spending",
clean_entity_column = F) {
oldwd <- getwd()
setwd("~")
if (oldwd != getwd()) {
setwd(data_path)
}
types <-
list.files() %>%
str_to_lower() %>%
str_remove_all("\\_") %>%
str_remove_all("[0-9]") %>%
unique()
types %>%
walk(function(type) {
search_type <-
case_when(
type %>% str_detect("contract") ~ "contract",
type %>% str_detect("assistance") ~ "assistance"
)
.fix_dls(folder = data_path,
type = search_type,
clean_entity_column = clean_entity_column)
})
.parse_dl_files(data_path = data_path,
final_path = final_path)
}
#' Parse USA Spending Bulk URLs
#'
#' Downloads and parses vector of
#' USA spending bulk urls.
#'
#' @param urls vector of USA Spending monthly zip files
#' @param clean_entity_column if \code{TRUE} cleans entity columns
#' @param return_message if \code{TRUE} returns a message
#'
#' @return
#' @export
#'
#' @examples
parse_usa_spending_bulk_urls <-
function(urls,
clean_entity_column = F,
return_message = T) {
.dl_usa_spending_bulk_safe <-
possibly(.dl_usa_spending_bulk, tibble())
data <-
urls %>%
map_dfr(function(url) {
.dl_usa_spending_bulk_safe(
url = url,
clean_entity_column = clean_entity_column,
return_message = return_message
)
})
data
}
#' Bulk US Spending Download
#'
#' Downloads bulk monthly files
#' for specified spending types and
#' years
#'
#' @param years vector of years from 2008 to current year
#' @param types type of spending \itemize{
#' \item contract - contract spending that gets recorded in FPDS
#' \item assistance - non FPDS financial assistance spending
#' }
#' @param agencies specific agency if applicable, defaults to all which includes
#' all agencies
#' @param api_version 2
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
bulk_usa_spending <-
function(years = 2001,
clean_entity_column = F,
types = "contracts",
agencies = NA,
api_version = 2,
return_message = T,
...) {
df_urls <-
bulk_usa_spending_urls(
years = years,
types = types,
agencies = agencies,
api_version = api_version,
return_message = return_message
)
data <-
parse_usa_spending_bulk_urls(urls = df_urls$urlBulkZip,
clean_entity_column = clean_entity_column)
data
}
#' Treasury Account Table Dictionary
#'
#' Dictionary of Treasury account
#' symbols for budget search
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_tas()
dictionary_tas <-
function() {
data <-
"https://raw.githubusercontent.com/fedspendingtransparency/usaspending-api/dev/usaspending_api/data/tas_list.csv" %>%
fread(verbose = F, showProgress = FALSE) %>%
as_tibble()
data <-
.munge_fpds_names(data = data)
data <- data %>%
rename(dateEstablished = datetimeEstablished,
dateEnd = datetimeEnd) %>%
mutate_at(c("dateEstablished",
"dateEnd"),
list(function(x) {
x %>% substr(1, 10) %>% ymd()
})) %>%
.munge_data()
data
}
# data_dictioanry ---------------------------------------------------------
#' USA Spending Schema
#'
#' Includes all the relevent fields and schema
#' descriptions for USASpending.gov
#'
#' @return \code{tibble()}
#' @export
#'
#' @examples
#' dictionary_usa_spending_schema()
dictionary_usa_spending_schema <-
function() {
data <-
"https://files.usaspending.gov/docs/Data_Dictionary_Crosswalk.xlsx" %>%
download_excel_file()
df_headers <-
tibble(
col1 = data %>% dplyr::slice(1) %>% as.character(),
col2 = data %>% dplyr::slice(2) %>% as.character()
)
actual_names <- df_headers %>%
mutate(
col1 = case_when(
col1 == "Schema Data Label & Description" ~ "schema",
col1 == "USA Spending Downloads" ~ "usaspending",
col1 == "Legacy USA Spending" ~ "usaspendingLegacy",
TRUE ~ col1
)
) %>%
fill(col1) %>%
mutate(col2 = col2 %>% str_remove_all("\\ ")) %>%
unite(name, col1, col2, sep = "") %>%
pull(name)
data <- data %>% dplyr::slice(3:nrow(data)) %>%
mutate_all(list(function(x) {
ifelse(x == "N/A", NA, x)
})) %>%
setNames(actual_names)
data
}
# dictionaries ------------------------------------------------------------
# https://api.usaspending.gov/api/v1/references/glossary/?limit=500000
# https://github.com/fedspendingtransparency/usaspending-api/tree/dev/usaspending_api/data
#' U.S. Government Term Dictionary
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_government_terms()
dictionary_government_terms <-
function() {
json_data <-
"https://api.usaspending.gov/api/v2/references/data_dictionary/" %>%
jsonlite::fromJSON(simplifyDataFrame = T)
headers <- json_data$document$headers$raw
data <-
json_data$document$rows %>% as_tibble() %>%
set_names(headers)
data
}
# database_dumps ----------------------------------------------------------
# https://files.usaspending.gov/database_download/
# parse -------------------------------------------------------------------
.parse_spending_url <-
function(url = "https://api.usaspending.gov/api/v1/awards/27217896/") {
json_data <-
url %>% jsonlite::fromJSON(simplifyDataFrame = T, flatten = T)
json_data %>% flatten_df()
}
.parse_agency_url <- function() {
}
#' USA Spending term dictionary
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_us_spending()
dictionary_us_spending <-
function() {
data <-
"https://github.com/fedspendingtransparency/usaspending-api/blob/master/usaspending_api/data/USAspendingGlossary.xlsx?raw=true" %>%
rio::import() %>%
as_tibble() %>%
set_names(
c(
"nameTerm",
"descriptionTerm",
"nameSchema",
"descriptionSchema",
"resourcesMore",
"markdownResources"
)
)
data
}
function() {
data <-
"https://raw.githubusercontent.com/fedspendingtransparency/usaspending-api/master/usaspending_api/data/tas_list.csv" %>% read_csv()
}
#' U.S. Federal Budgets
#'
#' United States federal budgets by category since 1976 with
#' projections going out 4 years.
#'
#'
#' @return
#' @export
#'
#' @examples
#' us_budgets()
us_budgets <-
memoise::memoise(function() {
data <-
"https://raw.githubusercontent.com/fedspendingtransparency/usaspending-api/master/usaspending_api/data/budget_authority/budget_authority.csv" %>%
fread(verbose = F, showProgress = FALSE) %>%
as_tibble()
data <-
data %>%
.munge_fpds_names()
gather_cols <-
data %>% select(-one_of(data %>% select(matches("fy")) %>% names())) %>% names()
data <-
data %>%
gather(period, amount, -gather_cols)
data <-
data %>%
rename(isOnBudget = typeBudget) %>%
mutate(
isOnBudget = isOnBudget == "On-budget",
amount = formattable::currency(amount, digits = 0) * 1000
) %>%
.munge_data(clean_address = F)
current_year <- year(Sys.Date())
data <-
data %>%
mutate(
yearBudget = readr::parse_number(period),
isCurrentYear = yearBudget == current_year,
isProjection = yearBudget > current_year,
isActualBudget = yearBudget < current_year,
dateBudget = ifelse(
!is.na(yearBudget),
glue("{yearBudget}-09-30") %>% as.character(),
NA
)
) %>%
suppressWarnings() %>%
mutate(dateBudget = ymd(dateBudget)) %>%
select(yearBudget,
isProjection,
isActualBudget,
isCurrentYear,
dateBudget,
everything())
data
})
#' Dictionary of Government Agencies and Offices
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_government_agencies()
dictionary_government_agencies <-
function() {
data <-
"https://raw.githubusercontent.com/fedspendingtransparency/usaspending-api/c83b9cdb8978eb3445b6771f3fec9277c00de0b9/usaspending_api/data/authoritative_agency_list.csv" %>%
vroom::vroom()
data <-
data %>%
.munge_fpds_names() %>%
rename(slugAgency1 = idAgency) %>%
.munge_data() %>%
rename(idAgency = slugAgency1) %>%
select(-matches("Admin|ADMIN"))
data <-
data %>%
select(idAgency, nameAgency, nameOffice, everything())
data <- data %>%
mutate(icon = ifelse(
is.na(icon),
NA,
glue("https://www.usaspending.gov/graphics/agency/{icon}.jpg") %>% as.character()
)) %>%
mutate_at(c("idAgency"),
as.numeric) %>%
select(-matches("REGISTERED"))
data
}
#' 2019 Product Service Code Dictionary
#'
#' Returns 2019 product service code dictionary
#'
#' @param only_active if \code{TRUE} returns only active PSCs
#' @param url
#' @param snake_names
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' dictionary_psc_active()
dictionary_psc_active <-
memoise::memoise(function(url = "https://www.fpds.gov/downloads/PSC_Data_March_2020.xls", only_active = T, snake_names = F) {
options(warn = -1)
tmp <-
tempfile()
curl_download(url, tmp)
data <-
tmp %>% read_excel(sheet = 2)
unlink(tmp)
data <-
data %>%
setNames(
c(
"codeProductService",
"nameProductService",
"dateStart",
"dateEnd",
"nameProductServiceFull",
"detailsProductServiceExcludes",
"detailsProductServiceNotes",
"detailsProductServiceIncludes"
)
)
data <-
data %>%
mutate_at(c("dateStart", "dateEnd"),
list(function(x) {
as.Date(x)
}))
data <- data %>%
mutate_if(is.character, list(function(x){
x %>% str_replace_all("—", ", ")
}))
data <- data %>%
.munge_data(clean_address = F, unformat = T)
data <- data %>%
mutate_if(is.character,
list(function(x) {
str_remove_all(x, "\\.")
}))
data <-
data %>%
mutate(
isParentPSC = nchar(codeProductService) %in% c(1:2),
isActivePSC = is.na(dateEnd)
) %>%
mutate(
dateStart = dateStart %>% as.character() %>% str_replace_all("2105", "2015") %>% ymd(),
typePSC = case_when(
codeProductService %>% substr(1, 1) %in% c("1", "2", "3", "4", "5", "6", "7", "8", "9") ~ "PRODUCT",
TRUE ~ "SERVICE"
),
idSolicitationGroup =
case_when(
typePSC == "SERVICE" ~ substr(codeProductService, 1, 1),
TRUE ~ substr(codeProductService, 1, 2)
)
) %>%
select(isParentPSC,
isActivePSC,
typePSC,
idSolicitationGroup,
everything())
df_groups <-
data %>% distinct(idSolicitationGroup) %>%
left_join(
data %>% select(
idSolicitationGroup = codeProductService,
nameSolicitationGroup = nameProductService,
dateStart
),
by = "idSolicitationGroup"
) %>% distinct() %>%
group_by(idSolicitationGroup) %>%
filter(dateStart == max(dateStart)) %>%
ungroup() %>%
select(-dateStart)
data <- data %>%
left_join(df_groups,
by = "idSolicitationGroup")
data <-
data %>%
select(
isParentPSC,
isActivePSC,
typePSC,
idSolicitationGroup,
nameSolicitationGroup,
everything()
) %>%
distinct()
data <-
data %>%
mutate(
detailsProductServiceIncludes = case_when(
detailsProductServiceIncludes == "" ~ NA_character_,
TRUE ~ detailsProductServiceIncludes
),
countItemsIncluded = str_count(detailsProductServiceIncludes, pattern = ";") %>% coalesce(0L),
countItemsExcluded = str_count(detailsProductServiceExcludes, pattern = ";") %>% coalesce(0L),
countDaysActive = case_when(
!is.na(dateEnd) ~ (dateEnd - dateStart) %>% as.integer(),
TRUE ~ (Sys.Date() - dateStart) %>% as.integer()
)
)
data <- data %>%
separate(
nameProductService,
into = c("namePSC", "detailsPSC"),
sep = " - ",
extra = "merge",
fill = "right",
remove = F
)
data <- data %>%
mutate(groupProductService = case_when(
nchar(codeProductService) > 2 ~ codeProductService %>% substr(1, 3),
TRUE ~ str_c(codeProductService %>% substr(1, 2), "0")
)) %>%
select(
isParentPSC,
isActivePSC,
typePSC,
idSolicitationGroup,
nameSolicitationGroup,
groupProductService,
codeProductService,
everything()
)
if (only_active) {
data <-
data %>%
filter(isActivePSC)
}
if (snake_names) {
data <- data %>% janitor::clean_names()
}
data
})
#' Historic Product Service Code Dictionary
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_psc_historic()
dictionary_psc_historic <-
memoise::memoise(function(snake_names = F) {
data <-
"https://gist.githubusercontent.com/abresler/beb09e6957a4f64cb03c5c5778a96247/raw/5b108ec0bd5348b1681d54bbe1486bc66bb8c524/dict_psc.csv" %>%
read_csv() %>%
suppressWarnings() %>%
suppressMessages() %>%
.munge_data()
names(data)[1:2] <- c("codeProductService", "nameFullPSC")
df_groups <-
data %>%
select(codeProductService) %>%
mutate(
letternumber = codeProductService %>% substr(1, 1),
isNumber = !is.na(letternumber %>% readr::parse_number())
) %>%
mutate(
idSolicitationGroup = case_when(
isNumber ~ codeProductService %>% substr(1, 2),
TRUE ~ codeProductService %>% substr(1, 1)
)
) %>%
select(codeProductService, idSolicitationGroup) %>%
left_join(dictionary_solicitation_groups(), by = "idSolicitationGroup")
data <- data %>%
left_join(df_groups, by = "codeProductService") %>%
distinct()
data <- data %>%
mutate(groupProductService = case_when(
nchar(codeProductService) > 2 ~ codeProductService %>% substr(1, 3),
TRUE ~ str_c(codeProductService %>% substr(1, 2), "0")
)) %>%
select(
isParentPSC,
isActivePSC,
typePSC,
idSolicitationGroup,
nameSolicitationGroup,
groupProductService,
codeProductService,
everything()
)
if (snake_names) {
data <- data %>% janitor::clean_names()
}
data
})
# account_symbols ---------------------------------------------------------
# https://datalab.usaspending.gov/assets/analyst-guide-1-2.pdf
function() {
data <-
"https://raw.githubusercontent.com/fedspendingtransparency/usaspending-api/dev/usaspending_api/data/program_activity.csv" %>%
read_csv()
}
# assitance files ---------------------------------------------------------
.parse_usa_assitance <-
function(url = "https://files.usaspending.gov/agency_submissions/Raw%20Financial%20Assistance%20Files/index.html") {
}
.parse_usa_data_act_files <-
function(url = "https://files.usaspending.gov/agency_submissions/Raw%20Quarterly%20DATA%20Act%20Files/index.html") {
}
# bulk --------------------------------------------------------------------
#' Dictionary of USA Spending Database URLs
#'
#' Returns most recent monthly bulk data base URLs from
#' USA spending
#'
#' @return
#' @export
#'
#' @examples
dictionary_usa_spending_database_urls <-
memoise::memoise(function() {
page <-
"https://files.usaspending.gov/database_download/" %>%
read_html()
nameFile <- page %>%
html_nodes("td:nth-child(1)") %>%
html_text() %>%
str_to_upper()
typeDatabase <-
page %>%
html_nodes("td:nth-child(2)") %>%
html_text() %>%
str_to_upper()
urlZIP <-
page %>% html_nodes("td:nth-child(3) a") %>%
html_attr("href") %>%
map_chr(function(x) {
x %>% substr(2, nchar(x))
})
dateData <-
urlZIP %>% str_split("\\_") %>% map(2) %>% flatten_chr() %>% str_remove_all(".zip") %>% parse_number() %>% ymd()
sizeGB <-
page %>%
html_nodes("td:nth-child(4)") %>%
html_text() %>%
parse_number()
urlZIP <-
str_c("https://files.usaspending.gov/database_download", urlZIP)
tibble(dateData, typeDatabase, nameFile, sizeGB, urlZIP)
})
# budget_authority --------------------------------------------------------
#' Agency Budget Authority
#'
#' Returns information about the most recent
#' budget requests for each of the parent government agencies and
#' departments
#'
#' @return
#' @export
#'
#' @examples
#' usa_spending_agency_budget_authority()
usa_spending_agency_budget_authority <-
memoise::memoise(function() {
data <-
"https://api.usaspending.gov/api/v2/references/toptier_agencies/?sort=percentage_of_total_budget_authority&order=desc" %>%
fromJSON()
data <-
data$results %>% as_tibble()
data <-
data %>%
munge_usa_spending_names()
data <-
data %>%
.munge_data() %>%
mutate_at(c("yearBudget", "quarterMostRecent"),
as.numeric)
data <- data %>%
mutate(
amountRemainingVsBudget = amountBudgeted - amountSpent,
amountRemainingVsObligation = amountObligated - amountSpent,
amountUnObligated = amountBudgeted - amountObligated,
)
data
})
function() {
url <- "https://datalab.usaspending.gov/americas-finance-guide/"
page <- read_html(url)
slugs <-
html_nodes(page, ".afg__download--div a") %>% html_attr("href") %>%
str_split("/") %>%
map_chr(function(x) {
x[length(x)]
})
urls <-
glue("https://datalab.usaspending.gov/assets/ffg/data/{slugs}") %>% as.character()
data <-
tibble(slugFile = slugs, urls) %>%
mutate(
nameFile = case_when(
slugFile %>% str_detect("revenue") ~ "FEDERAL REVENUE",
slugFile %>% str_detect("spending") ~ "FEDERAL SPENDING",
slugFile %>% str_detect("deficit") ~ "FEDERAL DEFICIT",
slugFile %>% str_detect("debt") ~ "FEDERAL DEBT"
)
) %>%
select(nameFile, urlCSV = urls)
data$urlCSV %>%
map_dfr(function(x) {
x %>% message()
df <- fread(x, showProgress = FALSE) %>% as_tibble()
if (x == "https://datalab.usaspending.gov/assets/ffg/data/overview_federal_deficit.csv") {
df <- df %>%
setNames(c(
"yearBudget",
"categorySpending",
"amount",
"urlSource",
"nameSource"
)) %>%
mutate(urlCSV = x)
return(df)
}
df <- df %>%
setNames(c("yearBudget", "amount", "urlSource", "nameSource")) %>%
mutate(urlCSV = x)
df
})
data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.