govtrackR: Federal government tracking tools

Documented in bulk_usa_spending bulk_usa_spending_urls dictionary_government_agencies dictionary_government_terms dictionary_tas dictionary_usa_spending_names dictionary_usa_spending_schema dictionary_us_spending download_usa_spending fix_usa_spending_downloads munge_usa_spending_names parse_usa_spending_bulk_urls

# dictionaries ------------------------------------------------------------

#' USA Spending name dictionary
#'
#' Dictionary of names from USA Spending to
#' govtrackR
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' dictionary_usa_spending_names()
dictionary_usa_spending_names <-
  function() {
    tibble(
      nameUSASpending = c(
        "contract_transaction_unique_key",
        "contract_award_unique_key",
        "award_id_piid",
        "modification_number",
        "transaction_number",
        "parent_award_agency_id",
        "parent_award_agency_name",
        "parent_award_id",
        "parent_award_modification_number",
        "federal_action_obligation",
        "total_dollars_obligated",
        "base_and_exercised_options_value",
        "current_total_value_of_award",
        "base_and_all_options_value",
        "potential_total_value_of_award",
        "action_date",
        "action_date_fiscal_year",
        "period_of_performance_start_date",
        "period_of_performance_current_end_date",
        "period_of_performance_potential_end_date",
        "ordering_period_end_date",
        "solicitation_date",
        "awarding_agency_code",
        "awarding_agency_name",
        "awarding_sub_agency_code",
        "awarding_sub_agency_name",
        "awarding_office_code",
        "awarding_office_name",
        "funding_agency_code",
        "funding_agency_name",
        "funding_sub_agency_code",
        "funding_sub_agency_name",
        "funding_office_code",
        "funding_office_name",
        "treasury_accounts_funding_this_award",
        "federal_accounts_funding_this_award",
        "foreign_funding",
        "foreign_funding_description",
        "sam_exception",
        "sam_exception_description",
        "recipient_duns",
        "recipient_name",
        "recipient_doing_business_as_name",
        "cage_code",
        "recipient_parent_name",
        "recipient_parent_duns",
        "recipient_country_code",
        "recipient_country_name",
        "recipient_address_line_1",
        "recipient_address_line_2",
        "recipient_city_name",
        "recipient_state_code",
        "recipient_state_name",
        "recipient_zip_4_code",
        "recipient_congressional_district",
        "recipient_phone_number",
        "recipient_fax_number",
        "primary_place_of_performance_country_code",
        "primary_place_of_performance_country_name",
        "primary_place_of_performance_city_name",
        "primary_place_of_performance_county_name",
        "primary_place_of_performance_state_code",
        "primary_place_of_performance_state_name",
        "primary_place_of_performance_zip_4",
        "primary_place_of_performance_congressional_district",
        "award_or_idv_flag",
        "award_type_code",
        "award_type",
        "idv_type_code",
        "idv_type",
        "multiple_or_single_award_idv_code",
        "multiple_or_single_award_idv",
        "type_of_idc_code",
        "type_of_idc",
        "type_of_contract_pricing_code",
        "type_of_contract_pricing",
        "award_description",
        "action_type_code",
        "action_type",
        "solicitation_identifier",
        "number_of_actions",
        "inherently_governmental_functions",
        "inherently_governmental_functions_description",
        "product_or_service_code",
        "product_or_service_code_description",
        "contract_bundling_code",
        "contract_bundling",
        "dod_claimant_program_code",
        "dod_claimant_program_description",
        "naics_code",
        "naics_description",
        "recovered_materials_sustainability_code",
        "recovered_materials_sustainability",
        "domestic_or_foreign_entity_code",
        "domestic_or_foreign_entity",
        "dod_acquisition_program_code",
        "dod_acquisition_program_description",
        "information_technology_commercial_item_category_code",
        "information_technology_commercial_item_category",
        "epa_designated_product_code",
        "epa_designated_product",
        "country_of_product_or_service_origin_code",
        "country_of_product_or_service_origin",
        "place_of_manufacture_code",
        "place_of_manufacture",
        "subcontracting_plan_code",
        "subcontracting_plan",
        "extent_competed_code",
        "extent_competed",
        "solicitation_procedures_code",
        "solicitation_procedures",
        "type_of_set_aside_code",
        "type_of_set_aside",
        "evaluated_preference_code",
        "evaluated_preference",
        "research_code",
        "research",
        "fair_opportunity_limited_sources_code",
        "fair_opportunity_limited_sources",
        "other_than_full_and_open_competition_code",
        "other_than_full_and_open_competition",
        "number_of_offers_received",
        "commercial_item_acquisition_procedures_code",
        "commercial_item_acquisition_procedures",
        "small_business_competitiveness_demonstration_program",
        "simplified_procedures_for_certain_commercial_items_code",
        "simplified_procedures_for_certain_commercial_items",
        "a76_fair_act_action_code",
        "a76_fair_act_action",
        "fed_biz_opps_code",
        "fed_biz_opps",
        "local_area_set_aside_code",
        "local_area_set_aside",
        "price_evaluation_adjustment_preference_percent_difference",
        "clinger_cohen_act_planning_code",
        "clinger_cohen_act_planning",
        "materials_supplies_articles_equipment_code",
        "materials_supplies_articles_equipment",
        "labor_standards_code",
        "labor_standards",
        "construction_wage_rate_requirements_code",
        "construction_wage_rate_requirements",
        "interagency_contracting_authority_code",
        "interagency_contracting_authority",
        "other_statutory_authority",
        "program_acronym",
        "parent_award_type_code",
        "parent_award_type",
        "parent_award_single_or_multiple_code",
        "parent_award_single_or_multiple",
        "major_program",
        "national_interest_action_code",
        "national_interest_action",
        "cost_or_pricing_data_code",
        "cost_or_pricing_data",
        "cost_accounting_standards_clause_code",
        "cost_accounting_standards_clause",
        "gfe_gfp_code",
        "gfe_gfp",
        "sea_transportation_code",
        "sea_transportation",
        "undefinitized_action_code",
        "undefinitized_action",
        "consolidated_contract_code",
        "consolidated_contract",
        "performance_based_service_acquisition_code",
        "performance_based_service_acquisition",
        "multi_year_contract_code",
        "multi_year_contract",
        "contract_financing_code",
        "contract_financing",
        "purchase_card_as_payment_method_code",
        "purchase_card_as_payment_method",
        "contingency_humanitarian_or_peacekeeping_operation_code",
        "contingency_humanitarian_or_peacekeeping_operation",
        "alaskan_native_owned_corporation_or_firm",
        "american_indian_owned_business",
        "indian_tribe_federally_recognized",
        "native_hawaiian_owned_business",
        "tribally_owned_business",
        "veteran_owned_business",
        "service_disabled_veteran_owned_business",
        "woman_owned_business",
        "women_owned_small_business",
        "economically_disadvantaged_women_owned_small_business",
        "joint_venture_women_owned_small_business",
        "joint_venture_economic_disadvantaged_women_owned_small_bus",
        "minority_owned_business",
        "subcontinent_asian_asian_indian_american_owned_business",
        "asian_pacific_american_owned_business",
        "black_american_owned_business",
        "hispanic_american_owned_business",
        "native_american_owned_business",
        "other_minority_owned_business",
        "contracting_officers_determination_of_business_size",
        "contracting_officers_determination_of_business_size_code",
        "emerging_small_business",
        "community_developed_corporation_owned_firm",
        "labor_surplus_area_firm",
        "us_federal_government",
        "federally_funded_research_and_development_corp",
        "federal_agency",
        "us_state_government",
        "us_local_government",
        "city_local_government",
        "county_local_government",
        "inter_municipal_local_government",
        "local_government_owned",
        "municipality_local_government",
        "school_district_local_government",
        "township_local_government",
        "us_tribal_government",
        "foreign_government",
        "organizational_type",
        "corporate_entity_not_tax_exempt",
        "corporate_entity_tax_exempt",
        "partnership_or_limited_liability_partnership",
        "sole_proprietorship",
        "small_agricultural_cooperative",
        "international_organization",
        "us_government_entity",
        "community_development_corporation",
        "domestic_shelter",
        "educational_institution",
        "foundation",
        "hospital_flag",
        "manufacturer_of_goods",
        "veterinary_hospital",
        "hispanic_servicing_institution",
        "receives_contracts",
        "receives_grants",
        "receives_contracts_and_grants",
        "airport_authority",
        "council_of_governments",
        "housing_authorities_public_tribal",
        "interstate_entity",
        "planning_commission",
        "port_authority",
        "transit_authority",
        "subchapter_scorporation",
        "limited_liability_corporation",
        "foreign_owned_and_located",
        "for_profit_organization",
        "nonprofit_organization",
        "other_not_for_profit_organization",
        "the_ability_one_program",
        "number_of_employees",
        "annual_revenue",
        "private_university_or_college",
        "state_controlled_institution_of_higher_learning",
        "1862_land_grant_college",
        "1890_land_grant_college",
        "1994_land_grant_college",
        "minority_institution",
        "historically_black_college",
        "tribal_college",
        "alaskan_native_servicing_institution",
        "native_hawaiian_servicing_institution",
        "school_of_forestry",
        "veterinary_college",
        "dot_certified_disadvantage",
        "self_certified_small_disadvantaged_business",
        "small_disadvantaged_business",
        "c8a_program_participant",
        "historically_underutilized_business_zone_hubzone_firm",
        "sba_certified_8a_joint_venture",
        "highly_compensated_officer_1_name",
        "highly_compensated_officer_1_amount",
        "highly_compensated_officer_2_name",
        "highly_compensated_officer_2_amount",
        "highly_compensated_officer_3_name",
        "highly_compensated_officer_3_amount",
        "highly_compensated_officer_4_name",
        "highly_compensated_officer_4_amount",
        "highly_compensated_officer_5_name",
        "highly_compensated_officer_5_amount",
        "last_modified_date",
        "assistance_transaction_unique_key",
        "assistance_award_unique_key",
        "award_id_fain",
        "award_id_uri",
        "sai_number",
        "non_federal_funding_amount",
        "total_funding_amount",
        "face_value_of_loan",
        "original_subsidy_cost",
        "total_subsidy_cost",
        "total_loan_value",
        "recipient_city_code",
        "recipient_county_code",
        "recipient_county_name",
        "recipient_zip_code",
        "recipient_zip_last_4_code",
        "recipient_foreign_city_name",
        "recipient_foreign_province_name",
        "recipient_foreign_postal_code",
        "primary_place_of_performance_code",
        "primary_place_of_performance_county_code",
        "primary_place_of_performance_foreign_location",
        "cfda_number",
        "cfda_title",
        "assistance_type_code",
        "assistance_type_description",
        "business_funds_indicator_code",
        "business_funds_indicator_description",
        "business_types_code",
        "business_types_description",
        "correction_delete_indicator_code",
        "correction_delete_indicator_description",
        "action_type_description",
        "record_type_code",
        "record_type_description",
        "agency_id",
        "abbreviation",
        "agency_name",
        "congressional_justification_url",
        "active_fy",
        "active_fq",
        "outlay_amount",
        "obligated_amount",
        "budget_authority_amount",
        "current_total_budget_authority_amount",
        "percentage_of_total_budget_authority",
        "parent_award_id_piid",
        "usaspending_permalink",

        "total_obligated_amount",
        "total_non_federal_funding_amount",
        "original_loan_subsidy_cost",
        "total_face_value_of_loan",
        "total_loan_subsidy_cost",
        "primary_place_of_performance_scope",
        "government_furnished_property_code",
        "government_furnished_property",
        "alaskan_native_corporation_owned_firm",
        "native_hawaiian_organization_owned_firm",
        "tribally_owned_firm",
        "receives_financial_assistance",
        "receives_contracts_and_financial_assistance",
        "foreign_owned"
      ),

      nameActual =
        c(
          "keyContract",
          "keyAwardcontract_award_unique_key",
          "idContract",
          "codeModification",
          "numberTransaction",
          "idAgencyAwardIDV",
          "nameAgencyAwardIDV",
          "idContractIDV",
          "codeModificationIDV",
          "amountObligation",
          "amountObligationTotal",
          "amountBaseAndExercisedOption",
          "amountAwardCurrentTotalValue",
          "amountBaseAllOptions",
          "amountAwardPotentialTotal",
          "dateObligation",
          "yearFiscalObligation",
          "datePerformanceStart",
          "datePerformanceEnd",
          "datePerformanceEndPotential",
          "dateOrderingEnd",
          "dateSolicitation",
          "idDepartmentAward",
          "nameDepartmentAward",
          "idAgencyAward",
          "nameAgencyAward",
          "idOfficeAward",
          "nameOfficeAward",
          "idDepartmentFunding",
          "nameDepartmentFunding",
          "idAgencyFunding",
          "nameAgencyFunding",
          "idOfficeFunding",
          "nameOfficeFunding",
          "idTreasuryAccounts",
          "idFederalAccounts",
          "codeForeignFunding",
          "typeForeignFunding",
          "idSAMException",
          "typeSAMException",
          "idDUNS",
          "nameVendor",
          "nameVendorDBA",
          "cageVendor",
          "nameVendorParent",
          "idDUNSParent",
          "codeCountryVendor",
          "nameCountryVendor",
          "addressStreet1Vendor",
          "addressStreet2Vendor",
          "cityVendor",
          "codeStateVendor",
          "nameStateVendor",
          "zipcodeVendor",
          "codeCongressionalDistrictVendor",
          "telephoneVendor",
          "faxVendor",
          "codeCountryPerformance",
          "nameCountryPerformance",
          "cityPerformance",
          "nameCountyPerformance",
          "codeStatePerformance",
          "nameStatePerformance",
          "zipcodePerformance",
          "codeCongressionalDistrictPerformance",
          "typeAwardFlag",
          "codeAward",
          "typeAward",
          "codeIDV",
          "typeIDV",
          "codeSingleMultipleIDV",
          "typeSingleMultipleIDV",
          "codeIDC",
          "typeIDC",
          "codeContractPricing",
          "typeContractPricing",
          "descriptionAward",
          "codeActionType",
          "typeAction",
          "idSolicitation",
          "countActions",
          "codeInherentlyGovernmentFunction",
          "typeInherentlyGovernmentFunction",
          "codeProductService",
          "nameProductService",
          "codeContractBundling",
          "typeContractBundling",
          "codeDODClaimant",
          "typeDODClaimant",
          "idNAICS",
          "nameNAICS",
          "codeRecoveredMaterials",
          "typeRecoveredMaterials",
          "codeDomesticForeignEntity",
          "typeDomesticForeignEntity",
          "codeDODAcquisition",
          "typeDODAcquisition",
          "codeTechnologyItem",
          "typeTechnologyItem",
          "codeEPADesignatedProduct",
          "typeEPADesignatedProduct",
          "codeCountryOriginProductService",
          "nameCountryOriginProductService",
          "codePlaceOfManufacture",
          "typePlaceOfManufacture",
          "codeSubContractingPlan",
          "typeSubContractingPlan",
          "codeContractCompetition",
          "typeContractCompetition",
          "codeSolicitationProcedures",
          "typeSolicitationProcedures",
          "codeSetAside",
          "typeSetAside",
          "codeEvaluatedPreference",
          "typeEvaluatedPreference",
          "codeResearch",
          "typeResearch",
          "codeFairOpportunityException",
          "typeFairOpportunityException",
          "codeOtherThanFullOpenCompetition",
          "typeOtherThanFullOpenCompetition",
          "countOffersReceived",
          "codeCommercialAcquisitionProcedures",
          "typeCommercialAcquisitionProcedures",
          "isSmallBusinessCompetitivenessDemonstration",
          "hasSimplifiedProcedures",
          "removeSimplifiedProcedures",
          "hasA76FairActAction",
          "removeA76FairActAction",
          "isFBOSourced",
          "removeFBO",
          "hasLocalAreaSetAside",
          "removeLocalAreaSetAside",
          "priceEvaluationAdjustmentPreferenceDifference",
          "hasClingerCohenActPlanning",
          "removeClinger",
          "hasMaterialsSuppliesArticles",
          "removeMaterialsSuppliesArticles",
          "hasLaborStandardsClause",
          "removeLabor",
          "hasConstructionWageRequirements",
          "removeConstruction",
          "codeInterAgencyContractingAuthority",
          "typeInterAgencyContractingAuthority",
          "typeStatutoryAuthority",
          "codeProgram",
          "codeParentAward",
          "typeParentAward",
          "codeMultipleSingleAward",
          "typeMultipleSingleAward",
          "nameMajorProgram",
          "codeNationalInterestAction",
          "nameNationalInterestAction",
          "codeCostPricingData",
          "typeCostPricingData",
          "codeCostAccountingClause",
          "typeCostAccountingClause",
          "hasGFEOrGFP",
          "removeGFE",
          "codeSeaTransported",
          "removeSeaTransportation",
          "codeUndefinitizedAction",
          "typeUndefinitizedAction",
          "codeConsolidatedContract",
          "typeConsolidatedContract",
          "codePerformanceBasedServiceContract",
          "typePerformanceBasedServiceContract",
          "isMultiYearContract",
          "removeMultiYearContract",
          "codeContractFinancing",
          "typeContractFinancing",
          "hasCreditCardPurchaseMethod",
          "removeCreditCardPurchaseMethod",
          "codeHumanitarianPeackeepingOperation",
          "typeHumanitarianPeackeepingOperation",
          "isAlaskanNativeOwnedBusiness",
          "isAmericanIndianOwnedBusiness",
          "isIndianTribeFederallyRecognized",
          "isNativeHawaiianOwnedBusiness",
          "isTriballyOwnedBusiness",
          "isVeteranOwnedBusiness",
          "isServiceDisabledVeteranOwnedBusiness",
          "isWomanOwnedBusiness",
          "isWomanOwnedSmallBusiness",
          "isEconomicallyDisadvantagedWomanOwnedSmallBusiness",
          "isJointVentureWomanOwnedSmallBusiness",
          "isJointVentureEconomicallyDisadvantagedWomanOwnedSmallBusiness",
          "isMinorityOwnedBusiness",
          "isSubcontinentAsianIndianAmericanOwnedBusiness",
          "isAsianPacificAmericanOwnedBusiness",
          "isBlackAmericanOwnedBusiness",
          "isHispanicAmericanOwnedBusiness",
          "isNativeAmericanOwnedBusiness",
          "isOtherMinorityOwnedBusiness",
          "typeContractingOfficerBusinessSize",
          "codeContractingOfficerBusinessSize",
          "isEmergingSmallBusiness",
          "isCommunityDevelopedCorporationOwnedFirm",
          "isLaborSurplusAreaFirm",
          "isUSFederalGovernment",
          "isFederallyFundedResearchAndDevelopmentCorp",
          "isFederalAgency",
          "isStateGovernment",
          "isLocalGovernment",
          "isCityGovernment",
          "isCountyGovernment",
          "isInterMunicipalLocalGovernment",
          "isLocalGovernmentOwned",
          "isMunicipalityLocalGovernment",
          "isSchoolDistrictLocalGovernment",
          "isTownshipLocalGovernment",
          "isTribalGovernment",
          "isForeignGovernment",
          "typeOrganization",
          "isCorporateEntityNonTaxExempt",
          "isCorporateEntityTaxExempt",
          "isPartnershipOrLLP",
          "isSoleProprietorship",
          "isSmallAgriculturalCooperative",
          "isInternationalOrganization",
          "isUSGovernmentEntity",
          "isCommunityDevelopmentCorporation",
          "isDomesticShelter",
          "isEducationalInstitution",
          "isFoundation",
          "isHospitalFlag",
          "isManufacturerOfGoods",
          "isVeterinaryHospital",
          "isHispanicServicingInstitution",
          "hasContracts",
          "hasGrants",
          "hasContractsAndGrants",
          "isAirportAuthority",
          "isCouncilOfGovernments",
          "isHousingAuthoritiesPublicOrTribal",
          "isInterstateEntity",
          "isPlanningCommission",
          "isPortAuthority",
          "isTransitAuthority",
          "isSubchapterSCorporation",
          "isLimitedLiabilityCorporation",
          "isForeignOwnedAndLocated",
          "isForProfitOrganization",
          "isNotForProfitOrganization",
          "isOtherNotForProfitOrganization",
          "isAbilityOneProgram",
          "countEmployeesVendor",
          "amountRevenueAnnualVendor",
          "isPrivateUniversityOrCollege",
          "isStateControlledInstitutionOfHigherLearning",
          "is1862LandGrantCollege",
          "is1890LandGrantCollege",
          "is1994LandGrantCollege",
          "isMinorityInstitution",
          "isHistoricallyBlackCollegeOrUniversity",
          "isTribalCollege",
          "isAlaskanNativeServicingInstitution",
          "isNativeHawaiianServicingInstitution",
          "isSchoolOfForestry",
          "isVeterinaryCollege",
          "isDOTCertifiedDisadvantagedBusinessEnterprise",
          "isSelfCertifiedSmallDisadvantagedBusiness",
          "isSBACertifiedSmallDisadvantagedBusiness",
          "isC8AParticipant",
          "isUnderUtilizedHUBZone",
          "isSBACertified8AJointVenture",
          "nameOfficerHighCompensated1",
          "amountOfficerHighCompensated1",
          "nameOfficerHighCompensated2",
          "amountOfficerHighCompensated2",
          "nameOfficerHighCompensated3",
          "amountOfficerHighCompensated3",
          "nameOfficerHighCompensated4",
          "amountOfficerHighCompensated4",
          "nameOfficerHighCompensated5",
          "amountOfficerHighCompensated5",
          "datetimeLastModified",
          "keyAssistanceTransaction",
          "keyAssistanceAward",
          "idFAIN",
          "uriAward",
          "codeSAI",
          "amountNonFederalFunding",
          "amountFundingTotal",
          "amountLoanFaceValue",
          "amountSubsidyOriginal",
          "amountSubsidyTotal",
          "amountLoanTotal",
          "codeCityVendor",
          "idCountyVendor",
          "nameCountyVendor",
          "zipcodeVendor",
          "zip4Vendor",
          "nameCityForeignVendor",
          "nameProvinceForeignVendor",
          "zipcodeForeignVendor",
          "codePrimaryPerformance",
          "idCountyPrimaryPerformance",
          "typeLocationForeignPerformance",
          "idCFDA",
          "nameProgram",
          "idAssistance",
          "typeAssistance",
          "codeBusinessFunds",
          "typeBusinessFunds",
          "codeBusinessType",
          "typeBusiness",
          "codeDeleteIndicator",
          "typeDeleteIndicator",
          "typeAction",
          "idRecord",
          "typeRecord",

          "idAgency",
          "slugAgency",
          "nameAgency",
          "urlCongressionalJustification",
          "yearBudget",
          "quarterMostRecent",
          "amountSpent",
          "amountObligated",
          "amountBudgeted",
          "amountBudgetTotal",
          "pctBudgetTotal",
          "idContractIDV",
          "urlUSASpendingPermalink",
          "amountObligationTotal",
          "amountNonFederalFundingTotal",
          "amountSubsidyOriginal",
          "amountLoanFaceValueTotal",
          "amountSubsidyTotal",
          "typePrimaryPerformanceScope",

          "codeGovernmentFurnishedProperty",
          "typeGovernmentFurnishedProperty",
          "isAlaskanNativeOwnedBusiness",
          "isNativeHawaiianOwnedBusiness",
          "isTriballyOwnedBusiness",
          "hasAssistance",
          "hasContractsAndGrants",
          "isForeignOwned"
        )

    )
  }

#' USA Spending Departments
#'
#' Returns searchable USA Spending
#' departments
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' usa_spending_departments()
usa_spending_departments <-
  memoise::memoise(function() {
    headers = c(
      `Connection` = 'close',
      `Accept` = 'application/json, text/plain, */*',
      `Origin` = 'https://www.usaspending.gov',
      `User-Agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
      `Content-Type` = 'application/json;charset=UTF-8',
      `Sec-Fetch-Site` = 'same-site',
      `Sec-Fetch-Mode` = 'cors',
      `Referer` = 'https://www.usaspending.gov/',
      `Accept-Encoding` = 'gzip, deflate, br',
      `Accept-Language` = 'en-US,en;q=0.9'
    )

    data = '{"agency":0}'

    res <-
      httr::POST(url = 'https://api.usaspending.gov/api/v2/bulk_download/list_agencies/',
        httr::add_headers(.headers = headers),
        body = data)

    .res_to_data(res = res) %>%
      as_tibble() %>%
      setNames(c("nameDepartment", "idDepartment", "idAgency")) %>%
      .munge_data()

  })


# utils -------------------------------------------------------------------

.clean_org_usa <-
  function(data,
    col = "nameAgencyAward",
    clean_entity_column = T) {
    if (!data %>% hasName(col)) {
      return(data)
    }

    id_col <-
      col %>% str_replace_all("name", "id")

    id_col <- case_when(
      col %in% c("nameVendor") ~ c("idDUNS"),
      col %in% c("nameVendorParent") ~ "idDUNSParent",
      TRUE ~ id_col
    )

    if (!data %>% hasName(id_col)) {
      return(data)
    }

    df_count <-
      data %>%
      count(!!sym(id_col), !!sym(col), sort = T, name = "count") %>%
      .clean_usg_organizations(column = col)

    if (clean_entity_column) {
      df_count <- df_count %>%
        clean_entity_data(entity_column = col)
      df_count <- df_count %>% select(-col)
      names(df_count) <- names(df_count) %>% str_remove_all("Clean")
    }

    df_count <- df_count %>%
      filter(!is.na(!!sym(col))) %>%
      arrange(!!sym(id_col)) %>%
      group_by(!!sym(id_col)) %>%
      filter(count == max(count)) %>%
      dplyr::slice(1) %>%
      ungroup() %>%
      select(-count)


    data <-
      data %>%
      select(-one_of(col)) %>%
      left_join(df_count, by = id_col) %>%
      select(names(data), everything())

    data
  }

.clean_organization <-
  function(data,
    col = "nameVendor",
    clean_entity_column = T) {
    if (!data %>% hasName(col)) {
      return(data)
    }

    new_name <-
      col %>% str_c("Clean")


    df_count <-
      data %>%
      count(!!sym(col), sort = T, name = "count")

    new_values <- df_count %>% pull(col)
    df_count <-
      df_count %>%
      mutate(
        UQ(new_name) :=
          new_values %>%
          str_remove_all('\\"') %>%
          str_remove_all("\\(") %>%
          str_remove_all("\\)") %>%
          str_replace_all('\\, INC.|\\, INC$', ' INC') %>%
          str_replace_all("\\, INCORPORATED$", " INCORPORATED") %>%
          str_replace_all("\\, CORP.$|\\, CORP$", " CORP") %>%
          str_replace_all("\\, G.P.$|\\, GP$", " GP") %>%
          str_replace_all("\\, P.C.$|\\, PC$", " PC") %>%
          str_replace_all("\\, LTD$|\\, LTD.$", " LTD") %>%
          str_replace_all("\\, A ", "\\ A ") %>%
          str_replace_all("\\, AN ", "\\ AN ") %>%
          str_replace_all("\\, OF ", "\\ OF ") %>%
          str_replace_all("\\, LIMITED$", "\\ LIMITED") %>%
          str_replace_all("\\, LIMITED PARTNERSHIP$", "\\ LIMITED PARTNERSHIP") %>%
          str_replace_all("\\, PLLC$|\\, PLLC", " PLLC") %>%
          str_replace_all("\\, LLC$|\\, LLC|\\, L.L.C|\\, L.L.C$", " LLC") %>%
          str_replace_all("\\, LLP$|\\, LLP|\\, L.L.P|\\, L.L.P$", " LLP") %>%
          str_replace_all("\\, LLLP$|\\, LLLP|\\, L.L.L.P|\\, L.L.L.P$", " LLLP") %>%

          str_replace_all("\\, LP$|\\, L.P.$|\\, L.P$", " LP") %>%
          str_remove_all("\\, ,$| ,$|^%|^ - |\\/ |\\& ") %>%
          str_remove_all("^-") %>%
          str_squish()
      )



    df_count <-
      df_count %>%
      filter(!is.na(!!sym(new_name))) %>%
      .clean_usg_organizations(column = new_name) %>%
      mutate(UQ(new_name) := !!sym(new_name) %>% str_remove_all("\\,") %>% str_squish())

    if (clean_entity_column) {
      col_order <- names(data)
      df_count <-
        df_count %>%
        clean_entity_data(
          entity_column = new_name,
          use_business_suffix = T,
          ignore_words = NULL,
          use_n_gram_merge = F
        )

      df_count <-
        df_count %>%
        select(-new_name) %>%
        select(-count)

      data <-
        data %>%
        left_join(df_count, by = col)

      data <-
        data %>%
        select(-col)

      names(data) <-
        names(data) %>% str_remove_all("Clean")
      df_names <-
        tibble(name = names(data)) %>% mutate(column = 1:n())

      cols <-
        df_names %>% group_by(name) %>% filter(column == min(column)) %>% ungroup() %>% pull(column)

      data <- data[, cols]

      data <-
        data %>%
        select(one_of(col_order), everything())
    } else {
      col_order <- names(data)

      data <-
        data %>%
        left_join(df_count %>%
            select(-count)
          , by = col)
      data <-
        data %>%
        select(-col)

      names(data) <- names(data) %>% str_remove_all("Clean")

      data <- data %>%
        select(one_of(col_order), everything())
    }

    data
  }

# endpoints ---------------------------------------------------------------

# https://api.usaspending.gov/docs/endpoints
# schemas -----------------------------------------------------------------

# https://fiscal.treasury.gov/data-transparency/DAIMS-current.html

.res_to_data <-
  function(res) {
    res %>%
      content(as = "text", encoding = "UTF-8") %>%
      fromJSON(simplifyDataFrame = T) %>%
      flatten_df()

  }

# bulk --------------------------------------------------------------------

## https://www.usaspending.gov/#/download_center/award_data_archive

.us_bulk_urls <- function(year = 2008,
  agency = NULL,
  api_version = 2,
  type = "contracts") {
  slug_type <-
    case_when(type %>% str_to_lower() %>% str_detect("contract") ~ "contracts",
      TRUE ~ "assistance")

  agency_slug <-
    ifelse(length(agency) == 0, "all", str_to_lower(agency))

  headers  <-
    c(
      `Connection` = 'close',
      `Pragma` = 'no-cache',
      `Cache-Control` = 'no-cache',
      `Accept` = 'application/json, text/plain, */*',
      `Origin` = 'https://www.usaspending.gov',
      `User-Agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36',
      `Content-Type` = 'application/json;charset=UTF-8',
      `Sec-Fetch-Site` = 'same-site',
      `Sec-Fetch-Mode` = 'cors',
      `Referer` = 'https://www.usaspending.gov/',
      `Accept-Encoding` = 'gzip, deflate, br',
      `Accept-Language` = 'en-US,en;q=0.9'
    )

  data <-
    list(agency = agency_slug,
      fiscal_year = year,
      type = slug_type) %>%
    toJSON(auto_unbox = T)

  url <-
    glue(
      'https://api.usaspending.gov/api/v{api_version}/bulk_download/list_monthly_files/'
    ) %>% as.character()

  res <-
    httr::POST(url = url,
      httr::add_headers(.headers = headers),
      body = data)

  data <-
    res %>%
    .res_to_data()

  data <-
    data %>%
    filter(!str_to_upper(file_name) %>% str_detect("DELTA")) %>%
    mutate(fiscal_year = year) %>%
    select(
      yearFiscal = fiscal_year,
      nameAgency = agency_name,
      typeAward = type,
      dateUpdated = updated_date,
      urlBulkZip = url
    ) %>%
    mutate(dateUpdated = ymd(dateUpdated)) %>%
    mutate_at(c("typeAward", "nameAgency"), str_to_upper)

  data

}

#' USA Spending Bulk URLs
#'
#' Generates a tibble containing
#' zip URLs for specified inputs
#'
#' @param years vector of years starting in 2001
#' @param types type of award \itemize{
#' \item contracts - contract spending
#' \item assistance - financial assistance awards
#' }
#' @param api_version API version, as of December 2019 it is 2
#' @param agencies if not \code{NA} vector of agency names, \code{NA} returns
#' all agencies
#' @param return_message if \code{TRUE} returns a message
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' bulk_usa_spending_urls(years = 2001:2020, types = c('contracts', 'assistance'), agencies = NA)
bulk_usa_spending_urls <-
  function(years = 2001:2020,
    types = c("contracts", "assistance"),
    agencies = NA,
    api_version = 2,
    return_message = T) {
    df_rows <-
      tidyr::expand_grid(year  = years,
        type = types,
        agency = agencies)
    .us_bulk_urls_safe <-
      possibly(.us_bulk_urls, tibble())
    all_data <-
      1:nrow(df_rows) %>%
      map_dfr(function(x) {
        df_row <- df_rows %>% dplyr::slice(x)
        year_row <- df_row$year
        type_row <- df_row$type
        if (return_message) {
          glue("Finding {type_row} for {year_row}") %>% message()
        }
        if (df_row$agency %>% is.na()) {
          agency_slug <- NULL
        } else {
          agency_slug <- df_row$agency
        }
        .us_bulk_urls_safe(
          year = year_row,
          agency = agency_slug,
          api_version = api_version,
          type = type_row
        )
      })

    all_data
  }

#' Munge USA Spending names
#'
#' @param data
#'
#' @return
#' @export
#'
#' @examples
munge_usa_spending_names <-
  function(data) {
    dict_names <- dictionary_usa_spending_names()
    usa_names <-
      names(data)

    actual_names <-
      usa_names %>%
      map_chr(function(name) {
        df_row <-
          dict_names %>% filter(nameUSASpending == name)
        if (nrow(df_row) == 0) {
          glue::glue("Missing {name}") %>% message()
          return(name)
        }

        df_row$nameActual
      })

    data %>%
      set_names(actual_names)
  }

.munge_usa_bulk <-
  function(data, clean_entity_column = T) {
    start_rows <- nrow(data)
    data <- data %>%
      select(-matches("^remove[A-Z]")) %>%
      .remove_na()

    date_time_names <-
      data %>% select(matches("^datetime[A-Z]|^date[A-Z]")) %>%
      select_if(is.character) %>%
      names()

    data <- data %>%
      select(-matches("^slug"))

    if (length(date_time_names) > 0) {
      data <-
        data %>%
        mutate_at(date_time_names,
          list(function(x) {
            case_when(nchar(x) == 10 ~ ymd(x),
              TRUE ~ ymd_hms(x) %>% as.Date())
          }))
    }

    logical_names <-
      data %>% select(matches("^is[A-Z]|^has[A-Z]")) %>%
      select_if(is.character) %>%
      names()

    if (length(logical_names)) {
      data <-
        data %>%
        mutate_at(logical_names,
          list(function(x) {
            x %>% str_to_upper()
          })) %>%
        mutate_at(logical_names,
          list(function(x) {
            case_when(
              x %in% c("T", "Y", "Y:", "TRUE", "YES",
                "Y: YES") ~ TRUE,
              x %in% c("F", "N", "FALSE",
                "N: NO", "N:") ~ FALSE,
              TRUE ~ NA
            )
          }))
    }

    code_cols <- data %>% select(matches("^code")) %>% names()

    if (length(code_cols) > 0) {
      data <- data %>%
        mutate_at(code_cols, as.character)
    }


    data <-
      .clean_org_usa(data = data,
                     col = "nameAgencyAwardIDV",
                     clean_entity_column = clean_entity_column)

    data <-
      .clean_org_usa(data = data,
                     col = "nameDepartmentAward",
                     clean_entity_column = clean_entity_column)

    data <-
      .clean_org_usa(data = data,
                     col = "nameAgencyAward",
                     clean_entity_column = clean_entity_column)

    data <-
      .clean_org_usa(data = data,
                     col = "nameOfficeAward",
                     clean_entity_column = clean_entity_column)

    data <-
      .clean_org_usa(data = data,
                     col = "nameDepartmentFunding",
                     clean_entity_column = clean_entity_column)

    data <-
      .clean_org_usa(data = data,
                     col = "nameAgencyFunding",
                     clean_entity_column = clean_entity_column)

    data <-
      .clean_org_usa(data = data,
                     col = "nameOfficeFunding",
                     clean_entity_column = clean_entity_column)

    if (clean_entity_column) {
      data <-
        .clean_organization(data = data,
          clean_entity_column = clean_entity_column,
          col = "nameVendor") %>%
        distinct()

      data <-
        .clean_organization(data = data,
          clean_entity_column = clean_entity_column,
          col = "nameVendorParent") %>% distinct()
    }

    end_rows <-
      nrow(data)

    if (start_rows != end_rows) {
      stop("MERGE GOT SCREWED UP")
    }
    data

  }


.dl_year_zip <-
  function(url = "https://files.usaspending.gov/award_data_archive/2012_all_Contracts_Full_20191210.zip",
    path = "Desktop/data/usa_spending/") {
    oldwd <- getwd()
    year <-
      url %>% str_remove_all("https://files.usaspending.gov/award_data_archive/") %>%
      str_remove_all("^FY") %>%
      substr(1, 4)
    type <-
      case_when(url %>% str_to_lower() %>% str_detect("contracts") ~ "contracts",
                TRUE ~ "assistance")
    setwd("~")
    setwd(dir = path)
    outfile <- tempfile("download", fileext = ".zip")

    file <- curl::curl_download(url, outfile)
    folder <- glue("{type}_{year}") %>% as.character()
    unz_files <- unzip(file, exdir = folder)
    file %>% unlink()
    gc()
    return(invisible())
  }



#' Download USA Spending URLs
#'
#' Download basic USA spending urls
#'
#' @param years
#' @param types
#' @param path
#'
#' @return
#' @export
#'
#' @examples
download_usa_spending <-
  function(years = 2013:2020,
    types = "contracts",
    path = "Desktop/usa_contracts/") {
    oldwd <- getwd()
    df_urls <-
      bulk_usa_spending_urls(years = years, types = types)

    df_urls$urlBulkZip %>%
      walk(function(url) {
        url %>% message()
        .dl_year_zip(url = url, path = path)
      })

    if (oldwd != getwd()) {
      setwd(oldwd)
    }
    "FINISHED" %>% message()
    return(invisible())
  }

.dl_usa_spending_bulk <-
  function(url = "https://files.usaspending.gov/award_data_archive/2012_all_Contracts_Full_20191210.zip",
    clean_entity_column = F,
    return_message = TRUE) {
    if (return_message) {
      glue("Downloading {url}") %>% message()
    }
    outfile <- tempfile("download", fileext = ".zip")

    file <- curl::curl_download(url, outfile)
    unz_files <- unzip(file, exdir = "xml")
    glue::glue("Has {length(unz_files)} files") %>% cat()

    data <-
      unz_files %>%
      map_dfr(function(x) {
        x %>% message()
        data <- fread(x, showProgress = FALSE)

        data <-
          data %>%
          munge_usa_spending_names() %>%
          as_tibble()

        if (data %>% hasName("cageVendor")) {
          data <- data %>%
            mutate(cageVendor = as.character(cageVendor))
        }

        if (data %>% hasName("idAssistance")) {
          data <- data %>%
            mutate(
              codeAssistance = case_when(
                idAssistance == 2 ~ "A",
                idAssistance == 3 ~ "A",
                idAssistance == 4 ~ "B",
                idAssistance == 5 ~ "B",
                idAssistance == 6 ~ "C",
                idAssistance == 7 ~ "E",
                idAssistance == 8 ~ "F",
                idAssistance == 9 ~ "G",
                idAssistance == 10 ~ "D",
                idAssistance == 11 ~ NA_character_
              ),
              typeAssistance = case_when(
                idAssistance == 2 ~ "BLOCK GRANT",
                idAssistance == 3 ~ "FORMULA GRANT",
                idAssistance == 4 ~ "PROJECT GRANT",
                idAssistance == 5 ~ "COOPERATIVE AGREEMENT",
                idAssistance == 6 ~ "DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY OR OTHER NON-REIMBURSABLE DIRECT FINANCIAL AID",
                idAssistance == 7 ~ "DIRECT LOAN",
                idAssistance == 8 ~ "GUARANTEED/INSURED LOAN",
                idAssistance == 9 ~ "INSURANCE",
                idAssistance == 10 ~ "DIRECT PAYMENT WITH UNRESTRICTED USE (RETIREMENT, PENSION, VETERANS BENEFITS, ETC.)",
                idAssistance == 11 ~ "OTHER REIMBURSABLE, CONTINGENT, INTANGIBLE, OR INDIRECT FINANCIAL ASSISTANCE"
              )
            )
        }


        data <-
          data %>%
          mutate_if(is.character,
                    list(function(x) {
                      ifelse(x == "", NA_character_, x) %>% str_squish()
                    })) %>%
          .remove_na()


        data <- data %>%
          mutate_if(is.character,
                    list(function(x) {
                      case_when(x %in% c("NAN", "N/A", "NA") ~ NA_character_,
                                TRUE ~ x)
                    })) %>%
          .remove_na()

        num_col <-
          data %>% select(matches("idDUNS|codeCongressionalDistrict")) %>% names()

        data <-
          data %>%
          mutate_at(num_col,
                    list(function(x) {
                      x %>% as.character() %>% parse_number()
                    })) %>%
          .remove_na()

        char_col <-
          data %>% select(
            matches(
              "idCFDA|zipcode|nameOfficeAward|^code[A-Z]|^telephone|^fax|idAgency|idFederalAccounts|idOffice"
            )
          ) %>% names()

        data <-
          data %>%
          mutate_at(char_col,
                    list(function(x) {
                      x %>% as.character()
                    })) %>%
          .remove_na()

        data <-
          .munge_usa_bulk(data = data, clean_entity_column = clean_entity_column)


        data
      })

    file %>% unlink()
    unlink("xml", recursive = T)
    unz_files %>% unlink()
    gc()
    rm(unz_files)
    data <- data %>%
      mutate(
        nameVendor = case_when(
          is.na(nameVendor) & !is.na(nameVendorParent) ~ nameVendorParent,
          is.na(nameVendor)  &
            is.na(nameVendor) ~ "UNDISCLOSED VENDOR",
          TRUE ~ nameVendor
        ),
        nameVendor = case_when(nameVendor == "0"  ~ "UNDISCLOSED VENDOR",
                               TRUE ~ nameVendor),
        nameVendorParent = case_when(
          is.na(nameVendorParent) & !is.na(nameVendor) ~ nameVendorParent,
          is.na(nameVendor)  &
            is.na(nameVendor) ~ "UNDISCLOSED VENDOR PARENT",
          TRUE ~ nameVendorParent
        ),
        nameVendorParent = case_when(
          nameVendorParent == "0"  ~ "UNDISCLOSED VENDOR PARENT",
          TRUE ~ nameVendorParent
        ),
      ) %>%
      mutate_at(c("idDUNS", "idDUNSParent"),
                list(function(x) {
                  ifelse(x == 0, NA_integer_, x)
                })) %>%
      mutate(hasParent = idDUNSParent != idDUNS)


    data <-
      data %>%
      .munge_usa_bulk(clean_entity_column = clean_entity_column)

    data <- data %>%
      mutate_if(is.character, str_squish)

    data <- data %>%
      mutate_at(c("nameVendor", "nameVendorParent"),
                list(function(x) {
                  x %>% str_remove_all("^/|^-|^*") %>% str_squish()
                }))

    data <-
      data %>%
      mutate(urlBulkZip = url)

    data
  }

.fix_dl <-
  function(folder = "Desktop/usa_contracts/contracts_2011/",
    clean_entity_column = T) {
    oldwd <- getwd()

    if (getwd != "/Users/alexbresler") {
      setwd("~")
    }

    setwd(folder)
    files <-
      list.files() %>%
      discard(function(x) {
        x %>% str_detect("tsv")
      })
    files %>%
      map_dfr(function(x) {
        x %>% message()
        data <- fread(x, showProgress = FALSE)

        data <-
          data %>%
          munge_usa_spending_names() %>%
          as_tibble()

        data <-
          data %>%
          add_solicitation_group()


        data <-
          data %>%
          mutate_if(is.character, str_squish) %>%
          .remove_na()

        data <-
          data %>%
          mutate_if(is.character,
            list(function(x) {
              case_when(x %in% c("", "NAN", "N/A", "NA") ~ NA_character_,
                TRUE ~ x) %>% str_squish()
            })) %>%
          .remove_na()

        num_col <-
          data %>% select(matches("idDUNS|codeCongressionalDistrict")) %>% names()

        data <-
          data %>%
          mutate_at(num_col,
            list(function(x) {
              x %>% as.character() %>% parse_number()
            })) %>%
          .remove_na()

        char_col <-
          data %>% select(
            matches(
              "idCFDA|zipcode|nameOfficeAward|^code[A-Z]|^telephone|^fax|idAgency|idFederalAccounts|idOffice"
            )
          ) %>% names()

        data <-
          data %>%
          mutate_at(char_col,
            list(function(x) {
              x %>% as.character()
            })) %>%
          .remove_na()

        data <-
          .munge_usa_bulk(data = data, clean_entity_column = clean_entity_column)

        new_file <-
          x %>% str_replace_all(".csv", ".tsv.gz")

        data %>%
          data.table::fwrite(new_file)
        unlink(x)
        rm(data)
        gc()
        return(invisible())
      })

    "FINISHED" %>%
      message()
    return(invisible)
  }

.fix_dls <-
  function(folder = "Desktop/usa_contracts/",
    type = "assistance",
    clean_entity_column = T) {
    oldwd <- getwd()
    setwd("~")
    setwd(folder)
    folders <-
      list.files() %>%
      discard(function(x) {
        !x %>% str_detect(type)
      })
    folders %>%
      walk(function(f) {
        folder <- glue("Desktop/usa_contracts/{f}") %>% as.character()
        setwd("~")
        setwd(folder)
        files <-
          list.files() %>%
          discard(function(x) {
            x %>% str_detect("tsv")
          })

        files %>%
          map_dfr(function(x) {
            x %>% message()
            data <- fread(x, showProgress = FALSE)

            data <-
              data %>%
              munge_usa_spending_names() %>%
              as_tibble()

            data <-
              data %>%
              add_solicitation_group()


            data <-
              data %>%
              mutate_if(is.character, str_squish) %>%
              .remove_na()

            data <-
              data %>%
              mutate_if(is.character,
                        list(function(x) {
                          case_when(x %in% c("", "NAN", "N/A", "NA") ~ NA_character_,
                                    TRUE ~ x) %>% str_squish()
                        })) %>%
              .remove_na()

            num_col <-
              data %>% select(matches("idDUNS|codeCongressionalDistrict")) %>% names()

            data <-
              data %>%
              mutate_at(num_col,
                        list(function(x) {
                          x %>% as.character() %>% parse_number()
                        })) %>%
              .remove_na()

            char_col <-
              data %>% select(
                matches(
                  "idCFDA|zipcode|nameOfficeAward|^code[A-Z]|^telephone|^fax|idAgency|idFederalAccounts|idOffice"
                )
              ) %>% names()

            data <-
              data %>%
              mutate_at(char_col,
                        list(function(x) {
                          x %>% as.character()
                        })) %>%
              .remove_na()

            data <-
              .munge_usa_bulk(data = data, clean_entity_column = clean_entity_column)

            new_file <-
              x %>% str_replace_all(".csv", ".tsv.gz")

            data %>%
              data.table::fwrite(new_file)
            unlink(x)
            rm(data)
            gc()
            return(invisible())
          })

        "FINISHED" %>%
          message()
        return(invisible)
      })

    if (oldwd != getwd()) {
      setwd(oldwd)
    }
    return(invisible())
  }


.parse_dl_files <-
  function(data_path = "Desktop/usa_contracts/",
    final_path =  "Desktop/data/usa_spending") {
    oldwd <- getwd()
    setwd("~")
    setwd(data_path)
    types <-
      list.files() %>% str_remove_all("\\_|[0-9]") %>% unique()
    types %>%
      walk(function(type) {
        folders <-
          list.files()[list.files() %>%
              str_detect(type)]

        folders %>%
          walk(function(folder) {
            folder %>% message()
            setwd(folder)
            year <-
              folder %>% str_remove_all("contracts|assistance|_") %>% as.numeric()
            files <- list.files()
            data <-
              files %>%
              map_dfr(function(x) {
                df <- x %>%
                  fread(showProgress = FALSE) %>%
                  as_tibble() %>%
                  mutate_if(is.character, list(function(x) {
                    if_else(x == "", NA_character_, x)
                  }))
                if (df %>% hasName("type_of_idc")) {
                  df <- df %>%
                    rename(typeIDC = type_of_idc)

                }

                if (df %>% hasName("usaspending_permalink")) {
                  df <- df %>%
                    rename(urlUSASpending = usaspending_permalink)
                }

                char_cols <-
                  df %>% select(
                    matches(
                      "codeCity|zipcode|idAgency|idOffice|idCFDA|slug|code|telephone|fax|type|cageVendor"
                    )
                  ) %>% names()
                num_cols <-
                  df %>% select(matches("amount")) %>% names()

                df <- df %>% mutate_at(char_cols, as.character)

                df <- df %>% mutate_at(num_cols, as.numeric)

                df
              })

            final_data <-
              glue("{final_path}/{type}/{year}.tsv.gz")


            if (data %>% hasName("amountObligation")) {
              amt <-
                data %>% pull(amountObligation) %>% sum(na.rm = T) %>% currency(digits = 0)
              glue(
                "{nrow(data) %>% comma(digits = 0)} {type} transactions in {year} totaling {amt}"
              ) %>% message()
            }
            setwd("~")
            to_logical_cols <-
              data %>% select(matches("^is|^has")) %>% select_if(is.character) %>% names()

            if (length(to_logical_cols) > 0) {
              data <- data %>%
                mutate_at(to_logical_cols,
                  list(function(x) {
                    case_when(x %in% c("f", "F")  ~ F,
                      TRUE ~ T)
                  }))
            }

            data <-
              data %>% .add_budget_year()

            data <-
              data %>% add_department_codes()

            data <-
              data %>% .add_dod_type()

            data <-
              data %>% .fix_foreign_reference()

            data <-
              data %>% .add_analysis_contract()

            data <-
              data %>% .add_original_dates()

            data <-
              data %>% .allocate_federal_accounts()

            data <-
              data %>% .guess_duns_type()

            data <-
              data %>% .add_agency_cgacs()
            data %>% write_csv(final_data)
            rm(data)
            setwd(data_path)
            return(invisible())
          })
      })
  }

#' Fix USA Spending Downloads
#'
#' @param data_path
#' @param final_path
#' @param clean_entity_column
#'
#' @return
#' @export
#'
#' @examples
fix_usa_spending_downloads <-
  function(data_path = "Desktop/usa_contracts/",
    final_path =  "Desktop/data/usa_spending",
    clean_entity_column = F) {
    oldwd <- getwd()
    setwd("~")
    if (oldwd != getwd()) {
      setwd(data_path)
    }
    types <-
      list.files() %>%
      str_to_lower() %>%
      str_remove_all("\\_") %>%
      str_remove_all("[0-9]") %>%
      unique()

    types %>%
      walk(function(type) {
        search_type <-
          case_when(
            type %>% str_detect("contract") ~ "contract",
            type %>% str_detect("assistance") ~ "assistance"
          )

        .fix_dls(folder = data_path,
          type = search_type,
          clean_entity_column = clean_entity_column)
      })

    .parse_dl_files(data_path = data_path,
                    final_path = final_path)
  }

#' Parse USA Spending Bulk URLs
#'
#' Downloads and parses vector of
#' USA spending bulk urls.
#'
#' @param urls vector of USA Spending monthly zip files
#' @param clean_entity_column if \code{TRUE} cleans entity columns
#' @param return_message  if \code{TRUE} returns a message
#'
#' @return
#' @export
#'
#' @examples
parse_usa_spending_bulk_urls <-
  function(urls,
    clean_entity_column = F,
    return_message = T) {
    .dl_usa_spending_bulk_safe <-
      possibly(.dl_usa_spending_bulk, tibble())

    data <-
      urls %>%
      map_dfr(function(url) {
        .dl_usa_spending_bulk_safe(
          url = url,
          clean_entity_column = clean_entity_column,
          return_message = return_message
        )
      })

    data

  }


#' Bulk US Spending Download
#'
#' Downloads bulk monthly files
#' for specified spending types and
#' years
#'
#' @param years vector of years from 2008 to current year
#' @param types type of spending \itemize{
#' \item contract - contract spending that gets recorded in FPDS
#' \item assistance - non FPDS financial assistance spending
#' }
#' @param agencies specific agency if applicable, defaults to all which includes
#' all agencies
#' @param api_version 2
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
bulk_usa_spending <-
  function(years = 2001,
    clean_entity_column = F,
    types = "contracts",
    agencies = NA,
    api_version = 2,
    return_message = T,
    ...) {
    df_urls <-
      bulk_usa_spending_urls(
        years = years,
        types = types,
        agencies = agencies,
        api_version = api_version,
        return_message = return_message
      )

    data <-
      parse_usa_spending_bulk_urls(urls = df_urls$urlBulkZip,
        clean_entity_column = clean_entity_column)

    data

  }

#' Treasury Account Table Dictionary
#'
#' Dictionary of Treasury account
#' symbols for budget search
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_tas()
dictionary_tas <-
  function() {
    data <-
      "https://raw.githubusercontent.com/fedspendingtransparency/usaspending-api/dev/usaspending_api/data/tas_list.csv" %>%
      fread(verbose = F, showProgress = FALSE) %>%
      as_tibble()

    data <-
      .munge_fpds_names(data = data)

    data <- data %>%
      rename(dateEstablished = datetimeEstablished,
        dateEnd = datetimeEnd) %>%
      mutate_at(c("dateEstablished",
        "dateEnd"),
        list(function(x) {
          x %>% substr(1, 10) %>% ymd()
        })) %>%
      .munge_data()

    data
  }


# data_dictioanry ---------------------------------------------------------

#' USA Spending Schema
#'
#' Includes all the relevent fields and schema
#' descriptions for USASpending.gov
#'
#' @return \code{tibble()}
#' @export
#'
#' @examples
#' dictionary_usa_spending_schema()
dictionary_usa_spending_schema <-
  function() {
    data <-
      "https://files.usaspending.gov/docs/Data_Dictionary_Crosswalk.xlsx" %>%
      download_excel_file()

    df_headers <-
      tibble(
        col1 = data %>% dplyr::slice(1) %>% as.character(),
        col2 =  data %>% dplyr::slice(2) %>% as.character()
      )

    actual_names <- df_headers %>%
      mutate(
        col1 = case_when(
          col1 == "Schema Data Label & Description" ~ "schema",
          col1 == "USA Spending Downloads" ~ "usaspending",
          col1 == "Legacy USA Spending" ~ "usaspendingLegacy",
          TRUE ~ col1
        )
      ) %>%
      fill(col1) %>%
      mutate(col2 = col2 %>% str_remove_all("\\ ")) %>%
      unite(name, col1, col2, sep = "") %>%
      pull(name)

    data <- data %>% dplyr::slice(3:nrow(data)) %>%
      mutate_all(list(function(x) {
        ifelse(x == "N/A", NA, x)
      })) %>%
      setNames(actual_names)

    data
  }

# dictionaries ------------------------------------------------------------

# https://api.usaspending.gov/api/v1/references/glossary/?limit=500000

# https://github.com/fedspendingtransparency/usaspending-api/tree/dev/usaspending_api/data

#' U.S. Government Term Dictionary
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_government_terms()
dictionary_government_terms <-
  function() {
    json_data <-
      "https://api.usaspending.gov/api/v2/references/data_dictionary/" %>%
      jsonlite::fromJSON(simplifyDataFrame = T)

    headers <- json_data$document$headers$raw
    data <-
      json_data$document$rows %>% as_tibble() %>%
      set_names(headers)

    data
  }




# database_dumps ----------------------------------------------------------

# https://files.usaspending.gov/database_download/


# parse -------------------------------------------------------------------

.parse_spending_url <-
  function(url = "https://api.usaspending.gov/api/v1/awards/27217896/") {
    json_data <-
      url %>% jsonlite::fromJSON(simplifyDataFrame = T, flatten = T)
    json_data %>% flatten_df()
  }

.parse_agency_url <- function() {

}



#' USA Spending term dictionary
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_us_spending()
dictionary_us_spending <-
  function() {
    data <-
      "https://github.com/fedspendingtransparency/usaspending-api/blob/master/usaspending_api/data/USAspendingGlossary.xlsx?raw=true" %>%
      rio::import() %>%
      as_tibble() %>%
      set_names(
        c(
          "nameTerm",
          "descriptionTerm",
          "nameSchema",
          "descriptionSchema",
          "resourcesMore",
          "markdownResources"
        )
      )
    data
  }

function() {
  data <-
    "https://raw.githubusercontent.com/fedspendingtransparency/usaspending-api/master/usaspending_api/data/tas_list.csv" %>% read_csv()
}

#' U.S. Federal Budgets
#'
#' United States federal budgets by category since 1976 with
#' projections going out 4 years.
#'
#'
#' @return
#' @export
#'
#' @examples
#' us_budgets()
us_budgets <-
  memoise::memoise(function() {
    data <-
      "https://raw.githubusercontent.com/fedspendingtransparency/usaspending-api/master/usaspending_api/data/budget_authority/budget_authority.csv" %>%
      fread(verbose = F, showProgress = FALSE) %>%
      as_tibble()

    data <-
      data %>%
      .munge_fpds_names()

    gather_cols <-
      data %>% select(-one_of(data %>% select(matches("fy")) %>% names())) %>% names()

    data <-
      data %>%
      gather(period, amount, -gather_cols)

    data <-
      data %>%
      rename(isOnBudget = typeBudget) %>%
      mutate(
        isOnBudget = isOnBudget == "On-budget",
        amount = formattable::currency(amount, digits = 0) * 1000
      ) %>%
      .munge_data(clean_address = F)

    current_year <- year(Sys.Date())
    data <-
      data %>%
      mutate(
        yearBudget = readr::parse_number(period),
        isCurrentYear = yearBudget == current_year,
        isProjection = yearBudget > current_year,
        isActualBudget = yearBudget < current_year,
        dateBudget = ifelse(
          !is.na(yearBudget),
          glue("{yearBudget}-09-30") %>% as.character(),
          NA
        )
      ) %>%
      suppressWarnings() %>%
      mutate(dateBudget = ymd(dateBudget)) %>%
      select(yearBudget,
        isProjection,
        isActualBudget,
        isCurrentYear,
        dateBudget,
        everything())


    data
  })

#' Dictionary of Government Agencies and Offices
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_government_agencies()
dictionary_government_agencies <-
  function() {
    data <-
      "https://raw.githubusercontent.com/fedspendingtransparency/usaspending-api/c83b9cdb8978eb3445b6771f3fec9277c00de0b9/usaspending_api/data/authoritative_agency_list.csv" %>%
      vroom::vroom()

    data <-
      data %>%
      .munge_fpds_names() %>%
      rename(slugAgency1 = idAgency) %>%
      .munge_data() %>%
      rename(idAgency = slugAgency1) %>%
      select(-matches("Admin|ADMIN"))

    data <-
      data %>%
      select(idAgency, nameAgency, nameOffice, everything())

    data <- data %>%
      mutate(icon = ifelse(
        is.na(icon),
        NA,
        glue("https://www.usaspending.gov/graphics/agency/{icon}.jpg") %>% as.character()
      )) %>%
      mutate_at(c("idAgency"),
                as.numeric) %>%
      select(-matches("REGISTERED"))

    data
  }


#' 2019 Product Service Code Dictionary
#'
#' Returns 2019 product service code dictionary
#'
#' @param only_active if \code{TRUE} returns only active PSCs
#' @param url
#' @param snake_names
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' dictionary_psc_active()

dictionary_psc_active <-
  memoise::memoise(function(url = "https://www.fpds.gov/downloads/PSC_Data_March_2020.xls", only_active = T, snake_names = F) {
    options(warn = -1)
    tmp <-
      tempfile()
    curl_download(url, tmp)

    data <-
      tmp %>% read_excel(sheet = 2)

     unlink(tmp)

    data <-
      data %>%
      setNames(
        c(
          "codeProductService",
          "nameProductService",
          "dateStart",
          "dateEnd",
          "nameProductServiceFull",
          "detailsProductServiceExcludes",
          "detailsProductServiceNotes",
          "detailsProductServiceIncludes"
        )
      )


    data <-
      data %>%
      mutate_at(c("dateStart", "dateEnd"),
        list(function(x) {
          as.Date(x)
        }))

    data <- data %>%
      mutate_if(is.character, list(function(x){
        x %>% str_replace_all("—", ", ")
      }))


    data <- data %>%
      .munge_data(clean_address = F, unformat = T)

    data <- data %>%
      mutate_if(is.character,
        list(function(x) {
          str_remove_all(x, "\\.")
        }))

    data <-
      data %>%
      mutate(
        isParentPSC = nchar(codeProductService) %in% c(1:2),
        isActivePSC = is.na(dateEnd)
      ) %>%
      mutate(
        dateStart = dateStart %>% as.character() %>% str_replace_all("2105", "2015") %>% ymd(),
        typePSC = case_when(
          codeProductService %>% substr(1, 1) %in% c("1", "2", "3", "4", "5", "6", "7", "8", "9") ~ "PRODUCT",
          TRUE ~ "SERVICE"
        ),
        idSolicitationGroup =
          case_when(
            typePSC == "SERVICE" ~ substr(codeProductService, 1, 1),
            TRUE ~ substr(codeProductService, 1, 2)
          )
      ) %>%
      select(isParentPSC,
        isActivePSC,
        typePSC,
        idSolicitationGroup,
        everything())

    df_groups <-
      data %>% distinct(idSolicitationGroup) %>%
      left_join(
        data %>% select(
          idSolicitationGroup = codeProductService,
          nameSolicitationGroup = nameProductService,
          dateStart
        ),
        by = "idSolicitationGroup"
      ) %>% distinct() %>%
      group_by(idSolicitationGroup) %>%
      filter(dateStart == max(dateStart)) %>%
      ungroup() %>%
      select(-dateStart)

    data <- data %>%
      left_join(df_groups,
        by = "idSolicitationGroup")

    data <-
      data %>%
      select(
        isParentPSC,
        isActivePSC,
        typePSC,
        idSolicitationGroup,
        nameSolicitationGroup,
        everything()
      ) %>%
      distinct()

    data <-
      data %>%
      mutate(
        detailsProductServiceIncludes = case_when(
          detailsProductServiceIncludes == "" ~ NA_character_,
          TRUE ~ detailsProductServiceIncludes
        ),
        countItemsIncluded = str_count(detailsProductServiceIncludes, pattern = ";")  %>%  coalesce(0L),
        countItemsExcluded = str_count(detailsProductServiceExcludes, pattern = ";") %>% coalesce(0L),
        countDaysActive = case_when(
          !is.na(dateEnd) ~ (dateEnd - dateStart) %>% as.integer(),
          TRUE ~ (Sys.Date() - dateStart) %>% as.integer()
        )
      )

    data <- data %>%
      separate(
        nameProductService,
        into = c("namePSC", "detailsPSC"),
        sep = " - ",
        extra = "merge",
        fill = "right",
        remove = F
      )

    data <- data %>%
      mutate(groupProductService = case_when(
        nchar(codeProductService) > 2 ~ codeProductService %>% substr(1, 3),
        TRUE ~  str_c(codeProductService %>% substr(1, 2), "0")
      )) %>%
      select(
        isParentPSC,
        isActivePSC,
        typePSC,
        idSolicitationGroup,
        nameSolicitationGroup,
        groupProductService,
        codeProductService,
        everything()
      )

    if (only_active) {
      data <-
        data %>%
        filter(isActivePSC)
    }

    if (snake_names) {
      data <- data %>% janitor::clean_names()
    }

    data
  })

#' Historic Product Service Code Dictionary
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_psc_historic()

dictionary_psc_historic <-
  memoise::memoise(function(snake_names = F) {
    data <-
      "https://gist.githubusercontent.com/abresler/beb09e6957a4f64cb03c5c5778a96247/raw/5b108ec0bd5348b1681d54bbe1486bc66bb8c524/dict_psc.csv" %>%
      read_csv() %>%
      suppressWarnings() %>%
      suppressMessages() %>%
      .munge_data()

    names(data)[1:2] <- c("codeProductService", "nameFullPSC")

    df_groups <-
      data %>%
      select(codeProductService) %>%
      mutate(
        letternumber = codeProductService %>% substr(1, 1),
        isNumber = !is.na(letternumber %>% readr::parse_number())
      ) %>%
      mutate(
        idSolicitationGroup = case_when(
          isNumber ~ codeProductService %>% substr(1, 2),
          TRUE ~ codeProductService %>% substr(1, 1)
        )
      ) %>%
      select(codeProductService, idSolicitationGroup) %>%
      left_join(dictionary_solicitation_groups(), by = "idSolicitationGroup")

    data <- data %>%
      left_join(df_groups, by = "codeProductService") %>%
      distinct()

    data <- data %>%
      mutate(groupProductService = case_when(
        nchar(codeProductService) > 2 ~ codeProductService %>% substr(1, 3),
        TRUE ~  str_c(codeProductService %>% substr(1, 2), "0")
      )) %>%
      select(
        isParentPSC,
        isActivePSC,
        typePSC,
        idSolicitationGroup,
        nameSolicitationGroup,
        groupProductService,
        codeProductService,
        everything()
      )

    if (snake_names) {
      data <- data %>% janitor::clean_names()
    }

    data
  })

# account_symbols ---------------------------------------------------------

# https://datalab.usaspending.gov/assets/analyst-guide-1-2.pdf


function() {
  data <-
    "https://raw.githubusercontent.com/fedspendingtransparency/usaspending-api/dev/usaspending_api/data/program_activity.csv" %>%
    read_csv()
}



# assitance files ---------------------------------------------------------


.parse_usa_assitance <-
  function(url = "https://files.usaspending.gov/agency_submissions/Raw%20Financial%20Assistance%20Files/index.html") {

  }


.parse_usa_data_act_files <-
  function(url = "https://files.usaspending.gov/agency_submissions/Raw%20Quarterly%20DATA%20Act%20Files/index.html") {

  }




# bulk --------------------------------------------------------------------


#' Dictionary of USA Spending Database URLs
#'
#' Returns most recent monthly bulk data base URLs from
#' USA spending
#'
#' @return
#' @export
#'
#' @examples
dictionary_usa_spending_database_urls <-
  memoise::memoise(function() {
    page <-
      "https://files.usaspending.gov/database_download/" %>%
      read_html()


    nameFile <- page %>%
      html_nodes("td:nth-child(1)") %>%
      html_text() %>%
      str_to_upper()

    typeDatabase <-
      page %>%
      html_nodes("td:nth-child(2)") %>%
      html_text() %>%
      str_to_upper()

    urlZIP <-
      page %>% html_nodes("td:nth-child(3) a") %>%
      html_attr("href") %>%
      map_chr(function(x) {
        x %>% substr(2, nchar(x))
      })

    dateData <-
      urlZIP %>% str_split("\\_") %>% map(2) %>% flatten_chr() %>% str_remove_all(".zip") %>% parse_number() %>% ymd()

    sizeGB <-
      page %>%
      html_nodes("td:nth-child(4)") %>%
      html_text() %>%
      parse_number()


    urlZIP <-
      str_c("https://files.usaspending.gov/database_download", urlZIP)

    tibble(dateData, typeDatabase, nameFile, sizeGB, urlZIP)

  })


# budget_authority --------------------------------------------------------

#' Agency Budget Authority
#'
#' Returns information about the most recent
#' budget requests for each of the parent government agencies and
#' departments
#'
#' @return
#' @export
#'
#' @examples
#' usa_spending_agency_budget_authority()
usa_spending_agency_budget_authority <-
  memoise::memoise(function() {
    data <-
      "https://api.usaspending.gov/api/v2/references/toptier_agencies/?sort=percentage_of_total_budget_authority&order=desc" %>%
      fromJSON()

    data <-
      data$results %>% as_tibble()

    data <-
      data %>%
      munge_usa_spending_names()

    data <-
      data %>%
      .munge_data() %>%
      mutate_at(c("yearBudget", "quarterMostRecent"),
        as.numeric)

    data <- data %>%
      mutate(
        amountRemainingVsBudget = amountBudgeted - amountSpent,
        amountRemainingVsObligation = amountObligated - amountSpent,
        amountUnObligated = amountBudgeted - amountObligated,
      )

    data

  })


function() {
  url <- "https://datalab.usaspending.gov/americas-finance-guide/"
  page <- read_html(url)
  slugs <-
    html_nodes(page, ".afg__download--div a") %>% html_attr("href") %>%
    str_split("/") %>%
    map_chr(function(x) {
      x[length(x)]
    })

  urls <-
    glue("https://datalab.usaspending.gov/assets/ffg/data/{slugs}") %>% as.character()

  data <-
    tibble(slugFile = slugs, urls) %>%
    mutate(
      nameFile = case_when(
        slugFile %>% str_detect("revenue") ~ "FEDERAL REVENUE",
        slugFile %>% str_detect("spending") ~ "FEDERAL SPENDING",
        slugFile %>% str_detect("deficit") ~ "FEDERAL DEFICIT",
        slugFile %>% str_detect("debt") ~ "FEDERAL DEBT"
      )
    ) %>%
    select(nameFile, urlCSV = urls)

  data$urlCSV %>%
    map_dfr(function(x) {
      x %>% message()
      df <- fread(x, showProgress = FALSE) %>% as_tibble()

      if (x == "https://datalab.usaspending.gov/assets/ffg/data/overview_federal_deficit.csv") {
        df <- df %>%
          setNames(c(
            "yearBudget",
            "categorySpending",
            "amount",
            "urlSource",
            "nameSource"
          )) %>%
          mutate(urlCSV = x)
        return(df)
      }

      df <- df %>%
        setNames(c("yearBudget", "amount", "urlSource", "nameSource")) %>%
        mutate(urlCSV = x)

      df
    })

  data
}