R/far.R

Defines functions .generate_federal_account_ids tbl_decode_federal_contract_ids decode_federal_contract_ids .decode_id_current dictionary_far_acquisition_codes dictionary_federal_program_activities dictionary_far_names .munge_far_names tbl_fpds_office_ids federal_office_information .join_offices_names_sam .join_offices_names_no_sam

Documented in decode_federal_contract_ids dictionary_far_acquisition_codes dictionary_federal_program_activities federal_office_information tbl_decode_federal_contract_ids tbl_fpds_office_ids

.join_offices_names_no_sam <-
  function(office_ids =  "W81K02",
           try_sam = T) {
    office_ids <-
      office_ids %>%
      discard(list(function(x) {
        is.na(x)
      }))
    data <- dictionary_fpds_contracting_offices()
    base_names <- names(data)
    data <-
      data %>%
      filter(idOffice %in% office_ids)

    if (nrow(data) == 0 && !try_sam) {
      "No matches"
      return(data)
    }

    if (nrow(data) == 0) {
      data <- .join_offices_names_sam(office_ids = office_ids)
    }

    if (nrow(data) == 0) {
      "No matches"
      return(data)
    }



    data
  }

.join_offices_names_sam <-
  function(office_ids =  "W81K02",
           select_columns =
             c(
               "isActive",
               "idCGACAgency",
               "nameAgencyCGAC",
               "isDefenseAccount",
               "idOrganizationLevel",
               "idOrganizationSAMDepartment",
               "idOffice",
               "idOfficeFPDS",
               "locationOrganization",
               "cityStateOrganization",
               "idOrganizationSAM",
               "nameDepartment",
               "slugDepartment",
               "nameAgency",
               "nameCommandMajor",
               "typeOrganization",
               "nameOrganization",
               "nameOrganizationParent",
               "nameCommandSub",
               "idAgencyFPDS",
               "idOrganizationParent",
               "idOrganizationSAMAgency",
               "idOfficeAAC",
               "idAddressSAM",
               "dateOrganizationStart",
               "slugCGAC",
               "isSourceFPDS",
               "codeAgencyOMB",
               "descriptionOffice",
               "addressStreet1Organization",
               "addressStreet2Organization",
               "cityOrganization",
               "stateOrganization",
               "zipcodeOrganization",
               "codeCountryOrganization",
               "hasContractAwards",
               "hasContractFunding",
               "hasFinancialAssistanceFunding",
               "hasFinancialAssistanceAwards"
             )) {
    data <-
      sam_federal_organizations(
        levels = 1:5,
        snake_names = F,
        join_addresses = T
      )
    office_ids <-
      office_ids %>%
      discard(list(function(x) {
        is.na(x)
      }))

    data <-
      data %>%
      filter(idOffice %in% office_ids |
               idOfficeAAC %in% office_ids |
               idOfficeFPDS %in% office_ids) %>%
      .remove_na()

    if (nrow(data) == 0) {
      "No Matches"
      return(invisible())
    }

    data <- data %>% select(one_of(select_columns))

    if (!data %>% hasName("nameOffice") &
        data %>% hasName("nameOrganization")) {
      data <- data %>%
        mutate(nameOffice = nameOrganization)
    }


    data
  }


#' Acquire Federal Office Information
#'
#' @param ids vector of FPDS office IDs
#' @param use_sam if \code{TRUE} uses full SAM heirarchy - takes longer
#' @param select_columns  vector of select columns
#'
#' @return
#' @export
#'
#' @examples
federal_office_information <-
  function(fpds_office_ids = NULL,
           use_sam = F,
           select_columns =
             c(
               "isActive",
               "idCGACAgency",
               "nameAgencyCGAC",
               "isDefenseAccount",
               "idOrganizationLevel",
               "idOrganizationSAMDepartment",
               "idOffice",
               "idOfficeFPDS",
               "locationOrganization",
               "cityStateOrganization",
               "idOrganizationSAM",
               "nameDepartment",
               "slugDepartment",
               "nameAgency",
               "nameCommandMajor",
               "typeOrganization",
               "nameOrganization",
               "nameOrganizationParent",
               "nameCommandSub",
               "idAgencyFPDS",
               "idOrganizationParent",
               "idOrganizationSAMAgency",
               "idOfficeAAC",
               "idAddressSAM",
               "dateOrganizationStart",
               "slugCGAC",
               "isSourceFPDS",
               "codeAgencyOMB",
               "descriptionOffice",
               "addressStreet1Organization",
               "addressStreet2Organization",
               "cityOrganization",
               "stateOrganization",
               "zipcodeOrganization",
               "codeCountryOrganization",
               "hasContractAwards",
               "hasContractFunding",
               "hasFinancialAssistanceFunding",
               "hasFinancialAssistanceAwards"
             )) {
    if (use_sam) {
      data <-
        .join_offices_names_sam(office_ids = fpds_office_ids, select_columns =  select_columns)
    } else {
      data <-
        .join_offices_names_no_sam(office_ids = fpds_office_ids, try_sam = T)
    }

    data
  }

#' Return Federal Office Information from a tibble
#'
#' @param data
#' @param column office id column name
#' @param use_sam if \code{TRUE} uses full SAM heirarchy - takes longer
#' @param select_columns  vector of select columns
#'
#' @return
#' @export
#'
#' @examples
tbl_fpds_office_ids <-
  function(data,
           column = "idOfficeContracting",
           use_sam = F,
           select_columns =
             c(
               "isActive",
               "idCGACAgency",
               "nameAgencyCGAC",
               "isDefenseAccount",
               "idOrganizationLevel",
               "idOrganizationSAMDepartment",
               "idOffice",
               "idOfficeFPDS",
               "locationOrganization",
               "cityStateOrganization",
               "idOrganizationSAM",
               "nameDepartment",
               "slugDepartment",
               "nameAgency",
               "nameCommandMajor",
               "typeOrganization",
               "nameOrganization",
               "nameOrganizationParent",
               "nameCommandSub",
               "idAgencyFPDS",
               "idOrganizationParent",
               "idOrganizationSAMAgency",
               "idOfficeAAC",
               "idAddressSAM",
               "dateOrganizationStart",
               "slugCGAC",
               "isSourceFPDS",
               "codeAgencyOMB",
               "descriptionOffice",
               "addressStreet1Organization",
               "addressStreet2Organization",
               "cityOrganization",
               "stateOrganization",
               "zipcodeOrganization",
               "codeCountryOrganization",
               "hasContractAwards",
               "hasContractFunding",
               "hasFinancialAssistanceFunding",
               "hasFinancialAssistanceAwards"
             )) {
    df_ids <-
      data %>%
      select(one_of(column)) %>%
      distinct()
    ids <- df_ids %>% pull()
    df_offices <-
      federal_office_information(
        fpds_office_ids = ids,
        use_sam = use_sam,
        select_columns = select_columns
      )

    df_offices <-
      df_offices %>%
      rename(UQ(column) := idOffice)

    df_ids <-
      df_ids %>%
      left_join(df_offices, by = column) %>%
      distinct() %>%
      group_by(!!sym(column)) %>%
      slice(1) %>%
      ungroup()

    data <- data %>%
      mutate(id = 1:n()) %>%
      left_join(df_ids, by = column) %>%
      group_by(id) %>%
      slice(1) %>%
      ungroup() %>%
      select(-id)

    data
  }

.munge_far_names <-
  function(data) {
    names_dict <- names(data)

    dict <- dictionary_far_names()
    actual_names <-
      names_dict %>%
      map_chr(function(name) {
        df_row <-
          dict %>% filter(nameFAR == name)
        if (nrow(df_row) == 0) {
          glue::glue("Missing {name}") %>% message()
          return(name)
        }

        df_row$nameActual
      })

    data %>%
      set_names(actual_names)
  }


#' Dictionary of Government-wide Accounting Classification Codes
#'
#' @return
#' @export
#'
#' @examples
dictionary_omb_cgac_accounts <-
  memoise::memoise(function() {
    data <-
      "https://raw.githubusercontent.com/fedspendingtransparency/data-act-broker-backend/d6966408734cf8863e210a1f00a26070f89bb7dc/dataactvalidator/config/example_cgac.csv" %>%
      read_csv() %>%
      set_names("idCGAC", "nameAgency") %>%
      mutate(idCGAC = as.numeric(idCGAC)) %>%
      mutate(
        slugCGAC = case_when(
          nchar(idCGAC) == 1 ~ str_c("00", idCGAC, sep = ""),
          nchar(idCGAC) == 2 ~ str_c("0", idCGAC, sep = ""),
          nchar(idCGAC) == 3 ~ as.character(idCGAC),
          is.na(idCGAC) ~ NA_character_
        )
      ) %>%
      fix_usg_organization_col(org_col = "nameAgency") %>%
      distinct()

    data <-
      data %>%
      mutate(nameAgency = nameAgency %>% str_remove_all("[0-9]|\\(|\\)") %>% str_squish()) %>%
      mutate(
        idCGAC = case_when(nameAgency == "THE LEGISLATIVE BRANCH" ~ 0,
                           TRUE ~ idCGAC),
        slugCGAC = case_when(nameAgency == "THE LEGISLATIVE BRANCH" ~ "000",
                             TRUE ~ slugCGAC)
      ) %>%
      rename(nameAgencyCGAC = nameAgency) %>%
      mutate(isDefenseAccount = nameAgencyCGAC %>% str_detect("DEPARTMENT OF DEFENSE|NAVY|AIR FORCE|ARMY"))

    data
  })

dictionary_far_names <-
  function() {
    tibble(
      nameFAR  = c(
        "FEATURE_ID",
        "FEATURE_NAME",
        "FEATURE_CLASS",
        "CENSUS_CODE",
        "CENSUS_CLASS_CODE",
        "GSA_CODE",
        "OPM_CODE",
        "STATE_NUMERIC",
        "STATE_ALPHA",
        "COUNTY_SEQUENCE",
        "COUNTY_NUMERIC",
        "COUNTY_NAME",
        "PRIMARY_LATITUDE",
        "PRIMARY_LONGITUDE",
        "DATE_CREATED",
        "DATE_EDITED",
        "PA_CODE",
        "FYQ",
        "OMB_BUREAU_TITLE_OPTNL",
        "OMB_ACCOUNT_TITLE_OPTNL",
        "AGENCY_CODE",
        "ALLOCATION_ID",
        "ACCOUNT_CODE",
        "PA_TITLE",
        "DEPARTMENT_ID",
        "DEPARTMENT_NAME",
        "AGENCY_NAME",
        "CONTRACTING_OFFICE_CODE",
        "CONTRACTING_OFFICE_NAME",
        "START_DATE",
        "END_DATE",
        "ADDRESS_CITY",
        "ADDRESS_STATE",
        "ZIP_CODE",
        "COUNTRY_CODE",
        "ACCT_NUM",
        "ATA",
        "AID",
        "BPOA",
        "EPOA",
        "A",
        "MAIN",
        "SUB",
        "GWA_TAS",
        "GWA_TAS NAME",
        "Agency AID",
        "Agency Name",
        "ADMIN_ORG",
        "Admin Org Name",
        "FR Entity Type",
        "FR Entity Description",
        "Financial Indicator Type2",
        "FIN_IND_TYP2 description",
        "Function Code",
        "Function Description",
        "Sub Function Code",
        "Sub Function Description",
        "DT_TM_ESTAB",
        "DT_END",
        "ADDRESS_LINE_1",
        "ADDRESS_LINE_2",
        "ADDRESS_LINE_3"
      ),
      nameActual = c(
        "idFeature",
        "nameFeature",
        "typeFeature",
        "codeCensus",
        "classCensus",
        "codeGSA",
        "idOPM",
        "idState",
        "codeState",
        "idCountySequence",
        "idCounty",
        "nameCounty",
        "latitude",
        "longitude",
        "dateCreated",
        "dateEdited",

        "idCodePA",
        "slugQuarterMostRecent",
        "nameBureauOMB",
        "nameAccountOMB",
        "idCGAC",
        "idAllocationOMB",
        "idAccountOMB",
        "titleAccountOMB",
        "idDepartment",
        "nameDepartment",
        "nameAgency",
        "idOffice",
        "nameOffice",
        "datetimeStart",
        "datetimeEnd",
        "cityOrganization",
        "stateOrganization",
        "zipcodeOrganization",
        "codeCountryOrganization",

        "numberAccount",
        "idAllocationOMB",
        "idAgency",
        "periodAvailableBegining",
        "periodAvailableEnd",
        "typeObligationPeriod",
        "idAccountOMB",
        "idSubAccountOMB",
        "slugAccount",
        "nameAccount",
        "idCGAC",
        "nameAgency",
        "codeBureauOMB",
        "nameBureauOMB",
        "codeFinancialReportingEntity",
        "nameFinancialReportingEntity",
        "idFinancialIndicator",
        "typeFinancialIndicator",
        "codeFunctionBudget",
        "nameFunctionBudget",
        "codeSubFunction",
        "nameSubFunction",
        "datetimeProgramEstablished",
        "datetimeProgramEnded",
        "addressStreet1",
        "addressStreet2",
        "addressStreet3"
      )

    )
  }


#' Tibble of National Location Codes
#'
#' @return
#' @export
#'
#' @examples
dictionary_omb_national_location_codes <-
  memoise::memoise(function() {
    data <-
      fread(
        "https://raw.githubusercontent.com/fedspendingtransparency/data-act-broker-backend/development/dataactvalidator/config/NationalFedCodes.txt",
        showProgress = FALSE
      ) %>%
      as_tibble()

    data <-
      data %>%
      .munge_far_names() %>%
      mutate_at(c("dateCreated",
                  "dateEdited"), mdy) %>%
      .munge_data()

    data
  })


#' Dictionary of all active federal program activities
#'
#' Returns all accounts and parents relating to the OMB
#' budget schema
#'
#' @param remove_quarter if \code{TRUE} removes most recent budget budget quarter column
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_federal_program_activities()
dictionary_federal_program_activities <-
  function(remove_quarter = T) {
    data <-
      "https://raw.githubusercontent.com/fedspendingtransparency/data-act-broker-backend/development/dataactvalidator/config/DATA%20Act%20Program%20Activity%20List%20for%20Treas.csv" %>%
      fread(showProgress = FALSE) %>%
      as_tibble()

    data <-
      data %>%
      .munge_data() %>%
      .munge_far_names()

    data <-
      data %>%
      .generate_federal_account_ids(cgac_column = "idCGAC",
                                    account_column = "idAccountOMB")

    data <- data %>%
      rename(nameAccountOMBOther = nameAccountOMB,
             nameAccountOMB = titleAccountOMB)

    if (remove_quarter) {
      data <-
        data %>%
        select(-slugQuarterMostRecent)
    }

    data <-
      data %>%
      distinct()

    data
  }

#' Active FPDS Contracting Offices
#'
#' Information about active FPDS offices
#'
#' @param join_addresses if `TRUE` builds an address
#' @param snake_names  if `TRUE` returns snake case names
#'
#' @return \code{tibble()}
#' @export
#'
#' @examples
dictionary_fpds_contracting_offices <-
  memoise::memoise(function(join_addresses = T,
                            snake_names = F) {
    data <-
      suppressMessages(.download_excel_file(url  = "https://www.fpds.gov/downloads/top_requests/FPDSNG_Contracting_Offices.xls") %>%
      clean_names())

    col_names <- data %>% slice(5) %>% as.character()

    data <-
      data %>% slice(6:nrow(data)) %>%
      setNames(col_names)

    data <-
      data %>% .munge_far_names() %>%
      rename(idAgency = idCGAC) %>%
      mutate(idCGAC = idDepartment %>% substr(1, 2) %>% as.numeric()) %>%
      .remove_na() %>%
      select(-one_of("datetimeEnd"))

    data <- data %>%
      mutate(datetimeStart = as.numeric(datetimeStart) %>%  excel_numeric_to_date()) %>%
      rename(dateOrganizationStart = datetimeStart) %>%
      select(idCGAC, everything()) %>%
      mutate_if(is.character, list(str_squish))

    data <- data %>%
      fix_usg_organization_col(org_col = "nameDepartment") %>%
      fix_usg_organization_col(org_col = "nameAgency") %>%
      fix_usg_organization_col(org_col = "nameOffice")

    data <- data %>%
      bind_cols(
        data %>%
          select(cityOrganization:codeCountryOrganization) %>%
          mutate_all(list(function(x) {
            x %>% coalesce("")
          })) %>%
          unite(cityState, cityOrganization, stateOrganization, sep = ", ") %>%
          unite(cityState, cityState, zipcodeOrganization, sep = " ") %>%
          unite(
            locationOrganization,
            cityState,
            codeCountryOrganization,
            sep = ", "
          )
      )

    data <- data %>%
      rename(idCGACDepartment = idCGAC) %>%
      mutate(idCGACAgency = idAgency %>% substr(1, 2) %>% as.numeric()) %>%
      select(idCGACDepartment,
             nameDepartment,
             idCGACAgency,
             everything())

    tbl_cgac <- dictionary_omb_cgac_accounts()

    data <- data %>%
      left_join(tbl_cgac %>% select(idCGACAgency = idCGAC,
                                    isDefenseAccount, nameAgencyCGAC),
                by = "idCGACAgency") %>%
      select(idCGACDepartment,
             nameDepartment,
             idCGACAgency,
             nameAgencyCGAC,
             everything())

    names(data) <- names(data) %>% str_replace_all("Organization", "OfficeContracting")

    if (join_addresses) {
      data <-
        data %>%
        build_address()
    }

    if (snake_names) {
      data <- data %>% clean_names()
    }

    data
  })

#' End Dated FPDS Contracting Offices
#'
#' Information about historic FPDS offices
#'
#' @param join_addresses if `TRUE` builds an address
#' @param snake_names  if `TRUE` returns snake case names
#'
#' @return
#' @export
#'
#' @examples
dictionary_fpds_end_dated_contracting_offices <-
  memoise::memoise(function(join_addresses = T,
           snake_names = F) {
    data <-
      .download_excel_file(url  = "https://www.fpds.gov/downloads/top_requests/FPDSNG_End_Dated_Civilian_Offices.xls") %>%
      clean_names()

    col_names <- data %>% slice(1) %>% as.character()

    data <-
      data %>% slice(2:nrow(data)) %>%
      setNames(col_names)

    data <-
      data %>% .munge_far_names() %>%
      rename(idAgency = idCGAC) %>%
      mutate(idCGAC = idDepartment %>% substr(1, 2) %>% as.numeric()) %>%
      .remove_na()

    data <- data %>%
      mutate(
        datetimeStart = as.numeric(datetimeStart) %>%  excel_numeric_to_date(),
        datetimeEnd = as.numeric(datetimeEnd) %>% excel_numeric_to_date()
      ) %>%
      rename(dateOrganizationStart = datetimeStart,
             dateOrganizationEnd = datetimeEnd) %>%
      select(idCGAC, everything()) %>%
      mutate_if(is.character, list(str_squish))

    data <- data %>%
      fix_usg_organization_col(org_col = "nameDepartment") %>%
      fix_usg_organization_col(org_col = "nameAgency") %>%
      fix_usg_organization_col(org_col = "nameOffice")

    data <- data %>%
      bind_cols(
        data %>%
          select(cityOrganization:codeCountryOrganization) %>%
          mutate_all(list(function(x) {
            x %>% coalesce("")
          })) %>%
          unite(cityState, cityOrganization, stateOrganization, sep = ", ") %>%
          unite(cityState, cityState, zipcodeOrganization, sep = " ") %>%
          unite(
            locationOrganization,
            cityState,
            codeCountryOrganization,
            sep = ", "
          )
      )

    data <- data %>%
      rename(idCGACDepartment = idCGAC) %>%
      mutate(idCGACAgency = idAgency %>% substr(1, 2) %>% as.numeric()) %>%
      select(idCGACDepartment,
             nameDepartment,
             idCGACAgency,
             everything())

    tbl_cgac <- dictionary_omb_cgac_accounts()

    data <- data %>%
      left_join(tbl_cgac %>% select(idCGACAgency = idCGAC,
                                    isDefenseAccount, nameAgencyCGAC),
                by = "idCGACAgency") %>%
      select(idCGACDepartment,
             nameDepartment,
             idCGACAgency,
             nameAgencyCGAC,
             everything())

    names(data) <- names(data) %>% str_replace_all("Organization", "OfficeContracting")


    if (join_addresses) {
      data <-
        data %>%
        build_address()
    }

    if (snake_names) {
      data <- data %>% clean_names()
    }

    data
  })

#' Treasury Account Symbol Dictionary
#'
#' Returns account information for all treasury
#' accounts
#'
#' @return
#' @export
#'
#' @examples
#' dictionary_treasury_account_symbols()
dictionary_treasury_account_symbols <-
  memoise::memoise(function(include_groups = F,
                            snake_names = F) {
    data <-
      "https://raw.githubusercontent.com/fedspendingtransparency/data-act-broker-backend/development/dataactvalidator/config/example_cars_tas.csv" %>%
      fread(showProgress = FALSE) %>%
      as_tibble()

    data <-
      data %>%
      .munge_far_names()

    data <- data %>%
      mutate_at(data %>% select(matches("datetime")) %>% names(),
                mdy_hms)

    data <- data %>%
      .generate_federal_account_ids(cgac_column = "idCGAC",
                                    account_column = "idAccountOMB") %>%
      mutate(isActive = is.na(datetimeProgramEnded)) %>%
      select(
        isActive,
        idFederalAccount,
        nameBureauOMB,
        nameAgency,
        nameAccount,
        nameFunctionBudget,
        nameSubFunction,
        everything()
      )

    if (include_groups) {
      data <-
        data %>%
        left_join(dictionary_omb_cgac_accounts(), by = "idCGAC") %>%
        distinct()

      data <-
        data %>%
        rename(nameAccountOMB  = nameAccount,
               nameSubFunctionOMB = nameSubFunction) %>%
        .add_omb_account_group() %>%
        munge_data(unformat = T)

      data <- data %>% add_dod_omb_group()
    }

    data <-
      data %>%
      .munge_data(snake_names = snake_names)

    data

  })


## https://www.acquisition.gov/sites/default/files/archives/pdf/FAR_46.pdf
## # page 158


#' FAR PIID Codes Dictionary
#'
#' Returns instrument IDs to decode FAR codes
#' from PIIDs
#'
#' @return
#' @export
#'
#' @examples
dictionary_far_acquisition_codes <-
  function() {
    tibble::tibble(
      codeFARAcquisition = LETTERS,
      descriptionFAR = c(
        "Blanket purchase agreements",
        "invitations for bids",
        "Contracts of all types except indefinite-delivery contracts",
        "Indefinite-delivery contracts (including Federal Supply Schedules, Governmentwide acquisition contracts (GWACs), and multi-agency contracts",
        "Reserved for future Federal Governmentwide use",
        "Task orders, delivery orders or calls under– Indefinite-delivery contracts (including Federal Supply Schedules, Governmentwide acquisition contracts (GWACs), and multi-agency contracts); Blanket purchase agreements; or Basic ordering agreements",
        "Basic ordering agreements",
        "Agreements, including basic agreements and loan agreements, but excluding blanket purchase agreements, basic ordering agreements, and leases. Do not use this code for contracts or agreements with provisions for orders or calls",
        "Do not use this letter",
        "Reserved for future Federal Governmentwide use",
        "Reserved for departmental or agency use",
        "Lease agreements",
        "Reserved for departmental or agency use",
        "Reserved for departmental or agency use",
        "Do not use this letter",
        "Purchase orders",
        "Requests for quotations",
        "Requests for proposals",
        "Reserved for departmental or agency use",
        "Reserved for departmental or agency use",
        "Requests for quotations, numbering capacity of Q is exhausted",
        "Purchase orders, numbering capacity of P is exhausted",
        "Reserved for future Federal Governmentwide use",
        "Reserved for future Federal Governmentwide use",
        "Imprest fund",
        "Reserved for future Federal Governmentwide use"
      ) %>% str_to_upper()
    )
  }


.decode_id_current <-
  function(x = "W81K0220Q0055",
           id_name = "idContract",
           type = "Award",
           keywords = NULL,
           numeric_threshold = 1,
           remove_start_characters = NULL,
           office_id_chars = c(1, 6),
           budget_year_chars = c(7, 8),
           contract_letter_char = 9) {
    id <-
      x %>% str_remove_all("\\-|\\(|\\)|\\_")

    if (length(remove_start_characters) > 0) {

    }

    x_chars <- nchar(x)
    num_ratio <- x %>% str_count("[0-10]") / x_chars

    if (num_ratio >= numeric_threshold) {
      glue("{idName} exceeded numeric threshold of {numeric_thresold}") %>% message()
    }

    office_id <-
      id %>% substr(office_id_chars[[1]], office_id_chars[[2]])
    budget_year_slug <-
      id %>%
      substr(budget_year_chars[[1]], budget_year_chars[[2]])

    year_budget <-
      glue("20{budget_year_slug}") %>% as.numeric()

    contract_char_id <-
      id %>%
      substr(contract_letter_char[[1]], contract_letter_char[[1]])

    piid <-
      id %>% substr(contract_letter_char + 1, nchar(id))
    clean_id <-
      id_name %>% str_c("Clean")

    office_type <- str_c("idOffice", type)

    data <-
      tibble(!!sym(id_name) := x, !!sym(clean_id) := id) %>%
      mutate(
        !!sym(office_type) := office_id,
        yearBudgetFiscal = year_budget,
        codeFARAcquisition = contract_char_id,
        numberAction = piid
      )

    data

  }



#' Decode contract IDs
#'
#' @param x
#' @param id_name
#' @param type
#' @param keywords
#' @param numeric_threshold
#' @param remove_start_characters
#' @param office_id_chars
#' @param budget_year_chars
#' @param contract_letter_char
#'
#' @return
#' @export
#'
#' @examples
decode_federal_contract_ids <-
  function(contract_ids = "W81K0220Q0055",
           id_name = "idContractAnalysis",
           type = "Award",
           keywords = NULL,
           numeric_threshold = 1,
           remove_start_characters = NULL,
           office_id_chars = c(1, 6),
           budget_year_chars = c(7, 8),
           contract_letter_char = 9,
           join_office_names = T,
           use_sam = F,
           select_columns =
             c(
               "isActive",
               "idCGACAgency",
               "nameAgencyCGAC",
               "isDefenseAccount",
               "idOrganizationLevel",
               "idOrganizationSAMDepartment",
               "idOffice",
               "idOfficeFPDS",
               "locationOrganization",
               "cityStateOrganization",
               "idOrganizationSAM",
               "nameDepartment",
               "slugDepartment",
               "nameAgency",
               "nameCommandMajor",
               "typeOrganization",
               "nameOrganization",
               "nameOrganizationParent",
               "nameCommandSub",
               "idAgencyFPDS",
               "idOrganizationParent",
               "idOrganizationSAMAgency",
               "idOfficeAAC",
               "idAddressSAM",
               "dateOrganizationStart",
               "slugCGAC",
               "isSourceFPDS",
               "codeAgencyOMB",
               "descriptionOffice",
               "addressStreet1Organization",
               "addressStreet2Organization",
               "cityOrganization",
               "stateOrganization",
               "zipcodeOrganization",
               "codeCountryOrganization",
               "hasContractAwards",
               "hasContractFunding",
               "hasFinancialAssistanceFunding",
               "hasFinancialAssistanceAwards"
             )
           ) {
    if (length(contract_ids) == 0) {
      "Enter vector of ids" %>% message()
      return(invisible())
    }

    .decode_id_current_safe <-
      possibly(.decode_id_current, tibble())

    data <-
      contract_ids %>%
      map_dfr(function(x) {
        .decode_id_current_safe(
          x = x,
          id_name = id_name,
          type = type,
          keywords = keywords,
          numeric_threshold = numeric_threshold,
          remove_start_characters = remove_start_characters,
          office_id_chars = office_id_chars,
          budget_year_chars = budget_year_chars,
          contract_letter_char = contract_letter_char
        )
      }) %>%
      select(-matches("Clean"))

    office_name <- data %>% select(matches("idOffice")) %>% names()
    offices <- data %>% select(one_of(office_name)) %>% pull()
    if (join_office_names) {
      "Joining office information" %>% message()
      type_slug <-
        str_c(type %>% substr(1, 1) %>% str_to_upper()
              ,
              type %>%  substr(2, nchar(type)))
      office_column_name <-
        glue("idOffice{type_slug}") %>% as.character()
      df_offices <-
        tbl_fpds_office_ids(
        data = data,
        column = office_column_name,
        select_columns = select_columns,
        use_sam = use_sam
      )


      if (nrow(df_offices) == 0) {
        return(data)
      }



      return(df_offices)
    }

    data
  }

#' Decode Federal Contract IDs from a tibble
#'
#' @param data
#' @param id_column
#' @param type
#' @param keywords
#' @param numeric_threshold
#' @param remove_start_characters
#' @param office_id_chars
#' @param budget_year_chars
#' @param contract_letter_char
#' @param join_office_names
#' @param use_sam
#' @param select_columns
#'
#' @return
#' @export
#'
#' @examples
tbl_decode_federal_contract_ids <-
  function(data,
           id_column = "idContractAnalysis",
           type = "Award",
           keywords = NULL,
           numeric_threshold = 1,
           remove_start_characters = NULL,
           office_id_chars = c(1, 6),
           budget_year_chars = c(7, 8),
           contract_letter_char = 9,
           join_office_names = T,
           use_sam = F,
           select_columns =
             c(
               "isActive",
               "idCGACAgency",
               "nameAgencyCGAC",
               "isDefenseAccount",
               "idOrganizationLevel",
               "idOrganizationSAMDepartment",
               "idOffice",
               "idOfficeFPDS",
               "locationOrganization",
               "cityStateOrganization",
               "idOrganizationSAM",
               "nameDepartment",
               "slugDepartment",
               "nameAgency",
               "nameCommandMajor",
               "typeOrganization",
               "nameOrganization",
               "nameOrganizationParent",
               "nameCommandSub",
               "idAgencyFPDS",
               "idOrganizationParent",
               "idOrganizationSAMAgency",
               "idOfficeAAC",
               "idAddressSAM",
               "dateOrganizationStart",
               "slugCGAC",
               "isSourceFPDS",
               "codeAgencyOMB",
               "descriptionOffice",
               "addressStreet1Organization",
               "addressStreet2Organization",
               "cityOrganization",
               "stateOrganization",
               "zipcodeOrganization",
               "codeCountryOrganization",
               "hasContractAwards",
               "hasContractFunding",
               "hasFinancialAssistanceFunding",
               "hasFinancialAssistanceAwards"
             )
  ) {
    df_ids <-
      data %>%
      select(one_of(id_column)) %>%
      distinct() %>%
      filter(!is.na(!!sym(id_column)))
    ids <-
      df_ids %>% pull()

    df_ids <-
      decode_federal_contract_ids(
        contract_ids = ids,
        id_name = id_column,
        type = type,
        keywords = keywords,
        numeric_threshold = numeric_threshold,
        remove_start_characters = remove_start_characters,
        office_id_chars = office_id_chars,
        budget_year_chars = budget_year_chars,
        contract_letter_char = contract_letter_char,
        join_office_names = join_office_names,
        use_sam = use_sam,
        select_columns = select_columns
      )

    if (nrow(df_ids) == 0) {
      return(data)
    }

    remove_cols <- names(df_ids)[names(df_ids) %in% names(data)] %>%
      discard(function(x){
        x == id_column
      })

    if (length(id_column) > 0) {
      data <-
        data %>%
        select(-one_of(remove_cols)) %>%
        left_join(df_ids, by = id_column)
    } else {
      data <-
        data %>%
        left_join(df_ids, by = id_column)
    }
   data
  }


# accounts ----------------------------------------------------------------


.generate_federal_account_ids <-
  function(data,
           cgac_column = "idCGAC",
           account_column = "codeAccount",
           federal_account_column = "idFederalAccount") {
    if (!data %>% hasName(cgac_column) &
        !data %>% hasName(account_column)) {
      "No appropriate codes"
    }

    df_ids <- data %>%
      select(one_of(cgac_column, account_column)) %>%
      filter(!is.na((!!sym(cgac_column)))) %>%
      mutate(cgac = as.numeric(!!sym(cgac_column)),
             account = as.numeric(!!sym(account_column)))

    df_ids <-
      df_ids %>%
      mutate(slugCGAC = case_when(
        nchar(cgac) == 1 ~ str_c("00", cgac, sep = ""),
        nchar(cgac) == 2 ~ str_c("0", cgac, sep = ""),
        nchar(cgac) == 3 ~ as.character(cgac)
      )) %>%
      mutate(
        slugAccount = case_when(
          nchar(account) == 1 ~ str_c("000", account, sep = ""),
          nchar(account) == 2 ~ str_c("00", account, sep = ""),
          nchar(account) == 3 ~ str_c("0", account, sep = ""),
          nchar(account) == 4 ~ str_c(account, sep = ""),
          nchar(account) > 4 ~ account %>% substr(1, 4) %>% as.character()
        )
      ) %>%
      unite(!!sym(federal_account_column), slugCGAC, slugAccount, sep = "-") %>%
      select(one_of(federal_account_column), everything()) %>%
      select(-c(cgac, account))

    data <-
      data %>%
      left_join(df_ids, by = c(cgac_column, account_column)) %>%
      select(one_of(federal_account_column), everything()) %>%
      distinct()

    data

  }
abresler/govtrackR documentation built on July 11, 2020, 12:30 a.m.