govtrackR: Federal government tracking tools

Documented in us_government_grant_keywords

.munge_grant_names <-
  function(data) {
    dict_names <- .dictionary_grant_names()

    grant_names <-
      names(data)

    actual_names <-
      grant_names %>%
      map_chr(function(name) {
        df_row <-
          dict_names %>% filter(nameGrant == name)
        if (nrow(df_row) == 0) {
          glue::glue("Missing {name}") %>% message()
          return(name)
        }

        df_row$nameActual
      })

    data %>%
      set_names(actual_names)
  }

.dictionary_grant_names <-
  function() {
    tibble(
      nameGrant = c(
        "OPPORTUNITY NUMBER",
        "OPPORTUNITY TITLE",
        "AGENCY CODE",
        "AGENCY NAME",
        "ESTIMATED FUNDING",
        "EXPECTED NUMBER OF AWARDS",
        "GRANTOR CONTACT",
        "AGENCY CONTACT PHONE",
        "AGENCY CONTACT EMAIL",
        "ESTIMATED POST DATE",
        "ESTIMATED APPLICATION DUE DATE",
        "POSTED DATE",
        "CLOSE DATE",
        "LAST UPDATED DATE/TIME",
        "VERSION",
        "referenceProgramElement",
        "id",
        "revision",
        "opportunityNumber",
        "opportunityTitle",
        "owningAgencyCode",
        "listed",
        "publisherUid",
        "flag2006",
        "originalDueDate",
        "originalDueDateDesc",
        "synPostDateInPast",
        "docType",
        "forecastHistCount",
        "synopsisHistCount",
        "assistCompatible",
        "assistURL",
        "draftMode",
        "slugKey",
        "alternativeNames",
        "archived",
        "description",
        "fiscalYear",
        "fiscalYearLatest",
        "latest",
        "modifiedDate",
        "objective",
        "organizationId",
        "parentProgramId",
        "programNumber",
        "publishedDate",
        "submittedDate",
        "title",
        "website",
        "city",
        "contactId",
        "country",
        "email",
        "fax",
        "flag",
        "fullName",
        "phone",
        "state",
        "streetAddress",
        "zip",
        "assistanceType",
        "code",
        "isApplicable",
        "isFundedCurrentFY",
        "isRecoveryAct",
        "obligationId",
        "appeal.description",
        "appeal.interval",
        "applicationProcedure.description",
        "applicationProcedure.isApplicable",
        "approval.description",
        "approval.interval",
        "awardProcedure.description",
        "deadlines.description",
        "deadlines.flag",
        "deadlines.list.description",
        "deadlines.list.end",
        "deadlines.list.start",
        "preApplicationCoordination.description",
        "preApplicationCoordination.environmentalImpact.reports.isSelected",
        "preApplicationCoordination.environmentalImpact.reports.reportCode",
        "renewal.description",
        "renewal.interval",
        "selectionCriteria.description",
        "selectionCriteria.isApplicable",
        "chapterMatching",
        "descriptionAudit",
        "descriptionCFR200",
        "descriptionDocuments",
        "descriptionMatching",
        "descriptionRecords",
        "formulaMatching",
        "isApplicableAudit",
        "isApplicableDocuments",
        "matchingMatching",
        "moeMatching",
        "partMatching",
        "publicLawMatching",
        "subPartMatching",
        "titleMatching",
        "subpartB",
        "subpartC",
        "subpartD",
        "subpartE",
        "subpartF",
        "matching.description",
        "matching.percent",
        "matching.requirementFlag",
        "moe.description",
        "cash",
        "expenditure",
        "performanceMonitoring",
        "program",
        "progress",
        "awardedDescriptionlimitation",
        "awardedlimitation",
        "descriptionapplicant",
        "descriptionassistanceUsage",
        "descriptionbeneficiary",
        "descriptiondocumentation",
        "descriptionlimitation",
        "discretionaryFund.descriptionusage",
        "discretionaryFund.isApplicableusage",
        "isApplicabledocumentation",
        "isSameAsApplicantbeneficiary",
        "loanTerms.descriptionusage",
        "loanTerms.isApplicableusage",
        "restrictions.descriptionusage",
        "restrictions.isApplicableusage",
        "typesapplicant",
        "typesassistanceUsage",
        "typesbeneficiary",
        "dataBeneficiary",
        "hasBeneficiary",
        "countBeneficiary",
        "dataAssistanceUsage",
        "hasAssistanceUsage",
        "countAssistanceUsage",
        "dataApplicantTypes",
        "hasApplicantTypes",
        "countApplicantTypes",
        "numberItem",
        "actauthorizationId",
        "actdescription",
        "actparentAuthorizationId",
        "actpart",
        "actsection",
        "acttitle",
        "authorizationTypesact",
        "authorizationTypesexecutiveOrder",
        "authorizationTypespublicLaw",
        "authorizationTypesstatute",
        "authorizationTypesUSC",
        "executiveOrderdescription",
        "executiveOrderpart",
        "executiveOrdersection",
        "executiveOrdertitle",
        "publicLawcongressCode",
        "publicLawnumber",
        "statutepage",
        "statutevolume",
        "USCsection",
        "USCtitle",
        "descriptionbase",
        "isFundedCurrentFYbase",
        "accounts.code",
        "accounts.description",
        "descriptionFinancial",
        "isFundedCurrentYear",
        "titleGrantPopular",
        "isArchived",
        "descriptionProgram",
        "yearFiscal",
        "isFiscalYearLatest",
        "isLatest",
        "datetimeModified",
        "descriptionProgramObjective",
        "idOrganizationSAM",
        "slugKeyParentProgram",
        "idCFDA",
        "datetimePublished",
        "isRevision",
        "datetimeSubmitted",
        "titleGrant",
        "urlProgram"
      ),
      nameActual = c(
        "urlGrant",
        "titleGrant",
        "codeAgency",
        "nameOfficeFull",
        "amountFunding",
        "countAwards",
        "contactGrantor",
        "telephoneAgency",
        "emailGrantor",
        "datePostedEstimate",
        "dateApplicationDueEstimate",
        "datePosted",
        "dateClosed",
        "datetimeUpdated",
        "descriptionVersion",
        "descriptionProgramElement",

        "id",
        "isRevision",
        "opportunityNumber",
        "titleGrant",
        "owningAgencyCode",
        "listed",
        "publisherUid",
        "flag2006",
        "originalDueDate",
        "originalDueDateDesc",
        "synPostDateInPast",
        "docType",
        "forecastHistCount",
        "synopsisHistCount",
        "assistCompatible",
        "assistURL",
        "draftMode",

        "slugKey",
        "titleGrantPopular",
        "isArchived",
        "descriptionProgram",
        "yearFiscal",
        "isFiscalYearLatest",
        "isLatest",
        "datetimeModified",
        "descriptionProgramObjective",
        "idOrganizationSAM",
        "slugKeyParentProgram",
        "idCFDA",
        "datetimePublished",
        "datetimeSubmitted",
        "titleGrant",
        "urlProgram",
        "cityContact",
        "slugKeyContact",
        "countryContact",
        "emailContact",
        "faxContact",
        "flagContact",
        "nameContact",
        "phoneContact",
        "stateContact",
        "addressStreetContact",
        "zipcodeContact",
        "codesAssistanceTypes",
        "idTreasuryAccount",
        "isApplicable",
        "isFundedCurrentFY",
        "isRecoveryAct",
        "slugKeyObligation",

        "descriptionAppealProcess",
        "idAppealInterval",
        "descriptionApplicationProcedure",
        "hasApplicationProcedure",
        "descriptionApproval",
        "idApprovalInterval",
        "descriptionAwardProcedure",
        "descriptionDeadlines",
        "slugDeadlinesFlag",
        "descriptionDeadlinesList",
        "dateDeadLineEnd",
        "dateDeadlineStart",
        "descriptionPreApplicationCoordination",
        "hasPreApplicationCoordination",
        "codePreApplicationCoordinationEnvironmentalImpact",
        "descriptionRenewal",
        "idRenewalInterval",
        "descriptionSelectionCriteria",
        "hasSelectionCriteria",

        "descriptionChapterMatching",
        "descriptionAudit",
        "descriptionCFR200",
        "descriptionDocuments",
        "descriptionMatching",
        "descriptionRecords",
        "formulaMatching",
        "isApplicableAudit",
        "isApplicableDocuments",
        "hasMatching",
        "hasMOEMatching",
        "hasPartMatching",
        "hasPublicLawMatching",
        "hasSubPartMatching",
        "hasTitleMatching",
        "hasSubpartB",
        "hasSubpartC",
        "hasSubpartD",
        "hasSubpartE",
        "hasSubpartF",
        "descriptionMatchingProgram",
        "pctMatching",
        "slugMatchingFlag",
        "descriptionMOE",
        "hasCashMonitoring",
        "hasExpenditureMonitoring",
        "hasPerformanceMonitoring",
        "hasProgramMonitoring",
        "hasProgressMonitoring",

        "descriptionAwardedLimitation",
        "typeAwardedLimitation",
        "descriptionApplicant",
        "descriptionAssistanceUsage",
        "descriptionBeneficiary",
        "descriptionDocumentation",
        "descriptionLimitation",
        "descriptionDiscretionaryFundUsage",
        "hasDiscretionaryFundUsage",
        "isApplicabledocumentation",
        "isSameAsApplicantBeneficiary",
        "descriptionLoanTerms",
        "hasLoanTerms",
        "descriptionRestrictions",
        "hasRestrictions",
        "idApplicantTypes",
        "idAssistantUsage",
        "idBeneficiary",
        "dataBeneficiary",
        "hasBeneficiary",
        "countBeneficiary",
        "dataAssistanceUsage",
        "hasAssistanceUsage",
        "countAssistanceUsage",
        "dataApplicantTypes",
        "hasApplicantTypes",
        "countApplicantTypes",

        "numberItem",
        "slugKeyAuthorization",
        "descriptionAct",
        "slugKeyActParentAuthorization",
        "partAct",
        "sectionAct",
        "titleAct",
        "hasAuthorizationTypes",
        "isExecutiveOrder",
        "isPublicLaw",
        "isStatute",
        "isUnitedStatesCode",
        "descriptionExecutiveOrder",
        "partExecutiveOrder",
        "sectionExecutiveOrder",
        "titleExecutiveOrder",
        "codeCongressLaw",
        "numberLawCongress",
        "pageStatute",
        "volumeStatute",
        "sectionUnitedStatesCode",
        "titleUnitedStatesCode",
        "descriptionFinancial",
        "isFundedCurrentYear",
        "idTreasuryAccountSymbol",
        "descriptionTreasurySymbol",
        "descriptionFinancial",
        "isFundedCurrentYear",
        "titleGrantPopular",
        "isArchived",
        "descriptionProgram",
        "yearFiscal",
        "isFiscalYearLatest",
        "isLatest",
        "datetimeModified",
        "descriptionProgramObjective",
        "idOrganizationSAM",
        "slugKeyParentProgram",
        "idCFDA",
        "datetimePublished",
        "isRevision",
        "datetimeSubmitted",
        "titleGrant",
        "urlProgram"
      )

    )
  }

.parse_hyperlink <-
  function() {

  }

.us_government_grants <-
  function(exclude_nsf = T, other_agencies_to_exclude = NULL, return_message = T) {
    data <-
      "https://www.grants.gov/grantsws/rest/opportunities/search/csv/download?osjp={%22startRecordNum%22:0,%22keyword%22:%22%22,%22oppNum%22:%22%22,%22cfda%22:%22%22,%22oppStatuses%22:%22forecasted|posted|closed|archived%22,%22sortBy%22:%22openDate|desc%22,%22rows%22:56770001}" %>%
      read_csv()



    data <-
      .munge_grant_names(data = data) %>%
      .remove_na()

    data <- data %>%
      separate(
        urlGrant,
        sep = "\\,",
        extra = "merge",
        fill = "right",
        into = c("urlGrant", "idGrant")
      ) %>%
      mutate(
        urlGrant = urlGrant %>% str_remove_all("\\=HYPERLINK|\\)|\\("),
        idGrant = idGrant %>% str_remove_all("\\)|\\-")
      ) %>%
      mutate_all(list(function(x) {
        x %>%  noquote() %>%
          str_replace_all('[\"]', '')
      })) %>%
      separate(
        urlGrant,
        into = c("remove", "idOpportunity"),
        sep = "oppId=",
        fill = "right",
        remove = F,
        extra = "merge",
        convert = T
      ) %>%
      select(-remove) %>%
      select(idOpportunity, everything())

    date_cols <-
      data %>%
      select(-datetimeUpdated) %>%
      select(matches("date[A-Z]")) %>% names()

    data <- data %>% mutate_at(date_cols, mdy)

    data <-
      data %>% mutate(datetimeUpdated = mdy_hms(datetimeUpdated))

    data <-
      data %>%
      munge_data(unformat = T)

    data <- data %>%
      mutate(
        isActive = case_when(!is.na(datePosted) &
                               dateClosed >= Sys.Date() ~ T,
                             TRUE ~ F),
        isForecasted = case_when(!is.na(datePostedEstimate) ~ T,
                                 T ~ F),
        isArchived = case_when(!is.na(datePosted) &
                                 dateClosed < Sys.Date() ~ T,
                               TRUE ~ F)
      )

    data <-
      data %>%
      separate(
        codeAgency,
        into = c("slugAgency", "slugOfficeFull"),
        sep = "\\-",
        extra = "merge",
        fill = "right",
        remove = F
      ) %>%
      separate(
        slugOfficeFull,
        into  = c("slugOffice", "slugOfficeDetail"),
        fill = "right",
        extra = "merge",
        remove = F
      ) %>%
      separate(
        nameOfficeFull,
        into = c("nameOffice", "nameOfficeDetail"),
        sep = "\\-",
        extra = "merge",
        fill = "right",
        remove = F
      ) %>%
      mutate_if(is.character, str_squish)

    df_agencies <-
      data %>%
      distinct(slugAgency)

    dict_agencies <- dictionary_government_agencies()

    df_agencies <-
      df_agencies %>%
      left_join(dict_agencies %>% distinct(slugAgency, nameAgency), by = "slugAgency") %>%
      mutate(
        nameAgency =
          case_when(
            slugAgency == "USDOJ" ~ "DEPARTMENT OF JUSTICE",
            slugAgency == "PAMS" ~ "DEPARTMENT OF ENERGY",
            slugAgency == "ONDCP" ~ "EXECUTIVE OFFICE OF THE PRESIDENT",
            slugAgency == "USDOT" ~ "DEPARTMENT OF TRANSPORTATION",
            slugAgency == "IVV" ~ "NATIONAL AERONAUTICS AND SPACE ADMINISTRATION",
            slugAgency == "GCERC" ~ "GULF COAST ECOSYSTEM RESTORATION COUNCIL",
            slugAgency == "DC" ~ "DENALI COMMISSION",
            slugAgency == "GDIT" ~ "UNKNOWN",
            slugAgency == "USEAC" ~ "ELECTION ASSISTANCE COMMISSION",
            slugAgency == "WWC" ~ "SMITHSONIAN INSTITUTION",
            slugAgency == "ECP" ~ "DEPARTMENT OF COMMERCE",
            TRUE ~ nameAgency
          )
      )



    data <-
      data %>%
      left_join(df_agencies, by = "slugAgency") %>%
      group_by(idOpportunity) %>%
      dplyr::slice(1) %>%
      ungroup()

    data <-
      data %>%
      mutate(nameAgencyParent = nameAgency) %>%
      select(idOpportunity, matches("nameAgency"), everything())


    if (exclude_nsf) {
      data <-
        data %>%
        filter(slugAgency != "NSF")
    }

    if (length(other_agencies_to_exclude) > 0) {
      if (return_message) {
        "Excluding other agencies" %>% message()
      }
      agency_slugs <-
        other_agencies_to_exclude %>% str_to_upper() %>% str_c(collapse = "|")
      data <-
        data %>%
        filter(!nameOfficeFull %>% str_detect(agency_slugs))
    }



    data <- data %>%
      mutate(
        haSBIRSTTRMention = titleGrant %>% str_detect(
          "SBIR|STTR|SMALL BUSINESS PHASE|SMALL BUSINESS INNOVATION RESEARCH|SMALL BUSINESS TECHNOLOGY TRANSFER"
        )
      )

    name_cols <- data %>% select(matches("name")) %>%
      select(-matches("nameOfficeFull")) %>%
      names()

    data <- data %>%
      mutate_at(name_cols, list(function(x) {
        x %>% str_remove_all("\\-") %>% str_squish()
      }))

    data <- data %>%
      rename(nameDepartment = nameAgency,
             slugDepartment = slugAgency) %>%
      select(-nameAgencyParent)

    if (return_message) {
      amt <- data$amountFunding %>% sum(na.rm = T) %>% currency(digits = 0)
      from_date <-
        data$datePosted %>% min(na.rm = T)
      grant_count <- nrow(data) %>% comma(digits = 0)
      to_date <-
        data$datePosted %>% max(na.rm = T)
      glue(
        "\n\nFound {blue({grant_count})} CFDA grant programs amount {green({amt})} between {red({from_date})} and {red({to_date})}\n\n"
      ) %>% cat(fill = T)
    }

    data
  }

.parse_export_url <-
  memoise::memoise(function(url = "https://www.grants.gov/grantsws/rest/opportunities/search/csv/download?osjp={%22startRecordNum%22:0,%22keyword%22:%22DEEP%20LEARNING%22,%22oppNum%22:%22%22,%22cfda%22:%22%22,%22oppStatuses%22:%22forecasted|posted|closed|archived%22,%22rows%22:60}"){
  data <-
    url %>%
    fread(quote="", verbose = F,showProgress = FALSE)


  data <-
    data %>%
    select(1:(ncol(data) - 1)) %>%
    setNames(names(data)[2:ncol(data)]) %>%
    setNames(.dictionary_grant_names() %>% pull(nameActual)) %>%
    as_tibble()


  data <- data %>%
    separate(urlGrant,
             sep = "\\,",
             fill = "right",
             into = c("urlGrant", "idGrant")) %>%
    mutate(
      urlGrant = urlGrant %>% str_remove_all("\\=HYPERLINK|\\)|\\("),
      idGrant = idGrant %>% str_remove_all("\\)")
    ) %>%
    mutate_all(list(function(x) {
      x %>%  noquote() %>%
        str_replace_all('[\"]', '')
    })) %>%
    separate(
      urlGrant,
      into = c("remove", "idOpportunity"),
      sep = "oppId=",
      remove = F,
      extra = "merge",
      convert = T,
      fill = "right"
    ) %>%
    select(-remove) %>%
    select(idOpportunity, everything())

  date_cols <-
    data %>%
    select(-datetimeUpdated) %>%
    select(matches("date[A-Z]")) %>% names()

  data <- data %>% mutate_at(date_cols, mdy)

  data <-
    data %>% mutate(datetimeUpdated = mdy_hms(datetimeUpdated))

  data <-
    data %>%
    .munge_data(clean_address = F)

  data <-
    data %>%
    mutate(
      isActive = case_when(!is.na(datePosted) &
                             dateClosed >= Sys.Date() ~ T,
                           TRUE ~ F),
      isForecasted = case_when(!is.na(datePostedEstimate) ~ T,
                               T ~ F),
      isArchived = case_when(!is.na(datePosted) &
                               dateClosed < Sys.Date() ~ T,
                             TRUE ~ F)
    )

  data
  })

.term_df <-
  function(keywords = c("DEEP LEARNING", "MACHINE LEARNING")){


    keywords %>%
      map_dfr(function(keyword){
        slug <-
          glue('"/{keyword}/"') %>% as.character() %>% URLencode()

        url <- str_c("https://www.grants.gov/grantsws/rest/opportunities/search/csv/download?osjp={%22startRecordNum%22:0,%22keyword%22:",slug,",%22oppNum%22:%22%22,%22cfda%22:%22%22,%22oppStatuses%22:%22forecasted|posted|closed|archived%22,%22sortBy%22:%22openDate|desc%22,%22rows%22:56770001}")

        tibble(termSearch = keyword, urlUSAGrant = url)
      })

  }


#' US Government Grants for Keywords
#'
#' @param keywords vector of keywords
#' @param return_message if \code{TRUE} returns a message
#'
#' @return
#' @export
#'
#' @examples
us_government_grant_keywords <-
  function(keywords = c("DEEP LEARNING", "MACHINE LEARNING"), return_message = T) {
    df_urls <- .term_df(keywords = keywords)
    .parse_export_url_safe <- possibly(.parse_export_url, tibble())
    all_data <-
      df_urls$urlUSAGrant %>%
      map_dfr(function(url) {
        if (return_message) {
          glue("Parsing {url}") %>% message()
        }
        .parse_export_url_safe(url = url) %>%
          mutate(urlUSAGrant = url)
      })


    all_data <-
      all_data %>%
      left_join(df_urls, by = "urlUSAGrant") %>%
      select(termSearch, everything())


    df_counts <-
      all_data %>%
      group_by(idOpportunity, titleGrant) %>%
      summarise(termsSearch = unique(str_c(termSearch, collapse = " | ")),
                countMatchTerms = length(unique(termSearch))) %>%
      ungroup()

    all_data <-
      all_data %>%
      select(-c(termSearch, urlUSAGrant)) %>% distinct() %>%
      distinct() %>%
      left_join(df_counts, by = c("idOpportunity", "titleGrant")) %>%
      select(termsSearch, countMatchTerms, everything()) %>%
      mutate(amountFunding = formattable::currency(amountFunding, digits = 0))


    all_data <-
      all_data %>%
      mutate(
        isSBIRSTTR = titleGrant %>% str_detect(
          "SBIR|STTR|SMALL BUSINESS PHASE|SMALL BUSINESS INNOVATION RESEARCH"
        )
      )


    all_data
  }


#' US Government Grants
#'
#' Returns all grants listed
#' on grants.gov
#'
#' @param exclude_nsf if \code{TRUE} exclude National Science Foundation Grants
#' @param other_agencies_to_exclude Vector of other Agencies to exclude
#' @param return_message if TRUE returns a message
#'
#' @return \code{tibble()}
#' @export
#'
#' @examples

us_government_grants <-
  memoise::memoise(function(exclude_nsf = F, other_agencies_to_exclude = NULL, return_message = T) {
    data <-
      .us_government_grants(exclude_nsf = exclude_nsf, other_agencies_to_exclude = other_agencies_to_exclude, return_message = return_message)

    data
  })



### https://github.com/jakewins/grantmachine/tree/dfe95cf52a7d7076a46b653d634cac66380824ce

#
.parse_grant_json <-
  function(json) {
    df_cols <-
      json %>% map_df(class) %>% gather(column, class) %>%
      mutate(idColumn = 1:n()) %>%
      select(idColumn, everything())

    base_cols <-
      df_cols %>% filter(!class %>% str_detect("list|data")) %>% pull(idColumn)

    df_base <- json[base_cols] %>% flatten_df() %>% as_tibble() %>% .munge_grant_names()
  }