.munge_grant_names <-
function(data) {
dict_names <- .dictionary_grant_names()
grant_names <-
names(data)
actual_names <-
grant_names %>%
map_chr(function(name) {
df_row <-
dict_names %>% filter(nameGrant == name)
if (nrow(df_row) == 0) {
glue::glue("Missing {name}") %>% message()
return(name)
}
df_row$nameActual
})
data %>%
set_names(actual_names)
}
.dictionary_grant_names <-
function() {
tibble(
nameGrant = c(
"OPPORTUNITY NUMBER",
"OPPORTUNITY TITLE",
"AGENCY CODE",
"AGENCY NAME",
"ESTIMATED FUNDING",
"EXPECTED NUMBER OF AWARDS",
"GRANTOR CONTACT",
"AGENCY CONTACT PHONE",
"AGENCY CONTACT EMAIL",
"ESTIMATED POST DATE",
"ESTIMATED APPLICATION DUE DATE",
"POSTED DATE",
"CLOSE DATE",
"LAST UPDATED DATE/TIME",
"VERSION",
"referenceProgramElement",
"id",
"revision",
"opportunityNumber",
"opportunityTitle",
"owningAgencyCode",
"listed",
"publisherUid",
"flag2006",
"originalDueDate",
"originalDueDateDesc",
"synPostDateInPast",
"docType",
"forecastHistCount",
"synopsisHistCount",
"assistCompatible",
"assistURL",
"draftMode",
"slugKey",
"alternativeNames",
"archived",
"description",
"fiscalYear",
"fiscalYearLatest",
"latest",
"modifiedDate",
"objective",
"organizationId",
"parentProgramId",
"programNumber",
"publishedDate",
"submittedDate",
"title",
"website",
"city",
"contactId",
"country",
"email",
"fax",
"flag",
"fullName",
"phone",
"state",
"streetAddress",
"zip",
"assistanceType",
"code",
"isApplicable",
"isFundedCurrentFY",
"isRecoveryAct",
"obligationId",
"appeal.description",
"appeal.interval",
"applicationProcedure.description",
"applicationProcedure.isApplicable",
"approval.description",
"approval.interval",
"awardProcedure.description",
"deadlines.description",
"deadlines.flag",
"deadlines.list.description",
"deadlines.list.end",
"deadlines.list.start",
"preApplicationCoordination.description",
"preApplicationCoordination.environmentalImpact.reports.isSelected",
"preApplicationCoordination.environmentalImpact.reports.reportCode",
"renewal.description",
"renewal.interval",
"selectionCriteria.description",
"selectionCriteria.isApplicable",
"chapterMatching",
"descriptionAudit",
"descriptionCFR200",
"descriptionDocuments",
"descriptionMatching",
"descriptionRecords",
"formulaMatching",
"isApplicableAudit",
"isApplicableDocuments",
"matchingMatching",
"moeMatching",
"partMatching",
"publicLawMatching",
"subPartMatching",
"titleMatching",
"subpartB",
"subpartC",
"subpartD",
"subpartE",
"subpartF",
"matching.description",
"matching.percent",
"matching.requirementFlag",
"moe.description",
"cash",
"expenditure",
"performanceMonitoring",
"program",
"progress",
"awardedDescriptionlimitation",
"awardedlimitation",
"descriptionapplicant",
"descriptionassistanceUsage",
"descriptionbeneficiary",
"descriptiondocumentation",
"descriptionlimitation",
"discretionaryFund.descriptionusage",
"discretionaryFund.isApplicableusage",
"isApplicabledocumentation",
"isSameAsApplicantbeneficiary",
"loanTerms.descriptionusage",
"loanTerms.isApplicableusage",
"restrictions.descriptionusage",
"restrictions.isApplicableusage",
"typesapplicant",
"typesassistanceUsage",
"typesbeneficiary",
"dataBeneficiary",
"hasBeneficiary",
"countBeneficiary",
"dataAssistanceUsage",
"hasAssistanceUsage",
"countAssistanceUsage",
"dataApplicantTypes",
"hasApplicantTypes",
"countApplicantTypes",
"numberItem",
"actauthorizationId",
"actdescription",
"actparentAuthorizationId",
"actpart",
"actsection",
"acttitle",
"authorizationTypesact",
"authorizationTypesexecutiveOrder",
"authorizationTypespublicLaw",
"authorizationTypesstatute",
"authorizationTypesUSC",
"executiveOrderdescription",
"executiveOrderpart",
"executiveOrdersection",
"executiveOrdertitle",
"publicLawcongressCode",
"publicLawnumber",
"statutepage",
"statutevolume",
"USCsection",
"USCtitle",
"descriptionbase",
"isFundedCurrentFYbase",
"accounts.code",
"accounts.description",
"descriptionFinancial",
"isFundedCurrentYear",
"titleGrantPopular",
"isArchived",
"descriptionProgram",
"yearFiscal",
"isFiscalYearLatest",
"isLatest",
"datetimeModified",
"descriptionProgramObjective",
"idOrganizationSAM",
"slugKeyParentProgram",
"idCFDA",
"datetimePublished",
"isRevision",
"datetimeSubmitted",
"titleGrant",
"urlProgram"
),
nameActual = c(
"urlGrant",
"titleGrant",
"codeAgency",
"nameOfficeFull",
"amountFunding",
"countAwards",
"contactGrantor",
"telephoneAgency",
"emailGrantor",
"datePostedEstimate",
"dateApplicationDueEstimate",
"datePosted",
"dateClosed",
"datetimeUpdated",
"descriptionVersion",
"descriptionProgramElement",
"id",
"isRevision",
"opportunityNumber",
"titleGrant",
"owningAgencyCode",
"listed",
"publisherUid",
"flag2006",
"originalDueDate",
"originalDueDateDesc",
"synPostDateInPast",
"docType",
"forecastHistCount",
"synopsisHistCount",
"assistCompatible",
"assistURL",
"draftMode",
"slugKey",
"titleGrantPopular",
"isArchived",
"descriptionProgram",
"yearFiscal",
"isFiscalYearLatest",
"isLatest",
"datetimeModified",
"descriptionProgramObjective",
"idOrganizationSAM",
"slugKeyParentProgram",
"idCFDA",
"datetimePublished",
"datetimeSubmitted",
"titleGrant",
"urlProgram",
"cityContact",
"slugKeyContact",
"countryContact",
"emailContact",
"faxContact",
"flagContact",
"nameContact",
"phoneContact",
"stateContact",
"addressStreetContact",
"zipcodeContact",
"codesAssistanceTypes",
"idTreasuryAccount",
"isApplicable",
"isFundedCurrentFY",
"isRecoveryAct",
"slugKeyObligation",
"descriptionAppealProcess",
"idAppealInterval",
"descriptionApplicationProcedure",
"hasApplicationProcedure",
"descriptionApproval",
"idApprovalInterval",
"descriptionAwardProcedure",
"descriptionDeadlines",
"slugDeadlinesFlag",
"descriptionDeadlinesList",
"dateDeadLineEnd",
"dateDeadlineStart",
"descriptionPreApplicationCoordination",
"hasPreApplicationCoordination",
"codePreApplicationCoordinationEnvironmentalImpact",
"descriptionRenewal",
"idRenewalInterval",
"descriptionSelectionCriteria",
"hasSelectionCriteria",
"descriptionChapterMatching",
"descriptionAudit",
"descriptionCFR200",
"descriptionDocuments",
"descriptionMatching",
"descriptionRecords",
"formulaMatching",
"isApplicableAudit",
"isApplicableDocuments",
"hasMatching",
"hasMOEMatching",
"hasPartMatching",
"hasPublicLawMatching",
"hasSubPartMatching",
"hasTitleMatching",
"hasSubpartB",
"hasSubpartC",
"hasSubpartD",
"hasSubpartE",
"hasSubpartF",
"descriptionMatchingProgram",
"pctMatching",
"slugMatchingFlag",
"descriptionMOE",
"hasCashMonitoring",
"hasExpenditureMonitoring",
"hasPerformanceMonitoring",
"hasProgramMonitoring",
"hasProgressMonitoring",
"descriptionAwardedLimitation",
"typeAwardedLimitation",
"descriptionApplicant",
"descriptionAssistanceUsage",
"descriptionBeneficiary",
"descriptionDocumentation",
"descriptionLimitation",
"descriptionDiscretionaryFundUsage",
"hasDiscretionaryFundUsage",
"isApplicabledocumentation",
"isSameAsApplicantBeneficiary",
"descriptionLoanTerms",
"hasLoanTerms",
"descriptionRestrictions",
"hasRestrictions",
"idApplicantTypes",
"idAssistantUsage",
"idBeneficiary",
"dataBeneficiary",
"hasBeneficiary",
"countBeneficiary",
"dataAssistanceUsage",
"hasAssistanceUsage",
"countAssistanceUsage",
"dataApplicantTypes",
"hasApplicantTypes",
"countApplicantTypes",
"numberItem",
"slugKeyAuthorization",
"descriptionAct",
"slugKeyActParentAuthorization",
"partAct",
"sectionAct",
"titleAct",
"hasAuthorizationTypes",
"isExecutiveOrder",
"isPublicLaw",
"isStatute",
"isUnitedStatesCode",
"descriptionExecutiveOrder",
"partExecutiveOrder",
"sectionExecutiveOrder",
"titleExecutiveOrder",
"codeCongressLaw",
"numberLawCongress",
"pageStatute",
"volumeStatute",
"sectionUnitedStatesCode",
"titleUnitedStatesCode",
"descriptionFinancial",
"isFundedCurrentYear",
"idTreasuryAccountSymbol",
"descriptionTreasurySymbol",
"descriptionFinancial",
"isFundedCurrentYear",
"titleGrantPopular",
"isArchived",
"descriptionProgram",
"yearFiscal",
"isFiscalYearLatest",
"isLatest",
"datetimeModified",
"descriptionProgramObjective",
"idOrganizationSAM",
"slugKeyParentProgram",
"idCFDA",
"datetimePublished",
"isRevision",
"datetimeSubmitted",
"titleGrant",
"urlProgram"
)
)
}
.parse_hyperlink <-
function() {
}
.us_government_grants <-
function(exclude_nsf = T, other_agencies_to_exclude = NULL, return_message = T) {
data <-
"https://www.grants.gov/grantsws/rest/opportunities/search/csv/download?osjp={%22startRecordNum%22:0,%22keyword%22:%22%22,%22oppNum%22:%22%22,%22cfda%22:%22%22,%22oppStatuses%22:%22forecasted|posted|closed|archived%22,%22sortBy%22:%22openDate|desc%22,%22rows%22:56770001}" %>%
read_csv()
data <-
.munge_grant_names(data = data) %>%
.remove_na()
data <- data %>%
separate(
urlGrant,
sep = "\\,",
extra = "merge",
fill = "right",
into = c("urlGrant", "idGrant")
) %>%
mutate(
urlGrant = urlGrant %>% str_remove_all("\\=HYPERLINK|\\)|\\("),
idGrant = idGrant %>% str_remove_all("\\)|\\-")
) %>%
mutate_all(list(function(x) {
x %>% noquote() %>%
str_replace_all('[\"]', '')
})) %>%
separate(
urlGrant,
into = c("remove", "idOpportunity"),
sep = "oppId=",
fill = "right",
remove = F,
extra = "merge",
convert = T
) %>%
select(-remove) %>%
select(idOpportunity, everything())
date_cols <-
data %>%
select(-datetimeUpdated) %>%
select(matches("date[A-Z]")) %>% names()
data <- data %>% mutate_at(date_cols, mdy)
data <-
data %>% mutate(datetimeUpdated = mdy_hms(datetimeUpdated))
data <-
data %>%
munge_data(unformat = T)
data <- data %>%
mutate(
isActive = case_when(!is.na(datePosted) &
dateClosed >= Sys.Date() ~ T,
TRUE ~ F),
isForecasted = case_when(!is.na(datePostedEstimate) ~ T,
T ~ F),
isArchived = case_when(!is.na(datePosted) &
dateClosed < Sys.Date() ~ T,
TRUE ~ F)
)
data <-
data %>%
separate(
codeAgency,
into = c("slugAgency", "slugOfficeFull"),
sep = "\\-",
extra = "merge",
fill = "right",
remove = F
) %>%
separate(
slugOfficeFull,
into = c("slugOffice", "slugOfficeDetail"),
fill = "right",
extra = "merge",
remove = F
) %>%
separate(
nameOfficeFull,
into = c("nameOffice", "nameOfficeDetail"),
sep = "\\-",
extra = "merge",
fill = "right",
remove = F
) %>%
mutate_if(is.character, str_squish)
df_agencies <-
data %>%
distinct(slugAgency)
dict_agencies <- dictionary_government_agencies()
df_agencies <-
df_agencies %>%
left_join(dict_agencies %>% distinct(slugAgency, nameAgency), by = "slugAgency") %>%
mutate(
nameAgency =
case_when(
slugAgency == "USDOJ" ~ "DEPARTMENT OF JUSTICE",
slugAgency == "PAMS" ~ "DEPARTMENT OF ENERGY",
slugAgency == "ONDCP" ~ "EXECUTIVE OFFICE OF THE PRESIDENT",
slugAgency == "USDOT" ~ "DEPARTMENT OF TRANSPORTATION",
slugAgency == "IVV" ~ "NATIONAL AERONAUTICS AND SPACE ADMINISTRATION",
slugAgency == "GCERC" ~ "GULF COAST ECOSYSTEM RESTORATION COUNCIL",
slugAgency == "DC" ~ "DENALI COMMISSION",
slugAgency == "GDIT" ~ "UNKNOWN",
slugAgency == "USEAC" ~ "ELECTION ASSISTANCE COMMISSION",
slugAgency == "WWC" ~ "SMITHSONIAN INSTITUTION",
slugAgency == "ECP" ~ "DEPARTMENT OF COMMERCE",
TRUE ~ nameAgency
)
)
data <-
data %>%
left_join(df_agencies, by = "slugAgency") %>%
group_by(idOpportunity) %>%
dplyr::slice(1) %>%
ungroup()
data <-
data %>%
mutate(nameAgencyParent = nameAgency) %>%
select(idOpportunity, matches("nameAgency"), everything())
if (exclude_nsf) {
data <-
data %>%
filter(slugAgency != "NSF")
}
if (length(other_agencies_to_exclude) > 0) {
if (return_message) {
"Excluding other agencies" %>% message()
}
agency_slugs <-
other_agencies_to_exclude %>% str_to_upper() %>% str_c(collapse = "|")
data <-
data %>%
filter(!nameOfficeFull %>% str_detect(agency_slugs))
}
data <- data %>%
mutate(
haSBIRSTTRMention = titleGrant %>% str_detect(
"SBIR|STTR|SMALL BUSINESS PHASE|SMALL BUSINESS INNOVATION RESEARCH|SMALL BUSINESS TECHNOLOGY TRANSFER"
)
)
name_cols <- data %>% select(matches("name")) %>%
select(-matches("nameOfficeFull")) %>%
names()
data <- data %>%
mutate_at(name_cols, list(function(x) {
x %>% str_remove_all("\\-") %>% str_squish()
}))
data <- data %>%
rename(nameDepartment = nameAgency,
slugDepartment = slugAgency) %>%
select(-nameAgencyParent)
if (return_message) {
amt <- data$amountFunding %>% sum(na.rm = T) %>% currency(digits = 0)
from_date <-
data$datePosted %>% min(na.rm = T)
grant_count <- nrow(data) %>% comma(digits = 0)
to_date <-
data$datePosted %>% max(na.rm = T)
glue(
"\n\nFound {blue({grant_count})} CFDA grant programs amount {green({amt})} between {red({from_date})} and {red({to_date})}\n\n"
) %>% cat(fill = T)
}
data
}
.parse_export_url <-
memoise::memoise(function(url = "https://www.grants.gov/grantsws/rest/opportunities/search/csv/download?osjp={%22startRecordNum%22:0,%22keyword%22:%22DEEP%20LEARNING%22,%22oppNum%22:%22%22,%22cfda%22:%22%22,%22oppStatuses%22:%22forecasted|posted|closed|archived%22,%22rows%22:60}"){
data <-
url %>%
fread(quote="", verbose = F,showProgress = FALSE)
data <-
data %>%
select(1:(ncol(data) - 1)) %>%
setNames(names(data)[2:ncol(data)]) %>%
setNames(.dictionary_grant_names() %>% pull(nameActual)) %>%
as_tibble()
data <- data %>%
separate(urlGrant,
sep = "\\,",
fill = "right",
into = c("urlGrant", "idGrant")) %>%
mutate(
urlGrant = urlGrant %>% str_remove_all("\\=HYPERLINK|\\)|\\("),
idGrant = idGrant %>% str_remove_all("\\)")
) %>%
mutate_all(list(function(x) {
x %>% noquote() %>%
str_replace_all('[\"]', '')
})) %>%
separate(
urlGrant,
into = c("remove", "idOpportunity"),
sep = "oppId=",
remove = F,
extra = "merge",
convert = T,
fill = "right"
) %>%
select(-remove) %>%
select(idOpportunity, everything())
date_cols <-
data %>%
select(-datetimeUpdated) %>%
select(matches("date[A-Z]")) %>% names()
data <- data %>% mutate_at(date_cols, mdy)
data <-
data %>% mutate(datetimeUpdated = mdy_hms(datetimeUpdated))
data <-
data %>%
.munge_data(clean_address = F)
data <-
data %>%
mutate(
isActive = case_when(!is.na(datePosted) &
dateClosed >= Sys.Date() ~ T,
TRUE ~ F),
isForecasted = case_when(!is.na(datePostedEstimate) ~ T,
T ~ F),
isArchived = case_when(!is.na(datePosted) &
dateClosed < Sys.Date() ~ T,
TRUE ~ F)
)
data
})
.term_df <-
function(keywords = c("DEEP LEARNING", "MACHINE LEARNING")){
keywords %>%
map_dfr(function(keyword){
slug <-
glue('"/{keyword}/"') %>% as.character() %>% URLencode()
url <- str_c("https://www.grants.gov/grantsws/rest/opportunities/search/csv/download?osjp={%22startRecordNum%22:0,%22keyword%22:",slug,",%22oppNum%22:%22%22,%22cfda%22:%22%22,%22oppStatuses%22:%22forecasted|posted|closed|archived%22,%22sortBy%22:%22openDate|desc%22,%22rows%22:56770001}")
tibble(termSearch = keyword, urlUSAGrant = url)
})
}
#' US Government Grants for Keywords
#'
#' @param keywords vector of keywords
#' @param return_message if \code{TRUE} returns a message
#'
#' @return
#' @export
#'
#' @examples
us_government_grant_keywords <-
function(keywords = c("DEEP LEARNING", "MACHINE LEARNING"), return_message = T) {
df_urls <- .term_df(keywords = keywords)
.parse_export_url_safe <- possibly(.parse_export_url, tibble())
all_data <-
df_urls$urlUSAGrant %>%
map_dfr(function(url) {
if (return_message) {
glue("Parsing {url}") %>% message()
}
.parse_export_url_safe(url = url) %>%
mutate(urlUSAGrant = url)
})
all_data <-
all_data %>%
left_join(df_urls, by = "urlUSAGrant") %>%
select(termSearch, everything())
df_counts <-
all_data %>%
group_by(idOpportunity, titleGrant) %>%
summarise(termsSearch = unique(str_c(termSearch, collapse = " | ")),
countMatchTerms = length(unique(termSearch))) %>%
ungroup()
all_data <-
all_data %>%
select(-c(termSearch, urlUSAGrant)) %>% distinct() %>%
distinct() %>%
left_join(df_counts, by = c("idOpportunity", "titleGrant")) %>%
select(termsSearch, countMatchTerms, everything()) %>%
mutate(amountFunding = formattable::currency(amountFunding, digits = 0))
all_data <-
all_data %>%
mutate(
isSBIRSTTR = titleGrant %>% str_detect(
"SBIR|STTR|SMALL BUSINESS PHASE|SMALL BUSINESS INNOVATION RESEARCH"
)
)
all_data
}
#' US Government Grants
#'
#' Returns all grants listed
#' on grants.gov
#'
#' @param exclude_nsf if \code{TRUE} exclude National Science Foundation Grants
#' @param other_agencies_to_exclude Vector of other Agencies to exclude
#' @param return_message if TRUE returns a message
#'
#' @return \code{tibble()}
#' @export
#'
#' @examples
us_government_grants <-
memoise::memoise(function(exclude_nsf = F, other_agencies_to_exclude = NULL, return_message = T) {
data <-
.us_government_grants(exclude_nsf = exclude_nsf, other_agencies_to_exclude = other_agencies_to_exclude, return_message = return_message)
data
})
### https://github.com/jakewins/grantmachine/tree/dfe95cf52a7d7076a46b653d634cac66380824ce
#
.parse_grant_json <-
function(json) {
df_cols <-
json %>% map_df(class) %>% gather(column, class) %>%
mutate(idColumn = 1:n()) %>%
select(idColumn, everything())
base_cols <-
df_cols %>% filter(!class %>% str_detect("list|data")) %>% pull(idColumn)
df_base <- json[base_cols] %>% flatten_df() %>% as_tibble() %>% .munge_grant_names()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.