Nothing
## ---- include = FALSE------------------------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
width = 95,
fig.dpi = 96
)
opts.old <- options()
options(width = 95, cli.unicode = FALSE, cli.width = 95)
## confirm RePORTER is currently available before building this, for CRAN
r <- httr::POST("https://api.reporter.nih.gov/v2/projects/search",
httr::accept("application/json"),
httr::content_type_json(),
body = "{\"criteria\":{\"foa\":[\"RFA-NS-19-036\"]},\"include_fields\":[\"ApplId\"],\"offset\":0,\"limit\":10,\"sort_field\":\"appl_id\",\"sort_order\":\"desc\"}",
encode = "raw"
)
if (r$status_code != 200) knitr::knit_exit()
## ---- echo = FALSE, message = FALSE, fig.width=7, fig.height=4, fig.align='center'-----------
knitr::include_graphics("covid_plot.png")
## ---- eval = FALSE---------------------------------------------------------------------------
# install.packages("repoRter.nih")
## ---- eval = FALSE---------------------------------------------------------------------------
# devtools::install_github('bikeactuary/repoRter.nih@dev')
## ---- message = FALSE------------------------------------------------------------------------
library(tibble)
library(repoRter.nih)
library(ggplot2)
library(ggrepel)
library(dplyr)
library(scales)
library(tufte)
## --------------------------------------------------------------------------------------------
# all projects funded by the Paycheck Protection Act, Coronavirus Response and
# Relief Act, and American Rescue Plan, in fiscal year 2021
req <- make_req(criteria =
list(fiscal_years = 2021,
covid_response = c("C4", "C5", "C6")))
## --------------------------------------------------------------------------------------------
res <- get_nih_data(req)
class(res)
## --------------------------------------------------------------------------------------------
res %>% glimpse(width = getOption("cli.width"))
## --------------------------------------------------------------------------------------------
data("nih_fields")
nih_fields %>% print
## --------------------------------------------------------------------------------------------
req <- make_req()
## --------------------------------------------------------------------------------------------
data("nih_fields")
fields <- nih_fields %>%
filter(response_name %in%
c("appl_id", "subproject_id", "project_title", "fiscal_year",
"award_amount", "is_active", "project_start_date")) %>%
pull(include_name)
req <- make_req(include_fields = fields,
limit = 500,
message = FALSE) # default
res <- get_nih_data(query = req,
max_pages = 1)
res %>% glimpse(width = getOption("cli.width"))
## --------------------------------------------------------------------------------------------
req <- make_req(criteria =
list(
fiscal_years = 2010:2011,
include_active_projects = TRUE,
org_names = c("Yale", "New Haven")
),
include_fields = c("Organization", "FiscalYear", "AwardAmount"),
message = FALSE)
## --------------------------------------------------------------------------------------------
res <- get_nih_data(req, max_pages = 1)
res %>% glimpse(width = getOption("cli.width"))
## --------------------------------------------------------------------------------------------
res <- get_nih_data(req,
max_pages = 1,
flatten_result = TRUE)
res %>% glimpse(width = getOption("cli.width"))
## ---- echo = FALSE---------------------------------------------------------------------------
evl <- class(res)[1] == "tbl_df"
## ---- eval=evl-------------------------------------------------------------------------------
res %>%
group_by(organization_org_name) %>%
summarise(project_count = n())
## --------------------------------------------------------------------------------------------
## A valid request but probably not what we want
req <- make_req(criteria =
list(
fiscal_years = 2010:2011,
include_active_projects = TRUE,
org_cities = "New Haven",
org_states = "WY"
),
include_fields = c("Organization", "FiscalYear", "AwardAmount"),
message = FALSE ## suppress printed message
)
res <- get_nih_data(req,
max_pages = 5,
flatten_result = TRUE)
## --------------------------------------------------------------------------------------------
req <- make_req(criteria =
list(
fiscal_years = 2015,
include_active_projects = TRUE,
org_states = "WY"
),
include_fields = c("ApplId", "Organization", "FiscalYear", "AwardAmount"),
sort_field = "AwardAmount",
sort_order = "desc",
message = FALSE)
res <- get_nih_data(req,
flatten_result = TRUE)
res %>% glimpse(width = getOption("cli.width"))
## --------------------------------------------------------------------------------------------
## all projects funded by the Paycheck Protection Act, Coronavirus Response and Relief Act,
## and American Rescue Plan, over all years
req <- make_req(criteria =
list(covid_response = c("All")),
include_fields = nih_fields %>%
filter(payload_name %in% c("award_amount_range", "covid_response"))
%>% pull(include_name))
res <- get_nih_data(req, max_pages = 1)
## ---- echo=FALSE-----------------------------------------------------------------------------
evl <- class(res)[1] == "tbl_df"
## ---- eval=evl-------------------------------------------------------------------------------
res$covid_response %>% class()
res$covid_response[[1]]
## ----covid, eval=FALSE-----------------------------------------------------------------------
# ## all projects funded by the Paycheck Protection Act, Coronavirus Response and Relief Act,
# ## and American Rescue Plan, in fiscal year 2021
# req <- make_req(criteria =
# list(covid_response = c("All")),
# message = FALSE)
#
# res <- get_nih_data(req,
# flatten_result = TRUE)
## ---- eval=FALSE-----------------------------------------------------------------------------
# unique(res$covid_response)
## ---- echo=FALSE-----------------------------------------------------------------------------
res <- readRDS("cov_res.RDS")
unique(res$covid_response)
## ---- eval=FALSE-----------------------------------------------------------------------------
# library(ggplot2)
#
# res %>%
# left_join(covid_response_codes, by = "covid_response") %>%
# mutate(covid_code_desc = case_when(!is.na(fund_src) ~ paste0(covid_response, ": ", fund_src),
# TRUE ~ paste0(covid_response, " (Multiple)"))) %>%
# group_by(covid_code_desc) %>%
# summarise(total_awards = sum(award_amount) / 1e6) %>%
# ungroup() %>%
# arrange(desc(covid_code_desc)) %>%
# mutate(prop = total_awards / sum(total_awards),
# csum = cumsum(prop),
# ypos = csum - prop/2 ) %>%
# ggplot(aes(x = "", y = prop, fill = covid_code_desc)) +
# geom_bar(stat="identity") +
# geom_text_repel(aes(label =
# paste0(dollar(total_awards,
# accuracy = 1,
# suffix = "M"),
# "\n", percent(prop, accuracy = .01)),
# y = ypos),
# show.legend = FALSE,
# nudge_x = .8,
# size = 3, color = "grey25") +
# coord_polar(theta ="y") +
# theme_void() +
# theme(legend.position = "right",
# legend.title = element_text(colour = "grey25"),
# legend.text = element_text(colour="blue", size=6,
# face="bold"),
# plot.title = element_text(color = "grey25"),
# plot.caption = element_text(size = 6)) +
# labs(caption = "Data Source: NIH RePORTER API v2") +
# ggtitle("Legislative Source for NIH Covid Response Project Funding")
## ---- echo = FALSE, message = FALSE, fig.width=7, fig.height=4, fig.align='center'-----------
knitr::include_graphics("covid_plot.png")
## --------------------------------------------------------------------------------------------
data("covid_response_codes")
covid_response_codes %>% print
## --------------------------------------------------------------------------------------------
## projects funded in 2021 where the principal investigator first name
## is "Michael" or begins with "Jo"
req <- make_req(criteria =
list(fiscal_years = 2021,
pi_names = list(first_name = c("Michael", "Jo*"),
last_name = c(""), # must specify all pi_names elements always
any_name = character(1))),
include_fields = nih_fields %>%
filter(payload_name == "pi_names") %>%
pull(include_name),
message = FALSE)
res <- get_nih_data(req,
max_pages = 1,
flatten_result = TRUE)
res %>% glimpse(width = getOption("cli.width"))
## --------------------------------------------------------------------------------------------
## using advanced_text_search with boolean search string
req <- make_req(criteria =
list(advanced_text_search =
list(operator = "advanced",
search_field = c("terms", "abstract"),
search_text = "(head AND trauma) OR \"brain damage\" AND NOT \"psychological\"")),
include_fields = c("ProjectTitle", "AbstractText", "Terms") )
res <- get_nih_data(req, max_pages = 1)
## ---- echo = FALSE---------------------------------------------------------------------------
evl <- class(res)[1] == "tbl_df"
## ---- eval=evl-------------------------------------------------------------------------------
one_rec <- res %>%
slice(42) %>%
mutate(abstract_text = gsub("[\r\n]", " ", abstract_text))
one_rec %>% pull(project_title) %>% print
## ---- eval=evl-------------------------------------------------------------------------------
## substr to avoid LaTeX error exceeding char limit
one_rec %>% pull(abstract_text) %>% substr(1, 85) %>% print
## ---- eval=evl-------------------------------------------------------------------------------
one_rec %>% pull(terms) %>% substr(1, 85) %>% print
## ---- eval = FALSE---------------------------------------------------------------------------
# all_res <- list()
# for(y in 2017:2021) { ## five years to loop over, each year is ~80K records
# ## We only need the AwardAmount for quantiles
# req_sample <- make_req(criteria = list(fiscal_years = y),
# include_fields = "AwardAmount")
#
# ## get a sample of the result set - 1000 records should be enough
# ## return the metadata
# res_sample <- get_nih_data(req_sample, max_pages = 2, return_meta = TRUE)
#
# paste0("There are ", res_sample$meta$total, " results for fiscal year ", y) %>%
# print()
#
# ## deciles of award amount - each decile should contain ~7,314.2 records, approximately
# qtiles <- res_sample$records %>% pull(award_amount) %>% quantile(na.rm = TRUE, probs = seq(.1, 1, .1))
#
# ## list for qtile results (full year)
# this_res <- list()
# ## for each qtile
# for (i in 1:length(qtiles)) {
# if (i == 1) {
# award_min <- 0
# } else {
# award_min <- ceiling(qtiles[i-1])+.01
# }
# if (i == length(qtiles)) {
# award_max <- 1e9 ## arbitrarily huge
# } else {
# award_max <- ceiling(qtiles[i])
# }
# req <- make_req(criteria = list(fiscal_years = y,
# award_amount_range = list(min_amount = award_min,
# max_amount = award_max)))
# ## result set for quantile
# this_res[[i]] <- get_nih_data(req, flatten_result = FALSE)
# }
# ## list of result sets for each year
# yr_res[[y %>% as.character()]] <- this_res
# }
#
# ## shape it up
# all_res <- unlist(yr_res, recursive = FALSE) %>%
# bind_rows() %>%
# flatten(recursive = FALSE) %>%
# clean_names()
#
# ## pull out everything that is flat
# flat_columns <- all_res %>%
# select_if(is.atomic)
#
# ## everything that isn't
# annoying_columns <- all_res %>%
# select_if(!is.atomic)
## --------------------------------------------------------------------------------------------
sessionInfo()
## ---- include=FALSE-----------------------------------------------------------
options(opts.old)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.