#' Pull data from the InCites API
#'
#' \strong{Important note:} The throttling limits on the InCites API are not
#' documented anywhere and are difficult to determine from experience. As such,
#' whenever \code{pull_incites} receives a throttling error from the server, it
#' uses exponential backoff (with a maximum wait time of 45 minutes) to determine
#' how long to wait before retrying.
#'
#' @param uts A vector of UTs whose InCites data you would like to download.
#' Each UT is a 15-digit identifier for a given publication. You
#' can specify the UT using only these 15 digits or you can append the 15 digits
#' with "WOS:" (e.g., "000346263300011" or "WOS:000346263300011").
#' @param key The developer key that the server will use for authentication.
#' @param as_raw Do you want the data frame that is returned by the API to be
#' returned to you in its raw form? This option can be useful if the API has
#' changed the format of the data that it is serving, in which case specifying
#' \code{as_raw = TRUE} may avoid an error that would otherwise occur during
#' \code{pull_incites}'s data processing step.
#' @param ... Arguments passed along to \code{\link[httr]{GET}}.
#'
#' @return A data frame where each row corresponds to a different publication.
#' The definitions for the columns in this data frame can be found online at
#' the API's documentation \href{http://about.incites.thomsonreuters.com/api/#/}{page}
#' (see the \code{DocumentLevelMetricsByUT} method details for definitions).
#' Note that the column names are all converted to lowercase by
#' \code{pull_incites} and the 0/1 flag variables are converted to booleans.
#' Also note that not all publications indexed in WoS are also indexed in
#' InCites, so you may not get data back for some UTs.
#'
#' @examples
#' \dontrun{
#'
#' uts <- c(
#' "WOS:000346263300011", "WOS:000362312600021", "WOS:000279885800004",
#' "WOS:000294667500003", "WOS:000294946900020", "WOS:000412659200006"
#' )
#' pull_incites(uts, key = "some_key")
#'
#' pull_incites(c("000346263300011", "000362312600021"), key = "some_key")
#'}
#' @export
pull_incites <- function(uts, key = Sys.getenv("INCITES_KEY"), as_raw = FALSE, ...) {
if (key == "") {
stop(
"You need to provide an InCites API key to use `pull_incites()`",
call. = FALSE
)
}
urls <- get_urls(trim_uts(uts))
out_list <- pbapply::pblapply(urls, try_incites_req, key = key, ... = ...)
unique(process_incites(do.call("rbind", out_list), as_raw))
}
get_urls <- function(uts) {
ut_list <- split_uts(uts)
lapply(ut_list, get_url)
}
split_uts <- function(uts) {
len <- seq_along(uts)
f <- ceiling(len / 100)
split(uts, f = f)
}
get_url <- function(uts) {
paste0(
"https://api.clarivate.com/api/incites/DocumentLevelMetricsByUT/json?UT=",
paste0(uts, collapse = ",")
)
}
backoff_wait <- function(try) {
exp_backoff <- ceiling((2^try - 1) / 2)
ifelse(exp_backoff > 32, 45, exp_backoff)
}
try_incites_req <- function(url, key, ...) {
# Try making the HTTP request up to 10 times (spaced apart based on exponential backoff)
for (i in 1:10) {
maybe_data <- try(one_incites_req(url, key, ...), silent = TRUE)
if (!("try-error" %in% class(maybe_data))) {
Sys.sleep(2)
return(maybe_data)
} else {
if (grepl("limit", maybe_data[1])) {
minutes <- backoff_wait(i)
mins_txt <- if (minutes == 1) " minute." else " mintues."
message(
"\nRan into throttling limit. Retrying request in ",
minutes, mins_txt
)
Sys.sleep(60 * minutes)
} else {
stop(maybe_data[1])
}
}
}
stop("\n\nRan into throttling limit 10 times, stopping")
}
one_incites_req <- function(url, key, ...) {
response <- httr::GET(url, ua(), httr::add_headers(c("X-TR-API-APP-ID" = key)), ...)
raw_txt <- httr::content(response, as = "text", encoding = "UTF-8")
if (grepl("rate limit quota violation", raw_txt, ignore.case = TRUE))
stop("limit")
if (httr::http_error(response))
stop(httr::http_status(response))
json_resp <- jsonlite::fromJSON(raw_txt)
maybe_data_frame <- json_resp$api$rval[[1]]
if (is.data.frame(maybe_data_frame)) maybe_data_frame else NULL
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.