#'Sends iterative query requests to the Dimensions API and returns JSON-style list object of results.
#'Takes arguments from dim_request and runs an iterative version of that request to obtain all possible
#'records up to the first 50k.
#'Important Note: Queries should not contain "limit" or "skip" as these are calculated dynamically
#'based on function arguments.
#'
#'@param dim_token Is the token object created by dim_login in the Global environment
#'@param endpoint Is a string containing the Dimensions API endpoint URL.
#'@param query Text string of the DSL query. Please consult API Documentation for how to write DSL queries.
#'@param skip Specifies the number of records to offset (0 by default, max 50000).
#'@param limit Is the maximum returned documents (0 by default, max 50000)
#'@param pause Is the time dim_request waits in between requests. Default is 1.5s.
#'@param force Is a logical argument to determine whether or not to continue the data pull despite errors that may
#'arise during iterations. By default it is set to FALSE. In situations where the query results are > 50k,
#'force will prompt the first 50k results to be returned.
#'@param logs Is a logical which determines whether or not text logs are exported to the package directory/logs folder
#'in the case of query errors. By default set to FALSE.
#'@param req Is the first request generated by dim_request and used to scope out the query.
#'@return A JSON-style nested list object
#'
iterative_request <- function(dim_token,
endpoint = 'https://app.dimensions.ai/api/dsl/v2',
query,
skip,
limit,
pause,
force,
logs,
req){
# Get the data source (e.g. publications, grants, etc.)
dsrc <- strsplit(query, ' ')[[1]][2]
#First check of total_count -- check maxlimit to see if total_count should reflect the count returned from
#the API or the arg value
total_count <- httr::content(req)$`_stats`$total_count
warnings <- httr::content(req)$`_warnings`
#If no limit specified, total count > 50k and force is FALSE, stop and call attention to this.
#Or if limit > 50000 and force == FALSE
if((limit == 0 & total_count > 50000 & force == FALSE)|(limit > 50000 & force == FALSE)|(skip > 50000 & force == FALSE)){
warning('Records requested go beyond the 50k limit. Please review your request and see if there are ways to alter your query to within these limits or set force = TRUE to get the first 50k records. If you must have > 50k records, please contact Dimensions support for additional options.')
stop()} #Close if
#If total_count is zero notify user.
else if(total_count == 0){
resp <- 'No results. Please check your query.'
# If warnings included in zero result, output to logs, notify user.
if(!is.null(warnings)){
capture.output(warnings, file = paste0('./dsl_warnings_', Sys.time(),'.txt'))
resp <- paste(resp, 'Warnings detected -- please check the dsl_warnings output file in the logs directory of the package folder.')
}
warning(resp)
stop()
}
#Otherwise, iterate through the request
else {
#If force is TRUE and limit > 50000, use 50000, otherwise use limit
limit <- ifelse(force == TRUE & limit > 50000, 50000, limit)
#Iterative querying
s <- ifelse(skip > 0 & skip < 50000, skip,
ifelse(skip > 50000, 50000, skip))
#Calculate total records
tr = ifelse(limit == 0, total_count, min(limit, total_count))
#Calculate difference between total_count and skip
#If difference is 0 then d = 1, otherwise use tr - s
d <- ifelse(s > tr, tr, tr - s)
#Get chunksize based on tr
chunksize = get_chunksize(tr)
#Check d relative to limit and set iter_lim accordingly.
#If limit > 0 and chunksize, use the chunksize
#Otherwise, if limit = 0, use chunksize, all else use limit.
iter_lim <- ifelse(limit > 0 & limit > chunksize, chunksize,
ifelse(limit == 0, chunksize, limit))
#Calculate number of iterations
iterations <- ceiling(d/iter_lim)
#Start a list to append iterative results to
results <- list()
#Indicate start of iterations
message(paste('Iterating request over', iterations, 'iteration(s) of up to', iter_lim, 'records each.'))
##Create list object to store queries that fail for troubleshooting
error_logs = list()
#Iterate through searches
for(i in 1:iterations){
error_logs[[i]] <- list()
iterquery = paste(query, 'limit', iter_lim, 'skip', s, sep = ' ')
iterreq <- httr::POST(endpoint,
httr::add_headers(Authorization = paste("JWT", dim_token)),
body = iterquery,
encode = 'json')
# If an iteration fails and the error is known, alert user, write error logs, stop.
if(iterreq$status_code != 200 & iterreq$status_code %in% names(error_messages)){
warning(error_messages[paste(iterreq$status_code)])
if(logs == TRUE){
error_logs[[i]][['query']] <- iterquery
error_logs[[i]][['response']] <- iterreq
capture.output(error_logs, file = paste0('./error_logs_', Sys.time(),'.txt'))}
stop()
} #Close if
# If error is unrecognized, alert user, write to logs, stop.
else if(iterreq$status_code != 200 & !(iterreq$status_code %in% names(error_messages))){
warning(paste('Error on iteration', i, '. Please contact Dimensions Support and provide the following error message:', httr::content(iterreq)))
if(logs == TRUE){
error_logs[[i]][['query']] <- iterquery
error_logs[[i]][['response']] <- iterreq
capture.output(error_logs, file = paste0('./error_logs_', Sys.time(),'.txt'))}
stop()
}# Close elseif
# If no errors, parse JSON into list format
else {
iterdata <- jsonlite::fromJSON(httr::content(iterreq, 'text', encoding = 'UTF-8'), flatten = TRUE)
}# Close else
# Allow the API to breathe
Sys.sleep(pause)
#If an iteration returns status 429 (too many requests), sleep for 30s, try again.
if(iterreq$status_code == 429){
message('Too many requests. Taking 30 second break then will resume.')
Sys.sleep(30); s = s} #Close if
#If an iteration returns errors...
else if('errors' %in% names(iterdata) & iterreq$status_code != 429){
#specifically related to queried data being too large...then chunk it out
if(grepl('The response generated by your query is too large', iterdata$errors$query$details[[1]])){
message(paste('Iteration', i, 'too large. Chunking...', sep = ' '))
#Find smallest necessary limit
sub_lim <- lim_shrink(query, lim, s, endpoint, dim_token)
#Iterate through sub_lim chunks to fill in the larger iteration
for(l in 1:(lim%/%sub_lim)){
error_logs[[i]][[l]] <- list()
iterquery = paste(query, 'limit', sub_lim, 'skip', s, sep = ' ')
iterreq <- httr::POST(endpoint,
httr::add_headers(Authorization = paste("JWT", dim_token)),
body = iterquery,
encode = 'json')
# If an sub-iteration fails and the error is known, alert user, write error logs, stop.
if(req$status_code != 200 & req$status_code %in% names(error_messages)){
warning(error_messages[paste(req$status_code)])
if(logs == TRUE){
error_logs[[i]][[l]][['query']] <- iterquery
error_logs[[i]][[l]][['response']] <- iterreq
capture.output(error_logs, file = paste0('./error_logs_', Sys.time(),'.txt'))}
stop()
} #Close if
# If error is unrecognized, alert user, write to logs, stop.
else if(req$status_code != 200 & !(req$status_code %in% names(error_messages))){
warning(paste('Error on iteration', i, '-', l, '. Please contact Dimensions Support and provide the following error message:', httr::content(iterreq)))
if(logs == TRUE){
error_logs[[i]][[l]][['query']] <- iterquery
error_logs[[i]][[l]][['response']] <- iterreq
capture.output(error_logs, file = paste0('./error_logs_', Sys.time(),'.txt'))}
stop()
}# Close elseif
# If no errors, parse JSON into list format
else {
iterdata <- jsonlite::fromJSON(httr::content(iterreq, 'text', encoding = 'UTF-8'), flatten = TRUE)
}# Close else
results <- results[[i]][[l]] = iterdata[[dsrc]]
s = s + sub_lim
} #Close for-loop
}#Close if
#related to something else, but if force is TRUE, then skip iteration and issue warning.
else if(grepl('The result of this query was too large and had to be truncated',
iterdata$`_warnings`[[1]]) == TRUE){
}
else if(grepl('The response generated by your query is too large',
iterdata$errors$query$details[[1]]) == FALSE & force == TRUE){
warning(paste('Error in iteration', i, 'results', s, 'through', s + lim, 'not processed.', sep = ' '))
warning(iterdata[['errors']][['query']][['details']][[1]])
s <- s + iterlim
if(s + iter_lim > tr){
iter_lim <- abs(tr - s)
}
} #Close else if
#If any iteration has errors and force is FALSE, return results thus far and provide warnings.
else if(force == FALSE){
warning(paste('Failure at iteration ', i, 'on query:', iterquery, '. Results incomplete. To skip this iteration try again with force = TRUE.'))
warning(iterdata[['errors']][['query']][['details']][[1]])
return(results)
}#Close else-if
}#Close if errors
#If no errors, append results, increase skip by iter_lim and continue to next iteration.
else{
results[[i]] = iterdata[[dsrc]]
message(paste0('Iteration ', i, ' of ', iterations, ' completed.'))
s <- s + iter_lim
# If s is now greater than tr, set iter_lim to |tr - s|.
if(s + iter_lim > tr){
iter_lim <- abs(tr - s)
}# close if
} #close else
} #Close for-loop (iterate searches)
rtn = list(data = results, item = dsrc, query = query, total_count = total_count)
return(rtn)
} #Close else (iterating request)
} # Close function
#'Handles function requests. Determines the function being used and then handles the resulting API
#'results accordingly.
#'
#'@param req Is the first request generated by dim_request.
#'@param query Is a properly formatted text string of the DSL query.
#'
#'@return A list object of the result(s)
#'
function_request <- function(req, query){
#If request contains any of these function calls, take first item in response list
if(grepl('classify|extract_concepts|extract_grants', strsplit(query, ' ')[[1]][1])){
rtn = jsonlite::fromJSON(httr::content(req, 'text', encoding = 'UTF-8'), simplifyVector = TRUE)[[1]]
} #Close if
# Otherwise, if request is extract_affiliations, unpack those results instead
else if (grepl('extract_affiliations', strsplit(query, ' ')[[1]][1])){
resp = jsonlite::fromJSON(httr::content(req, 'text', encoding = 'UTF-8'), flatten = TRUE)
rtn = data.frame(m.affiliation_part = character(),
institute.id = character(),
institute.name = character(),
institute.city = character(),
institute.state = character(),
institute.country = character(),
metadata.requires_manual_review = logical())
for(m in resp[['results']][['matches']]){
for(i in m[['institutes']]){
if(length(i) == 0){
blank <- data.frame(m.affiliation_part = m$affiliation_part,
institute.id = NA,
institute.name = NA,
institute.city = NA,
institute.state = NA,
institute.country = NA,
metadata.requires_manual_review = FALSE)
rtn <- bind_rows(rtn, blank)
} # Close if
else if(length(i) > 0){
tmp <- bind_cols(data.frame(m$affiliation_part), data.frame(i))
rtn <- bind_rows(rtn, tmp)
} #Close else if
} #Close for loop
} #Close for loop
return(rtn)
} #Close else if
}#Close function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.