R/dim_request.R

Defines functions dim_request

Documented in dim_request

#'Sends iterative query requests to the Dimensions API and returns JSON list object of results.
#'Important Note: Queries should not contain "limit" or "skip" as these are calculated dynamically
#'based on function arguments.
#'
#'@param dim_token Is the token object created by dim_login in the Global environment
#'@param endpoint Is a string containing the Dimensions API endpoint URL.
#'@param query Text string of the DSL query. Please consult API Documentation for how to write DSL queries.
#'@param skip Specifies the number of records to offset (0 by default, max 50000).
#'@param limit Is the maximum returned documents (0 by default, max 50000)
#'@param pause Is the time dim_request waits in between requests. Default is 1.5s.
#'@param force Is a logical argument to determine whether or not to continue the data pull despite errors that may
#'arise during iterations. By default it is set to FALSE. In situations where the query results are > 50k,
#'force will prompt the first 50k results to be returned.
#'@param logs Is a logical which determines whether or not a text log of query requests from each iteration will be
#'exported to the project parent directory. By default set to FALSE.
#'
#'@return A JSON-style nested list object
#'
#'@examples > dim_request(dim_token, query = 'search publications return publications', maxlimit = 100, force = TRUE)
#'
#'In this example we would expect only the first 100 documents of the query to be returned as maxlimit is set to 100.
#'See documentation on the Dimensions Search Language (DSL) at https://docs.dimensions.ai/dsl/ to learn more
#'about how to write queries for the Dimensions API.

dim_request <- function(dim_token,
                        endpoint = 'https://app.dimensions.ai/api/dsl/v2',
                        query,
                        skip = 0,
                        limit = 0,
                        pause = 1.5,
                        force = FALSE,
                        logs = FALSE){

  # Check query for function use
  fun_pattern <- 'classify|extract_affiliations|extract_concepts|extract_grants'
  fun_check <- grepl(fun_pattern, strsplit(query, ' ')[[1]][1])

  # Deprecations check
  ## Extract data source from query
  dsrc <- strsplit(query, ' ')[[1]][2]

  # If query includes field requests
  if(grepl('\\[\\w+\\+|\\w+\\+|\\w+\\]|\\[\\w+\\s\\+\\s|\\w+\\s\\+\\s', query) == TRUE){

    ## Extract return fields from query
    qfields = stringr::str_extract_all(query, '\\[\\w+\\+|\\w+\\+|\\w+\\]|\\[\\w+\\s\\+\\s|\\w+\\s\\+\\s') %>%
              lapply(X = ., FUN = stringr::str_replace_all, '\\[|\\]|\\s|\\+', '') %>%
              unlist()
    #Create vector of deprecated fields
    dep_fields = deprecations[[dsrc]][deprecations[[dsrc]] %in% qfields]

    if(length(dep_fields > 0)){
      warning(paste0('The following fields are deprecated: ',
                    stringr::str_c(dep_fields, collapse = ', '),
                    '. Please reference the deprecated fields list at https://docs.dimensions.ai/dsl/1.31.0/releasenotes.html#deprecations-history for more information.'))
      stop()} #close if
  }
  #If not a function and query is missing return statement, add one.
  else if(fun_check == FALSE & grepl('\\sreturn\\s\\w+', query) == FALSE){
    query <- paste(query, 'return', dsrc, '[basics]')
  }# Close if

  #Endpoint check
  ep_check <- RCurl::url.exists(url = endpoint)

  #If the endpoint is not a valid URL (ep_check == TRUE) then issue warning and stop.
  if(ep_check == TRUE){
    warning('Could not resolve API endpoint URL. Please double-check check your endpoint URL and try again. If the problem persists please contact Dimensions support.')
    stop()}#close if

  #If query has limit and/or skip arguments, then stop and issue warning.
  if(grepl('limit [0-9]{1,}|skip [0-9]{1,}', query) == TRUE){
    warning('Query does not need to contain "limit" or "skip". These can be specified in the function call. Please revise your query.')
    stop()}#close if

  #If query is asking for the "all" fieldset, stop and issue warning.
  if(grepl('\\[all\\]$', query) == TRUE){
    warning('The "all" fieldset has been abandoned. Please specify a different fieldset in your return clause.')
    stop()}#close if

  #Might as well tell end-users to put on some tea...
  message('Depending on the size of your request, this may take some time...')

  #Query API initially to get total record count
  message('Sending initial request to API server...')

  error_logs <- list()

  req <- httr::POST(endpoint,
                    httr::add_headers(Authorization = paste("JWT", dim_token)),
                    body = query,
                    encode = 'json')

  #If initial status code isn't 200, stop and report corresponding message.
  if(req$status_code != 200 & req$status_code %in% names(error_messages)){
    warning(error_messages[paste(req$status_code)])
    if(logs == TRUE){
      error_logs[['query']] <- query
      error_logs[['response']] <- req
      capture.output(error_logs, file = paste0('./error_logs_', Sys.time(),'.txt'))
    }
    stop()} #Close if

  #If the initial status code isn't 200 and doesn't correspond to our pre-baked error messages...
  else if(req$status_code != 200 & !(req$status_code %in% names(error_messages))){
    message('Please contact Dimensions support and reference the following warning message:')
    warning(httr::content(req))
    if(logs == TRUE){
      error_logs[['query']] <- query
      error_logs[['response']] <- req
      capture.output(error_logs, file = paste0('./error_logs_', Sys.time(),'.txt'))
    }
    stop()}

  #If request is a function call, we have to do things differently...
  if(fun_check == TRUE){
    message('Request is function call. Returning results.')
    result <- function_request(req, query)
  } # Close if

  #If not a function call, do the iterative query process
  else if(fun_check == FALSE){

    message('Request is not function call -- running iterative query protocol.')
    result <- iterative_request(dim_token, endpoint, query, skip, limit, pause, force, logs, req)

  } #Close else if (non-function iterative query)

  message('Done.')
  return(result)

}# Close function
cheneypinata/dslr documentation built on Jan. 6, 2022, 11:27 p.m.