R/utils.R

Defines functions get_chunksize lim_shrink blank2na max_depth_df

#Error message for status other than 200
error_messages <- c('301' = 'Endpoint is no longer valid. Please contact Dimensions for support.',
                    '302' = 'Endpoint is being redirected. Please contact Dimensions support for new endpoint address to use in future requests.',
                    '400' = 'Bad request. Please check your request and try again. If the issue persists, please contact Dimensions Support.',
                    '401' = 'Incorrect credentials. Please double check your credentials and endpoint URL. If the problem persists, please contact Dimensions Support for help.',
                    '403' = 'Request forbidden. Please recheck your endpoint URL and login credentials. If the problem persists contact Dimensions Support for help.',
                    '404' = 'API endpoint URL not found. Please check your API endpoint URL again and contact Dimensions Support if the issue persists.',
                    '500' = 'The API server experienced an internal error. Please try again later or contact Dimensions Support for assistance.',
                    '502' = 'Invalid response from upstream server. Please try again later or contact Dimensions Support if the issue persists.',
                    '503' = 'The API service is currently unavailable. Please try again later or contact Dimensions Support if the issue persists.',
                    '504' = 'Gateway timeout. Please try again or contact Dimensions Support if the issue persists.')

#Deprecated fields
deprecations <- list('publications' = c('author_affiliations', 'category_ua', 'FOR', 'FOR_first',
                                        'HRCS_HC', 'HRCS_RAC', 'open_access','open_access_categories',
                                        'RCDC', 'references', 'terms', 'researchers'),
                     'grants' = c('FOR', 'FOR_first', 'HRCS_HC', 'HRCS_RAC', 'project_num', 'RCDC',
                                  'research_org_name', 'researcher_details', 'resulting_publication_ids',
                                  'terms', 'title_language', 'researchers'),
                     'patents' = c('assignee_state_names', 'filed_date', 'filed_year', 'FOR', 'FOR_first',
                                   'funder_groups', 'HRCS_HC', 'HRCS_RAC', 'RCDC', 'status'),
                     'clinical_trials' = c('FOR', 'FOR_first', 'funder_groups', 'HRCS_HC',
                                           'HRCS_RAC', 'investigator_details', 'organizations', 'RCDC', 'researchers'),
                     'policy_docs' = c('broad_research_areas', 'city', 'country', 'FOR', 'FOR_first', 'grid_id',
                                       'health_research_areas', 'HRCS_HC', 'HRCS_RAC', 'RCDC', 'source_name',
                                       'state'))

#max_depth_df counts vector depth of each df column and returns the max
max_depth_df <- function(df){

  depths <- apply(X = df, MARGIN = 2, FUN = purrr::vec_depth)

  return(max(depths))

}#Close function

#blank2na converts blank strings ('') to NA
blank2na <- function(x){

  #If x is blank then rewrite as NA otherwise leave it as-is
  x = ifelse(x == '', NA, x)

  #Return x
  return(x)

  }#Close function

#lim_shrink queries the API with smaller and smaller lim values until status 414 resolves
#then returns the new lim value for use in recursive querying
lim_shrink <- function(query, lim, skip, endpoint, dim_token){

  status <- 400

  new_lim <- c()

  while(status != 200){

    lim_split <- ceiling(lim/2)

    q <- paste(query, 'limit', lim_split, 'skip', skip, sep = ' ')

    r <- httr::POST(endpoint,
                    httr::add_headers(Authorization = paste0("JWT ", dim_token)),
                    body = q,
                    encode = 'json')

    status <- r$status_code

    new_lim <- append(new_lim, lim_split)

  }#Close while-loop

  return(min(new_lim))

}#Close function

# Examines the limit number provided and determines the appropriate chunk size
get_chunksize <- function(x){

  places = list('1' = 1,
                '2' = 10,
                '3' = 100,
                '4' = 1000,
                '5' = 10000)

  num_size = stringr::str_length(as.character(x))

  place = ifelse(num_size <= 5, places[[as.character(num_size)]], 1000)

  return(place)

}#Close function
cheneypinata/dslr documentation built on Jan. 6, 2022, 11:27 p.m.