R/helpers.R
In medrxivr: Access and Search MedRxiv and BioRxiv Preprint Data

Documented in api_link api_to_df clean_api_df internet_check skip_if_api_message

#' Checks whether the user has internet, and returns a helpful message it not.
#'
#' @return Informative error if not connected to the internet
#' @keywords internal

internet_check <- function() {
  if (curl::has_internet() == FALSE) { # nocov start
    stop(paste0(
      "No internet connect detected - ",
      "please connect to the internet and try again"
    ))
  } # nocov end
}

#' Convert API data to data frame
#'
#' @param url API endpoint from which to extract and format data
#'
#' @return Raw API data in a dataframe
#' @keywords internal
#'
#' @importFrom dplyr %>%

api_to_df <- function(url) {
  details <- httr::RETRY(
    verb = "GET",
    times = 3,
    url = url,
    httr::timeout(30)
  )

  httr::stop_for_status(
    details,
    task = paste(
      "extract data from API. As this is usually due to current user load,",
      "please try again in a little while, or use the maintained",
      "static daily snapshot (available for medRxiv only)"
    )
  )

  code <- details$status_code

  message <- httr::content(details, as = "text", encoding = "UTF-8")

  if (code == 200 &
    message == "Error : (2002) Connection refused") { # nocov start
    stop(paste(
      "API connection refused.",
      "As this is usually due to current user load,",
      "please try again in a little while, or use the maintained",
      "static daily snapshot (available for medRxiv only)"
    ))
  } # nocov end

  # Extract cursor from url and check if first page is empty This causes an
  # error only if the very first page, indicated by cursor == "0", is empty.
  # This approach allows for elegant handling of the discrepancy between the
  # expected total number of records as per the metadata and the actual number
  # available, while maintaining the informative error if the first page is
  # empty.
  cursor <-
    stringr::str_split(url,"/") %>%
    unlist() %>%
    dplyr::last()

  if (code == 200 & grepl("no posts found", message) & cursor == "0") {
    stop(paste(
      "No records found. Please double check your date range,",
      "as this is the usual cause of this error."
    ))
  }

  details <- details %>%
    httr::content(as = "text", encoding = "UTF-8") %>%
    jsonlite::fromJSON()
}

#' Create link for API
#'
#' @param ... Arguments to specify the path to the API endpoint
#'
#' @return Formatted link to API endpoint
#' @keywords internal

api_link <- function(...) {
  path_arg <- c(...)

  httr::modify_url("https://api.biorxiv.org/",
    path = c(
      "details",
      path_arg
    )
  )
}

#' Helper script to clean data from API to make it compatible with mx_search()
#'
#' @param df Raw dataframe from API
#'
#' @return Cleaned dataframe
#' @keywords internal
#'
#' @importFrom dplyr %>%

clean_api_df <- function(df) {
  df$node <- seq_len(nrow(df))

  df <- df %>%
    dplyr::select(-c(.data$type))

  df$link <- paste0("/content/", df$doi, "v", df$version, "?versioned=TRUE")
  df$pdf <- paste0("/content/", df$doi, "v", df$version, ".full.pdf")
  df$category <- stringr::str_to_title(df$category)
  df$authors <- stringr::str_to_title(df$authors)
  df$author_corresponding <- stringr::str_to_title(df$author_corresponding)

  df$link_page <- paste0("https://www.", df$server, ".org", df$link)
  df$link_pdf <- paste0("https://www.", df$server, ".org", df$pdf)

  df <- df %>%
    dplyr::select(-c(.data$server, .data$link, .data$pdf))

  df
}


#' Skips API tests if API isn't working correctly
#'
#' @keywords internal

skip_if_api_message <- function() {
  details <- httr::RETRY(
    verb = "GET",
    times = 3,
    url = "https://api.biorxiv.org/details/medrxiv/2020-06-21/2020-08-28/45",
    httr::timeout(30)
  )
  code <- details$status_code

  message <- httr::content(details, as = "text", encoding = "UTF-8")

  if (code == 200 & message == "Error : (2002) Connection refused") {
    testthat::skip("API connection refused") # nocov
  }
}

Any scripts or data that you put into this service are public.

medrxivr documentation built on Feb. 25, 2021, 1:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

medrxivr
Access and Search MedRxiv and BioRxiv Preprint Data

R/helpers.R
In medrxivr: Access and Search MedRxiv and BioRxiv Preprint Data

Defines functions skip_if_api_message clean_api_df api_link api_to_df internet_check

Documented in api_link api_to_df clean_api_df internet_check skip_if_api_message

Try the medrxivr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

medrxivr Access and Search MedRxiv and BioRxiv Preprint Data

R/helpers.R In medrxivr: Access and Search MedRxiv and BioRxiv Preprint Data

Defines functions skip_if_api_message clean_api_df api_link api_to_df internet_check

Documented in api_link api_to_df clean_api_df internet_check skip_if_api_message

Try the medrxivr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

medrxivr
Access and Search MedRxiv and BioRxiv Preprint Data

R/helpers.R
In medrxivr: Access and Search MedRxiv and BioRxiv Preprint Data