knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

library(infx)
library(rvest)
library(knitr)
get_openbis_items <- function(x) {
  sections <- tools:::RdTags(x) == "\\section"
  if (sum(sections) > 0L)
    sections[sections] <- sapply(x[sections], function(sec) {
      tolower(tools:::.Rd_get_text(sec[[1L]])) == "openbis"
    })

  if (sum(sections) == 1L) {
    obis <- x[[which(sections)]][[2L]]
    is_itemize <- tools:::RdTags(obis) == "\\itemize"
    unlist(
      lapply(obis[is_itemize], function(y) {
        url_hits <- tools:::RdTags(y) == "\\href"
        if (any(url_hits))
          lapply(y[url_hits], sapply, as.character)
        else
          NULL
      }),
      recursive = FALSE
    )
  } else if (sum(sections) > 1L)
    stop("expecting one or zero openbis sections")
  else
    NULL
}

pkg <- "infx"
rd_db <- tools:::fetchRdDB(file.path(find.package(pkg), "help", pkg))
api_calls <- unlist(lapply(rd_db, get_openbis_items), recursive = FALSE)

urls <- sapply(api_calls, `[[`, 1L)
api_calls <- strsplit(sapply(api_calls, `[[`, 2L), ":")
info <- lapply(sort(unique(urls)), function(x) {
  docs <- read_html(x)
  api <- html_nodes(docs,
                    xpath = "/html/body/div[4]/div[2]/ul/li/ul[2]/li/table")

  tab <- apply(html_table(api)[[1]], 1, function(y) {
    y <- y[2]
    fun_name <- sub("\\($", "", regmatches(y, regexpr("^.+?\\(", y)))
    desc <- sub("\\)\n", "", regmatches(y, regexpr("\\)\n.+$", y)))

    if (length(desc) && grepl("^Deprecated", desc))
      return(NULL)

    found <- fun_name %in% sapply(api_calls[urls == x], `[`, 2L)

    list(`Method name` = fun_name,
         Status = ifelse(found, "implemented", "skipped"),
         Description = sub("IDssServiceRpcScreening\\.",
                           "IDssServiceRpcScreening ",
                           sub("ch\\..+\\.dto\\.", "",
                               gsub("\n", "",
                                    sub("^/\\*\\* ", "", desc)))))
  })

  title <- html_text(html_nodes(docs, xpath = "/html/body/div[3]/h2"))
  desc <- html_nodes(docs, xpath = "/html/body/div[4]/div[1]/ul/li/div")
  desc <- strsplit(html_text(desc), "\\.")[[1]]
  desc <- paste0(desc[1], ". More information available [here](", x, ").\n")
  tabl <- do.call(rbind, tab)
  link <- paste0("[", sub("^Interface ", "", title), "](", x, ")")

  list(title = title,
       description = desc,
       table = tabl,
       status = round(mean(tabl[, 2] == "implemented") * 100),
       link = link)
})

This vignette serves to document which sections of the openBIS API are implemented by the presented client package and which functionality was omitted. Furthermore, the basic mechanisms for making requests to openBIS are explained such that a user who wishes to add some of the omitted API functions has some information on how to extend infx. Some general documentation on the API, offered by the openBIS developers is available here.

Creating a REST call

The basic functionality powering all Representational state transfer (REST) calls of infx is provided by do_requests_serial() and do_requests_parallel(). Both functions take roughly the same input and produce identical results, but differ in how curl is called. The former function makes requests in a sequential fashion, iteratively calling curl::curl_fetch_memory() and the latter performs asynchronous requests, using curl::curl_fetch_multi() to assemble all requests and curl::multi_run() to carry them out in asynchronous parallel fashion.

Much of the behavior of do_requests_serial() and do_requests_parallel() can be customized using three functions passed as arguments create_handle, check and finally. Additionally, do_requests_*() takes a vector of urls and an optional object bodies which is expected to be of the same length as urls. Urls can either be passed as a character vector or a list of unevaluated function calls which will be evaluated using base::eval() shortly before being used. Both functions support retrying failed requests up to n_try times.

For constructing the requests, both urls and bodies are iterated together. First, the function passed as create_handle, which receives as input the current entry of the bodies object is used to construct a curl handle using curl::create_handle(). Together with the current URL entry, curl::curl_fetch_*() is called and the resulting object is passed as first argument to the check function, which receives as second argument the current entry of the bodies object. The function passed as check argument should make sure the request completed successfully and in case of failure return a simpleError object, created by base::simpleError(). In case of success, it should return the part of the curl response object that is of further interest (most likely the content entry). Next the finally function is called on the object returned by check to do some final processing (e.g. parsing JSON or reading a binary file). If the check function signaled a failure and the number of available tries as specified by n_try is not used up, the request is made again. If the allowed number of tries is exceeded, a warning is issued.

Asynchronous requests are implemented by first adding n_con curl handles to a new multi handle and starting the downloads by calling curl::multi_run(). For each successful request, a new handle is added to the pool using the done callback function of curl::multi_add(). In conjunction with passing urls as unevaluated function calls, this helps with urls that have a limited lifetime in that the URL is only created right before being consumed. Instead of adding all requests at the same time and letting curl handle queuing, only n_con requests are handled by curl at any given time.

A very basic use of do_requests_serial() is shown in the following example. The default function of the create_handle argument creates a new clean curl handle with no options set. The default function of the check argument makes sure that the status code is equal to 200 and returns the content entry of the list returned by curl::curl_fetch_memory(). The default function for finally is base::identity(). In order to receive a human-readable result instead of a raw vector, a function process_raw() is created, which converts the raw vector to a character vector yielding a JSON string.

urls <- "https://httpbin.org/ip"

pretty_json <- function(x)
  jsonlite::prettify(rawToChar(x))

do_requests_serial(urls, finally = pretty_json)

Note that a list of length 1 is returned by do_requests_serial(). This is because the do_requests_*() functions are vectorized over requests and therefore return a list with as many entries as requests. A slightly more involved example for creating a request using do_requests_serial() is as follows.

urls <- rep("https://httpbin.org/post", 2)
json <- list(list(a = "foo"),
             list(a = "bar"))

post_handle <- function(x)
  curl::handle_setheaders(
    curl::new_handle(postfields = charToRaw(jsonlite::toJSON(x))),
    "Content-Type" = "application/json"
  )

process_json <- function(x)
  jsonlite::fromJSON(rawToChar(x))$json

res <- do_requests_serial(urls, json,
                          create_handle = post_handle,
                          finally = process_json)
identical(res, json)

In order to customize the curl handle, a create_handle function is supplied, which receives for each request the corresponding entry of the object passed as bodies argument. The finally function in this example parses the returned JSON object into a list. Since for POST requests, httpbin mirrors the POST data, the initial json list is returned.

Creating a JSON-RPC request

Building on functionality offered by the do_requests_*() functions, make_requests() helps with constructing JSON-RPC calls. As per the JSON-RPC 2.0 specification, a request object is a JSON string with the following members^[JSON-RPC 2.0 Specification Rev. 2013-01-04. JSON-RPC Working Group]:

In order to create a list of such request objects, the arguments methods, params, ids and version, which map to method, params, id and jsonrpc, respectively, are combined and converted to JSON. OpenBIS API endpoint urls are constructed with the helper function api_url() which can receive arguments api_endpoint and host_url, forwarded by make_requests(). The argument n_con is used to specify the maximal number of simultaneous connections made to the server.

As make_requests() is designed to construct several requests at the time, the arguments methods and params are vectorized. If any of the passed objects is of length 1, it is replicated using base::rep() to the required length such that all three objects are of the same length. Care has to be taken with the list passed as params such that its length corresponds to the number of requests. This means that if only a single set of parameters is passed, this list has to be wrapped by another list.

As a last argument, a finally function can be passed, which defaults to process_json(). This function first converts all typed JSON objects to json_class objects using as_json_class(), resolves object references using resolve_references() in order to make all json_class objects self-contained and then creates a json_vec object using as_json_vec(), therefore allowing S3 method dispatch on lists of json_class objects. For more information on the specifics of this, please refer to the vignette "JSON object handling".

For single requests, a wrapper around make_requests() is available as make_request(). This function wraps the params argument in a list such that a list of length 1 is passed to make_requests() and returns the first entry of the list resulting from calling make_requests(). The following example shows how make_request() is used to implement the API method listProjects, which takes an access token passed as a list as its only argument.

token <- login_openbis()

projects <- make_request(api_url(api_endpoint = "gis"),
                         method = "listProjects",
                         params = list(token))
print(projects, length = 10L)

logout_openbis(token)

To illustrate the used of make_requests(), the API method getDownloadUrlForFileForDataSet is implemented. This function generates a download URL for a file in a data set and requires a separate API call for each combination of data set and file path. In order to carry out several of these requests asynchronously, a list has to be passed as params argument such that each entry contains a list holding a login token, a dataset code and a file path.

token <- login_openbis()

dataset_codes <- c("20120629093035782-603380", "20121011143734361-1359915")
file_path <- c("original/aThresholdedInfectionScoring_bBB01-1I.csv")

params <- lapply(dataset_codes, function(x) list(token, x, file_path))

str(params)

donwload_urls <- make_requests(api_url(api_endpoint = "dsrg"),
                               method = "getDownloadUrlForFileForDataSet",
                               params = params)
str(donwload_urls)

logout_openbis(token)

In addition to the JSON-RPC calls enabled by make_request()/make_requests(), the do_requests_*() functions are also used for (asynchronous) file downloads in fetch_files(). Slightly different functions are used as create_handle and check arguments but apart from that the logic remains largely the same.

Any arguments passed as ... to make_requests() will be forwarded to api_url() which creates an API endpoint URL and passes this on to the do_requests_*() functions. As all functions that issue API calls use make_requests(), this makes it possible to not only target the InfectX openBIS instance, but arbitrary openBIS servers that support the v1 JSON-RPC API. The following example serves to illustrate how this mechanism can be used to access the openBIS demo.

token <- login_openbis("test_observer", "test_observer",
                       host_url = "https://openbis-eln-lims.ethz.ch")

gel_data <- search_openbis(
  token,
  search_criteria(
    attribute_clause("type", "ELN_PREVIEW"),
    sub_criteria = search_sub_criteria(
      search_criteria(
        attribute_clause(value = "WB_LEXA-ER-B112")
      ),
      type = "sample"
    )
  )
)

gel_img <- fetch_files(token, gel_data,
                       reader = magick::image_read)

logout_openbis(token)

print(gel_img[[1]])
attributes(gel_img[[1]])

api_url() can accept arguments api_endpoint, host_url and full_url and ignores any further arguments. The first argument offers a selection of hard-coded API endpoints and in combination with the second argument is used to access one of them on the specified openBIS host. In case a URL that is not supported by the api_endpoint selector is desired, the complete URL can be passed as full_url which will simply be returned by api_url().

Summary of API methods

Currently no methods from the API sections

are implemented as they mostly deal with modifying metadata and creating new aggregation reports. The main focus of this package is retrieving data and therefore only methods from the API sections

for (i in seq_along(info))
  cat(paste0("* ", info[[i]]$link, ", ", info[[i]]$status), "% implemented\n")

are currently available. A more detailed overview of what functionality is implemented in this API client is given in the following sections.

for (i in seq_along(info)) {
  cat(paste0("### ", info[[i]]$title, "\n", info[[i]]$description, "\n"))
  print(kable(info[[i]]$table))
  cat("\n")
}


nbenn/infx documentation built on May 20, 2022, 7:44 a.m.