knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(infx) library(rvest) library(knitr)
get_openbis_items <- function(x) { sections <- tools:::RdTags(x) == "\\section" if (sum(sections) > 0L) sections[sections] <- sapply(x[sections], function(sec) { tolower(tools:::.Rd_get_text(sec[[1L]])) == "openbis" }) if (sum(sections) == 1L) { obis <- x[[which(sections)]][[2L]] is_itemize <- tools:::RdTags(obis) == "\\itemize" unlist( lapply(obis[is_itemize], function(y) { url_hits <- tools:::RdTags(y) == "\\href" if (any(url_hits)) lapply(y[url_hits], sapply, as.character) else NULL }), recursive = FALSE ) } else if (sum(sections) > 1L) stop("expecting one or zero openbis sections") else NULL } pkg <- "infx" rd_db <- tools:::fetchRdDB(file.path(find.package(pkg), "help", pkg)) api_calls <- unlist(lapply(rd_db, get_openbis_items), recursive = FALSE) urls <- sapply(api_calls, `[[`, 1L) api_calls <- strsplit(sapply(api_calls, `[[`, 2L), ":")
info <- lapply(sort(unique(urls)), function(x) { docs <- read_html(x) api <- html_nodes(docs, xpath = "/html/body/div[4]/div[2]/ul/li/ul[2]/li/table") tab <- apply(html_table(api)[[1]], 1, function(y) { y <- y[2] fun_name <- sub("\\($", "", regmatches(y, regexpr("^.+?\\(", y))) desc <- sub("\\)\n", "", regmatches(y, regexpr("\\)\n.+$", y))) if (length(desc) && grepl("^Deprecated", desc)) return(NULL) found <- fun_name %in% sapply(api_calls[urls == x], `[`, 2L) list(`Method name` = fun_name, Status = ifelse(found, "implemented", "skipped"), Description = sub("IDssServiceRpcScreening\\.", "IDssServiceRpcScreening ", sub("ch\\..+\\.dto\\.", "", gsub("\n", "", sub("^/\\*\\* ", "", desc))))) }) title <- html_text(html_nodes(docs, xpath = "/html/body/div[3]/h2")) desc <- html_nodes(docs, xpath = "/html/body/div[4]/div[1]/ul/li/div") desc <- strsplit(html_text(desc), "\\.")[[1]] desc <- paste0(desc[1], ". More information available [here](", x, ").\n") tabl <- do.call(rbind, tab) link <- paste0("[", sub("^Interface ", "", title), "](", x, ")") list(title = title, description = desc, table = tabl, status = round(mean(tabl[, 2] == "implemented") * 100), link = link) })
This vignette serves to document which sections of the openBIS API are implemented by the presented client package and which functionality was omitted. Furthermore, the basic mechanisms for making requests to openBIS are explained such that a user who wishes to add some of the omitted API functions has some information on how to extend infx
. Some general documentation on the API, offered by the openBIS developers is available here.
The basic functionality powering all Representational state transfer (REST) calls of infx
is provided by do_requests_serial()
and do_requests_parallel()
. Both functions take roughly the same input and produce identical results, but differ in how curl is called. The former function makes requests in a sequential fashion, iteratively calling curl::curl_fetch_memory()
and the latter performs asynchronous requests, using curl::curl_fetch_multi()
to assemble all requests and curl::multi_run()
to carry them out in asynchronous parallel fashion.
Much of the behavior of do_requests_serial()
and do_requests_parallel()
can be customized using three functions passed as arguments create_handle
, check
and finally
. Additionally, do_requests_*()
takes a vector of urls
and an optional object bodies
which is expected to be of the same length as urls
. Urls can either be passed as a character vector or a list of unevaluated function calls which will be evaluated using base::eval()
shortly before being used. Both functions support retrying failed requests up to n_try
times.
For constructing the requests, both urls
and bodies
are iterated together. First, the function passed as create_handle
, which receives as input the current entry of the bodies
object is used to construct a curl handle using curl::create_handle()
. Together with the current URL entry, curl::curl_fetch_*()
is called and the resulting object is passed as first argument to the check
function, which receives as second argument the current entry of the bodies
object. The function passed as check
argument should make sure the request completed successfully and in case of failure return a simpleError
object, created by base::simpleError()
. In case of success, it should return the part of the curl response object that is of further interest (most likely the content
entry). Next the finally
function is called on the object returned by check
to do some final processing (e.g. parsing JSON or reading a binary file). If the check
function signaled a failure and the number of available tries as specified by n_try
is not used up, the request is made again. If the allowed number of tries is exceeded, a warning is issued.
Asynchronous requests are implemented by first adding n_con
curl handles to a new multi handle and starting the downloads by calling curl::multi_run()
. For each successful request, a new handle is added to the pool using the done
callback function of curl::multi_add()
. In conjunction with passing urls as unevaluated function calls, this helps with urls that have a limited lifetime in that the URL is only created right before being consumed. Instead of adding all requests at the same time and letting curl handle queuing, only n_con
requests are handled by curl at any given time.
A very basic use of do_requests_serial()
is shown in the following example. The default function of the create_handle
argument creates a new clean curl handle with no options set. The default function of the check
argument makes sure that the status code is equal to 200 and returns the content
entry of the list returned by curl::curl_fetch_memory()
. The default function for finally
is base::identity()
. In order to receive a human-readable result instead of a raw vector, a function process_raw()
is created, which converts the raw vector to a character vector yielding a JSON string.
urls <- "https://httpbin.org/ip" pretty_json <- function(x) jsonlite::prettify(rawToChar(x)) do_requests_serial(urls, finally = pretty_json)
Note that a list of length 1 is returned by do_requests_serial()
. This is because the do_requests_*()
functions are vectorized over requests and therefore return a list with as many entries as requests. A slightly more involved example for creating a request using do_requests_serial()
is as follows.
urls <- rep("https://httpbin.org/post", 2) json <- list(list(a = "foo"), list(a = "bar")) post_handle <- function(x) curl::handle_setheaders( curl::new_handle(postfields = charToRaw(jsonlite::toJSON(x))), "Content-Type" = "application/json" ) process_json <- function(x) jsonlite::fromJSON(rawToChar(x))$json res <- do_requests_serial(urls, json, create_handle = post_handle, finally = process_json) identical(res, json)
In order to customize the curl handle, a create_handle
function is supplied, which receives for each request the corresponding entry of the object passed as bodies
argument. The finally
function in this example parses the returned JSON object into a list. Since for POST requests, httpbin
mirrors the POST data, the initial json
list is returned.
Building on functionality offered by the do_requests_*()
functions, make_requests()
helps with constructing JSON-RPC calls. As per the JSON-RPC 2.0 specification, a request object is a JSON string with the following members^[JSON-RPC 2.0 Specification Rev. 2013-01-04. JSON-RPC Working Group]:
jsonrpc
: A string specifying the version of the JSON-RPC protocol.method
: A string containing the name of the method to be invoked.params
: A structured value that holds the parameter values to be used during the invocation of the method.id
: An identifier established by the client.In order to create a list of such request objects, the arguments methods
, params
, ids
and version
, which map to method
, params
, id
and jsonrpc
, respectively, are combined and converted to JSON. OpenBIS API endpoint urls are constructed with the helper function api_url()
which can receive arguments api_endpoint
and host_url
, forwarded by make_requests()
. The argument n_con
is used to specify the maximal number of simultaneous connections made to the server.
As make_requests()
is designed to construct several requests at the time, the arguments methods
and params
are vectorized. If any of the passed objects is of length 1, it is replicated using base::rep()
to the required length such that all three objects are of the same length. Care has to be taken with the list passed as params
such that its length corresponds to the number of requests. This means that if only a single set of parameters is passed, this list has to be wrapped by another list.
As a last argument, a finally
function can be passed, which defaults to process_json()
. This function first converts all typed JSON objects to json_class
objects using as_json_class()
, resolves object references using resolve_references()
in order to make all json_class
objects self-contained and then creates a json_vec
object using as_json_vec()
, therefore allowing S3 method dispatch on lists of json_class
objects. For more information on the specifics of this, please refer to the vignette "JSON object handling".
For single requests, a wrapper around make_requests()
is available as make_request()
. This function wraps the params
argument in a list such that a list of length 1 is passed to make_requests()
and returns the first entry of the list resulting from calling make_requests()
. The following example shows how make_request()
is used to implement the API method listProjects
, which takes an access token passed as a list as its only argument.
token <- login_openbis() projects <- make_request(api_url(api_endpoint = "gis"), method = "listProjects", params = list(token)) print(projects, length = 10L) logout_openbis(token)
To illustrate the used of make_requests()
, the API method getDownloadUrlForFileForDataSet
is implemented. This function generates a download URL for a file in a data set and requires a separate API call for each combination of data set and file path. In order to carry out several of these requests asynchronously, a list has to be passed as params
argument such that each entry contains a list holding a login token, a dataset code and a file path.
token <- login_openbis() dataset_codes <- c("20120629093035782-603380", "20121011143734361-1359915") file_path <- c("original/aThresholdedInfectionScoring_bBB01-1I.csv") params <- lapply(dataset_codes, function(x) list(token, x, file_path)) str(params) donwload_urls <- make_requests(api_url(api_endpoint = "dsrg"), method = "getDownloadUrlForFileForDataSet", params = params) str(donwload_urls) logout_openbis(token)
In addition to the JSON-RPC calls enabled by make_request()
/make_requests()
, the do_requests_*()
functions are also used for (asynchronous) file downloads in fetch_files()
. Slightly different functions are used as create_handle
and check
arguments but apart from that the logic remains largely the same.
Any arguments passed as ...
to make_requests()
will be forwarded to api_url()
which creates an API endpoint URL and passes this on to the do_requests_*()
functions. As all functions that issue API calls use make_requests()
, this makes it possible to not only target the InfectX openBIS instance, but arbitrary openBIS servers that support the v1 JSON-RPC API. The following example serves to illustrate how this mechanism can be used to access the openBIS demo.
token <- login_openbis("test_observer", "test_observer", host_url = "https://openbis-eln-lims.ethz.ch") gel_data <- search_openbis( token, search_criteria( attribute_clause("type", "ELN_PREVIEW"), sub_criteria = search_sub_criteria( search_criteria( attribute_clause(value = "WB_LEXA-ER-B112") ), type = "sample" ) ) ) gel_img <- fetch_files(token, gel_data, reader = magick::image_read) logout_openbis(token) print(gel_img[[1]]) attributes(gel_img[[1]])
api_url()
can accept arguments api_endpoint
, host_url
and full_url
and ignores any further arguments. The first argument offers a selection of hard-coded API endpoints and in combination with the second argument is used to access one of them on the specified openBIS host. In case a URL that is not supported by the api_endpoint
selector is desired, the complete URL can
be passed as full_url
which will simply be returned by api_url()
.
Currently no methods from the API sections
are implemented as they mostly deal with modifying metadata and creating new aggregation reports. The main focus of this package is retrieving data and therefore only methods from the API sections
for (i in seq_along(info)) cat(paste0("* ", info[[i]]$link, ", ", info[[i]]$status), "% implemented\n")
are currently available. A more detailed overview of what functionality is implemented in this API client is given in the following sections.
for (i in seq_along(info)) { cat(paste0("### ", info[[i]]$title, "\n", info[[i]]$description, "\n")) print(kable(info[[i]]$table)) cat("\n") }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.