#' Parse Tables With References To Files Containing Spectra On eLabFTW
#'
#' This function processes tables generated by the GET.elabftw functions. This function turns a table with references to spectra measured under different conditions into a single data.frame
#' containing the time series. Two columns must be present in the original table: A column with a time variable (some variable describing the changing measurement conditions between the spectra)
#' and a column containing the file names of the measured spectra. All other columns will be ignored. The return value will be a data.frame containing the spectra: The first column
#' contains the wavenumber axis, the other columns contain the measured intensities. The time variable will be used as label for the columns. The function can process spectra with different
#' wavenumber axis.
#'
#' Keep in mind: This functions needs the original HTTP API response. Therefore set the parameter outputHTTP to TRUE when calling a
#' GET.elabftw function.
#'
#' @param parseableTables A list generated by GET.elabftw functions with outputHTTP=TRUE. The list contains a list of data.frames with
#' file references and the HTTP response from the API request.
#' @param col.time The column number of the time variable in the original table
#' @param col.spectra The column number of the file references to the spectra (given in the original table)
#' @param col.wavenumber The column number of the wavenumber axis in the referenced files
#' @param col.signal The column number of the measured intensities in the referenced files
#' @param extension One string or a vector of strings with file extensions that will be recognised by the function. Any other file type
#' will cause the function to stop and report an error.
#' @param api.key The eLabFT API-key. See the eLabFTW documentation for details. The default settings retrieve this value from
#' the enviourmental variable READ_ELABFTW_TOKEN. This variable can be set in the ~/.Renviron file.
#' @param url The url to the eLabFTW API. See the eLabFTW documentation for details. The default settings retrieve this value from
#' the enviourmental variable ELABFTW_API_URL. This variable can be set in the ~/.Renviron file.
#' @param ... Parameters that will be passed to read.csv().
#' @return A List of data.frames. Every input table will result in one data.frame. The data.frames contain the measured spectra with a shared wavenumber axis and the time variable labeles the spectra.
#'
#' @importFrom magrittr %>%
#' @export
parseTimeSeries.elab <- function(parseableTables,
# The column number of the time variable in the original tables
col.time = 1,
# The column number of the spectra in the original tables
col.spectra = 2,
# The column number of the wavenumber axis in the uploaded files
col.wavenumber = 1,
# The column number of the measured intenisity in the uploaded files
col.signal = 2,
# The file extensions the function expects the spectra to have
extension = ".Data-1.txt",
# Sys.getenv() gets the enviourment variable with the API token. It's defined in ~/.Reviron
api.key = Sys.getenv("READ_ELABFTW_TOKEN"),
# Sys.getenv() gets the enviourment variable with the URL. It's defined in ~/.Reviron
url = Sys.getenv("ELABFTW_API_URL"),
# Passed to read.csv
...
) {
# Format allowed file extension according to regex and collapse all given extensions into a single regex expression
file.extension = gsub("\\.", "\\\\.", extension) %>% paste0(., "$") %>% paste0(., collapse="|")
# Get tables and meta data
tables <- parseableTables$table
metadata <- parseableTables$http
# Organise information about external files into data frame
# The fileid is needed to fetch the file via api
# The spectrum will be used to store the content of the read files
# If there are two files with the same name, the last one will be ignored (elabFTW allows mutiple files with the same
# real_name attached to the same protocol). -> usage of dplyr::distinct()
# Only files matching the given file extensions are added to the look-up table
filetable <- data.frame( filename = sapply(metadata$uploads, function(upload) { upload$real_name } ),
fileid = sapply(metadata$uploads, function(upload) { upload$id } )
) %>% .[grepl(file.extension, .[,1]),] %>% dplyr::distinct(., filename, .keep_all=T)
# Process every table in the list and return a list of spectra
timeSeries.list <- lapply(seq_along(tables), function(index) {
# Get one data frame from the list
table <- tables[[index]]
# Create a progress bar
progress <- progress::progress_bar$new(total = nrow(table),
format = paste0("Download ExpID ", metadata$id, " (Table ", index, "/", length(tables), ") [:bar] :eta remaining.") )
#
# READ DATA FROM ELABFTW
#
# Create a list of spectra by looping over one row of the given table and read the files containing spectra
timeSeries <- lapply(dplyr::pull(table, col.spectra), function(filename) {
# Check if the current element of the table is listed in the look-up table 'filetable'
match.lookUpTable <- paste0("^", filename, "$") %>% grepl(., filetable[,1])
# If the element is no recognised file panic. Stop the execution of the function and report to the user
if ( any(match.lookUpTable) == FALSE ) paste0("The file '" + filename + "' is unexpected. Abort. If this is a mistake, make sure the parameter extension is given the correct values.") %>% stop(.)
# If this filename is listed in the lookup table download the spectrum via API and return it
else {
# api url to needed file
url <- paste0(url, "uploads/", filetable[match.lookUpTable,]$fileid)
# Fetch file as HTML from elabftw api
file <- httr::GET(url, httr::add_headers(Authorization = api.key)) %>% httr::stop_for_status(.)
# Convert HTML into text and interpret the string as a table with read.csv
# read.csv can only read from connections -> usage of textConnection
spectrum <- httr::content(file, as="text") %>% textConnection(.) %>% read.csv(., ...)
# Advance progress bar
progress$tick()
# Return the spectrum
return(spectrum)
}
})
#
# ORGANISE LIST OF TABLES INTO SINGLE DATA.FRAME
#
# Combine the wavenumber-axis of all spectra into a single vector
wavenumbers <- sapply(timeSeries, function(spectrum) spectrum[,col.wavenumber] ) %>% unlist(.) %>% sort(., decreasing=F) %>% unique(.)
# Create a data.frame to put all the spectra in
# Create a column for the combined wavenumber-axis
timeSeries.formatted <- data.frame(wavenumber = wavenumbers)
# Create a progress bar
progress <- progress::progress_bar$new(total = length(timeSeries),
format = paste0("Restructure ExpID ", metadata$id, " (Table ", index, "/", length(tables), ") [:bar] :eta remaining.") )
# Loop over all spectra in the time series and add them to the data.frame
for ( index in seq_along(timeSeries) ) {
# Get the spectrum by from the timeSeries
spectrum <- timeSeries[[index]]
# Get the time variable from the original table
time <- table[index,col.time]
# Get for every wavenumber the corresponding measured signal from the spectrum
for (currentWavenumber in timeSeries.formatted$wavenumber) {
# If the needed value is part of the spectrum add the value
# Do nothing if the needed value is not part of the spectrum, result will contain NA for this wavenumber
if (currentWavenumber %in% spectrum[,col.wavenumber]) {
# Add the measured signal for the current wavenumber from the original spectrum to the new data.frame
# The column of the new data.frame will be labeled with the time variable given in the orginal table downloaded from elabFTW
timeSeries.formatted[which(currentWavenumber == timeSeries.formatted$wavenumber),as.character(time)] <- spectrum[which(currentWavenumber==spectrum[,col.wavenumber]),col.signal]
}
}
# Advance progress bar
progress$tick()
}
# Return result
return(timeSeries.formatted)
})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.