RHotStuff: Collection of frequently used functions

Documented in parseTimeSeries.elab

#' Parse Tables With References To Files Containing Spectra On eLabFTW
#' 
#' This function processes tables generated by the GET.elabftw functions. This function turns a table with references to spectra measured under different conditions into a single data.frame 
#' containing the time series. Two columns must be present in the original table: A column with a time variable (some variable describing the changing measurement conditions between the spectra)
#' and a column containing the file names of the measured spectra. All other columns will be ignored. The return value will be a data.frame containing the spectra: The first column 
#' contains the wavenumber axis, the other columns contain the measured intensities. The time variable will be used as label for the columns. The function can process spectra with different
#' wavenumber axis.
#' 
#' Keep in mind: This functions needs the original HTTP API response. Therefore set the parameter outputHTTP to TRUE when calling a 
#' GET.elabftw function.
#' 
#' @param parseableTables A list generated by GET.elabftw functions with outputHTTP=TRUE. The list contains a list of data.frames with 
#' file references and the HTTP response from the API request.
#' @param col.time The column number of the time variable in the original table
#' @param col.spectra The column number of the file references to the spectra (given in the original table)
#' @param col.wavenumber The column number of the wavenumber axis in the referenced files
#' @param col.signal The column number of the measured intensities in the referenced files
#' @param extension One string or a vector of strings with file extensions that will be recognised by the function. Any other file type 
#' will cause the function to stop and report an error.
#' @param api.key The eLabFT API-key. See the eLabFTW documentation for details. The default settings retrieve this value from
#' the enviourmental variable READ_ELABFTW_TOKEN. This variable can be set in the ~/.Renviron file.
#' @param url The url to the eLabFTW API. See the eLabFTW documentation for details. The default settings retrieve this value from
#' the enviourmental variable ELABFTW_API_URL. This variable can be set in the ~/.Renviron file.
#' @param ... Parameters that will be passed to read.csv().
#' @return A List of data.frames. Every input table will result in one data.frame. The data.frames contain the measured spectra with a shared wavenumber axis and the time variable labeles the spectra.
#' 
#' @importFrom magrittr %>%
#' @export
parseTimeSeries.elab <- function(parseableTables,
                                 # The column number of the time variable in the original tables 
                                 col.time    = 1, 
                                 # The column number of the spectra in the original tables
                                 col.spectra = 2,
                                 # The column number of the wavenumber axis in the uploaded files
                                 col.wavenumber = 1,
                                 # The column number of the measured intenisity in the uploaded files
                                 col.signal = 2,
                                 # The file extensions the function expects the spectra to have
                                 extension   = ".Data-1.txt",
                                 # Sys.getenv() gets the enviourment variable with the API token. It's defined in ~/.Reviron 
                                 api.key     = Sys.getenv("READ_ELABFTW_TOKEN"),
                                 # Sys.getenv() gets the enviourment variable with the URL. It's defined in ~/.Reviron 
                                 url         = Sys.getenv("ELABFTW_API_URL"),
                                 # Passed to read.csv
                                 ...
                                ) {
  # Format allowed file extension according to regex and collapse all given extensions into a single regex expression
  file.extension = gsub("\\.", "\\\\.", extension) %>% paste0(., "$") %>% paste0(., collapse="|")
  
  # Get tables and meta data
  tables    <- parseableTables$table
  metadata  <- parseableTables$http
  
  # Organise information about external files into data frame
  # The fileid is needed to fetch the file via api
  # The spectrum will be used to store the content of the read files
  # If there are two files with the same name, the last one will be ignored (elabFTW allows mutiple files with the same 
  #  real_name attached to the same protocol). -> usage of dplyr::distinct()
  # Only files matching the given file extensions are added to the look-up table
  filetable <- data.frame( filename = sapply(metadata$uploads, function(upload) { upload$real_name } ),
                           fileid   = sapply(metadata$uploads, function(upload) { upload$id        } )
  ) %>% .[grepl(file.extension, .[,1]),] %>% dplyr::distinct(., filename, .keep_all=T)
  
  # Process every table in the list and return a list of spectra 
  timeSeries.list <- lapply(seq_along(tables), function(index) {
    # Get one data frame from the list
    table <- tables[[index]]
    
    # Create a progress bar
    progress <- progress::progress_bar$new(total = nrow(table), 
                                           format = paste0("Download ExpID ", metadata$id, " (Table ", index, "/", length(tables), ") [:bar] :eta remaining.") )
    
    #
    # READ DATA FROM ELABFTW
    #
    # Create a list of spectra by looping over one row of the given table and read the files containing spectra
    timeSeries <- lapply(dplyr::pull(table, col.spectra), function(filename) {
      
      # Check if the current element of the table is listed in the look-up table 'filetable'
      match.lookUpTable <- paste0("^", filename, "$") %>% grepl(., filetable[,1])
      
      # If the element is no recognised file panic. Stop the execution of the function and report to the user
      if ( any(match.lookUpTable) == FALSE ) paste0("The file '" + filename + "' is unexpected. Abort. If this is a mistake, make sure the parameter extension is given the correct values.") %>% stop(.)
      # If this filename is listed in the lookup table download the spectrum via API and return it
      else {
        
        # api url to needed file
        url <- paste0(url, "uploads/", filetable[match.lookUpTable,]$fileid)
        # Fetch file as HTML from elabftw api
        file <- httr::GET(url, httr::add_headers(Authorization = api.key)) %>% httr::stop_for_status(.)
        
        # Convert HTML into text and interpret the string as a table with read.csv
        # read.csv can only read from connections -> usage of textConnection
        spectrum <- httr::content(file, as="text") %>% textConnection(.) %>% read.csv(., ...)
        
        # Advance progress bar
        progress$tick()
        
        # Return the spectrum
        return(spectrum) 
      }
    })
    
    #
    # ORGANISE LIST OF TABLES INTO SINGLE DATA.FRAME
    #
    # Combine the wavenumber-axis of all spectra into a single vector
    wavenumbers <- sapply(timeSeries, function(spectrum) spectrum[,col.wavenumber] ) %>% unlist(.) %>% sort(., decreasing=F) %>% unique(.)
    # Create a data.frame to put all the spectra in
    # Create a column for the combined wavenumber-axis
    timeSeries.formatted <- data.frame(wavenumber = wavenumbers)
    

    
    # Create a progress bar
    progress <- progress::progress_bar$new(total = length(timeSeries), 
                                           format = paste0("Restructure ExpID ", metadata$id, " (Table ", index, "/", length(tables), ") [:bar] :eta remaining.") )
    
    # Loop over all spectra in the time series and add them to the data.frame
    for ( index in seq_along(timeSeries) ) {
      # Get the spectrum by from the timeSeries
      spectrum <- timeSeries[[index]]
      # Get the time variable from the original table
      time <- table[index,col.time]
      
      # Get for every wavenumber the corresponding measured signal from the spectrum
      for (currentWavenumber in timeSeries.formatted$wavenumber) {
        # If the needed value is part of the spectrum add the value
        # Do nothing if the needed value is not part of the spectrum, result will contain NA for this wavenumber
        if (currentWavenumber %in% spectrum[,col.wavenumber]) {
          # Add the measured signal for the current wavenumber from the original spectrum to the new data.frame
          # The column of the new data.frame will be labeled with the time variable given in the orginal table downloaded from elabFTW
          timeSeries.formatted[which(currentWavenumber == timeSeries.formatted$wavenumber),as.character(time)] <- spectrum[which(currentWavenumber==spectrum[,col.wavenumber]),col.signal]
        }
      }
      
      # Advance progress bar
      progress$tick()
    }
    
    # Return result
    return(timeSeries.formatted)
  })
  
}