R/parseTable_elabftw.R

Defines functions parseTable.elabftw

Documented in parseTable.elabftw

#' Parse Tables With References To Files On eLabFTW
#' 
#' This function processes tables generated by the GET.elabftw functions. If the resulting tables contain paths to files stored in the
#' online labbook elabFTW this function will fetch the corresponding files via the eLabFTW API and reduces each file into a single
#' value by applying a fully customisable function (e.g. mean, sum, ...). This function expects the files to be tables. The files will 
#' be converted into tables with read.csv(). The result is the initial list of tables with the file paths replaced by the corresponding 
#' values.
#' 
#' Keep in mind: This functions needs the original HTTP API response. Therefore set the parameter outputHTTP to TRUE when calling a 
#' GET.elabftw function.
#' 
#' @param parseableTables A list generated by GET.elabftw functions with outputHTTP=TRUE. The list contains a list of data.frames with 
#' file references and the HTTP response from the API request.
#' @param func Any function that takes a data.frame as input and returns a single value. Apply mean to a single column for example.
#' @param extension One string or a vector of strings with file extensions that will be recognised by the function. Any other file type 
#' will be ignored.
#' @param api.key The eLabFT API-key. See the eLabFTW documentation for details. The default settings retrieve this value from
#' the enviourmental variable READ_ELABFTW_TOKEN. This variable can be set in the ~/.Renviron file.
#' @param url The url to the eLabFTW API. See the eLabFTW documentation for details. The default settings retrieve this value from
#' the enviourmental variable ELABFTW_API_URL. This variable can be set in the ~/.Renviron file.
#' @param outputHTTP Boolean value. If FALSE only the data frames will be returned. If TRUE the data frames and the http response 
#' received from the API in a GET.elabftw function will be returned. The http response contains meta information needed by other 
#' parsing functions, like this function.
#' @param ... Parameters that will be passed to read.csv().
#' @return The original list of data frames with the file paths replaced by the content of files from the online labbook eLabFTW. If outputHTTP is 
#' set TRUE the return value is a list containing the list of data frames and the original http response that was passed in the parseableTables 
#' parameter.
#' 
#' @importFrom magrittr %>%
#' @export
parseTable.elabftw <- function(parseableTables,
                               func,
                               extension  = ".csv",
                               # Sys.getenv() gets the enviourment variable with the API token. It's defined in ~/.Reviron 
                               api.key    = Sys.getenv("READ_ELABFTW_TOKEN"),
                               # Sys.getenv() gets the enviourment variable with the URL. It's defined in ~/.Reviron 
                               url        = Sys.getenv("ELABFTW_API_URL"),
                               outputHTTP = F, 
                               # Passed to read.csv
                               ...) {
  
  
  
  # Format allowed file extension according to regex and collapse all gibe extensions into a single regex expression
  file.extension = gsub("\\.", "\\\\.", extension) %>% paste0(., "$") %>% paste0(., collapse="|")
  
  # Get tables and meta data
  tables    <- parseableTables$table
  metadata  <- parseableTables$http
  
  # Organise information about external files into data frame
  # The fileid is needed to fetch the file via api
  # The value will be used to store the average, mean or whatever is used to reduce the file content into a single number
  # Storing the value in this look-up table should reduce the amount of HTTP requests needed to complete the task
  # If there are two files with the same name, the last one will be ignored (elabFTW allows mutiple files with the same 
  #  real_name attached to the same protocol). -> usage of dplyr::distinct()
  # Only files matching the given file extensions are added to the look-up table
  filetable <- data.frame( filename = sapply(metadata$uploads, function(upload) { upload$real_name } ),
                           fileid   = sapply(metadata$uploads, function(upload) { upload$id        } ),
                           value    = NA
                          ) %>% .[grepl(file.extension, .[,1]),] %>% dplyr::distinct(., filename, .keep_all=T)
  
  # Process every table in the list
  tables <- lapply( seq_along(tables), function(index) {
    # Get one data frame from the list
    table <- tables[[index]]
    
    # Create a progress bar
    progress <- progress::progress_bar$new(total = unlist(table) %>% length, 
                                           format = paste0("ExpID ", metadata$id, " (Table ", index, "/", length(tables), ") [:bar] :eta remaining.") )
    # Loop over every item of the data frame
    table <- apply( table, c(1,2), function(element) {
      # Check if the current element of the table is listed in the look-up table 'filetable'
      match.lookUpTable <- paste0("^", element, "$") %>% grepl(., filetable[,1])
      
      # If the element is no recognised file name return the element asis
      if ( any(match.lookUpTable) == FALSE ) output <- element
      # If the corresponding value in the look-up table is not NA return the value listed in the look-up table
      else if ( is.na(filetable[match.lookUpTable,]$value) == FALSE ) output <- filetable[match.lookUpTable,]$value
      # If the there is no computed value in the look-up table fetch the file via api and compute the value
      else {
        # api url to needed file
        url <- paste0(url, "uploads/", filetable[match.lookUpTable,]$fileid)
        # Fetch file as HTML from elabftw api
        file <- httr::GET(url, httr::add_headers(Authorization = api.key)) %>% httr::stop_for_status(.)

        # Convert HTML into text and interpret the string as a table with read.csv
        # read.csv can only read from connections -> usage of textConnection
        content <- httr::content(file, as="text") %>% textConnection(.) %>% read.csv(., ...)
        # Pass the table read from the file to user specified function
        # It is expected, that the function func will reduce the table into a single number
        output  <- content %>% func
        
        # Add new value to look-up table
        filetable[match.lookUpTable,]$value <- output
      }
      # Try to convert output into numeric
      output <- tryCatch( as.numeric(output), 
                          error = function(cond) { return(output) }, 
                          warning = function(cond) { return(output) } )
      # Advance progress bar
      progress$tick()
      # Return the new value
      return( output )
    } )
    # Return the new table as data frame
    as.data.frame(table) %>% return(.)
    
  } )
  
  # Return tables
  if (outputHTTP == FALSE) return(tables)
  # Return table and the meta data that was attached to the initial input data
  else return( list(table = table, http = metadata) )
}
AlreadyTakenJonas/RHotStuff documentation built on Oct. 28, 2022, 10:15 p.m.