R/gcms.R

#' Read GC-MS data
#'
#' Read GC-MS data from Agilent's Quantitative Analysis software and munge it in
#' to a more useful form
#'
#' @param f The name of the csv file exported from Agilent's Quantitative
#' Analysis software
#'
#' @return A data frame where each observation represents the results for one
#' analyte in one GC-MS run
#' @importFrom stats aggregate
#' @importFrom utils read.csv
#' @export
#'
read_gcms <- function(f) {
  # Extract analytes from header
  analytes <- read.csv(f, nrow = 1L, header = FALSE, stringsAsFactors = FALSE)
  analytes <- analytes[, -c(1:2)]
  analytes <- tidyr::gather(analytes, "var", "analyte")[, "analyte"]

  # Get index of non-NA values
  ind <- which(!is.na(analytes))
  analytes <- analytes[ind]

  # Strip Results from analyte names
  analytes <- gsub("\\sResults", "", analytes)

  # Get GC-MS data
  raw <- read.csv(f, skip = 1L, check.names = FALSE, stringsAsFactors = FALSE)
  raw <- raw[, -c(1:2)]
  meta <- raw[, 1:(ind[1] - 1)]
  gcms <- raw[, ind[1]:ncol(raw)]

  # Convert GC-MS data to long-form using loop
  n_runs <- nrow(raw)
  n_analytes <- length(analytes)
  n_fields <- diff(ind[1:2])
  n_observations <- n_runs * n_analytes
  n_columns <- ind[2] - 1
  n_values <- n_analytes * n_fields
  gathered <- data.frame(matrix(nrow = n_observations, ncol = n_columns))

  i <- 1
  k <- 1
  while (i < n_values) {
    j <- i + n_fields - 1
    m <- k + n_runs - 1
    tmp <- cbind(meta, gcms[, i:j])
    gathered[k:m, ] <- tmp
    i <- j + 1
    k <- m + 1
  }
  rownames(gathered) <- NULL

  # Clean up column names
  clean <- function(x) {
    x <- gsub("[\\.-]", "", x) # Remove periods
    x <- gsub("\\s", "_", x) # Replace spaces with underscores
    x <- tolower(x) # Make names lowercase
    x <- gsub("calc_", "", x) # Remove calc_ prepend
  }
  cols <- names(raw)[1:n_columns]
  colnames(gathered) <- lapply(cols, clean)

  # Combine GC-MS and analyte data
  analytes <- unlist(lapply(analytes, rep, n_runs))

  result <- cbind(analyte = analytes, gathered, stringsAsFactors = FALSE)
  result$type <- tolower(result$type)
  return(result)
}
jdavisagua/lab.routines documentation built on May 4, 2019, 7:37 a.m.