hySpc.read.Witec: File Import Functions for Spectra in Various ASCII or Text File Formats

# Function -------------------------------------------------------------------

#' Read Shimadzu GCxGC-qMS spectra files (ASCII/txt)
#'
#' **EXPERIMENTAL FUNCTION**\cr
#' Read Shimadzu GCxGC-qMS spectra files (`.txt`) as exported by
#' Shimadzu Chrome Solution (v. 2.72).
#' Mass Spectrometer: Shimadzu GCMS-QP 2010 Ultra (www.shimadzu.com)
#'
#' @note This is a first rough import function and the functions may change
#'       without notice.
#'
#' @param file (character): Path to Shimadzu `.txt` file.
#' @param encoding (character): Encoding of the text file (used by
#'        [base::readLines()]).
#' @param quiet (logical): Suppress printing of progress.
#'
#'
#' @return List of spectra tables.
#'
#'
#' @author Bjoern Egert
#'
#' @concept io
#'
#' @importFrom utils read.table
#'
#' @export
#'
read_txt_Shimadzu <- function(file, encoding = "", quiet = TRUE) {
  # A file consists of several sections ([Headers])
  # Each Section consists of:
  # [Header]
  #   [MS Spectrum]
  #   [MC Peak Table]
  #   [MS Similarity Search Results for Spectrum Process Table]

  impLines <- readLines(
    con = file, n = -1L, ok = TRUE, warn = TRUE, encoding = encoding
  )
  # length(impLines)

  # total numbers of pos1 and pos2 and pos3 are equal
  pos1 <- which(impLines == "[Header]") # row positions of Headers
  # row positions of peak info tables
  pos2 <- which(impLines == "[MC Peak Table]")
  # row positions of peak annotations:
  pos3 <- which(
    impLines == "[MS Similarity Search Results for Spectrum Process Table]"
  )
  pos4 <- which(impLines == "[MS Spectrum]") # row positions of peak spectra

  headers <- length(pos1) # number of header sections

  # link spectra to headers
  pos4Li <- list()
  for (i in 1:(length(pos1)))
  {
    header <- pos1[i]
    headerNext <- pos1[i + 1]
    tmp <- (pos4 > header & pos4 < headerNext)
    pos4Li[[i]] <- pos4[tmp]
  }

  # treat last Header section separately ...
  headerLast <- pos1[length(pos1)]
  tmp <- pos4 > headerLast
  pos4Li[[length(pos1)]] <- pos4[tmp]

  for (i in 1:(length(pos4Li) - 1))
  {
    tmp <- length(pos4Li[[i]])
    vec <- pos4Li[[i]]
    pos4Li[[i]] <- c(vec, pos1[i + 1])
  }
  # End position
  pos4Li[[headers]] <- c(pos4Li[[headers]], length(impLines))

  # Check
  stopifnot(impLines[1] == "[Header]")
  stopifnot(length(pos1) == length(pos2))
  stopifnot(length(pos2) == length(pos3))
  stopifnot(length(pos3) == length(pos4Li))


  # ----------------- 1. Import: gather [Header] informations

  # gather in lists
  res2Li <- list() # Peak Info
  res3Li <- list() # Peak Similarity
  res4Li <- list() # Mass Spectra

  for (header in 1:headers)
  {
    if (!quiet) cat("header:", header, "\n")

    # 1a. Import: "[MC Peak Table]" ----

    start <- pos2[header] + 3
    stop <- pos3[header] - 2

    peakMat <- read.table(
      file = file, skip = start - 1, nrows = stop - start,
      header = TRUE, sep = ";", dec = ".", comment.char = "",
      stringsAsFactors = FALSE, quote = "\"'"
    )


    # FIXME: commented code should not be present:

    # maybe faster than above ...
    # Peak#;Ret.Time;Proc.From;Proc.To;Mass;Area;Height;A/H;Conc.;Mark;Name;Ret. Index"
    # colnames <- strsplit(impLines[start], split = ";")[[1]]
    # strsplit(impLines[(start+1):stop], split = ";")[[1]]
    # peakMat <- matrix(
    #   impLines[(start+1):stop],
    #   ncol = length(colnames),
    #   byrow = TRUE
    # )

    res2Li[[header]] <- peakMat

    # 1b. Import: "[MS Similarity Search Results for Spectrum Process Table]" ----

    start <- pos3[header] + 2
    stop <- pos4Li[[header]][1] - 1
    if (stop - start != 0) # no annotation hits
      {
        simMat <- read.table(
          file = file, skip = start - 1, , nrows = stop - start,
          header = TRUE, sep = ";", dec = ".", comment.char = "",
          stringsAsFactors = FALSE, quote = ""
        ) # quote = "\"'"
      } else {
      simMat <- NA
    }
    res3Li[[header]] <- simMat

    # 1c. Import: "[MS Spectrum]" ----

    specLi <- list() # list of all spectra in current header section
    for (i in 1:(length(pos4Li[[header]]) - 1))
    {
      # extract spectra
      start <- pos4Li[[header]][i] + 5 # data starts 5 rows below
      stop <- pos4Li[[header]][i + 1] - 1 # last data row before new Spectrum

      # debug purposes
      if (!quiet) {
        cat("header:", header, "spec:", i, "start:", start, "stop:", stop, "\n")
      }

      # -------------------~
      # Catch exception, when peak is reported in peakMat, but [MS Spectrum]
      # is not available: like example: (keyrow: # of Peaks; 0)
      # [MS Spectrum]
      # # of Peaks;0
      # Raw Spectrum;38.343 (scan : 68686);Base Peak;m/z 0.00 (Inten : 0)
      # Background;38.342 <-> 38.348 (scan : 68684 <-> 68697)
      # m/z;Absolute Intensity;Relative Intensity
      # [MS Spectrum]

      checkRow <- pos4Li[[header]][i] + 1 # check for: "# of Peaks;0"
      isEmptySpec <- impLines[checkRow] == "# of Peaks;0"
      if (isEmptySpec) {
        # special case: create dummy spectrum with zero intensities
        spec <- cbind(
          "m/z" = 1:100,
          "Absolute Intensity" = 0,
          "Relative Intensity" = 0
        )
      }
      # -------------------~

      if (!isEmptySpec) {
        spec <- scan(
          file = file, sep = ";", skip = start - 1, nlines = (stop - start) + 1,
          dec = ".", quiet = TRUE
        )
        spec <- matrix(spec, ncol = 3, byrow = T)
        colnames(spec) <- c("m/z", "Absolute Intensity", "Relative Intensity")
      }

      specLi[[i]] <- spec
    }

    res4Li[[header]] <- specLi
  } # for(headers)


  # ----------------- 2. combine all headers sections

  # res2Li --> m2
  m2 <- as.data.frame(res2Li[[1]])
  m2 <- cbind(header = 1, m2)
  for (header in 2:headers)
  {
    tmp <- as.data.frame(res2Li[[header]])
    tmp <- cbind(header = header, tmp)
    m2 <- rbind(m2, tmp)
  }

  # res3Li --> m3
  # In a header section there may be not annotation tables
  m3 <- do.call("rbind", res3Li)
  # add header Nr.
  tmpMat <- lapply(X = res3Li, FUN = nrow)
  tmpMat <- as.matrix(tmpMat)
  tmp <- vector(length = 0)
  for (i in 1:nrow(tmpMat))
  {
    if (tmpMat[i, 1] == "NULL") tmp <- c(tmp, i)
    if (tmpMat[i, 1] != "NULL") tmp <- c(tmp, rep(x = i, times = tmpMat[i, 1]))
  }
  m3 <- cbind(header = tmp, m3)
  m3 <- m3[, c(
    "header", "Spectrum.", "Hit..", "SI", "CAS..", "Name",
    "Mol.Weight", "Mol.Form", "Retention.Index"
  )] # select most important columns
  tmp <- complete.cases(m3)
  m3 <- m3[tmp, ]

  # res4Li --> m4
  tmp <- colnames(res4Li[[1]][[1]])
  m4 <- matrix(NA, nrow = 1, ncol = length(tmp))
  colnames(m4) <- tmp
  # header number and spectra number as first columns:
  m4 <- cbind(header = NA, spectra = NA, m4)
  for (header in 1:headers)
  {
    for (spectra in 1:length(res4Li[[header]]))
    {
      tmp <- as.matrix(res4Li[[header]][[spectra]])
      tmp <- cbind(header, spectra, tmp)
      m4 <- rbind(m4, tmp)
    } # spectra
  } # header
  mode(m4) <- "numeric"
  m4 <- m4[-1, ]

  return(list(peakInfo = m2, peakAnnotate = m3, peakMasses = m4))
}


# Unit tests -----------------------------------------------------------------

hySpc.testthat::test(read_txt_Shimadzu) <- function() {
  context("read_txt_Shimadzu")

  local_edition(3)

  filename <- system.file(
    "extdata",
    "txt.Shimadzu/GCxGC-qMS.txt",
    package = "hySpc.read.txt"
  )


  test_that("Shimadzu .txt is read correctly", {

    expect_silent(spc <- read_txt_Shimadzu(filename))

    expect_equal(length(spc), 3)

    expect_equal(dim(spc[[1]]), c(2191, 13))
    expect_equal(dim(spc[[2]]), c(509, 9))
    expect_equal(dim(spc[[3]]), c(516939, 5))

    expect_equal(spc[[1]][[107, 6]], "TIC")
    expect_equal(spc[[1]][[259, 8]], 381874)

    expect_equal(spc[[2]][[21, 1]], 1)
    expect_equal(spc[[2]][[397, 8]], "C22_H55_N_O6_Si5")

    expect_equal(spc[[3]][[38796, 4]], 280)
    expect_equal(spc[[3]][[56789, 2]], 72)
  })
}

r-hyperspec/hySpc.read.Witec documentation built on April 23, 2023, 11:39 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

r-hyperspec/hySpc.read.Witec
File Import Functions for Spectra in Various ASCII or Text File Formats

R/read_txt_Shimadzu.R
In r-hyperspec/hySpc.read.Witec: File Import Functions for Spectra in Various ASCII or Text File Formats

Defines functions read_txt_Shimadzu

R Package Documentation

Browse R Packages

We want your feedback!

r-hyperspec/hySpc.read.Witec File Import Functions for Spectra in Various ASCII or Text File Formats

R/read_txt_Shimadzu.R In r-hyperspec/hySpc.read.Witec: File Import Functions for Spectra in Various ASCII or Text File Formats

Defines functions read_txt_Shimadzu

R Package Documentation

Browse R Packages

We want your feedback!

r-hyperspec/hySpc.read.Witec
File Import Functions for Spectra in Various ASCII or Text File Formats

R/read_txt_Shimadzu.R
In r-hyperspec/hySpc.read.Witec: File Import Functions for Spectra in Various ASCII or Text File Formats