R/DEPRECATED-read.jdx.R

Defines functions read.jdx

#' @name DEPRECATED-read.jdx
#' @concept moved to hySpc.read.jdx
#'
#' @title (DEPRECATED)
#'        JCAMP-DX import for Shimadzu library spectra
#'
#' @description
#' This function is **deprecated** and  will be  removed in the next release
#' of \pkg{hyperSpec} package.
#' Please use function `hySpc.read.jdx::read_jdx()` instead.
#' More on functions in package \pkg{hySpc.read.jdx}
#' [here (link)](https://r-hyperspec.github.io/hySpc.read.jdx/reference/index.html).
#'
#'
#' **Old description:**
#'
#' This is a first rough import function for JCAMP-DX spectra.
#'
#' So far, AFFN and PAC formats are supported for simple XYDATA, DATA TABLEs and
#' PEAK TABLEs.
#'
#' NTUPLES / PAGES are not (yet) supported.
#'
#' DIF, DUF, DIFDUP and SQZ data formats are not (yet) supported.
#'
#' @note JCAMP-DX support is incomplete and the functions may change without
#'       notice.
#
#        See `vignette ("fileio")` and the details section.
#
#' @param filename file name and path of the .jdx file
#' @param encoding encoding of the JCAMP-DX file (used by [base::readLines()])
#' @param header list with manually set header values
#' @param keys.hdr2data index vector indicating which header entries should be
#'        transferred into the extra data. Usually a character vector of labels
#'        (lowercase, without and dashes, blanks, underscores). If `TRUE`, all
#'        header entries are read.
#' @param ... further parameters handed to the data import function, e.g.
#'
#' | parameter | meaning                                                                             | default |
#' | --------- | ----------------------------------------------------------------------------------- | ------- |
#' | `xtol`    | tolerance for checking calculated x values against checkpoints at beginning of line | XFACTOR |
#' | `ytol`    | tolerance for checking Y values against MINY and MAXY                               | YFACTOR |
#'
#' @param NA.symbols character vector of text values that should be converted
#'        to `NA`
#' @param collapse.multi should hyperSpec objects from multispectra files be
#'        collapsed into one hyperSpec object (if `FALSE`, a list of hyperSpec
#'        objects is returned).
#' @param wl.tolerance,collapse.equal see [collapse]
#' @return hyperSpec object
#' @author C. Beleites with contributions by Bryan Hanson
#'
#' @export
#'
#' @importFrom utils head modifyList maintainer
read.jdx <- function(filename = NULL, encoding = "",
                     header = list(), keys.hdr2data = FALSE, ...,
                     NA.symbols = c("NA", "N/A", "N.A."),
                     collapse.multi = TRUE,
                     wl.tolerance = hy_get_option("wl.tolerance"),
                     collapse.equal = TRUE) {
  # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  hySpc_deprecated(new = "read_jdx()", package = "hySpc.read.jdx")

  if (is.null(filename)) {
    return(NA)
  }

  # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  ## see readLines help: this way, encoding is translated to standard encoding
  ## on current system.
  file <- file(filename, "r", encoding = encoding, blocking = FALSE)
  jdx <- readLines(file)
  close(file)

  ## start & end of spectra header and data
  hdrstart <- grep("^[[:blank:]]*##TITLE=", jdx)
  if (length(hdrstart) == 0L) stop("No spectra found.")

  datastart <- grep(sprintf("^[[:blank:]]*##(%s)=", paste(.DATA.START, collapse = "|")), jdx) + 1
  # V 4.24 uses ##XYDATA=
  # V 5.00 uses ##DATA TABLE= ..., XYDATA
  # V 5.01 MPI Golm files use ##PEAK TABLE=

  if (length(datastart) == 0L) stop("No data found: unsupported data type.")

  dataend <- grep("^[[:blank:]]*##", jdx)
  dataend <- sapply(datastart, function(s) dataend[which(dataend > s)[1]]) - 1

  spcend <- grep("^[[:blank:]]*##END=[[:blank:]]*$", jdx) - 1

  ## some checks
  stopifnot(length(datastart) >= length(hdrstart))
  stopifnot(length(datastart) == length(dataend))
  stopifnot(all(hdrstart < spcend))
  stopifnot(all(datastart < dataend))

  spc <- vector("list", length(datastart))

  for (s in seq_along(datastart)) {
    ## look for header data
    hdr <- modifyList(header, .jdx.readhdr(jdx[hdrstart[s]:(datastart[s] - 1)]))

    if (!is.null(hdr$page) || !is.null(hdr$ntuples)) {
      stop("NTUPLES / PAGEs are not yet supported.")
    }

    if (s == 1L) { ## file header may contain overall settings
      hdr <- modifyList(list(file = as.character(filename)), hdr)
      header <- hdr[!names(hdr) %in% .key2names(.DATA.START)]
    }

    ## evaluate data block

    if (grepl("[A-DF-Za-df-z%@]", jdx[datastart[s]])) {
      stop("SQZ, DIF, and DIFDUP forms are not yet supported.")
    }

    spc[[s]] <- switch(hdr$.format,
      `(X++(Y..Y))` = .jdx.TABULAR.PAC(hdr, jdx[datastart[s]:spcend[s]], ...),
      `(XY..XY)` = .jdx.TABULAR.AFFN(hdr, jdx[datastart[s]:spcend[s]], ...),
      stop("unknown JCAMP-DX data format: ", hdr$xydata)
    )

    ## process according to header entries
    spc[[s]] <- .jdx.processhdr(spc[[s]], hdr, keys.hdr2data, ..., NA.symbols = NA.symbols)
  }

  if (length(spc) == 1L) {
    spc <- spc[[1]]
  } else if (collapse.multi) {
    spc <- collapse(spc, wl.tolerance = wl.tolerance, collapse.equal = collapse.equal)
  }

  ## consistent file import behaviour across import functions
  .spc_io_postprocess_optional(spc, filename)
}

### HEADER ------------------------------------------------------------------------------------------

.jdx.readhdr <- function(hdr) {
  ## get rid of comments. JCAMP-DX comments start with $$ and go to the end of the line.
  hdr <- hdr[!grepl("^[[:blank:]]*[$][$]", hdr)]
  hdr <- gsub("([[:blank:]][$][$].*)$", "", hdr)

  ## now join lines that are not starting with ##KEY= with the KEYed line before
  nokey <- grep("^[[:blank:]]*##.*=", hdr, invert = TRUE)
  if (length(nokey) > 0) {
    for (l in rev(nokey)) { # these are few, so no optimization needed
      hdr[l - 1] <- paste(hdr[(l - 1):l], collapse = " ")
    }
    hdr <- hdr[-nokey]
  }

  names <- .key2names(sub("^[[:blank:]]*##(.*)=.*$", "\\1", hdr))

  hdr <- sub("^[[:blank:]]*##.*=[[:blank:]]*(.*)[[:blank:]]*$", "\\1", hdr)
  hdr <- gsub("^[\"'[:blank:]]*([^\"'[:blank:]].*[^\"'[:blank:]])[\"'[:blank:]]*$", "\\1", hdr)
  i <- grepl("^[[:blank:]]*[-+]?[.[:digit:]]*[eE]?[-+]?[.[:digit:]]*[[:blank:]]*$", hdr) &
    !names %in% c("title", "datatype", "owner")
  hdr <- as.list(hdr)
  hdr[i] <- as.numeric(hdr[i])
  names(hdr) <- names

  ## e.g. Shimadzu does not always save XFACTOR and YFACTOR
  if (is.null(hdr$yfactor)) hdr$yfactor <- 1
  if (is.null(hdr$xfactor)) hdr$xfactor <- 1

  ## we treat XYDATA and PEAK TABLEs the same way
  format <- hdr[names(hdr) %in% .key2names(.DATA.START)]
  format <- format[!sapply(format, is.null)]
  if (length(format) != 1) {
    stop(
      "contradicting format specification: please contact the maintainer (",
      maintainer("hyperSpec"),
      "supplying the file you just tried to load."
    )
  }

  hdr$.format <- format[[1]]

  hdr
}

.jdx.processhdr <- function(spc, hdr, keys, ..., ytol = abs(hdr$yfactor), NA.symbols) {
  ## hdr$xfactor and $yfactor applied by individual reading functions

  ## check Y values
  miny <- min(spc@data$spc)
  if (!is.null(hdr$miny) && abs(hdr$miny - miny) > ytol) {
    message(sprintf(
      "JDX file inconsistency: Minimum of spectrum != MINY: difference = %0.3g (%0.3g * YFACTOR)",
      miny - hdr$miny,
      (miny - hdr$miny) / hdr$yfactor
    ))
  }

  maxy <- max(spc@data$spc)
  if (!is.null(hdr$maxy) && abs(hdr$maxy - maxy) > ytol) {
    message(sprintf(
      "JDX file inconsistency: Maximum of spectrum != MAXY: difference = %0.3g (%0.3g * YFACTOR)",
      maxy - hdr$maxy,
      (maxy - hdr$maxy) / hdr$yfactor
    ))
  }


  spc@label$.wavelength <- .jdx.xunits(hdr$xunits)
  spc@label$spc <- .jdx.yunits(hdr$yunits)

  ## CONCENTRATIONS
  if ("concentrations" %in% keys) {
    spc <- .jdx.hdr.concentrations(spc, hdr, NA.symbols = NA.symbols)
  }

  # delete header lines already processed
  hdr[c(
    "jcampdx", "xunits", "yunits", "xfactor", "yfactor", "firstx", "lastx", "npoints",
    "firsty", "xydata", "end", "deltax", "maxy", "miny",
    "concentrations"
  )] <- NULL
  if (is.character(keys)) {
    keys <- keys[keys %in% names(hdr)]
  }
  hdr <- hdr[keys]

  if (length(hdr) > 0L) {
    spc@data <- cbind(spc@data, hdr)
  }

  spc
}

### DATA FORMATS --------------------------------------------------------------
.jdx.TABULAR.PAC <- function(hdr, data, ..., xtol = hdr$xfactor) {
  ## regexp for numbers including scientific notation
  .PATTERN.number <- "[-+]?[0-9]*[.]?[0-9]*([eE][-+]?[0-9]+)?"
  if (is.null(hdr$firstx)) stop("##FIRSTX= missing.")
  if (is.null(hdr$lastx)) stop("##LASTX= missing.")
  if (is.null(hdr$npoints)) stop("##NPOINTS= missing.")

  wl <- seq(hdr$firstx, hdr$lastx, length.out = hdr$npoints)

  ## remove starting X
  y <- sub(paste0("^[[:blank:]]*", .PATTERN.number, "[[:blank:]]*(.*)$"), "\\2", data)

  ## add spaces between numbers if necessary
  y <- gsub("([0-9.])([+-])", "\\1 \\2", y)

  y <- strsplit(y, "[[:blank:]]+")
  ny <- sapply(y, length)

  y <- as.numeric(unlist(y))


  if (length(y) != hdr$npoints) {
    stop("mismatch between ##NPOINTS and length of Y data.")
  }

  ## X checkpoints
  x <- sub(paste0("^[[:blank:]]*(", .PATTERN.number, ")[[:blank:]]*.*$"), "\\1", data)
  x <- as.numeric(x) * hdr$xfactor
  diffx <- abs(wl[c(1, head(cumsum(ny) + 1, -1))] - x)
  if (any(diffx > xtol)) {
    message(
      "JDX file inconsistency: X axis differs from checkpoints. ",
      sprintf(
        "Maximum difference = %0.2g (%0.2g * XFACTOR)",
        max(diffx), max(diffx) / hdr$xfactor
      )
    )
  }

  y <- y * hdr$yfactor

  new("hyperSpec", spc = y, wavelength = wl)
}

.jdx.TABULAR.AFFN <- function(hdr, data, ...) {
  data <- strsplit(data, "[,;[:blank:]]+")
  data <- unlist(data)
  data <- matrix(as.numeric(data), nrow = 2)

  new("hyperSpec", wavelength = data[1, ] * hdr$xfactor, spc = data[2, ] * hdr$yfactor)
}

### UNITS -------------------------------------------------------------------------------------------

.jdx.xunits <- function(xunits) {
  if (is.null(xunits)) {
    NULL
  } else {
    switch(tolower(xunits),
      `1/cm` = expression(tilde(nu) / cm^-1),
      micrometers = expression(`/`(lambda, micro * m)),
      nanometers  = expression(lambda / nm),
      seconds     = expression(t / s),
      xunits
    )
  }
}

.jdx.yunits <- function(yunits) {
  if (is.null(yunits)) {
    NULL
  } else {
    switch(tolower(yunits),
      transmittance     = "T",
      reflectance       = "R",
      absorbance        = "A",
      `kubelka-munk`    = expression(`/`(1 - R^2, 2 * R)),
      `arbitrary units` = "I / a.u.",
      yunits
    )
  }
}

## HDR processing functions
.jdx.hdr.concentrations <- function(spc, hdr, NA.symbols) {
  hdr <- strsplit(hdr$concentrations, "[)][[:blank:]]*[(]")[[1]]
  hdr[length(hdr)] <- gsub(")$", "", hdr[length(hdr)])
  if (hdr[1] == "(NCU") {
    hdr <- hdr[-1]
  } else {
    message("Unknown type of concentration specification in JDX file: ", hdr[1], ")")
  }

  hdr <- simplify2array(strsplit(hdr, ","))
  hdr[hdr %in% NA.symbols] <- NA

  ## names
  N <- hdr[1, ]
  N <- sub("^([^[:alpha:]]*)", "", N)
  N <- sub("([^[:alpha:]]*)$", "", N)
  N <- gsub("([^[:alnum:]_-])", ".", N)

  ## concentrations
  C <- t(as.numeric(hdr[2, ]))
  colnames(C) <- N
  C <- as.data.frame(C)
  spc@data <- cbind(spc@data, C)

  ## units
  U <- as.list(hdr[3, ])
  names(U) <- N

  spc@label <- modifyList(spc@label, U)

  spc
}

## helpers
.DATA.START <- c("XYDATA", "DATA TABLE", "PEAK TABLE")

.key2names <- function(key) {
  gsub("[[:blank:]_-]", "", tolower(key))
}


hySpc.testthat::test(read.jdx) <- function() {
  context("test-read.jdx")

  test_that(
    "deprecated",
    expect_warning(read.jdx(), "deprecated.*hySpc.read.jdx")
  )
}
r-hyperspec/hyperSpec documentation built on May 31, 2024, 5:53 p.m.