R/read_spc.R

Defines functions get_by_names split_line split_string .prepare_hdr_df read_spc .spc_ftflags .spc_error .spc_read_x .spc_read_y .spc_log .spc_subfile_dir .spc_subhdr .spc_file_hdr .raw_split_nul .spc_ylab .spc_xlab

############################################################################## ~
# read_spc: Import Thermo Galactic's .spc file format into a hyperSpec object
#
# C. Beleites 2009/11/29
#
############################################################################## ~

## Define constants -----------------------------------------------------------

.nul <- as.raw(0)

## header sizes
.spc_size <- c(hdr = 512, subhdr = 32, subfiledir = 12, loghdr = 64)

.spc_default_keys_hdr2data <- c("fexper", "fres", "fsource")
.spc_default_keys_log2data <- FALSE

## axis labeling --------------------------------------------------------------

## x-axis units ...............................................................
.spc_FXTYPE <- c(
  expression(`/`(x, "a. u.")), # 0
  expression(`/`(tilde(nu), cm^-1)),
  expression(`/`(lambda, (mu * m))),
  expression(`/`(lambda, nm)),
  expression(`/`(t, s)),
  expression(`/`(t, min)),
  expression(`/`(f, Hz)),
  expression(`/`(f, kHz)),
  expression(`/`(f, MHz)),
  expression(`/`(frac(m, z), frac(u, e))),
  expression(`/`(delta, ppm)), # 10
  expression(`/`(t, d)),
  expression(`/`(t, a)),
  expression(`/`(Delta * tilde(nu), cm^-1)),
  expression(`/`(E, eV)),
  NA, # old version file uses label in gcatxt
  "Diode No",
  "Channel",
  expression(`/`(x, degree)),
  expression(`/`(T, degree * F)),
  expression(`/`(T, degree * C)), # 20
  expression(`/`(T, K)),
  "Data Point",
  expression(`/`(t, ms)),
  expression(`/`(t, micro * s)),
  expression(`/`(t, ns)),
  expression(`/`(f, GHz)),
  expression(`/`(lambda, cm)),
  expression(`/`(lambda, m)),
  expression(`/`(lambda, mm)),
  expression(`/`(t, h)) # 30
)

.spc_xlab <- function(x) {
  if (is.character(x)) {
    x
  } else if (x <= length(.spc_FXTYPE) + 1) {
    .spc_FXTYPE[x + 1]
  } else {
    ## x = 255 is for double interferogram and supposed not to have a label.
    ## Thus, returning NA is appropriate
    NA
  }
}

## y-axis units ..............................................................
.spc_FYTYPE <- c(
  expression(`/`(I[Ref], "a. u.")), # -1
  expression(`/`(I, "a. u.")),
  expression(`/`(I[IGRM], "a. u.")),
  "A",
  expression(frac((1 - R)^2, 2 * R)),
  "Counts",
  expression(`/`(U, V)),
  expression(`/`(y, degree)),
  expression(`/`(I, mA)),
  expression(`/`(l, mm)),
  expression(`/`(U, mV)),
  expression(-log(R)), # 10
  expression(`/`(y, "%")),
  expression(`/`(I, "a. u.")),
  expression(I / I[0]),
  expression(`/`(E, J)),
  NA, # old version file uses label in gcatxt
  expression(`/`(G, dB)),
  NA, # old version file uses label in gcatxt
  NA, # old version file uses label in gcatxt
  expression(`/`(T, degree * F)),
  expression(`/`(T, degree * C)), # 20
  expression(`/`(T, K)),
  "n",
  "K", # extinction coeaffictient
  expression(Re(y)),
  expression(Im(y)),
  "y (complex)", # complex
  "T",
  "R",
  expression(`/`(I, "a. u.")),
  expression(`/`(I[Emission], "a. u."))
)

.spc_ylab <- function(x) {
  if (is.character(x)) {
    x
  } else if (x <= 26) {
    .spc_FYTYPE[x + 2]
  } else if (x %in% 128:131) {
    .spc_FYTYPE[x - 99]
  } else {
    NA
  }
}

## helper functions ----------------------------------------------------------
### .raw_split_nul - rawToChar conversion, splitting at \0
#' @importFrom utils tail
.raw_split_nul <- function(raw, trunc = c(TRUE, TRUE), firstonly = FALSE,
                           paste.collapse = NULL) {
  # TODO make better truncation
  trunc <- rep(trunc, length.out = 2)

  if (trunc[1] && raw[1] == .nul) {
    raw <- raw[-1]
  }
  if (trunc[2]) {
    tmp <- which(raw > .nul)
    if (length(tmp) == 0) {
      return("")
    }
    raw <- raw[1:tail(tmp, 1)]
  }
  if (raw[length(raw)] != .nul) {
    raw <- c(raw, .nul)
  }

  tmp <- c(0, which(raw == .nul))

  out <- character(length(tmp) - 1)
  for (i in 1:(length(tmp) - 1)) {
    if (tmp[i] + 1 < tmp[i + 1] - 1) {
      out[i] <- rawToChar(raw[(tmp[i] + 1):(tmp[i + 1] - 1)])
    }
  }

  if (length(out) > 1L) {
    if (firstonly) {
      message("multiple strings encountered in spc file ", paste(out, collapse = ", "), ": using only the first one.")
      out <- out[1]
    } else if (!is.null(paste.collapse)) {
      if (hy_get_option("debuglevel") > 2L) {
        message("multiple strings encountered in spc file ", paste(out, collapse = ", "), " => pasting.")
      }

      out <- paste(out, collapse = paste.collapse)
    }
  }

  out
}

## file part reading functions ----------------------------------------------------------------------

## read file header ..........................................................
##
##

#' @importFrom utils maintainer
.spc_file_hdr <- function(raw_data) {
  ## check file format

  ## Detect Shimadzu SPC (which is effectively a variant of OLE CF format)
  if (isTRUE(all.equal(
    raw_data[1:4],
    as.raw(c("0xD0", "0xCF", "0x11", "0xE0"))
  ))) {
    stop("Support for Shimadzu SPC file format (OLE CF) is not yet implemented")
  }

  ## NEW.LSB = 75 supported,
  ## NEW.MSB = 76 not supported (neither by many Grams software according to spc doc)
  ## OLD     = 77 not supported (replaced by new format in 1996)
  if (raw_data[2] != 75) {
    stop(
      "Wrong spc file format version (or no spc file at all).\n",
      "Only 'new' spc files (1996 file format) with LSB word order are supported."
    )
  }

  hdr <- list(
    ftflgs = readBin(raw_data[1], "integer", 1, 1, signed = FALSE),
    ## byte 2 is already interpreted
    fexper = readBin(raw_data[3], "integer", 1, 1, signed = TRUE),
    fexp = readBin(raw_data[4], "integer", 1, 1, signed = TRUE),
    fnpts = readBin(raw_data[5:8], "integer", 1, 4),
    ffirst = readBin(raw_data[9:16], "double", 1, 8),
    flast = readBin(raw_data[17:24], "double", 1, 8),
    fnsub = readBin(raw_data[25:28], "integer", 1, 4),
    fxtype = readBin(raw_data[29], "integer", 1, 1, signed = FALSE),
    fytype = readBin(raw_data[30], "integer", 1, 1, signed = FALSE),
    fztype = readBin(raw_data[31], "integer", 1, 1, signed = FALSE),
    fpost = readBin(raw_data[32], "integer", 1, 1, signed = TRUE),
    fdate = readBin(raw_data[33:36], "integer", 1, 4),
    fres = .raw_split_nul(raw_data[37:45], paste.collapse = "\r\n"),
    fsource = .raw_split_nul(raw_data[46:54], paste.collapse = "\r\n"),
    fpeakpt = readBin(raw_data[55:56], "integer", 1, 2, signed = FALSE),
    fspare = readBin(raw_data[57:88], "numeric", 8, 4),
    fcmnt = .raw_split_nul(raw_data[89:218], paste.collapse = "\r\n"),
    fcatxt = .raw_split_nul(raw_data[219:248], trunc = c(FALSE, TRUE)),
    flogoff = readBin(raw_data[249:252], "integer", 1, 4), # , signed = FALSE),
    fmods = readBin(raw_data[253:256], "integer", 1, 4), # , signed = FALSE),
    fprocs = readBin(raw_data[257], "integer", 1, 1, signed = TRUE),
    flevel = readBin(raw_data[258], "integer", 1, 1, signed = TRUE),
    fsampin = readBin(raw_data[259:260], "integer", 1, 2, signed = FALSE),
    ffactor = readBin(raw_data[261:264], "numeric", 1, 4),
    fmethod = .raw_split_nul(raw_data[265:312]),
    fzinc = readBin(raw_data[313:316], "numeric", 1, 4), # , signed = FALSE),
    fwplanes = readBin(raw_data[317:320], "integer", 1, 4), # , signed = FALSE),
    fwinc = readBin(raw_data[321:324], "numeric", 1, 4),
    fwtype = readBin(raw_data[325], "integer", 1, 1, signed = TRUE),
    ## 187 bytes reserved
    .last.read = .spc_size["hdr"]
  )

  ## R doesn't have unsigned long int .................................
  if (any(unlist(hdr[c("flogoff", "fmods", "fwplanes")]) < 0)) {
    stop(
      "error reading header: R does not support unsigned long integers.",
      "Please contact the maintainer of the package."
    )
  }



  ## do some post processing ..........................................

  experiments <- c(
    "General", "Gas Chromatogram", "General Chromatogram", "HPLC Chromatogram",
    "NIR Spectrum", "UV-VIS Spectrum", "* reserved *", "X-ray diffraction spectrum",
    "Mass Spectrum", "NMR Spectrum", "Raman Spectrum", "Fluorescence Spectrum",
    "Atomic Spectrum", "Chroatography Diode Array Data"
  )
  hdr$fexper <- factor(hdr$fexper + 1, levels = seq_along(experiments))
  levels(hdr$fexper) <- experiments

  hdr$ftflgs <- .spc_ftflags(hdr$ftflgs)

  hdr$fdate <- ISOdate(
    year = hdr$fdate %/% 1048560,
    month = hdr$fdate %/% 65536 %% 16,
    day = hdr$fdate %/% 2048 %% 32,
    hour = hdr$fdate %/% 64 %% 32,
    min = hdr$fdate %% 64
  )

  ## interferogram ?
  ## if not, hdr$fpeakpt is set to NULL
  if (hdr$fytype == 1) {
    if (hdr$fpeakpt != 0) {
      hdr$fpeakpt <- hdr$fpeakpt + 1
    }
  } else {
    hdr$fpeakpt <- NULL
  }

  ## set the axis labels
  if (hdr$ftflgs["TALABS"]) {
    # TODO: find test data
    tmp <- rep(0, 4)
    tmp[seq_along(hdr$fcatxt)] <- nchar(hdr$fcatxt)

    if (tmp[1] > 0) hdr$fxtype <- hdr$fcatxt[1]
    if (tmp[2] > 0) hdr$fytype <- hdr$fcatxt[2]
    if (tmp[3] > 0) hdr$fztype <- hdr$fcatxt[3]
    if (tmp[4] > 0) hdr$fwtype <- hdr$fcatxt[4]
  }
  hdr$fxtype <- .spc_xlab(hdr$fxtype)
  hdr$fytype <- .spc_ylab(hdr$fytype)
  hdr$fztype <- .spc_xlab(hdr$fztype)
  hdr$fwtype <- .spc_xlab(hdr$fwtype)


  ## File with subfiles with individual x axes?
  ## Then there should be a subfile directory:
  if (hdr$ftflgs["TXYXYS"] && hdr$ftflgs["TMULTI"]) {
    ## try to reject impossible values for the subfiledir offset
    if (hdr$fnpts > length(raw_data) ||
      (hdr$fnpts > hdr$flogoff && hdr$flogoff > 0) ||
      hdr$fnpts < 512) {
      .spc_error(
        ".spc_read_hdr", list(hdr = hdr),
        "file header flags specify TXYXYS and TMULTI, ",
        "but fnpts does not give a valid offset for the subfile directory.\n hdr$ftflgs = ",
        paste(names(hdr$ftflgs)[hdr$ftflgs], collapse = " | "),
        " (", sum(2^(0:7)[hdr$ftflgs]), ")\n",
        "You can try to read the file using hdr$ftflgs & ! TXYXYS (",
        sum(2^(0:7)[hdr$ftflgs & c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE)]),
        "). This assumes that all subfiles do have the same x axis.\n\n"
      )
    }

    hdr$subfiledir <- hdr$fnpts
    hdr$fnpts <- 0
  } else {
    hdr$subfiledir <- 0
  }


  ## some checks ......................................................

  if (hdr$ftflgs["TMULTI"]) {
    ## multiple spectra in file
    if (hdr$fnsub <= 1) {
      if (hy_get_option("debuglevel") >= 2L) {
        message("spc file header specifies multiple spectra but only zero or one subfile.")
      }
    }
  } else {
    ## single spectrum file
    if (hdr$fnsub == 0) {
      hdr$fnsub <- 1
    }

    if (hdr$fnsub > 1) {
      warning(
        "spc file header specifies single spectrum file  but ", hdr$fnsub,
        " subfiles (spectra).\nOnly first subfile will be read."
      )
      hdr$fnsub <- 1
    }

    if (hdr$ftflgs["TRANDM"]) {
      message("spc file header: file type flag TRANDM encountered => Enforcing TMULTI.")
    }

    if (hdr$ftflgs["TORDRD"]) {
      message("spc file header: file type flag TORDRD encountered => Enforcing TMULTI.")
    }

    if ((hdr$ftflgs["TRANDM"] || hdr$ftflgs["TORDRD"]) && hdr$fnsub > 1) {
      hdr$ftflgs["TMULTI"] <- TRUE
    }
  }

  if (hdr$ftflgs["TXYXYS"] && !hdr$ftflgs["TXVALS"]) {
    warning("spc file header: file type flag TXYXYS encountered => Enforcing TXVALS.")
    hdr$ftflgs["TXVALS"] <- TRUE
  }

  if (hdr$fwplanes > 0) {
    warning(
      "w planes found! This is not yet tested as the developer didn't have access to such files.\n",
      "Please contact the package maintainer ", maintainer("hyperSpec"),
      " stating whether the file was imported successfully or not."
    )
  }

  hdr
}

## read sub file header ......................................................
##
## needs header for consistency checks
##

.spc_subhdr <- function(raw_data, pos, hdr) {
  subhdr <- list(
    subflgs = raw_data[pos + (1)],
    subexp = readBin(raw_data[pos + (2)], "integer", 1, 1, signed = TRUE),
    subindx = readBin(raw_data[pos + (3:4)], "integer", 1, 2, signed = FALSE),
    subtime = readBin(raw_data[pos + (5:8)], "numeric", 1, 4),
    subnext = readBin(raw_data[pos + (9:12)], "numeric", 1, 4),
    subnois = readBin(raw_data[pos + (13:16)], "numeric", 1, 4),
    subnpts = readBin(raw_data[pos + (17:20)], "integer", 1, 4), # , signed = FALSE),
    subscan = readBin(raw_data[pos + (21:24)], "integer", 1, 4), # , signed = FALSE),
    subwlevel = readBin(raw_data[pos + (25:28)], "numeric", 1, 4)
  )
  ## 4 bytes reserved

  ## R doesn't have unsigned long int .................................
  if (any(unlist(subhdr[c("subnpts", "subscan")]) < 0)) {
    stop(
      "error reading subheader: R does not support unsigned long integers.",
      "Please contact the maintainer of the package."
    )
  }

  hdr$.last.read <- pos + .spc_size["subhdr"]

  ## checking
  if (subhdr$subexp == -128 && hdr$fexp != -128) {
    message(
      "subfile ", subhdr$subindx, " specifies data type float, but file header doesn't.",
      "\n=> Data will be interpreted as float unless TMULTI is set."
    )
  }

  if (subhdr$subnpts > 0 && subhdr$subnpts != hdr$fnpts && !hdr$ftflgs["TXYXYS"]) {
    message(
      "subfile ", subhdr$subindx, ": number of points in file header and subfile header ",
      "inconsistent. => Going to use subheader."
    )
  }

  if (subhdr$subnpts == 0) {
    if (hdr$ftflgs["TXYXYS"]) {
      message(
        "subfile ", subhdr$subindx, ": number of data points per spectrum not specified. ",
        "=> Using file header information (", hdr$fnpts, ")."
      )
    }
    subhdr$subnpts <- hdr$fnpts
  }

  if (!hdr$ftflgs["TXYXYS"]) {
    if (hdr$fnpts != subhdr$subnpts) {
      .spc_error(
        ".spc_read_subhdr", list(hdr = hdr, subhdr = subhdr),
        "hdr and subhdr differ in number of points per spectrum, ",
        "but TXYXYS is not specified.\n hdr$ftflgs = ",
        paste(names(hdr$ftflgs)[hdr$ftflgs], collapse = " | "),
        " (", sum(2^(0:7)[hdr$ftflgs]), ")\n",
        "You can try to read the file using hdr$ftflgs | TMULTI | TXYXYS (",
        sum(2^(0:7)[hdr$ftflgs |
          c(FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE)]),
        ").\n\n"
      )
    }
  }

  #  str (subhdr)
  ## according to .spc file documentation:
  if (!hdr$ftflgs["TMULTI"]) {
    subhdr$subexp <- hdr$fexp
  } else if (hdr$fexp == -128 && subhdr$subexp != -128) {
    message(
      "Header file specifies float data format, but subfile uses integer exponent. ",
      "=> Using file header settings."
    )
    subhdr$subexp <- -128
  }

  ## the z values
  if (hdr$fzinc == 0) { # should only happen for the first subfile...
    hdr$fzinc <- subhdr$subnext - subhdr$subtime
  }

  if (subhdr$subindx == 0) {
    hdr$firstz <- subhdr$subtime
  }

  if (subhdr$subtime == 0) {
    subhdr$subtime <- subhdr$subindx * hdr$fzinc + hdr$firstz
  }

  ## the w values
  if (hdr$fwplanes > 0) {
    if (hdr$fwinc == 0) { ## unevenly spaced w planes
    }

    # if (subhdr$subwlevel != 0) {
    # 	subhdr$w <- subhdr$subwlevel
    #
    # } else if (subhdr$subindx %% hdr$fwplanes == 1)
    # 	subhdr$w <- hdr$subhdr$w +  hdr$fwinc
    # else
    # 	subhdr$w <- hdr$subhdr$w
  }


  hdr$subhdr <- subhdr

  hdr
}

## read subfile directory ...........................................................................
##

.spc_subfile_dir <- function(raw_data, pos, nsub) {
  dir <- data.frame(
    ssfposn = rep(NA, nsub),
    ssfsize = rep(NA, nsub),
    ssftime = rep(NA, nsub)
  )

  for (s in seq_len(nsub)) {
    dir[s, ] <- c(
      readBin(raw_data[pos + (1:4)], "integer", 1, 4), # , signed = FALSE),
      readBin(raw_data[pos + (5:8)], "integer", 1, 4), # , signed = FALSE),
      readBin(raw_data[pos + (9:12)], "numeric", 1, 4)
    )
    pos <- pos + .spc_size["subfiledir"]
  }

  ## R doesn't have unsigned long int .................................
  if (any(dir[, 1:2] < 0)) {
    stop(
      "error reading subfiledir: R does not support unsigned long integers.",
      "Please contact the maintainer of the package."
    )
  }

  # 	dir$ssfposn <- dir$ssfposn
  dir
}

## read log block header ......................................................
##
.spc_log <- function(raw_data, pos, log_bin, log_disk, log_txt, keys_log2data,
                     replace.nul = as.raw(255), iconv.from = "latin1",
                     iconv.to = "utf8") {
  if (pos == 0) { # no log block exists
    return(list(
      data = list(),
      log = list()
    ))
  }

  loghdr <- list(
    logsizd = readBin(raw_data[pos + (1:4)], "integer", 1, 4), #  , signed = FALSE),
    logsizm = readBin(raw_data[pos + (5:8)], "integer", 1, 4), # , signed = FALSE),
    logtxto = readBin(raw_data[pos + (9:12)], "integer", 1, 4), # , signed = FALSE),
    logbins = readBin(raw_data[pos + (13:16)], "integer", 1, 4), # , signed = FALSE),
    logdsks = readBin(raw_data[pos + (17:20)], "integer", 1, 4), # , signed = FALSE),
    ## 44 bytes reserved
    .last.read = pos + .spc_size["loghdr"]
  )

  ## R doesn't have unsigned long int .................................
  if (any(unlist(loghdr) < 0)) {
    stop(
      "error reading log: R does not support unsigned long integers.",
      "Please contact the maintainer of the package."
    )
  }

  log <- list()
  data <- list()

  ## read binary part of log
  if (log_bin) {
    log$.log_bin <- raw_data[loghdr$.last.read + seq_len(loghdr$logbins)]
  }

  ## read binary on-disk-only part of log
  if (log_disk) {
    log$.log_disk <- raw_data[loghdr$.last.read + loghdr$logbins + seq_len(loghdr$logdsks)]
  }

  ## read text part of log
  if (log_txt & loghdr$logsizd > loghdr$logtxto) {
    log_txt <- raw_data[pos + loghdr$logtxto + seq_len(loghdr$logsizd - loghdr$logtxto)]
    if (tail(log_txt, 1) == .nul) { # throw away nul at the end
      log_txt <- head(log_txt, -1)
    }
    log_txt[log_txt == .nul] <- replace.nul
    log_txt <- readChar(log_txt, length(log_txt), useBytes = T)
    log_txt <- gsub(rawToChar(replace.nul), "\r\n", log_txt, useBytes = TRUE)
    log_txt <- iconv(log_txt, iconv.from, iconv.to)
    log_txt <- split_string(log_txt, "\r\n") ## spc file spec says \r\n regardless of OS
    log_txt <- split_line(log_txt, "=")
    data <- get_by_names(log_txt, keys_log2data)
  }

  list(log_long = log, extra.data = data)
}


## read y data ...............................................................
##

.spc_read_y <- function(raw_data, pos, npts, exponent, word) {
  if (exponent == -128) { # 4 byte float

    list(
      y = readBin(raw_data[pos + seq_len(npts * 4)], "numeric", npts, 4),
      .last.read = pos + npts * 4
    )
  } else if (word) { # 2 byte fixed point integer = word

    list(
      y = readBin(
        raw_data[pos + seq_len(npts * 2)], "integer", npts, 2, signed = TRUE
      ) * 2^(exponent - 16),
      .last.read = pos + npts * 2
    )
  } else { # 4 byte fixed point integer = dword
    list(
      y = readBin(raw_data[pos + seq_len(npts * 4)], "integer", npts, 4) *
        2^(exponent - 32),
      .last.read = pos + npts * 4
    )
  }
}

## read x data ...............................................................
##

.spc_read_x <- function(raw_data, pos, npts) {
  list(
    x = readBin(raw_data[pos + seq_len(npts * 4)], "numeric", npts, 4),
    .last.read = pos + 4 * npts
  )
}

## error .....................................................................
.spc_error <- function(fname, objects, ...) {
  cat("ERROR in read_spc() function ", fname, "\n\n")
  for (i in seq_along(objects)) {
    cat(names(objects)[i], ":\n")
    str(objects[[i]], vec.len = 20)
  }
  stop(...)
}

.spc_ftflags <- function(x) {
  ftflgs <- as.logical(x %/% 2^(0:7) %% 2)
  names(ftflgs) <- c(
    "TSPREC", "TCGRAM", "TMULTI", "TRANDM",
    "TORDRD", "TALABS", "TXYXYS", "TXVALS"
  )
  ftflgs
}

# Main function ###############################################################

#' Import for Thermo Galactic's `spc` file format
#'
#' These functions allow to import Thermo Galactic/Grams `.spc` files.
#'
#' @param filename The complete file name of the `.spc` file.
#' @param keys_hdr2data,keys_log2data character vectors with the names of
#' parameters in the `.spc`
#' file's log block (log2xxx) or header (hdr2xxx) that should go into the
#' extra data (yyy2data) of the returned hyperSpec object.
#'
#' All header fields specified in the `.spc` file format specification (see
#'   below) are imported and can be referred to by their de-capitalized names.
#' @param log_txt Should the text part of the `.spc` file's log block be read?
#' @param log_bin,log_disk Should the normal and on-disk binary parts of the
#'   `.spc` file's log block be read?  If so, they will be put as raw vectors
#'   into the hyperSpec object's log_
#' @param hdr A list with fileheader fields that overwrite the settings of
#'   actual file's header.
#'
#' Use with care, and look into the source code for detailed insight on the
#'   elements of this list.
#' @param no_object If `TRUE`, a list with wavelengths, spectra, labels,
#'   log and data are returned instead of a hyperSpec object.
#'
#' This parameter will likely be subject to change in future - use with care.
#' @return If the file contains multiple spectra with individual wavelength
#'   axes, `read_spc()` returns a list of hyperSpec objects. Otherwise the
#'   result is a hyperSpec object.
#'
#' `read_spc_Kaiser_map()` returns a hyperSpec object with data columns x,
#'   y, and z containing the stage position as recorded in the `.spc` files'
#'   log_
#' @note Only a restricted set of test files was available for development.
#'   Particularly, the w-planes feature could not be tested.
#'
#' If you have `.spc` files that cannot be read with these function, don't
#'   hesitate to contact the package maintainer with your code patch or asking
#'   advice.
#' @author C. Beleites
#' @rdname read-spc
#' @references Source development kit and file format specification of `.spc`
#'   files.
#' @export
#'
#' @keywords IO file
#' @concept io
#'
#' @examples
#'
#' ## get the sample .spc files from ftirsearch.com (see above)
#' \dontrun{
#' # single spectrum
#' spc <- read_spc("BENZENE.SPC")
#' plot(spc)
#'
#' # multi-spectra .spc file with common wavelength axis
#' spc <- read_spc("IG_MULTI.SPC")
#' spc
#'
#' # multi-spectra .spc file with individual wavelength axes
#' spc <- read_spc("BARBITUATES.SPC")
#' plot(spc[[1]], lines.args = list(type = "h"))
#' }
#'
#' @importFrom utils modifyList
read_spc <- function(filename,
                     keys_hdr2data = FALSE, keys_log2data = FALSE,
                     log_txt = TRUE, log_bin = FALSE, log_disk = FALSE,
                     hdr = list(),
                     no_object = FALSE) {
  ## f contains the raw bytes of the file

  ## fpos marks the position of the last read byte
  ## this is the same as the offset from beginning of the file (count 0)
  ## in the .spc definition

  f <- readBin(filename, "raw", file.info(filename)$size, 1)

  hdr <- modifyList(.spc_file_hdr(f), hdr)
  fpos <- hdr$.last.read

  if (!hdr$ftflgs["TXYXYS"]) {
    if (!hdr$ftflgs["TXVALS"]) {
      ## spectra with common evenly spaced wavelength axis
      wavelength <- seq(hdr$ffirst, hdr$flast, length.out = hdr$fnpts)
    } else {
      ## spectra with common unevenly spaced wavelength axis
      # if (! hdr$ftflgs ['TMULTI']) { # also for multifile with common
      # wavelength axis
      tmp <- .spc_read_x(f, fpos, hdr$fnpts)
      wavelength <- tmp$x
      fpos <- tmp$.last.read
    }
    # }
  }

  ## otherwise (TXYXYS set) hdr$fnpts gives offset to subfile directory if
  ## that exists

  ## obtain labels from file hdr or from parameter
  label <- list(
    .wavelength = hdr$fxtype, spc = hdr$fytype,
    z = hdr$fztype, z.end = hdr$fztype
  )

  if (hdr$fwplanes > 0) {
    label$w <- hdr$fwtype
  }

  ## prepare list for hyperSpec log and data.frame for extra data

  data <- list(z = NA, z.end = NA)
  if (hdr$fwplanes > 0) {
    data <- c(data, w = NA)
  }

  ## process the log block
  tmp <- .spc_log(
    f, hdr$flogoff,
    log_bin, log_disk, log_txt,
    keys_log2data
  )
  ## TODO: remove data2log

  data <- c(data, tmp$extra.data, get_by_names(hdr, keys_hdr2data))

  ## preallocate spectra matrix or list for multispectra file with separate
  ## wavelength axes
  ## populate extra data
  if (hdr$ftflgs["TXYXYS"] && hdr$ftflgs["TMULTI"]) {
    spc <- list()
    data <- .prepare_hdr_df(data, n_subfiles = 1L)
  } else {
    spc <- matrix(NA, nrow = hdr$fnsub, ncol = hdr$fnpts)
    data <- .prepare_hdr_df(data, n_subfiles = hdr$fnsub)
  }

  ## read subfiles
  if (hdr$subfiledir) { ## TXYXYS
    hdr$subfiledir <- .spc_subfile_dir(f, hdr$subfiledir, hdr$fnsub)

    for (s in seq_len(hdr$fnsub)) {
      hdr <- .spc_subhdr(f, hdr$subfiledir$ssfposn[s], hdr)
      fpos <- hdr$.last.read
      wavelength <- .spc_read_x(f, fpos, hdr$subhdr$subnpts)
      fpos <- wavelength$.last.read

      y <- .spc_read_y(f, fpos,
        npts = hdr$subhdr$subnpts, exponent = hdr$subhdr$subexp,
        word = hdr$ftflgs["TSPREC"]
      )
      fpos <- y$.last.read

      data$z <- hdr$subhdr$subtime
      data$z.end <- hdr$subhdr$subnext

      if (hdr$fwplanes > 0) {
        data$w <- hdr$subhdr$w
      }

      if (!exists("wavelength")) {
        .spc_error(
          "read_spc", list(hdr = hdr),
          "wavelength not read. This may be caused by wrong header information."
        )
      }

      spc[[s]] <- new("hyperSpec",
        spc = y$y,
        wavelength = wavelength$x,
        data = data,
        labels = label
      )
    }
  } else { ## multiple y data blocks behind each other
    for (s in seq_len(hdr$fnsub)) {
      hdr <- .spc_subhdr(f, fpos, hdr)
      fpos <- hdr$.last.read
      tmp <- .spc_read_y(f, fpos,
        npts = hdr$subhdr$subnpts, exponent = hdr$subhdr$subexp,
        word = hdr$ftflgs["TSPREC"]
      )
      fpos <- tmp$.last.read

      spc[s, ] <- tmp$y

      data[s, c("z", "z.end")] <- unlist(hdr$subhdr[c("subtime", "subnext")])

      if (hdr$fwplanes > 0) {
        data[s, "w"] <- hdr$subhdr$w
      }
    }
  }

  if (hdr$ftflgs["TXYXYS"] && hdr$ftflgs["TMULTI"]) {
    ## list of hyperSpec objects
    ## consistent file import behaviour across import functions
    lapply(spc, .spc_io_postprocess_optional, filename = filename)
  } else if (no_object) {
    list(spc = spc, wavelength = wavelength, data = data, labels = label)
  } else {
    if (hdr$fnsub > 1L && nrow(data) == 1L) {
      data <- data[rep(1L, hdr$fnsub), ]
    }

    spc <- new("hyperSpec",
      spc = spc, wavelength = wavelength,
      data = data, labels = label
    )

    ## consistent file import behaviour across import functions
    .spc_io_postprocess_optional(spc, filename)
  }
}

# Unit tests -----------------------------------------------------------------

hySpc.testthat::test(read_spc) <- function() {
  context("read_spc()")

  spc_path <- system.file("extdata/spc", package = "hySpc.read.spc")
  shimadzu_path <- system.file("extdata/spc.Shimadzu", package = "hySpc.read.spc")
  kaisermap_path <- system.file("extdata/spc.Kaisermap", package = "hySpc.read.spc")
  witec_path <- system.file("extdata/spc.Witec", package = "hySpc.read.spc")
  labram_path <- system.file("extdata/spc.LabRam", package = "hySpc.read.spc")

  old_spc <- paste0(spc_path, c("/CONTOUR.SPC", "/DEMO_3D.SPC", "/LC_DIODE_ARRAY.SPC"))

  test_that("old file format -> error", {
    for (f in old_spc) {
      expect_error(read_spc(f))
    }
  })

  test_that("SPC SDK example files", {
    checksums <- c(
      `fileio/spc/BARBITUATES.SPC` = "f49bbc854c",
      `fileio/spc/barbsvd.spc` = "8a4d30672c",
      `fileio/spc/BENZENE.SPC` = "6fc7901d15",
      `fileio/spc/DRUG SAMPLE_PEAKS.SPC` = "a600cd05e2",
      `fileio/spc/DRUG SAMPLE.SPC` = "981e42bfb8",
      `fileio/spc/FID.SPC` = "ab65b6bb23",
      `fileio/spc/HCL.SPC` = "c657dd8279",
      `fileio/spc/HOLMIUM.SPC` = "18dc3b1ca3",
      `fileio/spc/IG_BKGND.SPC` = "0b083dab3a",
      `fileio/spc/IG_MULTI.SPC` = "fed652db3b",
      `fileio/spc/IG_SAMP.SPC` = "c72dd5fc70",
      `fileio/spc/KKSAM.SPC` = "8e905a5500",
      `fileio/spc/POLYR.SPC` = "78b5987d93",
      `fileio/spc/POLYS.SPC` = "608c01f69b",
      `fileio/spc/SINGLE POLYMER FILM.SPC` = "0e13423de4",
      `fileio/spc/SPECTRUM WITH BAD BASELINE.SPC` = "a05b77fada",
      `fileio/spc/time.spc` = "98eabdd347",
      `fileio/spc/TOLUENE.SPC` = "eb08948be8",
      `fileio/spc/TriVista-linear.spc` = "31b30dac34",
      `fileio/spc/TriVista-normal.spc` = "15d5d219b0",
      `fileio/spc/TUMIX.SPC` = "7f8db885fb",
      `fileio/spc/TWO POLYMER FILMS.SPC` = "173a0bb6d3",
      `fileio/spc/Witec-timeseries.spc` = "65f84533d8",
      `fileio/spc/XYTRACE.SPC` = "28594b6078"
    )

    # FIXME: unit test is missing

  })

  test_that("LabRam spc files", {
    labram1 <- read_spc(paste0(labram_path, "/LabRam-1.spc"))
    labram2 <- read_spc(paste0(labram_path, "/LabRam-2.spc"))
    expect_equal(labram1[[2]][[1]], 785)
    expect_equal(labram1[[1]][[4]], 678)

    expect_equal(labram2[[1]][[69]], 1849)
    expect_equal(labram2[[1]][[253]], 2563)
  })

  test_that("Shimadzu spc files do not yet work", {
    expect_error(read_spc(paste0(shimadzu_path, "/F80A20-1.SPC")))

    fname <- paste0(shimadzu_path, "/UV-2600_labeled_DNA.spc")

    # TODO #102 - implement support for Shimadzu files
    SHIMADZU_SPC_IMPLEMENTED <- F
    if (SHIMADZU_SPC_IMPLEMENTED) {
      # Compare data from SPC file and from CSV file. They should be equal
      spc <- read_spc(fname)


      expected <- read.txt.long(paste0(fname, ".csv"), sep = ",")
      expect_true(all.equal(spc$spc, expected$spc))
    } else {
      # IF NOT IMPLEMENTED
      # expect_error(
      #   read_spc("fileio/spc.Shimadzu/F80A20-1.SPC"),
      #   regexp = 'Shimadzu SPC'
      # )
      expect_error(read_spc(fname),
        regexp = "Support for Shimadzu SPC file format (OLE CF) is not yet implemented",
        fixed = T
      )
    }
  })



  test_that("Witec: some files supported", {
    expect_error(read_spc(paste0(witec_path, "/P_A32_006_Spec_Data_1.spc")))
    expect_error(read_spc(paste0(witec_path, "/P_A32_007_Spec_Data_1.spc")))

    tmp <- read_spc(paste0(witec_path, "/Witec-Map.spc"))
    expect_equal(tmp[[4]][[179]], 1138)
    expect_equal(tmp[[5]][[347]], 1019)

    ## no spatial information
    expect_null(tmp$x)
    expect_null(tmp$y)

    ## spectra numbered in z
    tmp <- read_spc(paste0(witec_path, "/Witec-timeseries.spc"))
    expect_equal(tmp[[53]][[231]], 1100)
    expect_equal(tmp[[71]][[739]], 981)
  })


  ## Kaiser spc files tested mostly in Kaiser-specific read_spc_Kaiser*
  ## unit tests


  # test_that("wplanes", {
  #
  #   skip("wplanes do not yet work")
  #   # wplanes
  # })

  test_that("option file.keep.name", {
    file.keep.name <- hy_get_option("file.keep.name")

    hy_set_options(file.keep.name = FALSE)
    expect_null(read_spc(paste0(labram_path, "/LabRam-2.spc"))$filename)
    hy_set_options(file.keep.name = TRUE)
    expect_equal(
      read_spc(paste0(labram_path, "/LabRam-2.spc"))$filename,
      paste0(labram_path, "/LabRam-2.spc")
    )


    hy_set_options(file.keep.name = file.keep.name)
  })

  test_that("hdr2data", {
    expect_equal(
      colnames(
        read_spc(paste0(labram_path, "/LabRam-2.spc"), keys_hdr2data = TRUE)
      ),
      c(
        "z", "z.end", "ftflgs", "fexper", "fexp", "fnpts", "ffirst",
        "flast", "fnsub", "fxtype", "fytype", "fztype", "fpost", "fdate",
        "fres", "fsource", "fspare", "fcmnt", "fcatxt", "flogoff", "fmods",
        "fprocs", "flevel", "fsampin", "ffactor", "fmethod", "fzinc",
        "fwplanes", "fwinc", "fwtype", ".last.read", "subfiledir", "spc",
        "filename"
      )
    )
  })
}


.prepare_hdr_df <- function(data, n_subfiles) {
  ## the *type header elements are expressions. They need to be converted
  ## to character.
  data <- lapply(data, function(x) {
    if (mode(x) == "expression") {
      as.character(x)
    } else {
      x
    }
  })

  ## convert vectors to matrix, otherwise the data.frame will contain one
  ## row per element.
  ## matrices need to be protected during as.data.frame

  vector.entries <- which(sapply(data, length) > 1L)
  for (v in vector.entries) {
    data[[v]] <- I(t(as.matrix(data[[v]])))
  }

  data <- as.data.frame(data, stringsAsFactors = FALSE)
  data <- data[rep(1L, n_subfiles), ]

  for (v in vector.entries) {
    data[[v]] <- unclass(data[[v]])
  } # remove AsIs protection

  data
}

# Helper functions -----------------------------------------------------------
### -------------------------------------------------------------------------~
###
### split_string - split string at pattern
###
###

split_string <- function(x, separator, trim.blank = TRUE, remove.empty = TRUE) {
  pos <- gregexpr(separator, x)
  if (length(pos) == 1 && pos[[1]] == -1) {
    return(x)
  }

  pos <- pos[[1]]

  pos <- matrix(c(
    1, pos + attr(pos, "match.length"),
    pos - 1, nchar(x)
  ),
  ncol = 2
  )

  if (pos[nrow(pos), 1] > nchar(x)) {
    pos <- pos[-nrow(pos), ]
  }

  x <- apply(pos, 1, function(p, x) substr(x, p[1], p[2]), x)

  if (trim.blank) {
    blank.pattern <- "^[[:blank:]]*([^[:blank:]]+.*[^[:blank:]]+)[[:blank:]]*$"
    x <- sub(blank.pattern, "\\1", x)
  }

  if (remove.empty) {
    x <- x[sapply(x, nchar) > 0]
  }

  x
}

### split_line

split_line <- function(x, separator, trim.blank = TRUE) {
  tmp <- regexpr(separator, x)

  key <- substr(x, 1, tmp - 1)
  value <- substr(x, tmp + 1, nchar(x))

  if (trim.blank) {
    blank.pattern <- "^[[:blank:]]*([^[:blank:]]+.*[^[:blank:]]+)[[:blank:]]*$"
    key <- sub(blank.pattern, "\\1", key)
    value <- sub(blank.pattern, "\\1", value)
  }

  value <- as.list(value)
  names(value) <- key

  value
}

# Unit tests -----------------------------------------------------------------
hySpc.testthat::test(split_string) <- function() {
  context("split_string()")

  # Perform tests
  test_that("split_string() returnts output silently", {
    expect_error(split_string())
    expect_error(split_string(letters))

    expect_silent(split_string("letters", "r"))
  })

  # FIXME (tests): add tests to check the correctness of the output!!!
}


### -------------------------------------------------------------------------~
###
### get_by_names - get list elements by name and if no such element exists, NA
###
###

get_by_names <- function(x, e) {
  x <- x[e]
  if (length(x) > 0) {
    if (is.character(e)) {
      names(x) <- e
    }
    x[sapply(x, is.null)] <- NA
    x
  } else {
    list()
  }
}



# Unit tests -----------------------------------------------------------------
hySpc.testthat::test(get_by_names) <- function() {
  context("get_by_names()")

  # Perform tests
  test_that("get_by_names() works", {
    lst <- list(a = 1, b = "b", c = 2i)

    expect_equal(get_by_names(lst, "a"), list(a = 1))
    expect_equal(get_by_names(lst, 1), list(a = 1))
    expect_equal(get_by_names(lst, 2), list(b = "b"))
    expect_equal(get_by_names(lst, 6)[[1]], NA)
  })
}
r-hyperspec/hySpc.read.spc documentation built on April 27, 2023, 1:46 p.m.