
Defines functions parseDatHeaderString

Documented in parseDatHeaderString

# @RdocFunction parseDatHeaderString
# @title "Parses a DAT header string"
# \description{
#  @get "title".
# }
# @synopsis
# \arguments{
#  \item{header}{A @character string.}
#  \item{timeFormat}{The format string used to parse the timestamp.
#    For more details, see \code{\link[base:strptime]{strptime}()}.
#    If @NULL, no parsing is done.}
#  \item{...}{Not used.}
# }
# \value{
#  Returns named @list structure.
# }
# \seealso{
#  @see "readCelHeader".
# }
# @author "HB"
# @keyword programming
# @keyword file
# @keyword IO
# @keyword internal
parseDatHeaderString <- function(header, timeFormat="%m/%d/%y %H:%M:%S", ...) {
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Local functions
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  trim <- function(s, ...) {
    s <- as.character(s);
    s <- sub("^[\t\n\f\r ]*", "", s);
    s <- sub("[\t\n\f\r ]*$", "", s);
  } # trim();

  # <quote>
  #   There are several sub-fields in this field. The first sub field is the
  # scanner ID, sometimes followed by a number, followed by three spaces.
  # If the scanner ID is absent, the field consists of four spaces.
  #   Next are 10 structured comment fields. Each field is preceded by the
  # delimiter 0x14 and a space. The field is followed by a space and 0x14.
  # Only field two is valid, the other 9 fields are obsolete. Field 2 
  # contains the probe array type, followed by .1sq. The 1sq extension is
  # also obsolete.
  #   Next (after the last structured field) there is the chip orientation
  # preceded by a space.
  #   The rest of the field is filled with nulls (zeros)
  # </quote>

  # Split by ASCII 0x14 delimiter
  parts <- strsplit(header, split=" \024 ", fixed=TRUE)[[1]];
  field <- parts[1];
  commentFields <- parts[-1];

  res <- list();
  pattern <- ".*CLS=(.{5})RWS=(.{5})XIN=(.{3})YIN=(.{3})VE=(.{3}).*(.{7})(.{4})";
  # Number of pixels per row (padded with spaces), preceded with "CLS=".
  res$nbrOfPixelColumns <- as.integer(gsub(pattern, "\\1", field));

  # Number of rows in the image (padded with spaces), preceded with "RWS=".
  res$nbrOfPixelRows <- as.integer(gsub(pattern, "\\2", field));

  # Pixel width in micrometers (padded with spaces), preceded with "XIN="
  res$pixelWidth <- as.double(gsub(pattern, "\\3", field));

  # Pixel height in micrometers (padded with spaces), preceded with "YIN="
  res$pixelHeight <- as.double(gsub(pattern, "\\4", field));

  # Scan speed in millimeters per second (padded with spaces), 
  # preceded with "VE=".
  res$scanSpeed <- as.double(gsub(pattern, "\\5", field));

  # Temperature in degrees Celsius (padded with spaces). If no temperature was
  # set then the entire field is empty.
  res$temperature <- as.double(gsub(pattern, "\\6", field));

  # Laser power in milliwatts or microwatts (padded with spaces).
  res$laserPower <- as.double(gsub(pattern, "\\6", field));

  # Find the element with a date. It is part of the same string as the
  # one containing the chip type.  Get the chip type from the header.
  # Extract the date timestamp
  pattern <- ".*([01][0-9]/[0-3][0-9]/[0-9][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]).*";
  timestamp <- gsub(pattern, "\\1", header);
  timestamp <- trim(timestamp); # Unnecessary?

  # Parse the identified timestamp into POSIXct?
  if (!is.null(timeFormat)) {
    timestamp <- strptime(timestamp, format=timeFormat, ...);
    # If no valid timestamp was found, return NA.
    if (length(as.character(timestamp)) == 0) {
      timestamp <- as.POSIXct(NA);
  res$timestamp <- timestamp;

  res$chipType <- trim(gsub("[.]1sq", "", commentFields[2]));

} # parseDatHeaderString()

# 2009-09-21
# o Created from internal code in aroma.affymetrix.

Try the affxparser package in your browser

Any scripts or data that you put into this service are public.

affxparser documentation built on Nov. 8, 2020, 7:26 p.m.