R/MicroarrayData.IO.R

############################################################################
# I/O methods
############################################################################


setMethodS3("readToList", "MicroarrayData", function(this, filename, path=NULL, reqFields, verbose=FALSE, ...) {
  filename <- Arguments$getReadablePathname(filename, path);  

  if (verbose) cat("Reading file ", filename, "...", sep="");

  # Support gzip'ed files too.
  if (regexpr("[.]gz$", filename) != -1) {
    tmpname <- tempfile();
    n <- gunzip(filename, tmpname);
    filename <- tmpname;
    on.exit(file.remove(tmpname));
  } 

  # Read the data from file
  df <- read.table(filename, header=TRUE);

  MicroarrayData$dataFrameToList(df, reqFields=reqFields);
}, protected=TRUE, static=TRUE, trial=TRUE);




#########################################################################/**
# @set "class=MicroarrayData"
# @RdocMethod read
#
# @title "Reads microarray data generated by another software package"
#
# @synopsis
#
# \arguments{
#   \item{...}{Any arguments accepted by the read() methods of the 
#    subclasses. Such arguments are commonly: filename, pattern and path.}
#   \item{verbose}{If @TRUE, information will printed out during
#                  the reading of the file.}
# }
#
# \value{
#   Returns a @see "MicroarrayData" object.
# }
#
# \description{
#   Static method that reads microarray data generated by another software
#   package into a MicroarrayData object. If the a filename pattern is
#   used, the files are guaranteed to be read in lexical (alphabetic) 
#   order.
# }
#
# \examples{
#   \dontrun{
#   # Due to a bug in R CMD check (R v1.7.1) the MicroarrayData$read() call
#   # below will call getKnownSubclasses(), which will generate
#   #   "Error in exists(objectName, mode = "function") : 
#   #	   [2003-07-07 23:32:41] Exception: F used instead of FALSE"
#   # Note that the example still work, just not in R CMD check
#
#   sa <- MicroarrayData$read(pattern="group.*.dat", path=system.file("data-ex", package="aroma"))
#   }
# }
#
# @author
#*/#########################################################################
setMethodS3("read", "MicroarrayData", function(static, ..., verbose=FALSE) {
  # 1. Get all subclasses of MicroarrayData.
  subclasses <- getKnownSubclasses(MicroarrayData);

  # 2. The possible full names of read() methods to be tried.
  methods <- paste("read.", subclasses, sep="");

  # 3. Of which only a few exists
  methods <- intersect(methods, methods("read"));

  # 4. Turn of all warnings when reading and reset them afterwards.
  owarn <- options(warn=-1);
  on.exit(options(owarn));

  # 5. For each of these read() methods in the subclasses, try to read
  #    the data using that static read() method.
  object <- NULL;
  for (method in methods) {
    tryCatch({
      mthd <- get(method, mode="function");
      if (verbose)
        cat("Trying ", method, "...", sep="");
      object <- mthd(static, ..., verbose=verbose);
      if (verbose)
        cat("ok\n");
      # Can not do return(object) inside trycatch() calls.
    }, error = function(ex) {
      if (verbose)
        cat("failed\n");
    })
    if (!is.null(object))
     return(object);
  }

  triedClasses <- gsub("^read[.]", "", methods);
  throw("Could not recognize the file format or unexisting files. Tried to read the data using the following classes: ", paste(triedClasses, collapse=", "));
}, static=TRUE);




#########################################################################/**
# @RdocMethod write
#
# @title "Write a MicroarrayData object to file"
#
# \description{
#   @get "title". By default, if not overridden
#   by a method in a subclass, it writes the data to a tab-delimited file.
#   Note that subclasses like GenePixData, ScanAlyzeData and SpotData do
#   write files in their special file formats. To force such object of such
#   classes to be written as tab-delimited file, do
#   \code{write.MicroarrayData(object, ...)} instead.
# }
#
# @synopsis
#
# \arguments{
#   \item{filename}{The filename of the GPR file to be written.}
#   \item{path}{The path to the GPR file.}
#   \item{slides}{The slides to be written. If @NULL, all slides are considered.}
#   \item{overwrite}{If @TRUE, an existing file is overwritten.
#     Otherwise an exception is thrown.}
#   \item{row.names}{If @TRUE, row names are written, otherwise not.}
#   \item{sep}{The separator between the cells.}
#   \item{...}{Other arguments accepted by subclasses or which are passed
#     to \code{write.table}.}
# }
#
# \value{Returns nothing.}
#
# @author
#
# \examples{
#   # Loads the file 'gpr123.gpr' located in the data directory:
#   gpr <- GenePixData$read("gpr123.gpr", path=system.file("data-ex", package="aroma"))
#
#   # Writes the GenePix Results Data to a file named "temp.gpr". Note
#   # that this file won't be exactly the same since a few lines,
#   # specifying for instance the creator of the file, will be added.
#   write(gpr, "temp.gpr", overwrite=TRUE)
#
#   # Extracts the raw data from the gpr object and saves it to file.
#   raw <- getRawData(gpr)
#   write(raw, "temp.raw", overwrite=TRUE)
#   file.show("temp.raw")
# }
#
# \seealso{
#   To read one or more MicroarrayData files at once see @seemethod "read".
#   @seeclass
# }
#*/#########################################################################
setMethodS3("write", "MicroarrayData", function(this, filename, path=NULL, slides=NULL, overwrite=FALSE, row.names=FALSE, sep="\t", ..., verbose=FALSE) {
  filename <- Arguments$getWritablePathname(filename, path, mustNotExist=!overwrite);  

  slides <- validateArgumentSlides(this, slides=slides);

  # Extracts the fields to be written
  fields <- c("slide", "spot", "gene", getFieldNames(this));
  df <- extract(this, fields=fields, slides=slides);

  write.table(df, file=filename, row.names=row.names, sep=sep, ...);
})



setMethodS3("readHeader", "MicroarrayData", function(static, filename, path=NULL, verbose=FALSE) {
  filename <- Arguments$getReadablePathname(filename, path);  

  # Read the file
  lines <- readLines(filename);
  lines <- strsplit(lines, split="\t");
  keys <- lapply(lines, FUN=function(x) x[1]);
  values <- lapply(lines, FUN=function(x) x[-1]);

  header <- values;
  names(header) <- keys;

  header;
}, static=TRUE)



setMethodS3("updateHeader", "MicroarrayData", function(this, header) {
  if (!inherits(this, header[["Class"]])) {
    throw("Can not update object of class ", class(this)[1],
          " with information from another class: ", header[["Class"]]);
  }

  for (key in names(header)) {
    value <- header[[key]];
    if (key == "Array names") {
      setSlideName(this, value);    
    }
  }
})



setMethodS3("writeHeader", "MicroarrayData", function(this, filename, path=NULL, overwrite=FALSE, verbose=FALSE) {
  filename <- Arguments$getWritablePathname(filename, path, mustNotExist=!overwrite);  

  header <- list(
    "Class"            = class(this)[1],
    "Number of spots"  = nbrOfSpots(this),
    "Number of arrays" = nbrOfSlides(this)
  );
  
  arraynames <- getSlideName(this);
  if (!is.null(arraynames))
    header[["Array names"]] <- paste(arraynames, collapse="\t");

  keys <- names(header);
  
  lines <- NULL;
  for (kk in seq(header)) {
    row <- paste(keys[kk], header[[kk]], sep="\t");
    lines <- c(lines, row);
  }

  writeLines(lines, con=filename, sep="\n");
})





############################################################################
# HISTORY:
# 2008-01-15
# o Replaced an obsolete trycatch() with tryCatch().
# 2005-10-21
# o Replace 'overwrite' arguments with 'mustNotExist' in calls to Arguments. 
# 2005-07-19
# o Replaced all path="" arguments to path=NULL.
# 2005-06-11
# o Making use of Arguments in R.utils.
# 2005-03-08
# o Added automatic detection and reading of gunzipped files (*.gz).
# 2004-05-07
# o Removed deprecated MicroarrayData$readAll().
# o Added trial readHeader(), writeHeader(), and updateHeader().
# 2004-03-09
# o BUG FIX: Argument 'slides' of write() in MicroarrayData had no effect;
#   all slides were written anyway. Thanks Johan Lindberg at the Royal
#   Institute of Technology, Stockholm for reporting this.
#   Updated the Rdoc comments for write() too.
# 2003-09-24
# o read() in MicroarrayData now gives a correctly spelled(!) error message
#   if it did not recognize any of the data files.
# 2002-10-28
# o Now the static method read() will try to call read() of all the 
#   subclasses of MicroarrayData until some read the data. If all fail,
#   an exception is thrown. This means that the user does not have to 
#   think of what file format the data is in, but she/he can always call
#   obj <- MicroarrayData$read(...).
# 2002-10-22
# * Added static method get AbsolutePath() to help out creating an absolute
#   pathname from 'filename' and 'path', where filename can be either a
#   string of a File object.
# 2002-04-21
# * Extracted I/O functions to MicroarrayData.IO.R.
# * Made getColors() generate grayscale colors by default. Before the method
#   was declared abstract.
# * Added trial version of normalizeGenewise().
# * Replaced some of the throw()'s with throw()'s.
# * Added getGeneReplicateIndex() and getGeneSlideReplicateIndex()
#   to MicroarrayData for fast access to the (gene, slide, replicate)
#   indices.
# 2002-04-20
# * Added trial versions of the static functions dataFrameToList() and
#   readToList(). These can support the read()  functions in the subclasses.
# * hist() now also excludes Inf's in addition to NA's. It turned out that
#   Lei Jiang's data, who reported the bug,  contained *one* M value that
#   was Inf. The result was that pretty(), which is called by hist(), gave
#   an error like "NA/NaN/Inf in foreign function call (arg 2)".
# * Added reference to 'plot' also in addition to 'par' in plot-functions.
#   This was done on a question how to set the limits on the axis.
# 2002-04-12
# * Updated the Rdoc example for plot() so it gives example on how to plot
#   a certain slide.
# 2002-04-06
# * Added support for multiple fields in normalizePrintorder() and
#   normalizeSpatial().
# * Renamed argument 'what' to 'field' in plotPrintorder().
# * Added argument 'breakpoints' to plotPrintorder().
# * Added normalizePrintorder().
# 2002-04-05
# * Added normalizeSpatial(). It is "smart", because it tries to get the
#   physical positions of the spots, but such information is not available
#   the positions according to the Layout object is used.
# 2002-04-03
# * write() now supports File objects.
# 2002-03-29
# * Updated the Rdoc's so any references to old get() are now to extract().
# 2002-03-25
# * Added static method createColors().
# 2002-03-24
# * Updated the Rdoc example for text().
# * BUG FIX: text() did not work correctly if argument 'labels' where not
#   explicitly set. The reason for this was that getInclude() was changed
#   to return a vector of TRUE and FALSE. Had to use which().
# * Changed this$getInclude(...) to getInclude(this, ...).
# 2002-03-13
# * BUG FIX: Forgot the sep="\t" in write().
# * Added some more Rdoc comments to write().
# 2002-03-10
# * BUG FIX: Argument 'slides' in write() was mistakenly named 'slide'.
# * write() in MicorarrayData will now by default save the object as the
#   data frame returned by as.data.frame(), i.e. if write() is not
#   overridden by a subclass e.g. GenePixData, which then saves in a
#   different format.
# * BUG FIX: extract() in MicroarrayData would neglect the special fields
#   "slide", "spot" and "gene". This automatically also fixed the fact that
#   as.data.frame() would not create these fields.
# 2002-02-26
# * Added the virtual fields "slide", "spot", "gene" to extract.
# 2002-02-25
# * Added read(), readAll(), write(), append().
# * Modified this class to support GenePixData etc directly without making
#   use of a ResultsData class.
# 2002-01-24
# * Renamed method get() to extract(). get() is not safe!
# * Rewritten to make use of setClassS3 and setMethodS3.
# 2002-01-19
# * Added getSlideName() and setSlideName(). Used in automatic labelling of
#   plots. See the putSlide() method.
# 2002-01-17
# * Added argument 'new=TRUE' to printReplicates. With new==FALSE it is
#   possible to plot another sequence of replicates in the same plot. This
#   is useful for instance when you want to look at the effect before and
#   after normalization.
# * BUG FIX: putGene() crashed if 'id' or 'name' was "auto"; instead of 
#   doing "if(id && name) ..." it is safer/better to do 
#   "if (id == TRUE && name == TRUE) ...".
# * BUG FIX: plotReplicates() didn't work if there where no within-slide
#   replicates. This is now corrected.
# 2002-01-15
# * Added putSlide().
# * Added seq() to simplify multiplots.
# * Added argument 'adjustMargins=TRUE' to subplots().
# * Added putTimestamp, putDataset, and putAuthor to all plot 
#   methods.
# 2001-11-18
# * Added getField() and getFields().
# 2001-08-11
# * Added plotPrintorder.
# 2001-08-08
# * Added the first core functionality of has-, get- setExcludedSpots.
#   By using which and unwhich I hope it could also be memory efficient.
#   I do not want to just set the values to NA to exclude, because then
#   one can not unexclude. Instead I want to flag some spots to be excluded.
# 2001-08-06
# * BUG FIX: plotSpatial set the plot history to "plotXY" instead of
#   "plotSpatial". This bug was probably a cut-and-paste misstake.
# * BUG FIX: plotXY and plotSpatial generated the wrong colors for all
#   cases where slide > 1. It turned out do be bug in getColors.MAData.
# 2001-08-03
# * Added getFieldNames(), setFieldNames(), renameField().
# 2001-07-31
# * Added hasLayout() and made setLayout() assert the argument layout.
# * Updated nbrOfSpots() to either ask the Layout of use 
#   get(fields=1, slides=1) to figure out the number of spots. This
#   method is a little bit inefficient so it should be overloaded by
#   subclasses.
# 2001-07-24
# * lowessCurve() now returns the lowess line.
# 2001-07-18
# * Bug fix: Forgot the 'labels' argument in call to text() in plotXY.
# 2001-07-17
# * TODO: Have to decide if spot indices should also run across slides, i.e.
#   the unique indices should continue counting on the next slide etc. This
#   is a complicated issue and somehow the though has to be digested before
#   making a decision. Now, I think some function are a little bit ambigous
#   in the use of slide, include and exclude arguments. It is pretty obvious
#   though that the include and exclude should be done before applying the
#   slide argument.
# * Added some Rdoc comments.
# * Updated the plot() method internally; now less if and then statements.
# * Removed the .lastPlot field. Making use of Device$setPlotParameters
#   instead.
# 2001-07-15
# * getIncludes() now also accepts lists in include/exclude arguments.
# * From now on the cex, col, pch etc arguments to the plot functions are
#   not subject to include and exclude. I made this decision since most
#   often you want for instance highlight four spots with four different
#   colors and nothing else. This was much harder to do before.
# 2001-07-12
# * Updated the pin.lty in plotXY to be the same as the one in Layout$put().
# * Bug fix: Trying to load a gpr data set with layout 8x4x15x16, the
#   pin.lty <- rep(...) function gave an error. used ngrid.c instead of
#   ngrid.r. The data I've tried this far have had ngrid.c == ngrid.r!
# 2001-07-11
# * Made .layout public, i.e. renamed it to layout.
# * Updated some of the Rdoc comments.
# 2001-07-09
# * Totally removed the use of image() in the plotSpatial() method. image()
#   had a "uncontrolable" color method and thanks to a highly improved
#   getPosition.Layout speed.
# 2001-07-06
# * Added addFlag(). Updated clearFlag() to work with regexpr's too.
# * Renamed plotYvsX to plotXY.
# 2001-07-05
# * Made include in the plot functions be operating on flags which have been
#   set by flag(). Removed all exlude from the plot functions.
# * Made plotYvsX() and plotSpatial() more generic and moved both of them to
#   this class. Also moved highlight(), text(), plot() to this class.
# 2001-07-04
# * Added the .extra field w/ the setExtra(), getExtra() methods.
# 2001-07-01
# * Removed plotSpatial(); now MicroarrayData is totally plot free.
# * Added getLabel() and setLabel(). Really useful!
# * Removed the rename() method since the internal field names should never
#   be modified.
# * Generic get() and set() seems to works fine. Added abstract setField().
# 2001-06-29
# * Created from old ResultsData.
############################################################################
HenrikBengtsson/aroma documentation built on May 7, 2019, 12:56 a.m.