#########################################################################/**
# @RdocFunction convertCdf
#
# @title "Converts a CDF into the same CDF but with another format"
#
# @synopsis
#
# \description{
# @get "title".
# Currently only CDF files in version 4 (binary/XDA) can be written.
# However, any input format is recognized.
# }
#
# \arguments{
# \item{filename}{The pathname of the original CDF file.}
# \item{outFilename}{The pathname of the destination CDF file.
# If the same as the source file, an exception is thrown.}
# \item{version}{The version of the output file format.}
# \item{force}{If @FALSE, and the version of the original CDF is the
# same as the output version, the new CDF will not be generated,
# otherwise it will.}
# \item{...}{Not used.}
# \item{.validate}{If @TRUE, a consistency test between the generated
# and the original CDF is performed. Note that the memory overhead
# for this can be quite large, because two complete CDF structures
# are kept in memory at the same time.}
# \item{verbose}{If @TRUE, extra details are written while processing.}
# }
#
# \value{
# Returns (invisibly) @TRUE if a new CDF was generated, otherwise @FALSE.
# }
#
# \section{Benchmarking of ASCII and binary CDFs}{
# Binary CDFs are much faster to read than ASCII CDFs. Here are some
# example for reading complete CDFs (the difference is even larger when
# reading CDFs in subsets):
# \itemize{
# \item HG-U133A (22283 units): ASCII 11.7s (9.3x), binary 1.20s (1x).
# \item Hu6800 (7129 units): ASCII 3.5s (6.1x), binary 0.57s (1x).
# }
# }
#
# \section{Confirmed conversions to binary (XDA) CDFs}{
# The following chip types have been converted using \code{convertCdf()}
# and then verified for correctness using \code{compareCdfs()}:
# ASCII-to-binary: HG-U133A, Hu6800.
# Binary-to-binary: Test3.
# }
#
# @examples "../incl/convertCdf.Rex"
#
# @author "HB"
#
# \seealso{
# See @see "compareCdfs" to compare two CDF files.
# @see "writeCdf".
# }
#
# @keyword "file"
# @keyword "IO"
#*/#########################################################################
convertCdf <- function(filename, outFilename, version="4", force=FALSE, ..., .validate=TRUE, verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'filename':
# Expand any '~' in the pathname.
filename <- file.path(dirname(filename), basename(filename));
if (!file.exists(filename)) {
stop("Cannot open CDF file. File does not exist: ", filename);
}
# Argument 'outFilename':
# Expand any '~' in the pathname.
outFilename <- file.path(dirname(outFilename), basename(outFilename));
if (identical(outFilename, filename)) {
stop("Cannot convert CDF file. Destination is identical the the source pathname: ", filename);
}
# Argument 'version':
version <- as.character(version);
if (version == "4") {
} else {
stop("Cannot convert CDF. Currently only version 4 (binary/XDA) can be written: ", version);
}
# Argument 'force':
force <- as.logical(force);
# Argument '.validate':
.validate <- as.logical(.validate);
# Argument 'verbose':
verbose <- as.integer(verbose);
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Read source CDF
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (verbose)
cat("Reading CDF header...\n");
cdfHeader <- readCdfHeader(filename);
# if (version == cdfHeader$version) {
# if (!force) {
# warning("The original CDF file is already in file-format version ",
# version, ": ", filename);
# if (verbose)
# cat("Reading CDF header...done\n");
# return(invisible(FALSE));
# }
# }
if (verbose)
cat("Reading CDF header...done\n");
# Read QC units
if (verbose)
cat("Reading CDF QC units...\n");
cdfQcUnits <- readCdfQc(filename)
if (verbose)
cat("Reading CDF QC units...done\n");
# Reading units
if (verbose)
cat("Reading CDF units...\n");
cdfUnits <- readCdf(filename)
if (verbose)
cat("Reading CDF units...done\n");
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Write new CDF file
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (verbose)
cat("Writing CDF structure...\n");
verbose2 <- verbose-1;
if (verbose2 < 0) verbose2 <- 0;
t <- system.time({
writeCdf(outFilename, cdfheader=cdfHeader, cdf=cdfUnits,
cdfqc=cdfQcUnits, overwrite=TRUE, verbose=verbose2);
});
if (verbose) {
cat("Timing for writeCdf():\n");
print(t);
}
if (verbose)
cat("Writing CDF structure...done\n");
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate?
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (.validate) {
if (verbose)
cat("Comparing CDFs...\n");
res <- compareCdfs(filename, outFilename, verbose=verbose);
if (!res) {
stop("An inconsistency between source and destination CDF was detected. Reason: ", attr(res, "reason"));
}
if (verbose)
cat("Comparing CDFs...done\n");
} # if (.validate)
invisible(TRUE);
} # convertCdf()
############################################################################
# HISTORY:
# 2007-07-26
# o Removed debug assignment 'res2 <<- res' before validation error message.
# 2006-09-09
# o More benchmarking: A binary-to-binary convertion of Mapping50K_Hind240
# (56.0Mb; 57299 units) takes in total 25 mins. It is the writing of
# units that is slow.
# o Simple benchmarking: On Thinkpad A31 1Gb RAM; HG-U133A.CDF (22283 units)
# ASCII (40.3Mb) -> XDA (10.8Mb): ~15s to read ASCII, ~270s to write XDA,
# and ~420s to validate.
# o Now validate is making use of compareCdfs().
# 2006-09-08
# o Added protection against overwriting the source file.
# 2006-09-07 /HB
# o Ken Simpson at WEHI just tried to convert an ASCII Exon CDF to a binary
# CDF using convertCdf(). The ASCII CDF is ???MB, and the binary one
# became 321MB. He did this on a 4-core with 16GB RAM Linux system, and
# it took 45 minutes, which probably includes the validation tests,
# which eventually made R run out of memory. I've updated the code a bit
# to make the validation a little bit more memory efficient. It is
# probably better to compare chunks of units and not all of them at the
# same time. The writeCdf() function can be made faster if writing to
# a raw vector internally which is the dumped to file. This should be
# done in chunks to also optimize memory usage.
# o Created.
############################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.