Nothing
##########################################################################/**
# @RdocClass CopyNumberChromosomalModel
#
# @title "The CopyNumberChromosomalModel class"
#
# \description{
# @classhierarchy
#
# This \emph{abstract} class represents a copy-number model.
# }
#
# @synopsis
#
# \arguments{
# \item{cesTuple}{A @see "CopyNumberDataSetTuple".}
# \item{refTuple}{An optional @see "CopyNumberDataFile",
# or @see "CopyNumberDataSet" or @see "CopyNumberDataSetTuple"
# for pairwise comparisons.}
# \item{calculateRatios}{A @logical specifying whether ratios should
# be calculated relative to the reference.
# If @FALSE, argument \code{refTuple} is ignored.
# }
# \item{tags}{A @character @vector of tags.}
# \item{genome}{A @character string specifying what genome is process.}
# \item{chromosomes}{(optional) A @vector specifying which chromosomes
# to process.}
# \item{maxNAFraction}{A @double in [0,1] indicating how many non-finite
# signals are allowed in the sanity checks of the data.}
# \item{...}{Optional arguments that may be used by some of the
# subclass models.}
# }
#
# \section{Fields and Methods}{
# @allmethods "public"
# }
#
# \section{Requirements}{
# This class requires genome information annotation files for
# every chip type.
# }
#
# @author
#*/###########################################################################
setConstructorS3("CopyNumberChromosomalModel", function(cesTuple=NULL, refTuple=NULL, calculateRatios=TRUE, tags="*", genome="Human", chromosomes=NULL, maxNAFraction=1/5, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'cesTuple':
if (!is.null(cesTuple)) {
# Coerce to CopyNumberDataSetTuple, if needed
cesTuple <- as.CopyNumberDataSetTuple(cesTuple)
# Currently only total copy-number estimates are accepted
if (hasAlleleBFractions(cesTuple)) {
throw("Unsupported copy-number data. Currently only total copy-number estimates are supported: ", getFullName(cesTuple))
}
}
# Argument 'refTuple':
# Validated in setReference() below.
# Argument 'calculateRatios':
if (is.character(calculateRatios) && calculateRatios == "auto") {
} else {
calculateRatios <- Arguments$getLogical(calculateRatios)
}
# Argument 'tags':
tags <- Arguments$getTags(tags, collapse=NULL)
# Argument 'maxNAFraction':
maxNAFraction <- Arguments$getNumeric(maxNAFraction, range=c(0,1))
# Optional arguments
optionalArgs <- list(...)
this <- extend(ChromosomalModel(), "CopyNumberChromosomalModel",
.cesTuple = cesTuple,
.reference = NULL,
.refTuple = NULL,
.calculateRatios = calculateRatios,
.chromosomes = NULL,
.tags = tags,
.genome = genome,
.maxNAFraction = maxNAFraction,
.optionalArgs = optionalArgs,
"cached:.extractRawCopyNumbersCache" = NULL
)
this <- setReference(this, refTuple)
# Validate?
if (!is.null(this$.cesTuple)) {
# Validate genome
gf <- getGenomeFile(this)
this <- setChromosomes(this, chromosomes)
}
this
})
setMethodS3("as.character", "CopyNumberChromosomalModel", function(x, ...) {
# To please R CMD check
this <- x
s <- sprintf("%s:", class(this)[1])
s <- c(s, paste("Name:", getName(this)))
s <- c(s, paste("Tags:", paste(getTags(this), collapse=",")))
s <- c(s, paste("Chip type (virtual):", getChipType(this)))
s <- c(s, sprintf("Path: %s", getPath(this)))
chipTypes <- getChipTypes(this)
nbrOfChipTypes <- length(chipTypes)
s <- c(s, sprintf("Number of chip types: %d", nbrOfChipTypes))
cesList <- getSets(getSetTuple(this))
reference <- getReference(this)
if (reference == "median") {
refTag <- sprintf("<%s of samples>", reference)
} else {
refTag <- sprintf("<%s>", reference)
}
refList <- getRefSetTuple(this)
if (!is.null(refList)) {
refList <- getSets(refList)
}
s <- c(s, "Sample & reference file pairs:")
for (kk in seq_along(cesList)) {
s <- c(s, sprintf("Chip type #%d ('%s') of %d:", kk, chipTypes[kk], nbrOfChipTypes))
s <- c(s, "Sample data set:")
ces <- cesList[[kk]]
ref <- refList[[kk]]
s <- c(s, as.character(ces))
if (is.null(ref)) {
s <- c(s, sprintf("Reference: %s", refTag))
} else {
s <- c(s, "Reference data set/file:")
s <- c(s, as.character(ref))
}
}
GenericSummary(s)
}, protected=TRUE)
setMethodS3("getOptionalArguments", "CopyNumberChromosomalModel", function(this, ...) {
this$.optionalArgs
}, protected=TRUE)
setMethodS3("getMaxNAFraction", "CopyNumberChromosomalModel", function(this, default=1/5, ...) {
maxNAFraction <- this$.maxNAFraction
if (is.null(maxNAFraction)) {
maxNAFraction <- default
}
maxNAFraction
}, protected=TRUE)
setMethodS3("getNames", "CopyNumberChromosomalModel", function(this, ...) {
# In a future version, we will provide correct paired names.
# Until getFullNames() support this (currently the tags for the
# references are dropped), we will only make it available
# as a hidden feature for getNames(). /HB 2010-01-02
usePairedNames <- this$.usePairedNames
if (is.null(usePairedNames))
usePairedNames <- FALSE
if (usePairedNames) {
names <- getPairedNames(this, ...)
} else {
names <- NextMethod("getNames")
}
names
})
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Methods related to the reference
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
setMethodS3("setReference", "CopyNumberChromosomalModel", function(this, reference, ...) {
# Argument 'reference':
if (is.null(reference)) {
reference <- "median"
}
calculateRatios <- this$.calculateRatios
if (is.character(reference)) {
# "constant(1)" == "none"
if (reference == "constant(1)") {
reference <- "none"
}
if (reference == "none") {
calculateRatios <- TRUE
} else if (reference == "median") {
calculateRatios <- TRUE
} else if (reference == "constant(2)") {
calculateRatios <- TRUE
} else {
throw("Unknown string value of argument 'reference': ", reference)
}
refTuple <- NULL
} else {
refTuple <- reference
reference <- "explicit"
# Ignore reference, if 'calculateRatios' is FALSE
if (is.logical(calculateRatios) && !calculateRatios) {
return(invisible(this))
}
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Was an explicit reference data set/tuple was provided?
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (reference == "explicit") {
# Is the reference a single file?
# AD HOC: Need more specialized class than GenericDataFile.
# /HB 2009-11-19
if (inherits(refTuple, "GenericDataFile")) {
refTuple <- list(refTuple)
}
cesTuple <- getSetTuple(this)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Coerce reference into a reference tuple?
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (is.list(refTuple)) {
refList <- refTuple
# Validate against the test set tuple
cesTuple <- getSetTuple(this)
cesList <- getSets(cesTuple)
nbrOfChipTypes <- length(cesList)
# Assert the number of chip types
if (length(refList) != nbrOfChipTypes) {
throw("The number of chip types in the references (argument 'refTuple') does not match the number of chip types in the sample (argument 'cesTuple'): ", length(refList), " != ", nbrOfChipTypes)
}
# Coerce single reference files into reference sets
for (kk in seq_along(refList)) {
ref <- refList[[kk]]
# If a data file...
# AD HOC: Need more specialized class than GenericDataFile.
# /HB 2009-11-19
if (inherits(ref, "GenericDataFile")) {
chipType <- getChipType(ref)
chipType <- gsub(",monocell", "", chipType)
ces <- cesList[[chipType]]
# Sanity check
if (is.null(ces)) {
throw("The reference (argument 'refTuple') uses a chip type not used in the sample (argument 'cesTuple'): ", chipType, " not in (", paste(names(cesList), collapse=", "), ")")
}
# Create a data set holding a sequence of one replicated reference file
refFiles <- rep(list(ref), times=length(ces))
refSet <- newInstance(ces, refFiles)
# Not needed anymore
refFiles <- NULL
refList[[kk]] <- refSet
# Not needed anymore
refSet <- NULL
}
# Not needed anymore
ref <- NULL
} # for (kk ...)
refTuple <- refList
# Not needed anymore
refList <- cesList <- NULL
} # if (is.list(refTuple))
if (!is.null(refTuple)) {
# Coerce to CopyNumberDataSetTuple, if needed
refTuple <- as.CopyNumberDataSetTuple(refTuple)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Assert the same number of chip types in test and reference set
if (!identical(getChipTypes(refTuple), getChipTypes(cesTuple))) {
throw("The reference tuple has a different set of chip types compared with the test tuple")
}
# Assert that the reference data is of the same format as the sample data
if (hasAlleleBFractions(refTuple) != hasAlleleBFractions(cesTuple)) {
throw("The reference data (argument 'refTuple') is not compatible with the sample data (argument 'cesTuple'). One provides total copy numbers and the other does not.")
}
if (hasStrandiness(refTuple) != hasStrandiness(cesTuple)) {
throw("The reference data (argument 'refTuple') is not compatible with the sample data (argument 'cesTuple'). One provides strand-specific data and the other does not.")
}
# Validate consistency between the data sets and the reference files
cesList <- getSets(cesTuple)
refList <- getSets(refTuple)
for (kk in seq_along(cesList)) {
ces <- cesList[[kk]]
ref <- refList[[kk]]
# Assert that the reference and the sample sets are of the same size
if (length(ref) != length(ces)) {
throw("The number of reference files does not match the number of sample files: ", length(ref), " != ", length(ces))
}
# Assert that the reference files are compatible with the test files
for (jj in seq_along(ces)) {
cf <- ces[[jj]]
rf <- ref[[jj]]
if (!inherits(rf, class(cf)[1])) {
throw(class(ref)[1], " #", kk, " of argument 'refTuple' contains file #", jj, ", that is not of the same class as the paired test file: ", class(rf)[1], " !inherits from ", class(cf)[1])
}
} # for (jj ...)
} # for (kk in ...)
# Not needed anymore
cesTuple <- NULL
} # if (reference == "explicit")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Update
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
this$.reference <- reference
this$.refTuple <- refTuple
this$.calculateRatios <- calculateRatios
invisible(this)
}, protected=TRUE)
setMethodS3("getReference", "CopyNumberChromosomalModel", function(this, ...) {
res <- this$.reference
# BACKWARD COMPATIBILITY: In case an old object was retrieved from file.
# /HB 2012-10-21
if (is.null(res)) {
refTuple <- this$.refTuple
this <- setReference(this, refTuple)
res <- getReference(this)
}
# Sanity check
.stop_if_not(is.character(res))
res
})
setMethodS3("getRefSetTuple", "CopyNumberChromosomalModel", function(this, ...) {
this$.refTuple
}, protected=TRUE)
setMethodS3("getReferenceSetTuple", "CopyNumberChromosomalModel", function(this, ..., force=FALSE, verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
# Type of reference?
ref <- getReference(this)
# Nothing todo?
if (is.element(ref, c("none", "constant(1)", "constant(2)"))) {
return(NULL)
}
cesTuple <- getSetTuple(this)
cesList <- getSets(cesTuple)
refTuple <- getRefSetTuple(this)
if (!force && inherits(refTuple, class(cesTuple)[1])) {
return(refTuple)
}
verbose && enter(verbose, "Building tuple of reference sets")
# Build a reference set tuple, if missing
refList <- vector("list", length(cesList))
for (kk in seq_along(cesList)) {
ces <- cesList[[kk]]
refSet <- refList[[kk]]
if (force || !inherits(refSet, "CopyNumberDataSet")) {
verbose && cat(verbose, "Type of reference: ", ref)
if (force) {
verbose && cat(verbose, "Forced recalculation requested.")
} else {
verbose && cat(verbose, "No reference available.")
}
if (ref == "median") {
verbose && enter(verbose, "Calculating average copy-number signals")
refFile <- getAverageFile(ces, force=force, verbose=less(verbose))
refSet <- clone(ces)
clearCache(refSet)
refFiles <- rep(list(refFile), times=length(cesTuple))
refSet$files <- refFiles
# Not needed anymore
refFiles <- NULL
verbose && exit(verbose)
} else {
throw("Non-supported reference: ", ref)
}
refList[[kk]] <- refSet
}
} # for (kk ...)
# Coerce to CopyNumberDataSetTuple, if needed
refTuple <- as.CopyNumberDataSetTuple(refList)
this$.referenceTuple <- refTuple
verbose && exit(verbose)
refTuple
}, protected=TRUE) # getReferenceSetTuple()
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Methods related to paired models, e.g. paired tumor-normal segmentation
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
setMethodS3("isPaired", "CopyNumberChromosomalModel", function(this, ...) {
# BACKWARD COMPATIBILITY: Assert that the user does not rely on
# private field '.paired'. /HB 2012-10-21
if (!is.null(this$.paired)) {
throw("USER ERROR: Private field '.paired' of ", class(this)[1], " is no longer used/supported. If you do not understand this error message, please contact the mailing list of the package with full details of your script.")
}
ref <- getReference(this, ...)
if (is.element(ref, c("none", "constant(1)", "constant(2)"))) {
return(FALSE)
}
refTuple <- getReferenceSetTuple(this, ...)
(length(refTuple) > 0)
})
setMethodS3("getPairedNames", "CopyNumberChromosomalModel", function(this, ..., translate=TRUE, verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Local functions
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
pairedFnt <- function(names, ...) {
names <- gsub("^[.]average-.*", "", names)
names
} # pairedFnt()
# Argument 'translate':
translate <- Arguments$getLogical(translate)
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Constructing names of pairs")
# Sanity check
if (!isPaired(this)) {
throw(sprintf("Cannot create paired names on a non-paired %s object.", class(this)[1]))
}
tuple <- getSetTuple(this)
arrays <- seq_len(length(tuple))
# Translate function?
if (translate) {
fnt <- this$.pairedFnt
# Default
if (is.null(fnt)) {
fnt <- "pair"
}
verbose && cat(verbose, "Name-pair translator: ")
if (is.character(fnt)) {
verbose && str(verbose, fnt)
if (identical(fnt, "pair")) {
fnt <- pairedFnt
}
}
verbose && str(verbose, fnt)
# Still translate?
translate <- is.function(fnt)
}
tupleList <- list(getSetTuple(this), getReferenceSetTuple(this))
namesList <- list()
for (kk in seq_along(tupleList)) {
tuple <- tupleList[[kk]]
names <- getNames(tuple)
# Translate name tuple?
if (translate) {
verbose && enter(verbose, "Translating name pair")
verbose && cat(verbose, "Names before: ", paste(names, collapse=", "))
names <- fnt(names)
verbose && cat(verbose, "Names after: ", paste(names, collapse=", "))
verbose && exit(verbose)
}
namesList[[kk]] <- names
} # for (kk ...)
# Sanity check
ns <- sapply(namesList, FUN=length)
ns <- unique(ns)
.stop_if_not(length(ns) == 1)
sep <- this$.pairedNameSep
if (is.null(sep)) {
sep <- "vs"
}
names <- paste(namesList[[1]], namesList[[2]], sep=sep)
pattern <- sprintf("%s$", sep)
names <- gsub(pattern, "", names)
verbose && cat(verbose, "Name of pairs:")
verbose && str(verbose, names)
verbose && exit(verbose)
names
}, protected=TRUE)
setMethodS3("calculateRatios", "CopyNumberChromosomalModel", function(this, ...) {
calculateRatios <- this$.calculateRatios
if (is.null(calculateRatios)) {
calculateRatios <- TRUE
this$.calculateRatios <- calculateRatios
}
calculateRatios
}, protected=TRUE)
setMethodS3("getDataFileMatrix", "CopyNumberChromosomalModel", function(this, array, ..., verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'array':
array <- Arguments$getIndex(array)
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Extract DataFileMatrix")
verbose && cat(verbose, "Array: ", array)
cesTuple <- getSetTuple(this)
verbose && cat(verbose, "Test data sets:")
verbose && print(verbose, cesTuple)
ceList <- getFileList(cesTuple, array, ..., verbose=less(verbose,1))
verbose && cat(verbose, "Test data files:")
verbose && print(verbose, ceList)
ref <- getReference(this)
verbose && cat(verbose, "Type of reference: ", ref)
refTuple <- getReferenceSetTuple(this)
if (!is.null(refTuple)) {
verbose && cat(verbose, "Reference data sets:")
verbose && print(verbose, refTuple)
rfList <- getFileList(refTuple, array, ..., verbose=less(verbose,1))
# Sanity check
if (!identical(names(ceList), names(rfList))) {
verbose && enter(verbose, "Sanity check failed")
verbose && cat(verbose, "Test data files:")
verbose && print(verbose, ceList)
verbose && cat(verbose, "Reference data files:")
verbose && print(verbose, rfList)
throw("SANITY CHECK ERROR: Target and reference files have non-matching chip types: ", hpaste(names(ceList)), " != ", hpaste(names(rfList)))
verbose && exit(verbose)
}
} else {
# ...otherwise return a list of 'ref strings.
rfList <- rep(ref, times=length(ceList))
rfList <- as.list(rfList)
names(rfList) <- names(ceList)
} # if (!is.null(refTuple))
files <- c(ceList, rfList)
dim(files) <- c(length(ceList), 2)
dimnames(files) <- list(names(ceList), c("test", "reference"))
verbose && exit(verbose)
files
}, protected=TRUE)
setMethodS3("getRawCnData", "CopyNumberChromosomalModel", function(this, ceList, refList, chromosome, units=NULL, reorder=TRUE, ..., estimateSd=TRUE, force=FALSE, verbose=FALSE) {
if ("maxNAFraction" %in% names(list(...))) {
.Defunct(msg = sprintf("Argument 'maxNAFraction' to getRawCnData() of CopyNumberChromosomalModel is defunct. Instead, specify when setting up the %s object.", class(this)[1L]))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'ceList':
ceList <- Arguments$getInstanceOf(ceList, "list")
for (ce in ceList) {
if (!is.null(ce)) {
ce <- Arguments$getInstanceOf(ce, "CopyNumberDataFile")
}
}
# Argument 'refList':
refList <- Arguments$getInstanceOf(refList, "list")
if (length(refList) != length(ceList)) {
throw("Argument 'refList' is of a different length than 'cesList': ", length(refList), " != ", length(ceList))
}
for (ref in refList) {
if (is.character(ref)) {
ref <- Arguments$getCharacter(ref, length=c(1L,1L))
} else if (!is.null(ref)) {
ref <- Arguments$getInstanceOf(ref, "CopyNumberDataFile")
}
}
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Retrieving raw CN data")
# Data set attributes
chipTypes <- getChipTypes(this)
arrayNames <- getNames(this)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Get (x, M, stddev*, chiptype, unit) from all chip types
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
verbose && enter(verbose, "Retrieving relative copy-number estimates")
# Get the chip types as a factor
chipTypes <- as.factor(chipTypes)
df <- NULL
for (kk in seq_along(chipTypes)) {
chipType <- chipTypes[kk]
verbose && enter(verbose, "Chip type: ", chipType)
ce <- ceList[[kk]]
if (!is.null(ce)) {
# Do we need to calculate ratios relative to a reference?
calculateRatios <- calculateRatios(this)
verbose && cat(verbose, "Calculate ratios? ", calculateRatios)
# Infer from filename tags?
if (identical(calculateRatios, "auto")) {
tags <- getTags(ce)
pattern <- "^(log|log2|log10|)ratio$"
hasRatio <- any(regexpr(pattern, tags) != -1)
calculateRatios <- !hasRatio
verbose && cat(verbose, "Calculate ratios (inferred from tags)? ", calculateRatios)
}
if (calculateRatios) {
ref <- refList[[kk]]
if (is.character(ref)) {
} else {
# AD HOC. /HB 2007-09-29
# Hmmm... what is this? /HB 2009-11-18
# At least, replaced AffymetrixCelFile
# with CopyNumberDataFile. /HB 2009-11-18
if (!inherits(ref, "CopyNumberDataFile")) {
ref <- "none" # WAS: ref <- NULL /HB 2012-10-21
}
}
} else {
ref <- "none" # WAS: ref <- NULL /HB 2012-10-21
}
# Sanity check
.stop_if_not(!is.null(ref))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# (a) Extract relative chromosomal signals
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# AD HOC. getXAM() is currently only implemented in the
# AffymetrixCelFile class and not defined in any Interface class.
# It should be implemented by others too, alternatively be
# replaced by a "better" method, e.g. extractRawCopyNumbers().
# /HB 2009-11-18.
df0 <- getXAM(ce, other=ref, chromosome=chromosome, units=units,
verbose=less(verbose))
df0 <- df0[,c("x", "M"), drop=FALSE]
# Argument 'units' may be NULL
units0 <- as.integer(rownames(df0))
verbose && cat(verbose, "Number of units: ", length(units0))
verbose && enter(verbose, "Scanning for non-finite values")
nbrOfLoci <- nrow(df0)
n <- as.integer(sum(!is.finite(df0[,"M"])))
fraction <- n / nbrOfLoci
verbose && printf(verbose, "Number of non-finite values: %d (%.1f%%) out of %d\n", n, 100*fraction, nbrOfLoci)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# (b) Sanity check of not too many missing values
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Sanity check
maxNAFraction <- getMaxNAFraction(this)
if (fraction > maxNAFraction) {
sampleTag <- getFullName(ce)
if (is.element(ref, c("none", "constant(1)", "constant(2)"))) {
refTag <- sprintf("<%s>", ref)
} else {
refTag <- getFullName(ref)
}
throw(sprintf("Something is wrong with the copy-number ratios of sample '%s' relative to reference '%s' on chromosome %s. Too many non-finite values: %d (%.1f%% > %.1f%%) out of %d. If this is expected, you may adjust argument 'maxNAFraction' when setting up %s().", sampleTag, refTag, chromosome, n, 100*fraction, 100*maxNAFraction, nbrOfLoci, class(this)[1L]))
}
verbose && exit(verbose)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# (c) Estimate standard errors
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (estimateSd) {
# By default, the estimates are missing
naValue <- NA_real_
sdTheta <- sdM <- rep(naValue, times=length(units0))
if (!is.character(ref)) {
sdTheta <- extractMatrix(ref, units=units0, field="sdTheta",
drop=TRUE, verbose=verbose)
# Estimate the std dev of the raw log2(CN).
# [only if ref is averaged across arrays]
if (isAverageFile(ref)) {
# Get (theta, sigma) of theta (estimated across all arrays).
theta <- extractMatrix(ref, units=units0, field="theta",
drop=TRUE, verbose=verbose)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# BEGIN: NEED SPECIAL ATTENTION IF ALLELE-SPECIFIC ESTIMATES
# (which are not supported yet /HB 2009-11-18)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Number of arrays used when averaging (per unit)
str(ref)
print(ref)
ns <- getNumberOfFilesAveraged(ref, units=units0, verbose=verbose)
throw("xxxxxxxxxxxxx")
# Sanity check
.stop_if_not(length(ns) == length(units0))
# Use Gauss' approximation (since mu and sigma are on the
# intensity scale)
sdM <- log2(exp(1)) * sqrt(1+1/ns) * sdTheta / theta
# Not needed anymore
ns <- theta <- NULL
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# END: NEED SPECIAL ATTENTION IF ALLELE-SPECIFIC ESTIMATES
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
} # if (isAverageFile(ref))
} # if (!is.character(ref))
# Append stddev estimates
df0 <- cbind(df0, sdTheta=sdTheta, sdM=sdM)
# Not needed anymore
sdTheta <- sdM <- NULL
} # if (estimateSd)
# Not needed anymore
ref <- NULL
# Append chip type, and CDF units.
df0 <- cbind(df0, chipType=rep(chipType, times=length(units0)),
unit=units0)
# Not needed anymore
units0 <- NULL
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# (d) Append data to data for previous chip types
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
df <- rbind(df, df0)
colnames(df) <- colnames(df0)
# Not needed anymore
df0 <- NULL
} else {
verbose && cat(verbose, "No copy-number estimates available: ", arrayNames[kk])
}
# Garbage collect
gc <- gc()
verbose && exit(verbose)
} # for (kk in ...)
verbose && exit(verbose)
if (reorder) {
verbose && enter(verbose, "Re-order by physical position")
o <- order(df[,"x"])
df <- df[o,, drop=FALSE]
rownames(df) <- NULL
# Not needed anymore
o <- NULL
verbose && exit(verbose)
}
# Garbage collect
gc <- gc()
verbose && print(verbose, gc)
nbrOfUnits <- nrow(df)
verbose && cat(verbose, sprintf("Extracted data for %d SNPs", nbrOfUnits))
verbose && exit(verbose)
df
}, private=TRUE)
setMethodS3("calculateChromosomeStatistics", "CopyNumberChromosomalModel", function(this, arrays=NULL, chromosomes=getChromosomes(this), ..., verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
allChromosomes <- getChromosomes(this)
# Argument 'arrays':
arrays <- indexOf(this, arrays)
# Argument 'chromosomes':
if (is.null(chromosomes)) {
chromosomes <- allChromosomes
} else {
unknown <- chromosomes[!(chromosomes %in% allChromosomes)]
if (length(unknown) > 0) {
throw("Argument 'chromosomes' contains unknown values: ", paste(unknown, collapse=", "))
}
}
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Estimating number of copies per chromosome")
nbrOfChromosomes <- length(chromosomes)
verbose && printf(verbose, "Chromosomes (%d): %s", nbrOfChromosomes, paste(chromosomes, collapse=", "))
res <- list()
arrayNames <- getNames(this)[arrays]
nbrOfArrays <- length(arrayNames)
for (aa in seq_len(nbrOfArrays)) {
array <- arrays[aa]
arrayName <- arrayNames[aa]
files <- getDataFileMatrix(this, array=array, verbose=less(verbose,5))
ceList <- files[,"test"]
rfList <- files[,"reference"]
res[[arrayName]] <- list()
for (chromosome in chromosomes) {
verbose && enter(verbose,
sprintf("Array #%d ('%s') of %d on chromosome %s",
aa, arrayName, nbrOfArrays, chromosome))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Get (x, M, stddev, chiptype, unit) data from all chip types
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
data <- getRawCnData(this, ceList=ceList, refList=rfList,
chromosome=chromosome, ..., estimateSd=FALSE,
verbose=less(verbose))
M <- data[,"M"]
# Not needed anymore
data <- NULL
fit <- list(
mean = mean(M, na.rm=TRUE),
sd = sd(M, na.rm=TRUE),
median = median(M, na.rm=TRUE),
mad = mad(M, na.rm=TRUE),
quantiles = quantile(M, probs=seq(from=0, to=1, by=0.01), na.rm=TRUE),
sdDiff = sd(diff(M), na.rm=TRUE)/sqrt(2),
madDiff = mad(diff(M), na.rm=TRUE)/sqrt(2),
nbrOfLoci = length(M),
nbrOfNAs = sum(is.na(M))
)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Estimate
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Garbage collection
gc <- gc()
verbose && print(verbose, gc)
res[[arrayName]][[chromosome]] <- fit
verbose && exit(verbose)
} # for (chromosome in ...)
} # for (aa in ...)
verbose && exit(verbose)
res
}, protected=TRUE) # calculateChromosomeStatistics()
###########################################################################/**
# @RdocMethod fit
#
# @title "Fits the model"
#
# \description{
# @get "title".
# }
#
# @synopsis
#
# \arguments{
# \item{...}{See subclasses.}
# }
#
# \value{
# See subclasses.
# }
#
# @author
#
# \seealso{
# @seeclass
# }
#*/###########################################################################
setMethodS3("fit", "CopyNumberChromosomalModel", abstract=TRUE)
###########################################################################/**
# @RdocMethod extractRawCopyNumbers
#
# @title "Extracts relative copy numbers"
#
# \description{
# @get "title" for a particular array and chromosome.
# }
#
# @synopsis
#
# \arguments{
# \item{array}{The index of the array to be extracted.}
# \item{chromosome}{The index of the chromosome to be extracted.}
# \item{...}{See subclasses.}
# \item{logBase}{(optional) The base of the logarithm used for the ratios.
# If @NULL, the ratios are not logged.}
# \item{cache}{If @TRUE, results are cached, otherwise not.}
# \item{force}{If @TRUE, cached results are ignored.}
# \item{verbose}{See @see "R.utils::Verbose".}
# }
#
# \value{
# See subclasses.
# }
#
# \section{Parallel processing}{
# Except for an in-object caching (\code{cache=TRUE}), this method
# access data solely in an read-only fashion.
# This method is safe to call with different arrays and/or
# chromosomes in parallel.
# }
#
# @author
#
# \seealso{
# @seeclass
# }
#*/###########################################################################
setMethodS3("extractRawCopyNumbers", "CopyNumberChromosomalModel", function(this, array, chromosome, ..., logBase=2, cache=FALSE, force=FALSE, verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'array':
array <- Arguments$getIndex(array, max=nbrOfArrays(this))
# Argument 'chromosome':
allChromosomes <- getChromosomes(this)
chromosome <- Arguments$getIndex(chromosome, range=range(allChromosomes))
if (!chromosome %in% allChromosomes)
throw("Argument 'chromosome' has an unknown value: ", chromosome)
# Argument 'logBase':
if (!is.null(logBase)) {
logBase <- Arguments$getDouble(logBase, range=c(1, 10))
}
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
key <- list(method="extractRawCopyNumbers", class=class(this), array=array, chromosome=chromosome, ...)
id <- getChecksum(key)
cacheList <- this$.extractRawCopyNumbersCache
if (!is.list(cacheList))
cacheList <- list()
if (force) {
cn <- NULL
} else {
cn <- cacheList[[id]]
}
if (is.null(cn)) {
# Extract the test and reference arrays
files <- getDataFileMatrix(this, array=array, verbose=less(verbose,5))
ceList <- files[,"test"]
rfList <- files[,"reference"]
data <- getRawCnData(this, ceList=ceList, refList=rfList,
chromosome=chromosome, ..., estimateSd=FALSE,
verbose=less(verbose))
cn <- RawCopyNumbers(cn=data[,"M"], x=data[,"x"], chromosome=chromosome)
cn <- setBasicField(cn, ".yLogBase", 2)
# Not needed anymore
data <- NULL
}
# Save to cache?
if (cache) {
cacheList[[id]] <- cn
this$.extractRawCopyNumbersCache <- cacheList
# Not needed anymore
cacheList <- NULL
}
# Convert to the correct logarithmic base
cn <- extractRawCopyNumbers(cn, logBase=logBase)
cn
})
###########################################################################/**
# @RdocMethod estimateSds
#
# @title "Estimates the standard deviation of the raw copy numbers (log2-ratios) robustly"
#
# \description{
# @get "title" using a first-order difference variance estimator, which is
# an estimator that is fairly robust for change points.
# }
#
# @synopsis
#
# \arguments{
# \item{arrays}{The arrays to be queried.}
# \item{chromosomes}{The chromosomes to be queried.}
# \item{...}{Additional arguments passed to
# @seemethod "extractRawCopyNumbers".}
# \item{verbose}{See @see "R.utils::Verbose".}
# }
#
# \value{
# Returns a CxK @double @matrix where C is the number of chromosomes,
# and K is the number of arrays (arrays are always the last dimension).
# }
#
# @author
#
# \seealso{
# @seeclass
# }
#*/###########################################################################
setMethodS3("estimateSds", "CopyNumberChromosomalModel", function(this, arrays=seq_len(nbrOfArrays(this)), chromosomes=getChromosomes(this), ..., verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'arrays':
arrays <- indexOf(this, arrays)
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
nbrOfChromosomes <- length(chromosomes)
naValue <- NA_real_
res <- matrix(naValue, nrow=nbrOfChromosomes, ncol=length(arrays))
colnames(res) <- getNames(this)[arrays]
rownames(res) <- chromosomes
for (rr in seq_len(nbrOfChromosomes)) {
chromosome <- chromosomes[rr]
verbose && enter(verbose, sprintf("Chromosome #%d ('Chr%02d') of %d",
rr, chromosome, nbrOfChromosomes))
for (cc in seq_along(arrays)) {
array <- arrays[cc]
verbose && enter(verbose, sprintf("Array #%d of %d", cc, length(arrays)))
rawCns <- extractRawCopyNumbers(this, array=array, chromosome=chromosome, ..., verbose=less(verbose,5))
# verbose && enter(verbose, "First-order robust variance estimator")
res[rr,cc] <- estimateStandardDeviation(rawCns)
# Not needed anymore
rawCns <- NULL
# verbose && exit(verbose)
verbose && exit(verbose)
}
verbose && exit(verbose)
}
res
}, protected=TRUE)
setMethodS3("getChromosomeLength", "CopyNumberChromosomalModel", function(this, chromosome, ...) {
data <- getGenomeData(this)
nbrOfChromosomes <- nrow(data)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'chromosome'
if (is.numeric(chromosome)) {
chromosome <- Arguments$getIndex(chromosome, max=nbrOfChromosomes)
} else {
chromosome <- Arguments$getCharacter(chromosome)
if (!is.element(chromosome, row.names(data))) {
throw("Cannot infer number of bases in chromosome. No such chromosome: ", chromosome)
}
}
# Extract the length
nbrOfBases <- data[chromosome,"nbrOfBases"]
# Sanity check
disallow <- c("Inf", "NA", "NaN")
nbrOfBases <- Arguments$getNumeric(nbrOfBases, range=c(0, Inf), disallow=disallow)
nbrOfBases
}) # getChromosomeLength()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.