###########################################################################/**
# @set "class=CBS"
# @RdocMethod callGainsAndLosses
#
# @title "Calls gains and losses"
#
# \description{
# @get "title".
# }
#
# @synopsis
#
# \arguments{
# \item{adjust}{A positive scale factor adjusting the sensitivity of the
# caller, where a value less (greater) than 1.0 makes the caller
# less (more) sensitive.}
# \item{method}{A @character string specifying the calling algorithm to use.}
# \item{...}{Additional/optional arguments used to override the default
# parameters used by the caller.}
# }
#
# \value{
# Returns a @see "PSCBS::CBS" object where @logical columns
# 'lossCall' and 'gainCall' have been appended to the segmentation table.
# }
#
# \section{The UCSF caller}{
# If \code{method == "ucsf-mad"}, then segments are called using [1], i.e.
# a segment is called gained or lost if its segment level is
# at least two standard deviations away from the median segment level
# on Chr1-22, where standard deviation is estimated using MAD.
# Then same is done for \code{method == "ucsf-dmad"} with the difference
# that the standard deviation is estimated using a robust first order
# variance estimator.
# }
#
# \examples{
# @include "../incl/segmentByCBS.Rex"
# @include "../incl/segmentByCBS,calls.Rex"
# }
#
# @author "HB"
#
# \references{
# [1] Fridlyand et al. \emph{Breast tumor copy number aberration
# phenotypes and genomic instability}, BMC Cancer, 2006. \cr
# }
#
# \seealso{
# @seemethod "callAmplifications".
# @seemethod "callOutliers".
# @seeclass
# }
#
# @keyword internal
#*/###########################################################################
setMethodS3("callGainsAndLosses", "CBS", function(fit, adjust=1.0, method=c("ucsf-mad", "ucsf-dmad"), ..., verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'adjust':
adjust <- Arguments$getDouble(adjust, range=c(0, Inf))
# Argument 'method':
method <- match.arg(method)
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Calling segments that are gained or lost")
userArgs <- list(...)
params <- list()
# Allocate calls
naValue <- NA
nbrOfSegments <- nbrOfSegments(fit, splitters=TRUE)
segs <- getSegments(fit, splitters=TRUE)
nbrOfRows <- nrow(segs)
gainCalls <- lossCalls <- rep(naValue, times=nbrOfRows)
verbose && cat(verbose, "Number of segments to be called: ", nbrOfSegments)
verbose && cat(verbose, "Call method: ", method)
if (is.element(method, c("ucsf-mad", "ucsf-dmad"))) {
# Default arguments
args <- list(
chromosomes = intersect(getChromosomes(fit), c(0L, 1:22)),
scale = 2.0
)
# Override by (optional) user-specified arguments
for (key in names(userArgs)) {
args[[key]] <- userArgs[[key]]
}
# Extract arguments
chromosomes <- args$chromosomes
scale <- args$scale
# Argument check
chromosomes <- Arguments$getVector(chromosomes, lengths=c(1,Inf))
scale <- Arguments$getDouble(scale, range=c(0,Inf))
# Estimate the whole-genome standard deviation of the TCNs
if (method == "ucsf-mad") {
sigma <- estimateStandardDeviation(fit, chromosomes=chromosomes,
method="res", estimator="mad")
sigmaKey <- "sigmaMAD"
} else if (method == "ucsf-dmad") {
sigma <- estimateStandardDeviation(fit, chromosomes=chromosomes,
method="diff", estimator="mad")
sigmaKey <- "sigmaDelta"
} else {
stop("INTERNAL ERROR: Unknown method: ", method)
}
# Sanity check
sigma <- Arguments$getDouble(sigma, range=c(0,Inf))
# Calculate the threshold
tau <- scale * sigma
# Make more or less sensitive
tau <- adjust * tau
verbose && cat(verbose, "Call parameters:")
verbose && str(verbose, list(sigma=sigma, scale=scale, adjust=adjust))
# Calculate segment levels using the median estimator
fitT <- updateMeans(fit, avg="median")
segsT <- getSegments(fitT, splitters=TRUE)
mu <- segsT$mean
fitT <- segsT <- NULL # Not needed anymore
# The median segmented level
muR <- median(mu, na.rm=TRUE)
# The threshold for losses
tauLoss <- muR - tau
# The threshold for gains
tauGain <- muR + tau
# Call
lossCalls <- (mu <= tauLoss) # Losses
gainCalls <- (mu >= tauGain) # Gains
# Call parameters used
params$method <- method
params$adjust <- adjust
params[[sigmaKey]] <- sigma
params$scale <- scale
params$muR <- muR
params$tau <- tau
params$tauLoss <- tauLoss
params$tauGain <- tauGain
}
verbose && cat(verbose, "Number of called segments: ", length(lossCalls))
# Sanity check
.stop_if_not(length(lossCalls) == nbrOfRows)
.stop_if_not(length(gainCalls) == nbrOfRows)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Update 'DNAcopy' object
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# (a) segmentation table
segs <- getSegments(fit, splitters=TRUE)
segs$lossCall <- lossCalls
segs$gainCall <- gainCalls
fit$output <- segs
# (b) parameters
allParams <- fit$params
if (is.null(allParams)) {
allParams <- list()
}
allParams$callGainsAndLosses <- params
fit$params <- allParams
verbose && exit(verbose)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Return the updated 'CBS' object.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
fit
}, private=TRUE) # callGainsAndLosses()
###########################################################################/**
# @RdocMethod callAmplifications
#
# @title "Calls (focal) amplifications"
#
# \description{
# @get "title".
# }
#
# @synopsis
#
# \arguments{
# \item{adjust}{A positive scale factor adjusting the sensitivity of the
# caller, where a value less (greater) than 1.0 makes the caller
# less (more) sensitive.}
# \item{maxLength}{A @double scalar specifying the maximum length of a segment
# in order for it to be considered a focal amplification.}
# \item{method}{A @character string specifying the calling algorithm to use.}
# \item{...}{Additional/optional arguments used to override the default
# parameters used by the caller.}
# \item{verbose}{@see "R.utils::Verbose".}
# }
#
# \value{
# Returns a @see "PSCBS::CBS" object where @logical column
# 'amplificationCall' has been appended to the segmentation table.
# }
#
# \section{The UCSF caller}{
# If \code{method == "ucsf-exp"}, then segments are called using [1], i.e.
# a segment is called an amplification if ...
# }
#
# @author
#
# \references{
# [1] Fridlyand et al. \emph{Breast tumor copy number aberration
# phenotypes and genomic instability}, BMC Cancer, 2006. \cr
# }
#
# \seealso{
# @seemethod "callGainsAndLosses".
# @seemethod "callOutliers".
# @seeclass
# }
#
# @keyword internal
#*/###########################################################################
setMethodS3("callAmplifications", "CBS", function(fit, adjust=1.0, maxLength=20e6, method=c("ucsf-exp"), ..., verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'adjust':
adjust <- Arguments$getDouble(adjust, range=c(0, Inf))
# Argument 'maxLength':
maxLength <- Arguments$getDouble(maxLength, range=c(0, Inf))
# Argument 'method':
method <- match.arg(method)
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Calling segments that are amplified")
userArgs <- list(...)
params <- list()
# Allocate calls
naValue <- NA
nbrOfSegments <- nbrOfSegments(fit, splitters=TRUE)
calls <- rep(naValue, times=nbrOfSegments)
verbose && cat(verbose, "Number of segments to be called: ", nbrOfSegments)
verbose && cat(verbose, "Call method: ", method)
if (method == "ucsf-exp") {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Call arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Default arguments
args <- list(
minLevel = 0.0,
lambda = 1.0,
degree = 3
)
# Override by (optional) user-specified arguments
for (key in names(userArgs)) {
args[[key]] <- userArgs[[key]]
}
# Extract arguments
minLevel <- args$minLevel
lambda <- args$lambda
degree <- args$degree
# Validate arguments
minLevel <- Arguments$getDouble(minLevel, range=c(-Inf, Inf))
lambda <- Arguments$getDouble(lambda, range=c(0, Inf))
degree <- Arguments$getDouble(degree, range=c(1, Inf))
verbose && cat(verbose, "Call parameters:")
verbose && str(verbose, list(minLevel=minLevel, lambda=lambda,
degree=degree))
segs <- getSegments(fit, splitters=TRUE)
verbose && cat(verbose, "Segments:")
verbose && str(verbose, segs)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Rule #1: Only consider segments that are short enough
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# The lengths (in bp) of the segments
start <- segs$start
end <- segs$end
length <- end - start ## + 1L
keep1 <- (length <= maxLength)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Rule #2: Only consider segments that have a mean level
# that is large enough.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# The mean levels of the segments
mu <- segs$mean
keep2 <- (mu >= minLevel)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Rule #3: Only consider segments that have a mean level
# that is much larger than either of the
# flanking segments.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# The mean levels of the flanking segments
muL <- c(NA_real_, mu[-nbrOfSegments])
muR <- c(mu[-1], NA_real_)
# The difference in mean levels to the flanking segments
deltaL <- mu - muL
deltaR <- mu - muR
# The maximum difference to either of the flanking segments
delta <- pmax(deltaL, deltaR, na.rm=TRUE)
# The threshold for calling segments amplified
tau <- exp(-lambda * mu^degree)
# Make more or less sensitive
tau <- adjust * tau
keep3 <- (delta >= tau)
# Amplification calls
calls <- (keep1 & keep2 & keep3)
# Call parameters used
params$method <- method
params$adjust <- adjust
params$maxLength <- maxLength
params$minLevel <- minLevel
params$lambda <- lambda
params$degree <- degree
params$tau <- tau
}
verbose && cat(verbose, "Number of called segments: ", length(calls))
# Sanity check
.stop_if_not(length(calls) == nbrOfSegments)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Update 'DNAcopy' object
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# (a) segmentation table
segs <- getSegments(fit, splitters=TRUE)
segs$amplificationCall <- calls
fit$output <- segs
# (b) parameters
allParams <- fit$params
if (is.null(allParams)) {
allParams <- list()
}
allParams$callAmplifications <- params
fit$params <- allParams
verbose && exit(verbose)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Return the updated 'CBS' object.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
fit
}, private=TRUE) # callAmplifications()
###########################################################################/**
# @RdocMethod callOutliers
#
# @title "Calls outliers"
#
# \description{
# @get "title".
# }
#
# @synopsis
#
# \arguments{
# \item{adjust}{A positive scale factor adjusting the sensitivity of the
# caller, where a value less (greater) than 1.0 makes the caller
# less (more) sensitive.}
# \item{method}{A @character string specifying the calling algorithm to use.}
# \item{...}{Additional/optional arguments used to override the default
# parameters used by the caller.}
# }
#
# \value{
# Returns a @see "PSCBS::CBS" object where @logical columns
# 'negOutlierCall' and 'posOutlierCall' have been appended
# to the segmentation table.
# }
#
# \section{The UCSF caller}{
# If \code{method == "ucsf-mad"}, then loci are called using [1]
# "Finally, to identify single technical or biological outliers such
# as high level amplifications, the presence of the outliers within
# a segment was allowed by assigning the original observed log2ratio
# to the clones for which the observed values were more than four
# tumor-specific MAD away from the smoothed values." [1; Suppl. Mat.]
# }
#
# @author "HB"
#
# \references{
# [1] Fridlyand et al. \emph{Breast tumor copy number aberration
# phenotypes and genomic instability}, BMC Cancer, 2006. \cr
# }
#
# \seealso{
# @seemethod "callGainsAndLosses".
# @seemethod "callAmplifications".
# @seeclass
# }
#
# @keyword internal
#*/###########################################################################
setMethodS3("callOutliers", "CBS", function(fit, adjust=1.0, method=c("ucsf-mad"), ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'adjust':
adjust <- Arguments$getDouble(adjust, range=c(0, Inf))
# Argument 'method':
method <- match.arg(method)
userArgs <- list(...)
params <- list()
# Allocate calls
nbrOfLoci <- nbrOfLoci(fit)
naValue <- NA
negOutlierCall <- posOutlierCall <- rep(naValue, times=nbrOfLoci)
if (method == "ucsf-mad") {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Call arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Default arguments
args <- list(
scale = 4.0
)
# Override by (optional) user-specified arguments
for (key in names(userArgs)) {
args[[key]] <- userArgs[[key]]
}
# Extract arguments
scale <- args$scale
# Validate arguments
scale <- Arguments$getDouble(scale, range=c(0, Inf))
# Genomic annotations
data <- getLocusData(fit)
chromosome <- data$chromosome
x <- data$x
# CN signals
y <- data[,3]
# Segmented CN signals
yS <- extractSegmentMeansByLocus(fit)
# CN residuals (relative to segment means)
dy <- y - yS
segs <- getSegments(fit, splitters=TRUE)
# Allocate per-segment SD estimates
nbrOfSegments <- nbrOfSegments(fit)
naValue <- NA_real_
sds <- rep(naValue, times=nbrOfSegments)
naValue <- NA_real_
for (ss in seq_len(nbrOfSegments)) {
seg <- segs[ss,]
# Identify loci in current segment
idxs <- which(seg$chromosome == chromosome &
seg$start <= x & x <= seg$end)
# Sanity check
idxs <- Arguments$getIndices(idxs, max=nbrOfLoci)
# Extract CN residuals
dySS <- dy[idxs]
# Calculate MAD for segment
sdSS <- mad(dySS, na.rm=TRUE)
# Threshold for outliers
tau <- scale * sdSS
# Make more or less sensitive
tau <- adjust * tau
# Call outliers
naValue <- NA
callsSS <- rep(naValue, times=length(dySS))
callsSS[-tau <= dySS & dySS <= +tau] <- 0L
callsSS[dySS > +tau] <- +1L
callsSS[dySS < -tau] <- -1L
# Record
negOutlierCall[idxs] <- (callsSS < 0L)
posOutlierCall[idxs] <- (callsSS > 0L)
sds[ss] <- sdSS
} # for (ss ...)
params$method <- method
params$adjust <- adjust
params$scale <- scale
params$sds <- sds
}
# Sanity check
.stop_if_not(length(negOutlierCall) == nbrOfLoci)
.stop_if_not(length(posOutlierCall) == nbrOfLoci)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Update 'DNAcopy' object
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# (a) segmentation table
data <- getLocusData(fit)
data$negOutlierCall <- negOutlierCall
data$posOutlierCall <- posOutlierCall
fit$data <- data
# (b) parameters
allParams <- fit$params
if (is.null(allParams)) {
allParams <- list()
}
allParams$callOutliers <- params
fit$params <- allParams
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Return the updated 'CBS' object.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
fit
}, private=TRUE) # callOutliers()
setMethodS3("extractCallsByLocus", "CBS", function(fit, ...) {
# Extract locus data
data <- getLocusData(fit, ...)
nbrOfLoci <- nrow(data)
# Extract segment data
segs <- getSegments(fit, splitters=TRUE)
# Identify segment calls
callCols <- grep("Call$", colnames(segs))
nbrOfCalls <- length(callCols)
chromosome <- data$chromosome
x <- data$x
y <- data[,3]
# Allocate locus calls
naValue <- NA
callsL <- matrix(naValue, nrow=nbrOfLoci, ncol=nbrOfCalls)
colnames(callsL) <- colnames(segs)[callCols]
callsL <- as.data.frame(callsL)
# For each segment...
for (ss in seq_len(nrow(segs))) {
seg <- segs[ss,]
idxs <- which(chromosome == seg$chromosome &
seg$start <= x & x <= seg$end)
idxs <- Arguments$getIndices(idxs, max=nbrOfLoci)
# Sanity check
## .stop_if_not(length(idxs) == seg$nbrOfLoci)
callsSS <- seg[callCols]
for (cc in seq_len(nbrOfCalls)) {
callsL[idxs,cc] <- callsSS[,cc]
}
} # for (ss ...)
# The calls for loci that have missing annotations or observations,
# should also be missing, i.e. NA.
nok <- (is.na(chromosome) | is.na(x) | is.na(y))
callsL[nok,] <- NA
# Sanity check
.stop_if_not(nrow(callsL) == nbrOfLoci)
.stop_if_not(ncol(callsL) == nbrOfCalls)
callsL
}, private=TRUE) # extractCallsByLocus()
###########################################################################/**
# @RdocMethod getCallStatistics
#
# @title "Calculates various call statistics per chromosome"
#
# \description{
# @get "title".
# }
#
# @synopsis
#
# \arguments{
# \item{regions}{An optional @data.frame with columns "chromosome",
# "start", and "end" specifying the regions of interest to calculate
# statistics for. If @NULL, all of the genome is used.}
# \item{shrinkRegions}{If @TRUE, regions are shrunk to the support of
# the data.}
# \item{...}{Not used.}
# \item{verbose}{@see "R.utils::Verbose".}
# }
#
# \value{
# Returns a CxK @data.frame, where C is the number of regions that
# meet the criteria setup by argument \code{regions}
# and (K-4)/2 is the number of call types.
# The first column is the chromosome index, the second and the third
# are the first and last position, and the fourth the length
# (=last-first+1) of the chromosome.
# The following columns contains call summaries per chromosome.
# For each chromosome and call type, the total length of such calls
# on that chromosome is reported together how large of a fraction
# of the chromosome such calls occupy.
# }
#
# \details{
# The estimators implemented here are based solely on the
# segmentation results, which is very fast.
# In the original proposal by Fridlyand et al. [1], the authors
# estimates the parameters by converting segment-level calls back
# to locus-level calls and there do the calculations.
# The difference between the two approaches should be minor,
# particularly for large density arrays.
# }
#
# @author "HB"
#
# \references{
# [1] Fridlyand et al. \emph{Breast tumor copy number aberration
# phenotypes and genomic instability}, BMC Cancer, 2006. \cr
# }
#
# \seealso{
# @seeclass
# }
#
# @keyword internal
#*/###########################################################################
setMethodS3("getCallStatistics", "CBS", function(fit, regions=NULL, shrinkRegions=TRUE, ..., verbose=FALSE) {
# To please R CMD check, cf. subset()
chromosome <- NULL; rm(list="chromosome")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'regions':
if (is.null(regions)) {
# Get chromosome lengths
regions <- getChromosomeRanges(fit)[,c("chromosome", "start", "end")]
}
regions <- as.data.frame(regions)
.stop_if_not(all(is.element(c("chromosome", "start", "end"), colnames(regions))))
.stop_if_not(!any(duplicated(regions$chromosome)))
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Calculating call statistics")
segs <- getSegments(fit, splitters=FALSE)
callTypes <- grep("Call$", colnames(segs), value=TRUE)
verbose && cat(verbose, "Call types: ", hpaste(callTypes))
if (length(callTypes) == 0) {
stop("Cannot calculate call statistics. No calls have been made.")
}
verbose && cat(verbose, "Regions of interest:")
verbose && str(verbose, regions)
verbose && enter(verbose, "Filtering out segments within the requested regions")
# Filter out segments within the requested regions
segsT <- NULL
verbose && cat(verbose, "Number of segments (before): ", nrow(segs))
for (rr in seq_len(nrow(regions))) {
regionRR <- regions[rr,]
chrRR <- regionRR[,"chromosome"]
startRR <- regionRR[,"start"]
endRR <- regionRR[,"end"]
if (is.na(chrRR) || is.na(startRR) || is.na(endRR)) {
next
}
verbose && enter(verbose, sprintf("Region #%d of %d", rr, nrow(regions)))
# Select regions that (at least) overlapping with the region
segsRR <- subset(segs, chromosome == chrRR & start <= endRR & end >= startRR)
verbose && cat(verbose, "Number of segments within region: ", nrow(segsRR))
# Special case
if (nrow(segsRR) == 0) {
segsRR <- segs[1,][NA,]
segsRR$chromosome <- chrRR
segsRR$start <- startRR
segsRR$end <- endRR
segsRR$nbrOfLoci <- 0L
}
if (shrinkRegions) {
range <- range(c(segsRR$start, segsRR$end), na.rm=TRUE)
startRR <- max(startRR, range[1], na.rm=TRUE)
endRR <- min(endRR, range[2], na.rm=TRUE)
regions[rr,"end"] <- endRR
regions[rr,"start"] <- startRR
}
# Adjust ranges
segsRR$start[segsRR$start < startRR] <- startRR
segsRR$end[segsRR$end > endRR] <- endRR
segsRR$fullLength <- endRR - startRR ## + 1L
segsT <- rbind(segsT, segsRR)
verbose && exit(verbose)
} # for (rr ...)
segs <- segsT
# Order by chromosome
o <- order(segs$chromosome)
segs <- segs[o,]
verbose && cat(verbose, "Number of segments (after): ", nrow(segs))
verbose && str(verbose, segs)
verbose && exit(verbose)
verbose && enter(verbose, "Calculating total length per call and chromosome")
# Sum length of calls per type and chromosome
segs$length <- segs[,"end"] - segs[,"start"] ## + 1L
res <- lapply(callTypes, FUN=function(type) {
coeffs <- as.integer(segs[,type])
lens <- coeffs * segs$length
lens <- by(lens, INDICES=segs$chromosome, FUN=sum, na.rm=TRUE)
as.vector(lens)
})
names(res) <- gsub("Call$", "Length", callTypes)
res1 <- as.data.frame(res)
verbose && str(verbose, res)
verbose && exit(verbose)
# Extract selected regions
idxs <- match(unique(segs$chromosome), regions$chromosome)
regionsT <- regions[idxs,]
# Sanity check
.stop_if_not(nrow(regionsT) == nrow(res1))
verbose && enter(verbose, "Calculating fractions per region")
# Calculate lengths
regionsT$length <- regionsT[,"end"] - regionsT[,"start"] ## + 1L
.stop_if_not(all(regionsT$length >= 0))
res2 <- res1 / regionsT[,"length"]
names(res2) <- gsub("Call$", "Fraction", callTypes)
verbose && exit(verbose)
res3 <- cbind(res1, res2)
res <- regionsT
if (nrow(res3) > 0) {
res <- cbind(res, res3)
}
rownames(res) <- NULL
res <- cbind(label=I(sprintf("chr%d", res[,"chromosome"])), res)
# Sanity checks
resT <- res[,grep("Fraction", colnames(res))]
for (key in colnames(resT)) {
rho <- resT[,key]
.stop_if_not(all(rho >= 0, na.rm=TRUE))
.stop_if_not(all(rho <= 1, na.rm=TRUE))
}
.stop_if_not(nrow(res) == nrow(regions))
verbose && str(verbose, res)
verbose && exit(verbose)
res
}, protected=TRUE) # getCallStatistics()
###########################################################################/**
# @RdocMethod getFractionOfGenomeLost
# @aliasmethod getFractionOfGenomeGained
# @aliasmethod getFractionOfGenomeAltered
# @aliasmethod getFGL
# @aliasmethod getFGG
# @aliasmethod getFGA
#
# @title "Calculates the fraction of the genome lost, gained, or aberrant either way"
#
# \description{
# @get "title" (in sense of total copy numbers),
# using definitions closely related to those presented in [1].
# }
#
# @synopsis
#
# \arguments{
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @double in [0,1].
# }
#
# @author "HB"
#
# \references{
# [1] Fridlyand et al. \emph{Breast tumor copy number aberration
# phenotypes and genomic instability}, BMC Cancer, 2006. \cr
# }
#
# \seealso{
# Internally, @seemethod "getCallStatistics" is used.
# @seeclass
# }
#
# @keyword internal
#*/###########################################################################
setMethodS3("getFractionOfGenomeLost", "CBS", function(fit, ...) {
stats <- getCallStatistics(fit, ...)
mean(stats$lossFraction, na.rm=TRUE)
}, protected=TRUE)
setMethodS3("getFractionOfGenomeGained", "CBS", function(fit, ...) {
stats <- getCallStatistics(fit, ...)
mean(stats$gainFraction, na.rm=TRUE)
}, protected=TRUE)
setMethodS3("getFractionOfGenomeAltered", "CBS", function(fit, ...) {
getFractionOfGenomeLost(fit, ...) + getFractionOfGenomeGained(fit, ...)
}, protected=TRUE)
# Shortcuts
setMethodS3("getFGL", "CBS", function(fit, ...) {
getFractionOfGenomeLost(fit, ...)
}, protected=TRUE)
setMethodS3("getFGG", "CBS", function(fit, ...) {
getFractionOfGenomeGained(fit, ...)
}, protected=TRUE)
setMethodS3("getFGA", "CBS", function(fit, ...) {
getFractionOfGenomeAltered(fit, ...)
}, protected=TRUE)
setMethodS3("isWholeChromosomeGained", "CBS", function(fit, minFraction=0.99, ...) {
# Argument 'minFraction':
minFraction <- Arguments$getDouble(minFraction, range=c(0,1))
stats <- getCallStatistics(fit, ...)
calls <- stats$gainFraction
if (is.null(calls)) {
return(rep(NA, times=nbrOfChromosomes(fit)))
}
res <- (calls >= minFraction)
names(res) <- stats$chromosome
attr(res, "minFraction") <- minFraction
res
}, protected=TRUE) # isWholeChromosomeGained()
setMethodS3("isWholeChromosomeLost", "CBS", function(fit, minFraction=0.99, ...) {
# Argument 'minFraction':
minFraction <- Arguments$getDouble(minFraction, range=c(0,1))
stats <- getCallStatistics(fit, ...)
calls <- stats$lossFraction
if (is.null(calls)) {
return(rep(NA, times=nbrOfChromosomes(fit)))
}
res <- (calls >= minFraction)
names(res) <- stats$chromosome
attr(res, "minFraction") <- minFraction
res
}, protected=TRUE) # isWholeChromosomeLost()
setMethodS3("nbrOfLosses", "CBS", function(fit, ...) {
stats <- getSegments(fit, ...)
calls <- stats$lossCall
if (is.null(calls)) {
return(NA_integer_)
}
sum(calls, na.rm=TRUE)
}, protected=TRUE)
setMethodS3("nbrOfGains", "CBS", function(fit, ...) {
stats <- getSegments(fit, ...)
calls <- stats$gainCall
if (is.null(calls)) {
return(NA_integer_)
}
sum(calls, na.rm=TRUE)
}, protected=TRUE)
setMethodS3("nbrOfAmplifications", "CBS", function(fit, ...) {
stats <- getSegments(fit, ...)
calls <- stats$amplificationCall
if (is.null(calls)) {
return(NA_integer_)
}
sum(calls, na.rm=TRUE)
}, protected=TRUE)
setMethodS3("getCallStatisticsByArms", "CBS", function(fit, genomeData, ...) {
# To please/trick R CMD check
chromosome <- x <- NULL; rm(list=c("chromosome", "x"))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'genomeData':
genomeData <- as.data.frame(genomeData)
# Subset 'regions' by chromosomes segmented
keep <- is.element(genomeData$chromosome, getChromosomes(fit))
genomeData <- genomeData[keep,]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# p-arm
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
regions <- getChromosomeRanges(fit)
regions$end <- genomeData$centroStart
regions$start <- pmin(regions$start, regions$end)
# Shrink regions
for (rr in seq_len(nrow(regions))) {
chr <- regions[rr,"chromosome"]
x0 <- regions[rr,"start"]
x1 <- regions[rr,"end"]
xs <- subset(fit$data, chromosome == chr & x0 <= x & x <= x1)$x
if (length(xs) > 0) {
range <- range(xs, na.rm=TRUE)
x0 <- max(c(x0, range[1]), na.rm=TRUE)
x1 <- min(c(x1, range[2]), na.rm=TRUE)
regions[rr,"start"] <- x0
regions[rr,"end"] <- x1
}
} # for (rr ...)
regions[,"length"] <- regions[,"end"] - regions[,"start"] ## + 1L
callStats <- getCallStatistics(fit, regions=regions)
callStats$label <- sprintf("%sp", callStats$label)
callStatsP <- callStats
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# q-arm
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
regions <- getChromosomeRanges(fit)
regions$start <- genomeData$centroEnd
regions$end <- pmax(regions$end, regions$start)
# Shrink regions
for (rr in seq_len(nrow(regions))) {
chr <- regions[rr,"chromosome"]
x0 <- regions[rr,"start"]
x1 <- regions[rr,"end"]
xs <- subset(fit$data, chromosome == chr & x0 <= x & x <= x1)$x
if (length(xs) > 0) {
range <- range(xs, na.rm=TRUE)
x0 <- max(c(x0, range[1]), na.rm=TRUE)
x1 <- min(c(x1, range[2]), na.rm=TRUE)
regions[rr,"start"] <- x0
regions[rr,"end"] <- x1
}
} # for (rr ...)
regions[,"length"] <- regions[,"end"] - regions[,"start"] ## + 1L
callStats <- getCallStatistics(fit, regions=regions)
callStats$label <- sprintf("%sq", callStats$label)
callStatsQ <- callStats
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Merge
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
callStats <- rbind(callStatsP, callStatsQ)
# Not needed anymore
regions <- callStatsP <- callStatsQ <- NULL
# Reorder
o <- order(callStats$chromosome, callStats$start)
callStats <- callStats[o,]
callStats
}, protected=TRUE) # getCallStatisticsByArms()
setMethodS3("callArms", "CBS", function(fit, genomeData, minFraction=0.95, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'minFraction':
minFraction <- Arguments$getDouble(minFraction, range=c(0,1))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# p-arm
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
callStats <- getCallStatisticsByArms(fit, genomeData=genomeData)
callTypes <- grep("Fraction", colnames(callStats), value=TRUE)
callTypes <- gsub("Fraction", "", callTypes)
keys <- sprintf("%sFraction", callTypes)
rhos <- callStats[,keys]
calls <- (rhos >= minFraction)
colnames(calls) <- sprintf("%sCall", callTypes)
callStats <- cbind(callStats, calls)
callStats
}, protected=TRUE) # callArms()
###########################################################################/**
# @RdocMethod mergeNonCalledSegments
#
# @title "Merge neighboring segments that are not called"
#
# \description{
# @get "title"
# }
#
# @synopsis
#
# \arguments{
# \item{...}{Not used.}
# \item{verbose}{@see "R.utils::Verbose".}
# }
#
# \value{
# Returns an object of the same class
# with the same of fewer number of segments.
# }
#
# @author "HB"
#
# \seealso{
# @seeclass
# }
#
# @keyword internal
#*/###########################################################################
setMethodS3("mergeNonCalledSegments", "CBS", function(fit, ..., verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Merging neighboring segments that are not called")
# Identify call columns
segs <- getSegments(fit, splitters=TRUE)
keep <- grep("Call$", colnames(segs))
nbrOfCalls <- length(keep)
# Sanity check
.stop_if_not(nbrOfCalls > 0)
chromosomes <- getChromosomes(fit)
fitList <- list()
for (cc in seq_along(chromosomes)) {
chromosome <- chromosomes[cc]
verbose && enter(verbose, sprintf("Chromosome #%d ('%s') of %d", cc, chromosome, length(chromosomes)))
fitCC <- extractChromosome(fit, chromosome=chromosome)
n0 <- nbrOfSegments(fitCC)
# Until no more neighboring non-called segments exists
while (TRUE) {
segs <- getSegments(fitCC, splitters=TRUE)
calls <- as.matrix(segs[,keep])
# Find two neighboring segments that are not called
isCalled <- rowAnys(calls, na.rm=TRUE, useNames=FALSE)
verbose && printf(verbose, "Number of segments not called: %d of %d\n", sum(!isCalled, na.rm=TRUE), length(isCalled))
notCalled <- which(!isCalled)
delta <- diff(notCalled)
left <- notCalled[which(delta == 1)[1]]
# No more segments to merge?
if (is.na(left)) {
break
}
fitCC <- mergeTwoSegments(fitCC, left=left)
} # while (...)
n1 <- nbrOfSegments(fitCC)
verbose && printf(verbose, "Number of segments merged: %d of %d\n", n0-n1, n0)
fitList[[cc]] <- fitCC
verbose && exit(verbose)
} # for (cc ...)
verbose && enter(verbose, "Building result")
res <- Reduce(append, fitList)
verbose && exit(verbose)
verbose && exit(verbose)
res
}, protected=TRUE) # mergeNonCalledSegments()
setMethodS3("estimateDeltaCN", "CBS", function(fit, flavor=c("density(TCN)", "density(dTCN)", "dTCN"), adjust=0.3, ..., verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'flavor':
flavor <- match.arg(flavor)
# Argument 'adjust':
adjust <- Arguments$getDouble(adjust, range=c(0,10))
if (flavor == "density(TCN)") {
# Get segment mean levels
segs <- getSegments(fit, splitters=FALSE)
x <- segs$mean
w <- segs$nbrOfLoci
# Drop missing values
keep <- is.finite(x) & is.finite(w)
x <- x[keep]
w <- w[keep]
keep <- NULL # Not needed anymore
# Normalize weights
w <- w / sum(w, na.rm=TRUE)
# Estimate density
d <- density(x, weights=w, adjust=adjust)
w <- NULL # Not needed anymore
# Find peaks
pv <- findPeaksAndValleys(d, ...)
type <- NULL; rm(list="type") # To please R CMD check
p <- subset(pv, type == "peak")
px <- p$x
pw <- p$density
# Distance between peaks
dx <- diff(px)
# Weights "between" peaks (AD HOC: sum up peak weights)
dw <- pw[-length(pw)] + pw[-1L]
deltaCN <- weighted.mean(dx, w=dw)
} else if (flavor == "density(dTCN)") {
# Get change-point magnitudes
x <- getChangePoints(fit)[[1L]]
x <- abs(x)
# Drop missing values
keep <- is.finite(x)
x <- x[keep]
keep <- NULL # Not needed anymore
# Estimate density
d <- density(x, adjust=adjust)
# Find peaks
pv <- findPeaksAndValleys(d, ...)
type <- NULL; rm(list="type") # To please R CMD check
p <- subset(pv, type == "peak")
px <- p$x
pw <- p$density
# Distance between peaks
dx <- diff(px)
# Weights "between" peaks (AD HOC: sum up peak weights)
dw <- pw[-length(pw)] + pw[-1L]
stop("Still not implemented.")
} else if (flavor == "dTCN") {
# Get change-point magnitudes
x <- getChangePoints(fit)[[1L]]
x <- abs(x)
# Drop missing values
keep <- is.finite(x)
x <- x[keep]
keep <- NULL # Not needed anymore
deltaCN <- median(x)
}
# Sanity check
deltaCN <- Arguments$getDouble(deltaCN, range=c(0, Inf))
deltaCN
}, protected=TRUE)
setMethodS3("encodeCalls", "data.frame", function(calls, flavor="UCSF", ...) {
# Argument 'calls':
.stop_if_not(all(is.element(c("chromosome", "x"), colnames(calls))))
.stop_if_not(all(is.element(c("lossCall", "gainCall"), colnames(calls))))
# Argument 'flavor':
flavor <- match.arg(flavor)
calls0 <- calls
# Allocate
calls <- rep(NA_real_, times=nrow(calls0))
# Encode loss, neutral and gain (required)
calls[!calls0$gainCall & !calls0$lossCall] <- 0
calls[calls0$gainCall] <- +1
calls[calls0$lossCall] <- -1
# Encode amplifications, if any/called.
idxs <- which(calls0$amplificationCall)
calls[idxs] <- +9
# Encode negative and positive outliers, if any/called.
idxs <- which(calls0$negOutlierCall)
calls[idxs] <- calls[idxs] - 0.1
idxs <- which(calls0$posOutlierCall)
calls[idxs] <- calls[idxs] + 0.1
calls
}, protected=TRUE) # encodeCalls()
setMethodS3("callGLAO", "CBS", function(fit, ..., verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Call gains, losses, amplifications and (negative and positive) outliers")
verbose && cat(verbose, "Number of segments: ", nbrOfSegments(fit))
# Call segments
fitC <- callGainsAndLosses(fit, ..., verbose=verbose)
fitC <- callAmplifications(fitC, ..., verbose=verbose)
# Call loci, i.e. locus-level negative and positive outliers
fitC <- callOutliers(fitC, ..., verbose=verbose)
verbose && print(verbose, fitC)
verbose && exit(verbose)
fitC
}, protected=TRUE) # callGLAO()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.