setMethodS3("seqOfSegmentsByDP", "AbstractCBS", function(fit, by, shift=+100, ..., verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Local functions
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'by':
by <- Arguments$getCharacters(by)
data <- getLocusData(fit)
fields <- colnames(data)
missing <- fields[!is.element(by, fields)]
if (length(missing) > 0) {
stop("Argument 'by' specifies one or more non-existing locus data fields: ", paste(missing, collapse=", "))
}
# Argument 'shift':
shift <- Arguments$getNumeric(shift)
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Identifying optimal sets of segments via dynamic programming")
# Assert that known segments was used
knownSegments <- fit$params$knownSegments
if (nrow(knownSegments) == 0L) {
chromosome <- getChromosomes(fit)
knownSegments <- data.frame(chromosome=chromosome, start=-Inf, end=+Inf)
}
chrOffsets <- getChromosomeOffsets(fit)
# Sanity check
.stop_if_not(all(is.finite(chrOffsets)))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Shift every other non-empty region
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
verbose && enter(verbose, "Shifting TCN levels for every second segment")
segPrefix <- getSegmentTrackPrefixes(fit)[1L]
segKeys <- toCamelCase(paste(segPrefix, c("start", "end")))
segRowsKey <- toCamelCase(paste(segPrefix, "seg rows"))
verbose && enter(verbose, "Split up into non-empty independent regions")
chromosomes <- getChromosomes(fit)
fitList <- list()
for (jj in seq_along(chromosomes)) {
chr <- chromosomes[jj]
verbose && enter(verbose, sprintf("Chromosome #%d ('%s') of %d", jj, chr, length(chromosomes)))
# Subset segmentation results on this chromosome
fitJJ <- extractChromosome(fit, chr)
verbose && cat(verbose, "Number of loci on chromosome: ", nbrOfLoci(fitJJ))
# Nothing to do and nothing to add?
if (nbrOfLoci(fitJJ) == 0L) {
verbose && exit(verbose)
next
}
# Find known segments on this chromosome
knownSegmentsJJ <- subset(knownSegments, chromosome == chr)
verbose && cat(verbose, "Known segments on chromosome:")
verbose && print(verbose, knownSegmentsJJ)
# Nothing to do?
if (nrow(knownSegmentsJJ) == 0L) {
fitList <- append(fitList, list(fitJJ))
verbose && exit(verbose)
next
}
# Get the segments on this chromosome
segsJJ <- getSegments(fitJJ)
# Extract the individual known segments on this chromosome
fitListJJ <- list()
for (kk in seq_len(nrow(knownSegmentsJJ))) {
verbose && enter(verbose, sprintf("Known segment #%d of %d", kk, nrow(knownSegmentsJJ)))
seg <- knownSegmentsJJ[kk,]
verbose && print(verbose, seg)
start <- seg$start
end <- seg$end
idxStart <- min(which(segsJJ[[segKeys[1]]] >= start))
idxEnd <- max(which(segsJJ[[segKeys[2]]] <= end))
idxs <- idxStart:idxEnd
# Extract the particular known segment
fitKK <- extractSegments(fitJJ, idxs)
# Only add if it has loci
if (nbrOfLoci(fitKK) > 0L) {
fitListJJ <- append(fitListJJ, list(fitKK))
}
fitKK <- NULL # Not needed anymore
verbose && exit(verbose)
} # for (kk ...)
# Append
fitList <- append(fitList, fitListJJ)
fitListJJ <- NULL # Not needed anymore
verbose && exit(verbose)
} # for (jj ...)
nbrOfRegions <- length(fitList)
verbose && cat(verbose, "Number of independent non-empty regions: ", nbrOfRegions)
verbose && exit(verbose)
verbose && enter(verbose, "Shift every other region")
for (jj in seq(from=1L, to=nbrOfRegions, by=2L)) {
fitJJ <- fitList[[jj]]
fitJJ <- shiftTCN(fitJJ, shift=shift)
fitList[[jj]] <- fitJJ
} # for (jj ...)
verbose && exit(verbose)
verbose && enter(verbose, "Merge")
## former Reduce() w/ append(a, b, addSplit = FALSE)
fitT <- do.call(c, args = c(fitList, addSplit = FALSE))
# Sanity check
## .stop_if_not(nbrOfSegments(fitT) == nbrOfSegments(fit)) # Not true anymore
verbose && exit(verbose)
fitList <- NULL # Not needed anymore
segsT <- getSegments(fitT)
verbose && print(verbose, tail(segsT))
verbose && exit(verbose)
fit <- fitT
# Not needed anymore
fitT <- knownSegments <- NULL
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Extract signals for DP
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
verbose && enter(verbose, "Extracting signals for dynamic programming")
fit <- tileChromosomes(fit)
# Locus-level signals
data <- getLocusData(fit)
Y <- as.matrix(data[,by,drop=FALSE])
verbose && print(verbose, summary(Y))
# "DP" change-point indices (excluding the two outer/boundary ones)
segRows <- fit[[segRowsKey]]
segIdxs <- seq_len(nrow(segRows)-1L)
cpIdxs <- segRows$endRow[segIdxs]
verbose && exit(verbose)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Dynamic-programming segmention pruning
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
verbose && enter(verbose, "Dynamic programming")
verbose && cat(verbose, "Number of \"DP\" change points: ", length(cpIdxs))
verbose && str(verbose, cpIdxs)
res <- seqOfSegmentsByDP(Y, candidatechangepoints=cpIdxs, ...)
verbose && str(verbose, res)
# Sanity checks
jumpList <- res$jump
lastJump <- jumpList[[length(jumpList)]]
.stop_if_not(identical(cpIdxs, as.integer(lastJump)))
verbose && exit(verbose)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Adjustments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
verbose && enter(verbose, "Excluding cases where known segments no longer correct")
verbose && cat(verbose, "Number of independent non-empty regions: ", nbrOfRegions)
# Drop the K first
nbrOfCPs <- nbrOfRegions - 1L
if (nbrOfCPs > 0L) {
K <- nbrOfCPs - 1L # Don't count the flat segmentation
jumpList <- jumpList[-(1:K)]
}
verbose && str(verbose, jumpList)
verbose && exit(verbose)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Extract the possible sets of segments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
verbose <- less(verbose, 20)
verbose && enter(verbose, "Converting to physical segments")
segs <- getSegments(fit, splitters=TRUE, addGaps=FALSE)
segs <- segs[,c("chromosome", segKeys)]
jumpIdxList <- lapply(jumpList, FUN=function(idxs) {
match(idxs, table=cpIdxs)
})
# Sanity check
.stop_if_not(identical(seq_along(cpIdxs), jumpIdxList[[length(jumpIdxList)]]))
chrs <- segs$chromosome
starts <- segs[[segKeys[1]]]
ends <- segs[[segKeys[2]]]
nsegs <- nrow(segs)
segList <- vector("list", length=length(jumpIdxList))
for (kk in seq_along(segList)) {
verbose && enter(verbose, sprintf("Sequence #%d of %d", kk, length(segList)))
idxs <- jumpIdxList[[kk]]
verbose && cat(verbose, "Change point indices:")
verbose && str(verbose, idxs)
chrsKK <- chrs[idxs]
chr <- chrsKK[1]
# .stop_if_not(all(chrsKK == chr))
chrsKK <- c(chrsKK, chrsKK[length(chrsKK)])
startsKK <- starts[c(1L, idxs+1L)]
endsKK <- ends[c(idxs, nsegs)]
verbose && cat(verbose, "Chromosomes:")
verbose && str(verbose, chrsKK)
verbose && cat(verbose, "Starts:")
verbose && str(verbose, startsKK)
verbose && cat(verbose, "Ends:")
verbose && str(verbose, endsKK)
offsetsKK <- chrOffsets[chrsKK]
startsKK <- startsKK - offsetsKK
endsKK <- endsKK - offsetsKK
segsKK <- data.frame(chromosome=chrsKK, start=startsKK, end=endsKK)
verbose && print(verbose, tail(segsKK))
segList[[kk]] <- segsKK
verbose && exit(verbose)
} # for (kk ...)
verbose && exit(verbose)
verbose <- more(verbose, 20)
# Sanity check
lastSegs <- segList[[length(segList)]]
# .stop_if_not(identical(lastSegs, segs))
verbose && str(verbose, segList)
nbrOfCPsSeq <- sapply(jumpList, FUN=length)
verbose && cat(verbose, "Sequence of number of \"DP\" change points:")
verbose && print(verbose, nbrOfCPsSeq)
nbrOfSegsSeq <- sapply(segList, FUN=nrow)
verbose && cat(verbose, "Sequence of number of segments:")
verbose && print(verbose, nbrOfSegsSeq)
nbrOfChangePointsSeq <- nbrOfSegsSeq - nbrOfRegions
verbose && cat(verbose, "Sequence of number of \"discovered\" change points:")
verbose && print(verbose, nbrOfChangePointsSeq)
.stop_if_not(all(nbrOfSegsSeq == nbrOfCPsSeq + 1L))
.stop_if_not(nbrOfSegsSeq[1L] == nbrOfRegions)
segList <- lapply(segList, FUN=function(seg) {
attr(seg, "nbrOfChangePoints") <- nrow(seg) - nbrOfRegions
seg
})
K <- (nbrOfRegions-1L)
modelFit <- list(
nbrOfSegments=nbrOfSegsSeq,
nbrOfChangePoints=nbrOfSegsSeq - nbrOfRegions,
nbrOfRegions=nbrOfRegions,
idxBest=res$kbest - K + 1L,
rse=res$rse[-(1:K)],
V=res$V[-(1:K),-(1:K),drop=FALSE],
seqOfSegmentsByDP=res
)
attr(segList, "modelFit") <- modelFit
verbose && exit(verbose)
segList
}, protected=TRUE) # seqOfSegmentsByDP()
setMethodS3("seqOfSegmentsByDP", "matrix", function(
### Segmentation of a multi-dimensional signal with dynamic programming.
Y,
### A n*p signal to be segmented
candidatechangepoints = c(1:(n-1)),
### A vector of candidate positions for change-points (default=[1:n-1])
kmax = n-1,
### Maximum number of change-points to test (default : n-1)
threshold = 0.5,
### Stopping criteria. Typically chosen to be in the interval
### (0 0.5]. The smaller the threshold, the higher the tendency to keep more
### changepoints. The criteria is based on the method found in 'Picard et al
### (2005)', "A statistical approach for array CGH data analysis" (BMC
### Bioinformatics). Default=0.5
...
){
n <- dim(Y)[1]
p <- dim(Y)[2]
kmaxmin <- floor(n/4)
if (kmax == 0 || kmax > length(candidatechangepoints)) {
kmax <- min(length(candidatechangepoints), kmaxmin)
}
if (kmax > kmaxmin) {
sprintf('warning : not enough points to optimize the number of the change-points up to %s\n', kmax)
kmax <- kmaxmin
sprintf('Set the maximum number of change-points to %s\n', kmax)
}
# Compute boundaries of the smallest intervals considered
b <- sort(union(0, union(n, candidatechangepoints)))
k <- length(b) - 1 # k is the number of such intervals
# Compute the k*k matrix J such that J[i,j] for i<=j is the RSE when intervales i to j are merged
J <- matrix(numeric(k*k), ncol=k)
# How should NAs be handled?!?
Yz <- Y
Yz[is.na(Yz)] <- 0
s <- rbind(rep(0, times=p), colCumsums(Yz, useNames=FALSE))
v <- c(0, cumsum(rowSums(Y^2, na.rm=TRUE)))
for (i in 1:k) {
for (j in i:k) {
Istart <- b[i] + 1
Iend <- b[j+1]
J[i,j] <- v[Iend+1] - v[Istart] - sum((s[Iend+1,]-s[Istart,])^2, na.rm=TRUE)/(Iend-Istart+1)
} # for (j ...)
} # for (i ...)
# Dynamic programming
V <- matrix(numeric((kmax+1)*k), ncol=k) # V[i,j] is the best RSE for segmenting intervals 1 to j with at most i-1 change points
jump <- matrix(numeric(kmax*k), ncol=k)
# With no change points, V[i,j] is juste the precomputed RSE for intervals 1 to j
V[1,] <- J[1,]
# Then we apply the recursive formula
for (ki in 1:kmax) {
for (j in (ki+1):k) {
val <- min(V[ki,ki:(j-1)] + t(J[(ki+1):j, j]))
ind <- which.min(V[ki,ki:(j-1)] + t(J[(ki+1):j, j]))
V[ki+1,j] <- val
jump[ki,j] <- ind + ki-1
} # for (j ...)
} # for (ki ...)
# Optimal segmentation
res.jump <- list()
for (ki in 1:kmax) {
res.jump[[ki]] <- numeric(ki)
res.jump[[ki]][ki] <- jump[ki,k]
if (ki != 1) {
for (i in seq(from=(ki-1), to=1, by=-1)) {
res.jump[[ki]][i] <- jump[i, res.jump[[ki]][i+1]]
}
}
} # for (ki ...)
# Convert back the index of the interval to the last position before the jump
rightlimit <- b[2:length(b)]
for (ki in 1:kmax) {
res.jump[[ki]] <- rightlimit[res.jump[[ki]]]
} # for (ki ...)
# RSE as a function of number of change-points
res.rse <- V[,k]
# Optimal number of change points
options(warn=-1)
J <- log(res.rse)
Km <- length(J)
Jtild <- (J[Km]-J)/(J[Km]-J[1])*(Km-1)+1 # Normalize
res.kbest <- max(which(diff(diff(Jtild)) > threshold)) + 1
#if((res.kbest) == -Inf) { res.kbest <- 1 }
return(list(jump=res.jump, rse=res.rse, kbest=res.kbest, V=V))
### \item{res.jump{i}}{a i*1 vector of change-point positions for the i-th lambda value (i depends on lambda). i varies between 1 and kmax}
### \item{res.rse}{a (kmax+1)-dimensional vector of residual squared error}
### \item{res.kbest}{the number of selected change-points}
}) # seqOfSegmentsByDP()
###########################################################################/**
# @set class=AbstractCBS
# @RdocMethod pruneByDP
#
# @title "Prunes the CN profile using dynamical programming"
#
# \description{
# @get "title" by specifying the target number of segments or alternative
# how of many change points to drop.
# }
#
# @synopsis
#
# \arguments{
# \item{nbrOfSegments}{An @integer specifying the number of segments after
# pruning. If negative, the it specifies the number of change points
# to drop.}
# \item{...}{Optional arguments passed to @seemethod "seqOfSegmentsByDP".}
# \item{verbose}{See @see "R.utils::Verbose".}
# }
#
# \value{
# Returns a pruned object of the same class.
# }
#
# \examples{\dontrun{
# # Drop two segments
# fitP <- pruneByDP(fit, nbrOfSegments=-2)
# }}
#
# @author "HB, PN"
#
# \references{
# [1] ... \cr
# }
#
# @keyword internal
#*/###########################################################################
setMethodS3("pruneByDP", "AbstractCBS", function(fit, nbrOfSegments, ..., verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Some pre-extraction
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
knownSegments <- fit$params$knownSegments
if (nrow(knownSegments) == 0L) {
chromosome <- getChromosomes(fit)
knownSegments <- data.frame(chromosome=chromosome, start=-Inf, end=+Inf)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'nbrOfSegments':
nbrOfSegments <- Arguments$getInteger(nbrOfSegments)
# Specifying number of change points *to drop*?
if (nbrOfSegments < 0L) {
nbrOfCPsToDrop <- -nbrOfSegments
nbrOfSegments <- nbrOfSegments(fit, splitters=FALSE) - nbrOfCPsToDrop
}
if (nbrOfSegments < nrow(knownSegments)) {
stop("Argument 'nbrOfSegments' is less than number of \"known\" segments: ", nbrOfSegments, " < ", nrow(knownSegments))
}
if (nbrOfSegments > nbrOfSegments(fit, splitters=FALSE)) {
stop("Argument 'nbrOfSegments' is greater than the number of \"found\" segments: ", nbrOfSegments, " > ", nbrOfSegments(fit, splitters=FALSE))
}
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Prune segments by dynamical programming")
# Nothing to do?
if (nbrOfSegments == nbrOfSegments(fit, splitters=FALSE)) {
verbose && cat(verbose, "No need for pruning, because the number of \"target\" segments is the same as the number of \"found\" segments: ", nbrOfSegments, " == ", nbrOfSegments(fit, splitters=FALSE))
return(fit)
verbose && exit(verbose)
}
verbose && cat(verbose, "Target number of segments: ", nbrOfSegments)
verbose && enter(verbose, "Dynamic programming")
segList <- seqOfSegmentsByDP(fit, ...)
# Select the one with expected number of segments
nbrOfCPs <- sapply(segList, FUN=nrow)
verbose && printf(verbose, "Range of number of CPs among solutions: [%d,%d]\n", min(nbrOfCPs), max(nbrOfCPs))
keep <- which(nbrOfCPs == nbrOfSegments)
.stop_if_not(length(keep) == 1L)
knownSegments <- segList[[keep]]
verbose && printf(verbose, "Solution with %d segments:\n", nbrOfSegments)
verbose && print(verbose, knownSegments)
verbose && exit(verbose)
verbose && enter(verbose, "Rebuilding segmentation results")
fitDP <- resegment(fit, knownSegments=knownSegments, undoTCN=+Inf, undoDH=+Inf, verbose=less(verbose, 10))
verbose && print(verbose, fitDP)
verbose && exit(verbose)
verbose && exit(verbose)
fitDP
}, protected=TRUE) # pruneByDP()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.