#' Joint segmentation of multivariate signals
#'
#' Joint segmentation of multivariate signals in two steps: \enumerate{
#' \item{first-pass segmentation. By default, a fast, greedy approach is used
#' (see \code{method}).} \item{pruning of the candidate change points obtained
#' by dynamic programming} }
#'
#' If \code{modelSelectionOnDP} is set to \code{FALSE}, then model selection is
#' run on the sets of the form \code{bkp[1:k]} for \eqn{1 \leq k \leq
#' length(bkp)}, where \code{bkp} is the set of breakpoints identified by the
#' initial segmentation. In particular, this implies that the candidate
#' breakpoints in \code{bkp} are sorted by order of appearance and not by
#' position.
#'
#' If \code{jitter} is not \code{NULL}, it should be a vector of integer
#' indices. The set of candidate breakpoints passed on to dynamic programming is
#' augmented by all indices distant from an element of \code{jitter} from one of
#' the candidates. For example, if \code{jitter==c(-1, 0, 1)} and the initial
#' set of breakpoints is \code{c(1,5)} then dynamic programming is run on
#' \code{c(1,2,4,5,6)}.
#'
#' If the return value of the initial segmentation has an element named
#' \code{dpseg}, then initial segmentation results are not pruned by dynamic
#' programming.
#' @param Y The signal to be segmented (a matrix or a numeric vector)
#' @param method A \code{character} value, the type of segmentation method used.
#' May be one of: \describe{ \item{"RBS"}{Recursive Binary Segmentation (the
#' default), see \code{\link{segmentByRBS}} as described in Gey and Lebarbier
#' (2005)} \item{"GFLars"}{Group fused LARS as described in Bleakley and Vert
#' (2011).} \item{"DP"}{Dynamic Programming (Bellman, 1956). For univariate
#' signals the pruned DP of Rigaill et al (2010) is used.} \item{"other"}{The
#' segmentation method is passed as a function using argument \code{segFUN}
#' (see examples in directory \code{otherMethods} of the \code{jointseg}
#' package).}}
#' @param stat A vector containing the names or indices of the columns of
#' \code{Y} to be segmented
#' @param dpStat A vector containing the names or indices of the columns of
#' \code{Y} to be segmented at the second round of segmentation. Defaults to
#' the value of argument \code{stat}.
#' @param segFUN The segmentation function to be used when \code{method} is set
#' to \code{other}. Not used otherwise.
#' @param jitter Uncertainty on breakpoint position after initial segmentation.
#' Defaults to \code{NULL}. See Details.
#' @param modelSelectionMethod A character value, the name of the method used to
#' perform model selection.
#' @param modelSelectionOnDP A logical value. If \code{TRUE} (the default),
#' model selection is performed on segmentation after dynamic programming;
#' else model selection is performed on initial segmentation. Only applies to
#' methods "DP", "RBS" and "GFLars".
#' @param ... Further arguments to be passed to the lower-level segmentation
#' method determined by argument \code{method}.
#' @param profile A \code{logical} value: trace time and memory usage ?
#' @param verbose A \code{logical} value: should extra information be output ?
#' Defaults to \code{FALSE}.
#' @references Bleakley, K., & Vert, J. P. (2011). The group fused lasso for
#' multiple change-point detection. arXiv preprint arXiv:1106.4199.
#' @references Vert, J. P., & Bleakley, K. (2010). Fast detection of multiple
#' change-points shared by many signals using group LARS. Advances in Neural
#' Information Processing Systems, 23, 2343-2351.
#' @references Gey, S., & Lebarbier, E. (2008). Using CART to Detect Multiple
#' Change Points in the Mean for Large Sample.
#' http://hal.archives-ouvertes.fr/hal-00327146/
#' @references Rigaill, G. (2010). Pruned dynamic programming for optimal
#' multiple change-point detection. arXiv preprint arXiv:1004.0887.
#' @seealso \code{\link{pruneByDP}}
#' @export jointSeg
#' @examples
#' ## A two-dimensional signal
#' p <- 2
#' trueK <- 10
#' len <- 1e4
#' sim <- randomProfile(len, trueK, 1, p)
#' Y <- sim$profile
#' K <- 2*trueK
#' res <- jointSeg(Y, method="RBS", K=K)
#' bkp <- res$bestBkp
#' getTpFp(bkp, sim$bkp, tol=5, relax = -1) ## true and false positives
#' plotSeg(Y, list(sim$bkp, res$bestBkp), col=1)
#'
#' ## Now we add some NA:s in one dimension
#' jj <- sim$bkp[1]
#' Y[jj-seq(-10,10), p] <- NA
#' res2 <- jointSeg(Y, method="RBS", K=K, verbose=TRUE)
#' bkp <- res2$bestBkp
#' getTpFp(res2$bestBkp, sim$bkp, tol=5, relax = -1) ## true and false positives
jointSeg <- function(Y, method="RBS", stat=NULL, dpStat=stat, segFUN=NULL, jitter=NULL,
modelSelectionMethod=ifelse(match(method, c("DynamicProgramming", "RBS", "GFLars"), nomatch=0) > 0, "Lebarbier", "none"),
modelSelectionOnDP=(match(method, c("DynamicProgramming", "RBS", "GFLars"), nomatch=0) > 0), ..., profile=FALSE, verbose=FALSE){
if (is.null(dim(Y))) {
## coerce to a matrix
Y <- as.matrix(Y)
}
## drop row names
rownames(Y) <- NULL
## Arguments 'stat' and 'dpStat'
if (is.null(stat)) {
stat <- 1:ncol(Y)
if (is.null(dpStat)) {
dpStat <- stat
}
}
if (mode(stat) != mode(dpStat)) {
stop("Arguments 'stat' and 'dpStat' should be of the same mode ('numeric' or 'character')")
}
arg <- union(stat, dpStat)
if (!is.null(arg)) {
if (is.numeric(arg)) {
mm <- match(arg, 1:ncol(Y))
} else if (is.character(arg)) {
mm <- match(arg, colnames(Y))
}
if (sum(is.na(mm))) {
guilty <- paste("'", arg[which(is.na(mm))], "'", sep="", collapse=",")
stop("Undefined column(s) selected in 'Y':", guilty, ". Please check arguments 'stat' and 'dpStat'")
}
}
## Case of rows with all entries missing (typically occurs when 'stat' is 'd')
n <- nrow(Y)
allNA <- apply(Y[, mm, drop=FALSE], 1, FUN=function(x) all(is.na(x)))
ww <- which(!allNA)
outPos <- 1:n ## in order to be able to map back to 'position' indices from the input data
if (length(ww)<n) {
outPos <- mapPositionsBack(outPos[ww])
}
Yseg <- Y[ww, stat] ## for the initial segmentation
nSeg <- ifelse(!is.null(dim(Yseg)), nrow(Yseg), length(Yseg))
Ydp <- Y[ww, dpStat] ## for pruning by DP
nDp <- ifelse(!is.null(dim(Ydp)), nrow(Ydp), length(Ydp))
Y <- NULL; rm(Y); ## not needed anymore
## Segmentation function
if (method=="other") {
if (is.null(segFUN) || mode(segFUN)!="function") {
stop("A segmentation function should be provided using argument 'segFUN' for method 'other'")
}
nms <- names(formals(segFUN))
inter <- intersect(nms, c("jitter", "methModelSelection", "DP"))
if (length(inter)) {
warning("Argument(s) ", paste("'", inter, "'", collapse=",", sep=""), " will not be passed to argument 'segFUN' as they match existing arguments of the 'jointSeg' function")
}
} else {
if (!is.null(segFUN)) {
warning("Argument 'segFUN' is only used when 'method' is set to 'other'")
}
segName <- paste("do", method, sep="")
## retrieve segmentation function and assert that it does exist
if (!exists(segName, mode="function")) {
stop("Function '", segName, "' should be provided for method '", method, "'")
} else {
segFUN <- eval(as.name(segName))
}
}
if (verbose) {
cat("Start initial segmentation by", method, "\n")
cat("Statistic:", stat, "\n")
utils::str(Yseg)
}
prof <- NULL
resSeg <- prof(segFUN(Yseg, ...), doit=profile)
initSeg <- resSeg$res
prof <- rbind(prof, segmentation=resSeg$prof)
if (verbose) {
cat("End initial segmentation by", method, "\n")
if (profile) {
print(resSeg$prof)
}
}
dpseg <- initSeg$dpseg
if (!is.null(dpseg)) {
## do nothing
} else {
## Prune candidate breakpoints
if (verbose) {
cat("Start pruning by dynamic programming\n")
cat("Statistic:", dpStat, "\n")
utils::str(Ydp)
}
bkp <- initSeg$bkp
if (!is.null(jitter)) {
jitter <- as.integer(jitter)
bkpJ <- sapply(bkp, FUN=function(x) {
x+jitter
})
bkpJ <- unique(bkpJ) ## remove duplicates
bkpJ <- bkpJ[bkpJ>=1]
bkpJ <- bkpJ[bkpJ<n]
bkp <- bkpJ
}
resDP <- prof(pruneByDP(Ydp, candCP=bkp, verbose=verbose), doit=profile)
dpseg <- resDP$res
prof <- rbind(prof, dpseg=resDP$prof)
if (verbose) {
cat("End pruning by dynamic programming\n")
if (profile) {
print(resDP$prof)
}
}
}
## Find the best segmentation
if (modelSelectionMethod == "none") {
bestSeg <- initSeg$bkp
} else {
if (modelSelectionOnDP) { ## run model selection on results of dynamic programming
mS <- modelSelection(dpseg$rse, n=nDp, method=modelSelectionMethod)
if (verbose) {
utils::str(mS)
}
bestSeg <- integer(0L)
if (mS$kbest!=0) {
bestSeg <- dpseg$bkp[[mS$kbest]]
} else {
## run model selection on initial segmentation
mS <- modelSelection(initSeg$rse, n=nSeg, method=modelSelectionMethod)
bestSeg <- integer(0L)
if (mS$kbest!=0) {
bestSeg <- sort(initSeg$bkp[1:mS$kbest])
}
}
}
}
## map breakpoint positions back to original space (if required)
bestBkp <- outPos[bestSeg]
initBkp <- outPos[initSeg$bkp]
dpBkpList <- lapply(dpseg$bkp,function(bkp) outPos[bkp])
##value<< A list with elements:
list(
bestBkp=bestBkp,
initBkp=initBkp,
dpBkpList=dpBkpList,
prof=prof)
}
############################################################################
## HISTORY:
## 2014-07-16
## o It is now possible to run 'modelSelection' on initial
## segmentation. Default behavior of the function is unchanged.
## 2014-05-14
## o Argument 'flavor' renamed to 'method'.
## o Added argument 'segFUN' and method "other" to enable a
## user-defined segmentation method to be used.
## o Removed default argument values for 'method', in order to make
## the choice of a segmentation method explicit for the user.
## o Moved all copy-number-specific code to downstream methods (e.g. 'doCBS').
## 2013-12-09
## o Replaced 'cghseg' by 'DP'
## o Renamed all 'segmentByNnn' to 'doNnn'
## o Force Ydp to be a matrix
## 2013-12-05
## o Now dropping row names of 'Yseg' and 'Ydp'.
## 2013-11-29
## Added flavor : 'DP'.
## 2013-03-28
## Added flavors : 'CnaStruct' and 'Pelt'
## 2013-03-07
## Added option 'DP' for flavor "RBS" to do selection on initial segmentation
## 2013-02-26
## o Added option 'jitter' to allow more precise breakpoint identification by DP.
## 2013-02-18
## o Add flavor 'PSCBS'
## 2013-01-25
## o Cleanups in doc and return values.
## o Now returning 'bestBkp'.
## o Removed 'position' from required fields in input data.
## 2013-01-09
## o Replaced all jumps by bkp.
## 2013-01-03
## o Added flavors 'PSCN' and 'cghseg'.
## 2012-12-27
## o Default flavor is now recursive binary segmentation.
## o Some code and doc cleanups.
## 2012-12-15
## o Added argument 'profile' for optional reporting of CPU and memory
## usage.
## 2012-12-06
## o Added argument flavor.
## 2012-12-04
## o Created.
############################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.