#' Group fused Lars segmentation (low-level)
#'
#' Low-level function for multivariate fused Lars segmentation (GFLars)
#'
#' This function recrusively looks for the best candidate change point
#' according to group-fused LARS. This is a low-level function. It is generally
#' advised to use the wrapper \code{\link{doGFLars}} which also works on data
#' frames, has a convenient argument \code{stat}, and includes a basic
#' workaround for handling missing values.
#'
#' See also \code{\link{jointSeg}} for combining group fused LARS segmentation
#' with pruning by dynamic programming (\code{\link{pruneByDP}}).
#'
#' See \code{\link{PSSeg}} for segmenting genomic signals from SNP arrays.
#'
#' The default weights \eqn{\sqrt{n/(i*(n-i))}} are calibrated as suggested by
#' Bleakley and Vert (2011). Using this calibration, the first breakpoint
#' maximizes the likelihood ratio test (LRT) statistic.
#'
#' @param Y A \code{n*p} matrix of signals to be segmented
#' @param K The number of change points to find
#' @param weights A \code{(n-1)*1} vector of weights for the weigthed group
#' fused Lasso penalty. See Details.
#' @param epsilon Values smaller than epsilon are considered null. Defaults to
#' \code{1e-9}.
#' @param verbose A \code{logical} value: should extra information be output ?
#' Defaults to \code{FALSE}.
#' @return A list with elements: \describe{\item{bkp}{A vector of \code{k} candidate
#' change-point positions} \item{lambda}{The estimated lambda values for each
#' change-point} \item{mean}{A vector of length \code{p}, the mean signal per
#' column} \item{value}{A \code{i x p} matrix of change-point values for the
#' first i change-points} \item{c}{\eqn{\hat{c}}, a \code{n-1 x K} matrix }}
#' @note This implementation is derived from the MATLAB code by Vert and
#' Bleakley: \url{http://cbio.ensmp.fr/GFLseg}.
#' @author Morgane Pierre-Jean and Pierre Neuvial
#' @seealso \code{\link{PSSeg}}, \code{\link{jointSeg}},
#' \code{\link{doGFLars}}, \code{\link{pruneByDP}}
#' @references Bleakley, K., & Vert, J. P. (2011). The group fused lasso for
#' multiple change-point detection. arXiv preprint arXiv:1106.4199.
#'
#' Vert, J. P., & Bleakley, K. (2010). Fast detection of multiple change-points
#' shared by many signals using group LARS. Advances in Neural Information
#' Processing Systems, 23, 2343-2351.
#' @examples
#'
#' p <- 2
#' trueK <- 10
#' sim <- randomProfile(1e4, trueK, 1, p)
#' Y <- sim$profile
#' K <- 2*trueK
#' res <- segmentByGFLars(Y, K)
#' print(res$bkp)
#' print(sim$bkp)
#' plotSeg(Y, res$bkp)
#'
#' @export segmentByGFLars
segmentByGFLars <- function(Y, K, weights=defaultWeights(nrow(Y)), epsilon=1e-9, verbose=FALSE){
if (!is.matrix(Y) || !is.numeric(Y)){
stop("Argument 'Y' should be a numeric matrix.\nPlease see 'doGFLars' for using GFLars directly on a data.frame or a numeric vector")
}
if (any(is.na(Y))) {
stop("Missing values are not handled by the current implementation of group fused LARS")
}
rownames(Y) <- NULL
n <- as.numeric(nrow(Y))
p <- dim(Y)[2]
## Initialization
if (missing(K)) {
stop("Please provide argument 'K'")
}
if (K>=n) {
stop("Too many breakpoints are required")
}
if (is.null(weights)) {
weights <- defaultWeights(n)
}
res.meansignal <- colMeans(Y);
res.lambda <- numeric(K);
res.bkp <- numeric(K)
res.value <- list()
res.c <- matrix(NA, n-1, K);
Y <- sweep(Y, 2, colMeans(Y)) ## [PN:2013-01-02] Not needed ?? (implicitly done within 'leftMultiplyByXt')
AS <- numeric(0)
c <- leftMultiplyByXt(Y=Y, w=weights, verbose=verbose)
for (ii in 1:K){
cNorm <- rowSums(c^2)
res.c[, ii] <- cNorm
bigcHat <- max(cNorm)
if (verbose) {
print(paste('optimize LARS : ', ii))
}
## First breakpoint
if (ii==1) {
AS <- which.max(cNorm)
res.bkp[ii] <- AS
}
I <- order(AS)
AS <- AS[I]
w <- leftMultiplyByInvXAtXA(n, AS, matrix(c[AS,], ncol=p), weights, verbose=verbose)
a <- multiplyXtXBySparse(n=n, ind=AS, val=w, w=weights, verbose=verbose)
a1 <- bigcHat - rowSums(a^2)
u <- a*c
a2 <- bigcHat - rowSums(u)
a3 <- bigcHat - cNorm
## We solve it
gammaTemp = matrix(NA, n-1, 2); # to get the indices right
## First those where we really have a second-order polynomial
subset <- which(a1 > epsilon)
delta <- a2[subset]^2 - a1[subset]*a3[subset]
delta.neg <- subset[which(delta<0)]
delta.pos <- subset[which(delta>=0)]
gammaTemp[delta.neg, 1] <- NA;
gammaTemp[delta.pos, 1] <- (a2[delta.pos]+sqrt(delta[which(delta>=0)]))/a1[delta.pos];
gammaTemp[delta.neg, 2] <- NA;
gammaTemp[delta.pos, 2] <- (a2[delta.pos]-sqrt(delta[which(delta>=0)]))/a1[delta.pos];
## Then those where the quadratic term vanishes and we have a
## first-order polynomial
subset <- which((a1 <= epsilon) & (a2 > epsilon))
gammaTemp[subset, ] = a3[subset] / (2*a2[subset])
## Finally the active set should not be taken into account, as well as
## those for which the computation gives dummy solutions
maxg <- max(gammaTemp, na.rm=TRUE)+1;
subset <- which((a1 <= epsilon) & (a2 <= epsilon));
gammaTemp[subset, ] <- maxg;
gammaTemp[AS, ] <- maxg;
gamma <- min(gammaTemp, na.rm=TRUE)
idx <- which.min(gammaTemp)
nexttoadd <- 1 + (idx-1) %% (n-1);
## Update beta
res.lambda[ii] <- sqrt(bigcHat);
res.value[[ii]] <- matrix(numeric(ii*p), ncol = p)
res.value[[ii]][I,] <- gamma*w;
if (ii>1){
res.value[[ii]][1:(ii-1),] <- res.value[[ii]][1:(ii-1),] + res.value[[ii-1]];
}
## Update active indexes
if (ii<K){
AS <- c(AS,nexttoadd)
res.bkp[ii+1] <- nexttoadd;
c <- c-gamma*a;
}
## print(AS)
}
list(
bkp=res.bkp,
lambda=res.lambda,
mean=res.meansignal,
value=res.value,
c=res.c)
}
############################################################################
## HISTORY:
## 2014-05-15
## o Renamed back to 'segmentByGFLars', so that 'doGFLars' is a *wrapper*
## around the core segmentation function.
## 2013-12-09
## o Renamed to 'doGFLars'
## 2013-01-09
## o Replace all jumps by bkp
## 2012-12-27
## o Renamed to segmentByGFLars.
## o Some code and doc cleanups.
## 2012-12-13
## o Updated example.
## 2012-12-06
## o Replaced the function defaultWeigths by a (n-1)*1 vector of
## weigths and updated example
## 2012-09-13
## o Some code cleanups.
## o Tentative bug fix: indices in gammaTemp.
## o SPEEDUP: removed unnecessary calls to 'complex'.
## 2012-08-13
## o Created.
############################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.