R/doGFLars.R

#' Group fused Lars segmentation
#' 
#' High-level function for multivariate fused Lars (GFLars) segmentation
#' 
#' This function is a wrapper around the lower-level segmentation function
#' \code{\link{segmentByGFLars}}. It can be run on p-dimensional,
#' piecewise-constant data in order to defined a set of candidate change
#' points. It is recommended to prune this list of candidates using dynamic
#' programming (\code{\link{pruneByDP}}), combined with a selection of the best
#' number of change points. The \code{\link{jointSeg}} function provides a
#' convenient wrapper for performing segmentation, pruning and model selection.
#' 
#' For the specific case of DNA copy number data segmentation, see the
#' dedicated wrapper \code{\link{PSSeg}}.
#' 
#' The default weights \eqn{\sqrt{n/(i*(n-i))}} are calibrated as suggested by
#' Bleakley and Vert (2011).  Using this calibration, the first breakpoint
#' maximizes the likelihood ratio test (LRT) statistic.
#' 
#' @param Y A \code{n*p} signal to be segmented
#' @param K The number of change points to find
#' @param stat A vector containing the names or indices of the columns of
#' \code{Y} to be segmented
#' @param \dots Further arguments to be passed to 'segmentByGFLars'
#' @param verbose A \code{logical} value: should extra information be output ?
#' Defaults to \code{FALSE}.
#' @return An object of the same structure as the output of
#' \code{\link{segmentByGFLars}}
#' @note This implementation is derived from the MATLAB code by Vert and
#' Bleakley: \url{http://cbio.ensmp.fr/GFLseg}.
#' @author Morgane Pierre-Jean and Pierre Neuvial
#' @references Bleakley, K., & Vert, J. P. (2011). The group fused lasso for
#' multiple change-point detection. arXiv preprint arXiv:1106.4199.
#' 
#' Vert, J. P., & Bleakley, K. (2010). Fast detection of multiple change-points
#' shared by many signals using group LARS. Advances in Neural Information
#' Processing Systems, 23, 2343-2351.
#' @examples
#' 
#' p <- 2
#' trueK <- 10
#' sim <- randomProfile(1e4, trueK, 1, p)
#' Y <- sim$profile
#' K <- 2*trueK
#' res <- doGFLars(Y, K)
#' print(res$bkp)
#' print(sim$bkp)
#' plotSeg(Y, res$bkp)
#' 
#' ## a toy example with missing values
#' sim <- randomProfile(1e2, 1, 0.1, 2)
#' Y <- sim$profile
#' Y[3:50, 2] <- NA
#' 
#' res <- doGFLars(Y, 10, 2, verbose=TRUE)
#' print(res$bkp)
#' print(sim$bkp)
#' plotSeg(Y, res$bkp)
#' 
#' @export doGFLars
doGFLars <- function(Y, K, stat=NULL, ..., verbose=FALSE) {
    ## Argument 'Y'
    if (is.null(dim(Y)) || is.data.frame(Y)) {
        if (verbose) {
            print("Coercing 'Y' to a matrix")
        }
        Y <- as.matrix(Y)
    } else if (!is.matrix(Y)){
        stop("Argument 'Y' should be a matrix, vector or data.frame")
    }

    ## Argument 'stat'
    if (!is.null(stat)) {
        if (is.numeric(stat)) {
            mm <- match(stat, 1:ncol(Y)) 
        } else if (is.character(stat)) {
            mm <- match(stat, colnames(Y))
        }
        if (sum(is.na(mm))) {
            guilty <- paste("'", stat[which(is.na(mm))], "'", sep="", collapse=",")
            stop("Undefined column(s) selected in 'Y':", guilty, ". Please check argument 'stat'")
        } else {
            Y <- Y[, mm, drop=FALSE]
        }
    }
    

    ## Handle missing values by smoothing
    outPos <- 1:nrow(Y)
    if (any(is.na(Y))) {
        warning("Missing values detected. Smoothing will be performed.")
        Yb <- binMissingValues(Y)
        pos <- attr(Yb, "idxs")
        outPos <- mapPositionsBack(pos)
        Y <- Yb
    }

    n <- as.numeric(nrow(Y))
    p <- dim(Y)[2]

    ## Pass on to 'segmentByGFLars'
    res <- segmentByGFLars(Y, K, ..., verbose=verbose)

    ## map breakpoint positions back to original space (if required)
    res$bkp <- outPos[res$bkp]
    
    res
}

############################################################################
## HISTORY:
## 2014-05-15
## o Added argument 'stat'.
## o Now a wrapper calling the low-level 'segmentByGFLars'.
## 2013-12-09
## o Renamed to 'doGFLars'
## 2013-01-09
## o Replace all jumps by bkp
## 2012-12-27
## o Renamed to segmentByGFLars.
## o Some code and doc cleanups.
## 2012-12-13
## o Updated example.
## 2012-12-06
## o Replaced the function defaultWeigths by a (n-1)*1 vector of
## weigths and updated example
## 2012-09-13
## o Some code cleanups.
## o Tentative bug fix: indices in gammaTemp.
## o SPEEDUP: removed unnecessary calls to 'complex'.
## 2012-08-13
## o Created.
############################################################################

Try the jointseg package in your browser

Any scripts or data that you put into this service are public.

jointseg documentation built on May 2, 2019, 6:10 a.m.