R/features.R

Defines functions features.default features

Documented in features features.default

#' @title Extract locations of features from a 'not' object
#' @description The function applies user-specified stopping criteria to extract change-points from \code{object}
#' generated by \code{\link{not}}. 
#' @details
#' Denote by \eqn{T_{1}, \ldots, T_{N}}{T_1, ..., T_N} the elements on the solution path \code{object$solution.path}, 
#' each representing a set of change-points.
#' When (\code{method="ic"}), the returned set of change-points is the one that minimises
#' \deqn{-2\mbox{log-likelihood}(\code{object}, \code{cpt}=T_k)+\mbox{penalty}(\code{object\$n}, \code{n.param}, ...),}{-2 log-likelihood(object, cpt=T_k) + penalty(object\$n, n.param, ...),}
#' over all \eqn{k}{k} such that the number of change-points in \eqn{T_k}{T_k} is smaller than or equal \code{q.max}. The log-likelihood is computed using the \code{\link{logLik}} routine,
#' while the penalty function is computed with \code{\link{sic.penalty}} (\code{penalty="sic"}), \code{\link{aic.penalty}} (\code{penalty="aic"}) or a user-defined penalty function (\code{penalty="user"}).
#' @rdname features
#' @export
#' @references R. Baranowski, Y. Chen, and P. Fryzlewicz (2019). Narrowest-Over-Threshold Change-Point Detection.  (\url{http://stats.lse.ac.uk/fryzlewicz/not/not.pdf})

features  <- function(object, ...)
  UseMethod("features")

#' @param object An object of 'not' class returned by \code{\link{not}}.
#' @param method A method of choosing the best solution in \code{object$solution.path}. 
#' If \code{method="ic"}, model minimising a chosen information criterion is selected. 
#' If \code{method="threshold"}, model is selected based on thresholding (see references for more details).
#' @param penalty Name of the penalty function to be used if \code{method="ic"}. If \code{penalty="user"},
#' a user-defined penalty function has to be passed via \code{penalty.fun}.
#' @param q.max Maximum number of change-points allowed to be detected. Used only for \code{method="ic"}.
#' @param penalty.fun Used only if \code{penalty="user"}. A function includes at least  the following arguments: sample size \code{n}, number of parameters used in a model \code{n.param}, and \code{...}.
#' For examples of such functions, see \code{\link{aic.penalty}} and \code{\link{sic.penalty}}.
#' @param th Used only if \code{method="threshold"}. A positive real number.
#' @param ... Further arguments that can be passed to the penalty function.
#' @return
#' \item{th}{Value of the threshold used (if \code{method="threshold"}) or selected on the solution path (if \code{method="ic"}).}
#' \item{cpt}{Estimated locations of the change-points.}
#' \item{ic}{Values of the information criterion minimised in order to find an optimal solution on the path (only if \code{method="ic"} was used).}
#' @method features default
#' @export
#' @rdname features
#' @examples
#' # **** Piecewisce-constant mean with Gaussian noise.
#' x <- c(rep(0, 100), rep(1,100)) + rnorm(100)
#' # *** identify potential locations of the change-points
#' w <- not(x, contrast = "pcwsConstMean")
#' # *** choose change-points using default settings
#' fo <- features(w)
#' # *** get the change-points
#' fo$cpt
#' # *** plot the SIC curve
#' plot(fo$ic)

features.default <- function(object,
                             method = c("ic", "threshold"),
                             penalty = c("sic", "aic", "user"),
                             q.max = 25,
                             penalty.fun,
                             th,
                             ...) {
  res <- list()
  
  ## verify input parameters
  res$method <- match.arg(method,  c("ic", "threshold"))
  
  ## determining the optimal features with information criteria
  if (res$method == "ic") {
    q.max <- as.integer(q.max)
    if (length(q.max) != 1)
      stop("q.max must be a single number")
    
    #verify penalty
    res$penalty <- match.arg(penalty, c("sic", "aic", "user"))
    
    if (res$penalty == "sic")
      penalty.fun <- sic.penalty
    else if (res$penalty == "aic")
      penalty.fun <- aic.penalty
    
    ##verify the penalty function
    penalty.fun <- as.function(penalty.fun)
    
    id <- which(object$solution.path$n.cpt <= q.max)
    
    res$ic <- rep(0, length(id) + 1)
    
    lh <- logLik(object, cpt = c())
    n.param <- attr(lh, "df")
    
    res$ic[1] <-
      -2 * lh + penalty.fun(n = length(object$x),
                            n.param = n.param,
                            cpt = c(),
                            ...)
    
    #calculate ic = log.lik + pen
    for (j in 2:length(res$ic)) {
      cpt.cand <- sort(object$solution.path$cpt[[id[j - 1]]])
      
      lh <- logLik(object, cpt = cpt.cand)
      n.param <- attr(lh, "df")
      res$ic[j] <-
        -2 * lh + penalty.fun(n = length(object$x),
                              n.param = n.param,
                              cpt = cpt.cand,
                              ...)
      
    }
    
    K.min.ic <- which.min(res$ic) - 1
    
    res$cpt <- list()
    
    if (K.min.ic == 0) {
      res$cpt <- NA
      res$th <-
        object$solution.path$th[1] + 10 * .Machine$double.eps
    }
    else {
      res$cpt <- sort(object$solution.path$cpt[[id[K.min.ic]]])
      res$th <- object$solution.path$th[id[K.min.ic]]
    }
    
    
  } else{
    ## determining the optimal number features with threshold
    if(missing(th)) stop("when method==\"threshold\", 'th' has to be specified")
    th <- as.numeric(th)[1]
    if (any(is.na(th)))
      stop("th must not contain na's")
    if (length(th) == 0)
      stop("th must contain at least one element")
    
    res$th <- th
    
    k <- which(object$solution.path$th  >= res$th)
    
    if (length(k) == 0)
      res$cpt <- NA
    else
      res$cpt <- object$solution.path$cpt[[max(k)]]
    
  }
  
  class(res) <- "features"
  
  return(res)
  
}
rbaranowski/not documentation built on March 22, 2022, 1:18 p.m.