R/changepoints.R

#' @title Change-points detected by WBS or BS
#' @description The function applies user-specified stopping criteria to extract change-points from \code{object}
#' generated by \code{\link{wbs}} or \code{\link{sbs}}. For \code{object} of class 'sbs', the function returns 
#' change-points whose corresponding test statistic exceeds threshold given in \code{th}. For \code{object} of class 'wbs',
#' the change-points can be also detected using information criteria with penalties specified in \code{penalty}.  
#' @details
#' For the change-point detection based on thresholding (\code{object} of class 'sbs' or 'wbs'), the user can either specify the thresholds in \code{th} directly, 
#' determine the maximum number \code{Kmax} of change-points to be detected, or let \code{th} depend on \code{th.const}.
#' 
#' When \code{Kmax} is given, the function automatically sets \code{th} to the lowest threshold such that the number of detected change-points is lower or equal than \code{Kmax}. 
#' Note that for the BS algorithm it might be not possible to find the threshold such that exactly \code{Kmax} change-points are found.
#' 
#' When  \code{th} and \code{Kmax} are omitted, the threshold value is set to
#' \deqn{th = sigma \times th.const \sqrt{2\log(n)},}{th=sigma * th.const* sqrt(2 log(n)),}
#' where sigma is the Median Absolute Deviation estimate of the noise level and \eqn{n}{n} is the number of elements in \code{x}. 
#' 
#' For the change-point detection based on information criteria (\code{object} of class 'wbs' only), 
#' the user can specify both the maximum number of change-points (\code{Kmax}) and a type of the penalty used. 
#' Parameter \code{penalty} should contain a list of characters with names of the functions of at least two arguments (\code{n} and \code{cpt}).
#' For each penalty given, the following information criterion is minimized over candidate sets of change-points \code{cpt}:
#' \deqn{\frac{n}{2}\log\hat{\sigma}_{k}^{2}+penalty(n,cpt),}{n/2 log(sigma_k)+ penalty(n,cpt),}
#' where \eqn{k}{k} denotes the number of elements in \eqn{cpt}{cpt}, \eqn{\hat{\sigma}_{k}}{sigma_k} is the corresponding maximum 
#' likelihood estimator of the residual variance.        
#' @param object an object of 'wbs' or 'sbs' class returned by, respectively, \code{\link{wbs}} and \code{\link{sbs}} functions
#' @param th a vector of positive scalars
#' @param th.const a vector of positive scalars
#' @param penalty a character vector with names of penalty functions used
#' @param Kmax a maximum number of change-points to be detected 
#' @param ... further arguments that may be passed to the penalty functions
#' @return  
#' \item{sigma}{Median Absolute Deviation estimate of the noise level}
#' \item{th}{a vector of thresholds}
#' \item{no.cpt.th}{the number of change-points detected for each value of \code{th}}
#' \item{cpt.th}{a list with the change-points detected for each value of \code{th}}
#' \item{Kmax}{a maximum number of change-points detected}
#' \item{ic.curve}{a list with values of the chosen information criteria}
#' \item{no.cpt.ic}{the number of change-points detected for each information criterion considered}
#' \item{cpt.ic}{a list with the change-points detected for each information criterion considered}
#' @rdname changepoints
#' @export
#' @examples 
#' #we generates  gaussian noise + Poisson process signal with 10 jumps on average
#' set.seed(10)
#' N <- rpois(1,10)
#' true.cpt <- sample(1000,N)
#' m1 <- matrix(rep(1:1000,N),1000,N,byrow=FALSE)
#' m2 <- matrix(rep(true.cpt,1000),1000,N,byrow=TRUE)
#' x <- rnorm(1000) + apply(m1>=m2,1,sum)
#' 
#' # we apply  the BS and WBS algorithms with default values for their parameters
#' 
#' s <- sbs(x)
#' w <- wbs(x)
#' 
#' s.cpt <- changepoints(s)
#' s.cpt
#' 
#' w.cpt <- changepoints(w)
#' w.cpt
#' 
#' #we can use different stopping criteria, invoking sbs/wbs functions is not necessary  
#' 
#' s.cpt <- changepoints(s,th.const=c(1,1.3))
#' s.cpt
#' w.cpt <- changepoints(w,th.const=c(1,1.3))
#' w.cpt


changepoints  <- function(object,...) UseMethod("changepoints")


#' @method changepoints sbs
#' @export 
#' @rdname changepoints
#' @importFrom stats mad var quantile

changepoints.sbs <- function(object,th=NULL,th.const=1.3,Kmax=NULL,...){
	
	th <- as.numeric(th)
	th.const <- as.numeric(th.const)
	Kmax <- as.integer(Kmax)
	
	w.cpt <- list()
	
	w.cpt$sigma <- mad(diff(object$x)/sqrt(2))
	
	if(length(th)) w.cpt$th <- as.numeric(th)
	else if(length(Kmax)){
		if(Kmax[1] < 1) stop("Kmax should be at least 1")
		Kmax <- as.integer(min(nrow(object$res),Kmax))
		if(Kmax < nrow(object$res)){
			tmp <- sort(object$res[,5],decreasing=TRUE)
			w.cpt$th <- tmp[Kmax]-min(diff(sort(unique(tmp),decreasing=FALSE)))/2
		}else w.cpt$th <- 0;
		
	}else{
		w.cpt$th <-  th.const * w.cpt$sigma *sqrt(2*log(object$n))
	}
	

	
	th.l <- length(w.cpt$th)
	
	if(th.l) w.cpt$th <- sort(w.cpt$th)
	else stop("th vector should contain at least one value")
	
	if(NA%in%w.cpt$th) stop("th cannot contain NA's")
	if(NA%in%w.cpt$Kmax) stop("Kmax cannot be NA")
	
	if(length(Kmax)>1){
		Kmax <- Kmax[1]
		warning("Kmax could contain only one number")
	}
	

	
	res.tmp <- matrix(object$res[order(abs(object$res[,4]),decreasing=TRUE),c(3,5)],ncol=2)
	
	w.cpt$no.cpt.th <- as.integer(rep(0,th.l))
	
	w.cpt$cpt.th <- list()
	
	for(i in 1:th.l)
		if(nrow(res.tmp)){
			
			res.tmp <- matrix(res.tmp[res.tmp[,2]>w.cpt$th[i],],ncol=2)
			if(nrow(res.tmp)){
				w.cpt$cpt.th[[i]] <- res.tmp[,1]
				w.cpt$no.cpt.th[i] <- length(w.cpt$cpt.th[[i]])
			}else w.cpt$cpt.th[[i]] <- NA
		}else w.cpt$cpt.th[[i]] <- NA
		
		
	w.cpt$Kmax <- as.integer(max(w.cpt$no.cpt.th))	
	
	
		
	return(w.cpt)
	
}

#' @method changepoints wbs
#' @export 
#' @rdname changepoints


changepoints.wbs <- function(object,th=NULL,th.const=1.3, Kmax=50,penalty=c("ssic.penalty","bic.penalty","mbic.penalty"),...){
	

	w.cpt <- changepoints.sbs(object,th,th.const=th.const,Kmax=NULL)
	
	penalty <- as.character(penalty)

	
	#ic part
	if(length(penalty))
	{
		if(Kmax==0 || object$n<=3) {
			w.cpt$cpt.ic$ssic <- w.cpt$cpt.ic$bic <- 	w.cpt$cpt.ic$mbic <- NA
			w.cpt$no.cpt.ic[c("ssic","mbic","bic")] <- as.integer(c(0,0,0))
			w.cpt$ic.curve <- list()
			w.cpt$ic.curve$bic<-w.cpt$ic.curve$ssic<-w.cpt$ic.curve$mbic <- NA
			
			if(Kmax==0){
				stop("no change-poinst found, choose larger Kmax")	
			}else{
				stop("sample size is too small")
			}
		
		}else{
		
			w.cpt$Kmax <- as.integer(Kmax)
			cpt.cand <- changepoints.sbs(object,th,th.const=1.3,Kmax=w.cpt$Kmax)$cpt.th[[1]]		
			if(NA%in%cpt.cand) stop("no change-poinst found, choose larger Kmax")
			
			if(length(cpt.cand)>min(Kmax,object$n-2)) cpt.cand <- cpt.cand[1:min(Kmax,object$n-2)]
			len.cpt<- length(cpt.cand)
			
			w.cpt$ic.curve <- list()
			
			
			

			
			for(j in 1:length(penalty)){
				w.cpt$ic.curve[[penalty[j]]] <- rep(0,len.cpt+1)
			}
			

			
				
			if(len.cpt) for(i in len.cpt:1){

				min.log.lik <- object$n/2 * log(sum((object$x -  means.between.cpt(object$x,cpt.cand[1:i]))^2)/object$n)
				
				for(j in 1:length(penalty)){
					
					w.cpt$ic.curve[[penalty[j]]][i+1] <- min.log.lik + do.call(penalty[j],list(n=object$n,cpt=cpt.cand[1:i],...))
				}
			}
			
			for(j in 1:length(penalty)){
				w.cpt$ic.curve[[penalty[j]]][1] <- object$n/2 * log(var(object$x))
			}
			
			
			w.cpt$cpt.ic <- list()
			
			for(j in 1:length(penalty)){
				tmp <- quantile(which.min(w.cpt$ic.curve[[penalty[j]]]), .5, type=3)
				
				if(tmp ==1){
					w.cpt$cpt.ic[[penalty[j]]] <- NA
					w.cpt$no.cpt.ic[penalty[j]] <- as.integer(0)
				}else{
					w.cpt$cpt.ic[[penalty[j]]] <- cpt.cand[1:(tmp-1)]
					w.cpt$no.cpt.ic[penalty[j]] <- as.integer(tmp-1);
				}
			}	
		
		}			
	}
	return(w.cpt)
	
}

Try the wbs package in your browser

Any scripts or data that you put into this service are public.

wbs documentation built on May 15, 2019, 1:04 a.m.