Nothing
#' MFT.mean
#'
#' The multiple filter test for mean change detection in time series or sequences of random variables.
#'
#' @param X numeric vector, input sequence of random variables
#' @param S numeric, start of time interval, default: NULL, if NULL then 1 is chosen
#' @param E numeric, end of time interval, default: NULL, if NULL then length(X) is chosen, needs E > S.
#' @param autoset.H logical, automatic choice of window size H
#' @param H vector, window set H, all elements must be increasing, the largest element must be =< (T/2). H is automatically set if autoset.H = TRUE
#' @param alpha numeric, in (0,1), significance level
#' @param method either "asymptotic" or "fixed", defines how threshold Q is derived, default: "asymptotic", If "asymptotic": Q is derived by simulation of limit process L (Brownian motion); possible set number of simulations (sim), If "fixed": Q may be set manually (Q)
#' @param sim integer, > 0, No of simulations of limit process (for approximation of Q), default = 10000
#' @param rescale logical, if TRUE statistic G is rescaled to statistic R, default = FALSE
#' @param Q numeric, rejection threshold, default: Q is simulated according to sim and alpha.
#' @param perform.CPD logical, if TRUE change point detection algorithm is performed
#' @param print.output logical, if TRUE results are printed to the console
#' @return invisible
#' \item{M}{test statistic}
#' \item{Q}{rejection threshold}
#' \item{method}{how threshold Q was derived, see 'Arguments' for detailed description}
#' \item{sim}{number of simulations of the limit process (approximation of Q)}
#' \item{rescale}{states whether statistic G is rescaled to R}
#' \item{CP}{set of change points estmated by the multiple filter algorithm, increasingly ordered in time}
#' \item{means}{estimated mean values between adjacent change points}
#' \item{S}{start of time interval}
#' \item{E}{end of time interval}
#' \item{Tt}{length of time interval}
#' \item{H}{window set}
#' \item{alpha}{significance level}
#' \item{perform.CPD}{logical, if TRUE change point detection algorithm was performed}
#' \item{tech.var}{list of technical variables with processes X and G_ht or R_ht}
#' \item{type}{type of MFT which was performed: "mean"}
#'
#' @seealso \code{\link{plot.MFT}, \link{summary.MFT}, \link{MFT.rate}, \link{MFT.variance}, \link{MFT.peaks}}
#' @author Michael Messer, Stefan Albert, Solveig Plomer and Gaby Schneider
#'
#' @references
#' Michael Messer, Stefan Albert and Gaby Schneider (2018). The multiple filter test for change point detection in time
#' series. Metrika <doi:10.1007/s00184-018-0672-1>
#'
#' @examples
#' # Normal distributed sequence with 3 change points of the mean (at n=100, 155, 350)
#' set.seed(50)
#' X1 <- rnorm(400,0,1); X2 <- rnorm(400,3,1); X3 <- rnorm(400,5,1); X4 <- rnorm(600,4.6,1)
#' X <- c(X1[1:100],X2[101:155],X3[156:350],X4[351:600])
#' mft <- MFT.mean(X)
#' plot(mft)
#' # Set additional parameters (window set)
#' mft2 <- MFT.mean(X,autoset.H=FALSE,H=c(80,160,240))
#' plot(mft2)
#'
#'
#' @rdname MFT.mean
#' @import stats
#' @import grDevices
#' @import graphics
#' @export
###### end
MFT.mean <-
function(X,autoset.H=TRUE,S=NULL,E=NULL,H=NULL,alpha=0.05,method="asymptotic",sim=10000,rescale=FALSE,Q=NA,perform.CPD=TRUE,print.output=TRUE){
###
### Set Parameters
###
if(is.null(S) & is.null(E)) {S <- 1; E <- length(X)}
if(is.null(S) & !is.null(E)) {S <- 1}
if(!is.null(S) & is.null(E)) {E <- length(X)}
if(E-S <= 0){stop("Invalid choice of S and E: Need S < E.")}
if(! method %in% c("aymptotic","fixed")){"Invalid choice of method: method must be 'asymptotic' or 'fixed'"}
if(method == "fixed" & !is.numeric(Q)){stop("Invalid choice of Q: Q must be positive real number")}
if(method == "fixed" & !(is.numeric(Q) & Q>0)){cat("Warning: non-positive threshold might be inappropriate. Possibly choose Q > 0",sep="\n") }
if(method != "fixed" & !is.na(Q)){cat("Warning: Q is derived by simulation. In order to set Q manually, choose: method = 'fixed'",sep="\n")}
# Begin-if-autoset.H
if(autoset.H){
Tt <- length(X[S:E]) # Length of interval.
if (Tt<=100) {stop("Automatic choice of windows failed. Time series is too short (need at least 101 random variables). Set H manually.")}
H <- seq(50,min((Tt-1)/2,200),25)
}
# End-if-set.H
# Begin-if-!autoset.H
if(!autoset.H){
if(is.null(H)){stop("If autoset.H is FALSE, the vector of window sizes H must be set")}
Tt <- length(X[S:E])
if(!all(c(diff(c(1,H,(Tt/2))))>0) | !all(H%%1 == 0)){stop("Invalid choice of window set: H needs to be an increasing ordered vector of integers, with min(H) > 1 and max(H) <= Tt/2")}
}
# End-if-!autoset.H
X <- X[S:E] # Shorten sequence to region of analysis.
if( !sim%%1==0 | sim <= 0) {stop("Invalid choice of number of simulations: sim must be a positive integer")}
if(( method=="asymptotic" ) & ( sim < 10000 )){cat("Warning: Number of simulations might be too low",sep="\n")}
if( alpha*(1-alpha) <= 0){stop("Invalid choice of significance level: alpha must be in (0,1)")}
###
### sim.parameter
###
sim.parameter <- function(sim=sim,Tt=Tt,H=H,alpha=alpha,rescale=rescale){
# Begin-maxSternh: Given Brownian motion W, max(L_ht) is calculated for fixed h in H.
maxSternh <- function(h=h,W=W,Tt=Tt){
Wt_plus <- W[(2*h+1):(Tt)] # W_(t+h)
Wt <- W[(1+h):(Tt-h)] # W_(t)
Wt_minus<- W[1:(Tt-2*h)] # W_(t-h)
return(max((1/sqrt(2*h))*abs(Wt_plus - 2*Wt + Wt_minus))) # max_(t)|L_(h,t)|
}
# End-maxSternh
# Begin-sim.maxH: Simulates Brownian motion and calculates max(L_ht) for all h in H.
sim.maxH <- function(Tt=Tt,H=H){
W <- c(0,cumsum(rnorm((Tt-1),mean = 0, sd=1))) # Standard Brownian Motion of length Tt.
return(vapply(as.matrix(H),FUN=maxSternh,W=W,Tt=Tt,numeric(length(1)))) # Apply the calculation of the maximum of Lht.
}
# End-sim.maxH
# Begin-sim.maxmax: Simulates Brownian motions and calculates mean(max|L_ht|),
# sd(max|L_ht|) and Q.
sim.maxmax <- function(sim=sim,Tt=Tt,H=H,alpha=alpha,rescale=rescale){
re <- matrix(replicate(sim,sim.maxH(Tt=Tt,H=H)),nrow=sim,byrow=TRUE) # Simulates the Browian motion sim times and calculates maxLht for every h in H.
meanH <- apply(re,MARGIN=2,mean) # Calculates the empirical mean of the set of max Lhts for all h in H.
sdH <- apply(re,MARGIN=2,sd) # Calculates the empirical standard deviation of the Lhts for all h in H.
if (rescale) {maxmax <- apply((t(re)-meanH)/sdH, MARGIN=2,max)} # Calculates M* sim times.
if (!rescale){maxmax <- apply((t(re)), MARGIN=2,max)}
Q <- quantile(maxmax,1-alpha) # Calculates the empirical alpha-Quantile M*.
list(Q=Q,meanH=meanH,sdH=sdH) # Writes the threshold Q, and the vectors of means and sd in a list.
}
# End-sim.maxmax
sim.maxmax(sim=sim,Tt=Tt,H=H,alpha=alpha,rescale=rescale) # Calls function sim.maxmax.
}
# End-sim.parameter
parameter <- list()
if(! method %in% c("aymptotic","fixed")){"Invalid choice of method: method must be 'asymptotic' or 'fixed'"}
if(rescale | method == "asymptotic") parameter <- sim.parameter(sim=sim,Tt=Tt,H=H,alpha=alpha,rescale=rescale) # Simulates parameter values.
if(method == "fixed" & !is.numeric(Q)){stop("Invalid choice of Q: Q must be positive real number")}
if(method == "fixed") {parameter$Q <- Q}
if(method == "fixed" & !rescale){sim <- NA}
if( alpha*(1-alpha) <= 0){stop("Invalid choice of significance level: alpha must be in (0,1)")}
###
### Calculate R_ht
###
# Calculates G_ht for fixed t and h.
ght <- function(t,Xp,h){
X <- Xp
windowRight <- X[(t+1):(t+h)] # Values in right window.
windowLeft <- X[(t-h+1):t]
meanRight <- mean(windowRight) # Mean in right window.
meanLeft <- mean(windowLeft)
varianceRight <- var(windowRight) # Variance in right window.
varianceLeft <- var(windowLeft)
if (is.na(varianceLeft) | is.na(varianceRight) | varianceLeft <= 0 | varianceRight <= 0){g <- 0}
else { g <- (meanRight - meanLeft) / sqrt((varianceLeft + varianceRight)/h)} # Calculates G_ht
return(g)
}
# End-Ght
# Calculate R_ht for all h and t.
Rh <- function(h,Xp,H,rescale,meanH=NULL,sdH=NULL){
seq2 <- seq(1+h,length(Xp)-h,by=1) # All possible t values for window size h.
G <- vapply(seq2,FUN = ght,Xp=Xp,h=h,numeric(1)) # Calculates all G_ht for window size h.
if(rescale){return( (abs(G) - meanH[which(H==h)]) / sdH[which(H==h)] )}
if(!rescale){return(abs(G))}
}
# End-Rht
if (rescale) {R_ht <- lapply(as.matrix(H),FUN=Rh,Xp=X,H=H,rescale=rescale,meanH=parameter$meanH,sdH=parameter$sdH)} # R_ht values for every window size.
if (!rescale){R_ht <- lapply(as.matrix(H),FUN=Rh,Xp=X,H=H,rescale=rescale)} # R_ht values for every window size.
Mh <- vapply(R_ht,max,numeric(1)) # Maximum for every window size.
M <- max(Mh); names(M) <- "Test statistic M" # Global Maximum M, Test statistic.
###
### Perform Change Point Detection (CPD)
###
if(perform.CPD){
# Perform Single window detection (SWD) for a fixed window size h.
SWD <- function(R,Tt,Q){
h <- (Tt - (length(R))) / 2 # Calculates window size h from R_ht.
CP <- rep(NA,Tt); j <- 1 # Vector to safe change poinst. Set index to 1.
while ( any(R > Q) ) { # As long as the process R_ht > Q, search for change points.
c <- which( R == max(R) )[1] # Search for the index of the maximum value.
CP[j] <- (c-1) + h # Calculate inx in actual timeline.
left <- max (1, c-h+1) # Boundary left h-neighbourhood.
right <- min ((Tt-2*h)+1, c+h-1) # Boundary right h-neighbourhood.
R[left:right] <- rep(Q-1,length(left:right)) # Set value in h-neighbourhood to insignificant value.
j <- j + 1 # Increase counter.
}
# End-while
if( all(is.na(CP)) ){return(numeric(0))} # If all CP do not have a value, return 0.
else{return(as.vector(na.omit(CP)))} # Else return calculated CPs.
}
# End-SWD
SWD <- lapply(R_ht,FUN=SWD,Tt=Tt,Q=parameter$Q) # SWD for all h in H.
# Combine SWD results.
delete <- function(element,h2,c1){ # If c1 falls within the h2-neighbourhood of element, then element is deleted.
hit <- any( ifelse( element - h2 < c1 & c1 < element + h2, TRUE, FALSE) )
return(hit)
}
# End-delete
outside <- function(c1,c2,h2){ # Outside is logical vector of length c2. TRUE -> Component deleted. FALSE -> Component remains.
tot_hit <- apply(as.matrix(c2),MARGIN=1,FUN=delete,h2=h2,c1=c1) # For a fixed CP c1 and a fixed window size h2, test all c2s for overlaps.
return(tot_hit)
}
# End-function
# With "outside" run through all "h-areas" and delete "overlap-CPs".
c <- SWD[[1]] # First, accept CPs detected with smallest h.
h_val <- rep(H[1],length(SWD[[1]])) # Corresponding h value.
if(length(H)>1){ # If there are more than 1 window sizes in H, work through all.
for (k in 2:(length(H))){ # For windowsize with index 2 and up ...
if(!is.na(SWD[[k]][1]) ){ # Test if there are CP detected with window size hk.
put_out <- outside(c,SWD[[k]],H[k]) # Tests if CP in c are in a hk-neighbourhood of the CP in SWD[[k]]. Delete if TRUE.
c <- c(c,SWD[[k]][put_out == FALSE]) # All CP which are not deleted, are added to the Set c.
h_val <- c(h_val,rep(H[k],length(c)-length(h_val))) # New h-value to test by.
} # End-if
} # End-for
} # End-if-length(H)
CP <- cbind(c,h_val) # Safes CPs together with window size of detection.
if(dim(CP)[1]>1){CP <- CP[order(CP[,1]),]} # Sorts CPs by their detection h value.
colnames(CP) <- c("changepoint","h_value") # Assigns cloumn names.
if(dim(CP)[1] > 0) {rownames(CP) <- as.character(1:dim(CP)[1])} # Assigns row names as characters.
# Calculats mean between change points.
CPs <- c(1,as.vector(CP[,1]),length(X)) # CP plus start and end
means <- rep(NA,(length(CPs)-1)) # Vector for means.
for(i in 1:(length(CPs)-1)){
means[i] <- mean(X[CPs[i]:CPs[i+1]]) # Mean for every section.
}
# End-for
names(means) <- apply(as.matrix(1:length(means)),1,function(index){paste("mu_",index,sep="")})
}
# End-perform.CPD
###
#### Output
###
if (perform.CPD) CP[,1]<-CP[,1]+S
if(print.output){
cat("","\n")
cat("MFT table",sep="\n"); cat("","\n")
cat(paste("Tt = ",Tt," and H = {",paste(H,collapse=", "),"}",sep="")); cat("","\n")
if(M>parameter$Q)
{cat(paste("Stationarity was rejected: M = ",round(M,2)," > Q = ", Q = round(parameter$Q,2),sep=""),sep="\n")}
else{cat(paste("Stationarity was not rejected: M = ",round(M,2)," < Q = ", Q = round(parameter$Q,2),sep=""),sep="\n")}; cat("","\n")
if(method=="asymptotic"){cat("(Threshold Q derived by simulation) \n\n")}
if(method=="fixed"){cat("(Threshold Q set by user) \n\n")}
if(perform.CPD){
cat("CPD was performed: ")
if(dim(CP)[1]==0){cat("No change points detected")}
if(dim(CP)[1]==1){cat(paste(dim(CP)[1],"change point detected at "))}
if(dim(CP)[1]>1){cat(paste(dim(CP)[1],"change points detected at "))}
if(dim(CP)[1]>0){cat(paste(sort(CP[,1])),sep=", ")}; cat("","\n"); cat(" ")
if(length(means)==1){cat(paste("The estimated mean is",signif(means,2)) )} else{cat("The estimated means are ")
cat(paste(signif(means,2)),sep=", ")}
}
# End-if-perform.CPD
if(!perform.CPD){cat("CPD was not performed")}
}
# End-if-print.output
#if(perform.CPD) SWD<-lapply(SWD,function(x) x<-x+S)
if(perform.CPD==FALSE){CP<-NA; means<-NA}
if (rescale) tech.var<-list(X=X,R_ht=R_ht,G_ht=NA)
else tech.var<-list(X=X,R_ht=NA,G_ht=R_ht)
mft<-list(M=M,Q=parameter$Q,method=method,rescale=rescale,CP=CP,expectation=means,S=S,E=E,Tt=Tt,H=H,sim=sim,alpha=alpha,perform.CPD=perform.CPD,tech.var=tech.var,type="mean")
class(mft) <- "MFT"
invisible(mft)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.