R/catCUSUM.R

Defines functions categoricalCUSUM catcusum.LLRcompute

Documented in catcusum.LLRcompute categoricalCUSUM

#########################################################################
# Categorical CUSUM for y_t \sim M_k(n_t, \pi_t) for t=1,...,tmax
# Workhorse function doing the actual computations - no semantic checks
# are performed here, we expect "proper" input.
#
# Params:
#  y - (k) \times tmax observation matrix for all categories
#  pi0 - (k) \times tmax in-control prob vector for all categories
#  pi1 - (k) \times tmax out-of-control prob vector for all categories
#  dfun - PMF function of the categorical response, i.e. multinomial, binomial,
#         beta-binom, etc.
#  n   - vector of dim tmax containing the varying sizes
#  h   - decision threshold of the Categorical CUSUM
#  calc.at -
#########################################################################


catcusum.LLRcompute <- function(y, pi0, pi1, h, dfun, n, calc.at=TRUE,...) {
  #Initialize variables
  t <- 0
  stopped <- FALSE
  S <- numeric(ncol(y)+1)
  U <- numeric(ncol(y)+1)

  ##Check if dfun is the binomial
  isBinomialPMF <- isTRUE(attr(dfun,which="isBinomialPMF"))

  #Run the Categorical LR CUSUM
  while (!stopped) {
    #Increase time
    t <- t+1

    #Compute log likelihood ratio
    llr <-  dfun(y=y[,t,drop=FALSE], size=n[t], mu=pi1[,t,drop=FALSE], log=TRUE,...) - dfun(y=y[,t,drop=FALSE], size=n[t], mu=pi0[,t,drop=FALSE], log=TRUE, ...)
    #Add to CUSUM
    S[t+1] <- max(0,S[t] + llr)

    #For binomial data it is also possible to compute how many cases it would take
    #to sound an alarm given the past.
    if ((nrow(y) == 2) & calc.at) {
      ##For the binomial PMF it is possible to compute the number needed for an
      ##alarm exactly
      if (isBinomialPMF) {
        ##Calculations in ../maple/numberneededbeforealarm.mw.
        at <- (h - S[t] - n[t] * ( log(1 - pi1[1,t]) - log(1-pi0[1,t]))) / (log(pi1[1,t]) - log(pi0[1,t]) - log(1-pi1[1,t]) + log(1-pi0[1,t]))
        U[t+1] = ceiling(max(0,at))
        ##Note: U[t+1] Can be higher than corresponding n_t.
        if (U[t+1]>n[t]) U[t+1] <- NA
      } else {
        #Compute the value at by trying all values betweeen 0 and n_t. If
        #no alarm, then we know the value for an alarm must be larger than y_t
        if (S[t+1]>h) {
          ay <- rbind(seq(0,y[1,t],by=1),n[t]-seq(0,y[1,t],by=1))
        } else {
          ay <- rbind(seq(y[1,t],n[t],by=1),n[t]-seq(y[1,t],n[t],by=1))
        }

        llr <-  dfun(ay, size=n[t], mu=pi1[,t,drop=FALSE], log=TRUE,...) - dfun(ay, size=n[t], mu=pi0[,t,drop=FALSE], log=TRUE, ...)
        alarm <- llr > h-S[t]

        ##Is any a_t==TRUE?, i.e. does a y_t exist or is the set over which to
        ##take the minimum empty?
        if (any(alarm)) {
          U[t+1] <- ay[1,which.max(alarm)]
        } else {
          U[t+1] <- NA
        }
      }
    }

    ##Only run to the first alarm. Then reset.
    if ((S[t+1] > h) | (t==ncol(y))) { stopped <- TRUE}
  }

  ##If no alarm at the end put rl to end (its censored! hoehle: Actually it should be length+1!
  ##but the chopping is written such that copying occurs until the final index (hence we can't
  ##just do ncol(pi0)+1
  ##Hence, N is more like the last index investigated.
  if (any(S[-1]>h)) {
    t <- which.max(S[-1] > h)
  } else {
    t <- ncol(pi0) ##Last one
  }
  ##Missing: cases needs to be returned!
  return(list(N=t,val=S[-1],cases=U[-1]))
}

######################################################################
## Wrap function to process sts object by categoricalCUSUM (new S4
## style). Time varying number of counts is found in slot populationFrac.
##
## Params:
##  control - list with the following components
##    * range - vector of indices in disProgObj to monitor
##    * h     - threshold, once CUSUM > h we have an alarm
##    * pi0 - (k-1) \times tmax in-control prob vector for all but ref cat
##    * pi1 - (k-1) \times tmax out-of-control prob vector for all but ref cat
##    * dfun - PMF to use for the computations, dmultinom, dbinom, dBB, etc.
## ... - further parameters to be sent to dfun
######################################################################

categoricalCUSUM <- function(stsObj,
                               control = list(range=NULL,h=5,
                                 pi0=NULL, pi1=NULL, dfun=NULL, ret=c("cases","value")),...) {

  ##Set the default values if not yet set
  if(is.null(control[["pi0"]])) {
    stop("no specification of in-control proportion vector pi0")
  }
  if(is.null(control[["pi1"]])) {
    stop("no specification of out-of-control proportion vector pi1")
  }
  if(is.null(control[["dfun"]])) {
    stop("no specification of the distribution to use, e.g. dbinom, dmultinom or similar")
  }

  if(is.null(control[["h"]]))
    control$h <- 5
  if(is.null(control[["ret"]]))
  	control$ret <- "value"

  ##Extract the important parts from the arguments
  if (is.numeric(control[["range"]])) {
    range <- control$range
  } else {
    stop("the range needs to be an index vector")
  }
  stsObj <- stsObj[range,]
  
  y <- t(stsObj@observed)
  pi0 <- control[["pi0"]]
  pi1 <- control[["pi1"]]
  dfun <- control[["dfun"]]
  control$ret <- match.arg(control$ret, c("value","cases"))
  ##Total number of objects that are investigated. Note this
  ##can't be deduced from the observed y, because only (c-1) columns
  ##are reported so using: n <- apply(y, 2, sum) is wrong!
  ##Assumption: all populationFrac's contain n_t and we can take just one
  n <- stsObj@populationFrac[,1]

  ##Semantic checks
  if ( ((ncol(y) != ncol(pi0)) | (ncol(pi0) != ncol(pi1))) |
      ((nrow(y) != nrow(pi0)) | (nrow(pi0) != nrow(pi1)))) {
    stop("dimensions of y, pi0 and pi1 have to match")
  }
  if ((control$ret == "cases") & nrow(pi0) != 2) {
    stop("cases can only be returned in case k=2")
  }
  if (length(n) != ncol(y)) {
    stop("length of n has to be equal to number of columns in y")
  }
  ##Check if all n entries are the same
  if (!all(apply(stsObj@populationFrac,1,function(x) all.equal(as.numeric(x),rev(as.numeric(x)))))) {
    stop("all entries for n have to be the same in populationFrac")
  }

  ##Reserve space for the results
  ##start with cusum[timePoint -1] = 0, i.e. set cusum[1] = 0
  alarm <- matrix(data = FALSE, nrow = length(range), ncol = nrow(y))
  upperbound <- matrix(data = 0, nrow = length(range), ncol = nrow(y))

  ##Small helper function to be used along the way --> move to other file!
  either <- function(cond, whenTrue, whenFalse) {
    if (cond) return(whenTrue) else return(whenFalse)
  }

  ##Setup counters for the progress
  doneidx <- 0
  N <- 1
  noofalarms <- 0
  noOfTimePoints <- length(range)

  #######################################################
  ##Loop as long as we are not through the entire sequence
  #######################################################
  while (doneidx < noOfTimePoints) {
    ##Run Categorical CUSUM until the next alarm
    res <- catcusum.LLRcompute(y=y, pi0=pi0, pi1=pi1, n=n, h=control$h, dfun=dfun,calc.at=(control$ret=="cases"),...)

    ##Note: res$N is the last index investigated in the updated y vector.
    ##If res$N == ncol(y) no alarm was found in the last segment.
    ##In case an alarm found put in into the log and reset the chart at res$N+1.
    if (res$N < ncol(y)) {
      ##Put appropriate value in upperbound
      upperbound[1:res$N + doneidx,]  <- matrix(rep(either(control$ret == "value", res$val[1:res$N] ,res$cases[1:res$N]),each=ncol(upperbound)),ncol=ncol(upperbound),byrow=TRUE)
      alarm[res$N + doneidx,] <- TRUE

      ##Chop & get ready for next round
      y <- y[,-(1:res$N),drop=FALSE]
      pi0 <- pi0[,-(1:res$N),drop=FALSE]
      pi1 <- pi1[,-(1:res$N),drop=FALSE]
      n <- n[-(1:res$N)]

      ##Add to the number of alarms
      noofalarms <- noofalarms + 1
    }
    ##cat("doneidx = ",doneidx, "\t res$N =", res$N,"\n")
    ##Update index of how far we are in the time series
    doneidx <- doneidx + res$N
  }

  ##Add upperbound-statistic of last segment (note: an alarm might or might be reached here)
  upperbound[(doneidx-res$N+1):nrow(upperbound),]  <- matrix( rep(either(control$ret == "value", res$val, res$cases),each=ncol(upperbound)),ncol=ncol(upperbound),byrow=TRUE)
  ##Inherit alarms as well (last time point might contain an alarm!)
  alarm[(doneidx-res$N+1):nrow(upperbound),] <- matrix( rep(res$val > control$h,each=ncol(alarm)), ncol=ncol(alarm),byrow=TRUE)

  # Add name and data name to control object
  control$name <- "categoricalCUSUM"
  control$data <- NULL #not supported anymore

  #store results in the sts object
  stsObj@alarm <- alarm
  stsObj@upperbound <- upperbound
  stsObj@control <- control

  #Ensure dimnames in the new object
  stsObj <- fix.dimnames(stsObj)

  #Done
  return(stsObj)
}

Try the surveillance package in your browser

Any scripts or data that you put into this service are public.

surveillance documentation built on July 20, 2022, 1:06 a.m.