R/algo_farrington.R

Defines functions algo.farrington refvalIdxByDate algo.farrington.threshold algo.farrington.fitGLM.populationOffset algo.farrington.fitGLM.fast algo.farrington.fitGLM algo.farrington.assign.weights anscombe.residuals

Documented in algo.farrington algo.farrington.assign.weights algo.farrington.fitGLM algo.farrington.fitGLM.fast algo.farrington.fitGLM.populationOffset algo.farrington.threshold anscombe.residuals refvalIdxByDate

### R code from vignette source 'Rnw/algo_farrington.Rnw'
### Encoding: ISO8859-1

###################################################
### code chunk number 1: algo_farrington.Rnw:25-35
###################################################
anscombe.residuals <- function(m,phi) {
  y <- m$y
  mu <- fitted.values(m)

  #Compute raw Anscombe residuals
  a <- 3/2*(y^(2/3) * mu^(-1/6) - mu^(1/2))
  #Compute standardized residuals
  a <- a/sqrt(phi * (1-hatvalues(m)))
  return(a)
}


################################################################################
# WEIGHTS FUNCTION
################################################################################

algo.farrington.assign.weights <- function(s,weightsThreshold=1) {
    #s_i^(-2) for s_i<weightsThreshold and 1 otherwise
    gamma <- length(s)/(sum(    (s^(-2))^(s>weightsThreshold) ))
    omega <- numeric(length(s))
    omega[s>weightsThreshold] <- gamma*(s[s>weightsThreshold]^(-2))
    omega[s<=weightsThreshold] <- gamma
    return(omega)
}

###################################################
### code chunk number 3: algo_farrington.Rnw:136-305
###################################################
algo.farrington.fitGLM <- function(response,wtime,timeTrend=TRUE,reweight=TRUE,...) {
  #Model formula depends on whether to include a time trend or not.
  theModel <- as.formula(ifelse(timeTrend, "response~1+wtime","response~1"))

  #Fit it -- this is slow. An improvement would be to use glm.fit here.
  model <- glm(theModel, family = quasipoisson(link="log"))

 #Check convergence - if no convergence we return empty handed.
  if (!model$converged) {
    #Try without time dependence
    if (timeTrend) {
      cat("Warning: No convergence with timeTrend -- trying without.\n")
      #Set model to one without time trend
      theModel <- as.formula("response~1")
      model <- glm(response ~ 1, family = quasipoisson(link="log"))
    }

    if (!model$converged) {
      cat("Warning: No convergence in this case.\n")
      print(cbind(response,wtime))
      return(NULL)
    }
  }

  #Overdispersion parameter phi
  phi <- max(summary(model)$dispersion,1)

  #In case reweighting using Anscome residuals is requested
  if (reweight) {
    s <- anscombe.residuals(model,phi)
    omega <- algo.farrington.assign.weights(s)
    model <- glm(theModel,family=quasipoisson(link="log"),weights=omega)
    #Here, the overdispersion often becomes small, so we use the max
    #to ensure we don't operate with quantities less than 1.
    phi <- max(summary(model)$dispersion,1)
  } # end of refit.


  #Add wtime, response and phi to the model
  model$phi <- phi
  model$wtime <- wtime
  model$response <- response
  #Done
  return(model)
}

######################################################################
# The algo.farrington.fitGLM function in a version using glm.fit
# which is faster than the call using "glm.
# This saves lots of overhead and increases speed.
#
# Author: Mikko Virtanen (@thl.fi) with minor modifications by Michael Hoehle
# Date:   9 June 2010
#
# Note: Not all glm results may work on the output. But for the
# necessary ones for the algo.farrington procedure work.
######################################################################

algo.farrington.fitGLM.fast <- function(response,wtime,timeTrend=TRUE,reweight=TRUE, ...) {
  #Create design matrix and formula needed for the terms object
  #Results depends on whether to include a time trend or not.
  if (timeTrend) {
    design<-cbind(intercept=1,wtime=wtime)
    Formula<-response~wtime
  } else {
    design<-matrix(1,nrow=length(wtime),dimnames=list(NULL,c("intercept")))
    Formula<-response~1
  }

  #Fit it using glm.fit which is faster than calling "glm"
  model <- glm.fit(design,response, family = quasipoisson(link = "log"))

   #Check convergence - if no convergence we return empty handed.
   if (!model$converged) {
      #Try without time dependence
     if (timeTrend) {
       cat("Warning: No convergence with timeTrend -- trying without.\n")
       #Drop time from design matrix
       design <- design[,1,drop=FALSE]
       #Refit
       model <- glm.fit(design,response, family = quasipoisson(link = "log"))
       Formula<-response~1
     }
     #No convergence and no time trend. That's not good.
   }

   #Fix class of output to glm/lm object in order for anscombe.residuals to work
   #Note though: not all glm methods may work for the result
   class(model) <- c("glm","lm")

   #Overdispersion parameter phi
   phi <- max(summary.glm(model)$dispersion,1)

   #In case reweighting using Anscome residuals is requested
   if (reweight) {
     s <- anscombe.residuals(model,phi)
     omega <- algo.farrington.assign.weights(s)
     model <- glm.fit(design,response, family = quasipoisson(link = "log"), weights = omega)
     #Here, the overdispersion often becomes small, so we use the max
     #to ensure we don't operate with quantities less than 1.
     phi <- max(summary.glm(model)$dispersion,1)
   } # end of refit.

   model$phi <- phi
   model$wtime <- wtime
   model$response <- response
   model$terms <- terms(Formula)
   # cheating a bit, all methods for glm may not work
   class(model)<-c("algo.farrington.glm","glm","lm") # 23/10/2012 (SM):
                                        # added "lm" class to avoid warnings
                                        # from predict.lm about fake object
   #Done
  return(model)
}

######################################################################
# Experimental function to include a population offset in the
# farrington procedure based on algo.farrington.fitGLM
# Alternative: include populationOffset argument in the two other
# fit functions, but I suspect use of this is not so common
#
# Parameters:
#  takes an additional "population" parameter
######################################################################

algo.farrington.fitGLM.populationOffset <- function(response,wtime,population,timeTrend=TRUE,reweight=TRUE,...) {
  #Model formula depends on whether to include a time trend or not.
  theModel <- as.formula(ifelse(timeTrend, "response~offset(log(population)) + 1 + wtime","response~offset(log(population)) + 1"))

  #Fit it -- this is slow. An improvement would be to use glm.fit here.
  model <- glm(theModel, family = quasipoisson(link="log"))

 #Check convergence - if no convergence we return empty handed.
  if (!model$converged) {
    #Try without time dependence
    if (timeTrend) {
     model <- glm(response ~ 1, family = quasipoisson(link="log"))
     cat("Warning: No convergence with timeTrend -- trying without.\n")
    }

    if (!model$converged) {
      cat("Warning: No convergence in this case.\n")
      print(cbind(response,wtime))
      return(NULL)
    }
  }

  #Overdispersion parameter phi
  phi <- max(summary(model)$dispersion,1)

  #In case reweighting using Anscome residuals is requested
  if (reweight) {
    s <- anscombe.residuals(model,phi)
    omega <- algo.farrington.assign.weights(s)
    model <- glm(theModel,family=quasipoisson(link="log"),weights=omega)
    #Here, the overdispersion often becomes small, so we use the max
    #to ensure we don't operate with quantities less than 1.
    phi <- max(summary(model)$dispersion,1)
  } # end of refit.


  #Add wtime, response and phi to the model
  model$phi <- phi
  model$wtime <- wtime
  model$response <- response
  model$population <- population
  #Done
  return(model)
}




###################################################
### code chunk number 4: algo_farrington.Rnw:344-370
###################################################

algo.farrington.threshold <- function(pred,phi,alpha=0.01,skewness.transform="none",y) {
  #Fetch mu0 and var(mu0) from the prediction object
  mu0 <- pred$fit
  tau <- phi + (pred$se.fit^2)/mu0
  #Standard deviation of prediction, i.e. sqrt(var(h(Y_0)-h(\mu_0)))
  switch(skewness.transform,
         "none" = { se <- sqrt(mu0*tau); exponent <- 1},
         "1/2" = { se <- sqrt(1/4*tau); exponent <- 1/2},
         "2/3"  = { se <- sqrt(4/9*mu0^(1/3)*tau); exponent <- 2/3},
         { stop("No proper exponent in algo.farrington.threshold.")})

  #Note that lu can contain NA's if e.g. (-1.47)^(3/2)
  lu <- sort((mu0^exponent + c(-1,1)*qnorm(1-alpha/2)*se)^(1/exponent),na.last=FALSE)

  #Ensure that lower bound is non-negative
  lu[1] <- max(0,lu[1],na.rm=TRUE)

  #Compute quantiles of the predictive distribution based on the
  #normal approximation on the transformed scale
  q <- pnorm( y^(exponent) , mean=mu0^exponent, sd=se)
  m <- qnorm(0.5, mean=mu0^exponent, sd=se)^(1/exponent)

  #Return lower and upper bounds
  return(c(lu,q=q,m=m))
}


###################################################
### code chunk number 5: algo_farrington.Rnw:412-451
###################################################
######################################################################
# Compute indices of reference value using Date class
#
# Params:
#  t0 - Date object describing the time point
#  b  - Number of years to go back in time
#  w  - Half width of window to include reference values for
#  epochStr - "1 month", "1 week" or "1 day"
#  epochs - Vector containing the epoch value of the sts/disProg object
#
# Details:
#  Using the Date class the reference values are formed as follows:
#   Starting from d0 go i, i in 1,...,b years back in time.
#
# Returns:
#  a vector of indices in epochs which match
######################################################################
refvalIdxByDate <- function(t0, b, w, epochStr, epochs) {
  refDays <- NULL
  refPoints <- seq( t0, length.out=b+1, by="-1 year")[-1]

  #Loop over all b-lagged points and append appropriate w-lagged points
  for (j in 1:length(refPoints)) {
      refPointWindow <- c(rev(seq(refPoints[j], length.out=w+1, by=paste("-",epochStr,sep=""))),
                          seq(refPoints[j], length.out=w+1, by=epochStr)[-1])
      refDays <- append(refDays,refPointWindow)
  }
  if (epochStr == "1 week") {
      #What weekday is t0 (0=Sunday, 1=Monday, ...)
      epochWeekDay <- as.numeric(format(t0,"%w"))
      #How many days to go forward to obtain the next "epochWeekDay", i.e. (d0 - d) mod 7
      dx.forward <- (epochWeekDay - as.numeric(format(refDays,"%w"))) %% 7
      #How many days to go backward to obtain the next "epochWeekDay", i.e. (d - d0) mod 7
      dx.backward <- (as.numeric(format(refDays,"%w")) - epochWeekDay) %% 7
      #What is shorter - go forward or go backward?
      #By convention: always go to the closest weekday as t0
      refDays <- refDays + ifelse(dx.forward < dx.backward, dx.forward, -dx.backward)
  }
  if (epochStr == "1 month") {
      #What day of the month is t0 (it is assumed that all epochs have the same value here)
      epochDay <- as.numeric(format(t0,"%d"))
      #By convention: go back in time to closest 1st of month
      refDays <- refDays -  (as.numeric(format(refDays, "%d")) - epochDay)
  }
  #Find the index of these reference values
  wtime <- match(as.numeric(refDays), epochs)
  return(wtime)
}



###################################################
### code chunk number 6: algo_farrington.Rnw:571-769
###################################################

algo.farrington <- function(disProgObj, control=list(
  range=NULL, b=5, w=3, reweight=TRUE, verbose=FALSE, plot=FALSE,
  alpha=0.05, trend=TRUE, limit54=c(5,4), powertrans="2/3",
  fitFun="algo.farrington.fitGLM.fast")
  ) {
  #Fetch observed
  observed <- disProgObj$observed
  freq <- disProgObj$freq
  epochStr <- switch( as.character(freq), "12" = "1 month","52" =  "1 week","365" = "1 day")
  #Fetch population (if it exists)
  if (!is.null(disProgObj$populationFrac)) {
    population <- disProgObj$populationFrac }
  else {
    population <- rep(1,length(observed))
  }

  ######################################################################
  # Initialize and check control options
  ######################################################################
  defaultControl <- eval(formals()$control)
  control <- modifyList(defaultControl, control, keep.null = TRUE)
  if (is.null(control$range)) {
    control$range <- (freq*control$b - control$w):length(observed)
  }
  control$fitFun <- match.arg(control$fitFun,
    c("algo.farrington.fitGLM.fast", "algo.farrington.fitGLM",
      "algo.farrington.fitGLM.populationOffset"))

  #Use special Date class mechanism to find reference months/weeks/days
  if (is.null(disProgObj[["epochAsDate",exact=TRUE]])) {
    epochAsDate <- FALSE
  } else {
    epochAsDate <- disProgObj[["epochAsDate",exact=TRUE]]
  }

  #check options
  if (!((control$limit54[1] >= 0) &  (control$limit54[2] > 0))) {
    stop("The limit54 arguments are out of bounds: cases >= 0 and period > 0.")
  }
  #Check control$range is within bounds.
  if (any((control$range < 1) | (control$range > length(disProgObj$observed)))) {
      stop("Range values are out of bounds (has to be within 1..",length(disProgObj$observed)," for the present data).")
  }

  # initialize the necessary vectors
  alarm <- matrix(data = 0, nrow = length(control$range), ncol = 1)
  trend <- matrix(data = 0, nrow = length(control$range), ncol = 1)
  upperbound <- matrix(data = 0, nrow = length(control$range), ncol = 1)
  # predictive distribution
  pd <- matrix(data = 0, nrow = length(control$range), ncol = 2)

  # Define objects
  n <- control$b*(2*control$w+1)

  # 2: Fit of the initial model and first estimation of mean and dispersion
  #    parameter
  for (k in control$range) {
    # transform the observed vector in the way
    # that the timepoint to be evaluated is at last position
    #shortObserved <- observed[1:(maxRange - k + 1)]

    if (control$verbose) { cat("k=",k,"\n")}

    #Find index of all epochs, which are to be used as reference values
    #i.e. with index k-w,..,k+w
    #in the years (current year)-1,...,(current year)-b
    if (!epochAsDate) {
      wtimeAll <- NULL
      for (i in control$b:1){
        wtimeAll <- append(wtimeAll,seq(k-freq*i-control$w,k-freq*i+control$w,by=1))
      }
      #Select them as reference values - but only those who exist
      wtime <- wtimeAll[wtimeAll>0]
      if (length(wtimeAll) != length(wtime)) {
          warning("@ range= ",k,": With current b and w then ",length(wtimeAll) - length(wtime),"/",length(wtimeAll), " reference values did not exist (index<1).")
      }
    } else { #Alternative approach using Dates
      t0 <- as.Date(disProgObj$week[k], origin="1970-01-01")
      wtimeAll <- refvalIdxByDate( t0=t0, b=control$b, w=control$w, epochStr=epochStr, epochs=disProgObj$week)
      #Select them as reference values (but only those not being NA!)
      wtime <- wtimeAll[!is.na(wtimeAll)]
      #Throw warning if necessary
      if (length(wtimeAll) != length(wtime)) {
          warning("@ range= ",k,": With current b and w then ",length(wtimeAll) - length(wtime),"/",length(wtimeAll), " reference values did not exist (index<1).")
      }
    }

    #Extract values from indices
    response <- observed[wtime]
    pop <- population[wtime]

    if (control$verbose) { print(response)}

    ######################################################################
    #Fit the model with overdispersion -- the initial fit
    ######################################################################
    #New feature: fitFun can now be the fast function for fitting the GLM
    model <- do.call(control$fitFun, args=list(response=response,wtime=wtime,population=pop,timeTrend=control$trend,reweight=control$reweight))

    #Stupid check to pass on NULL values from the algo.farrington.fitGLM proc.
    if (is.null(model)) return(model)

    ######################################################################
    #Time trend
    #
    #Check whether to include time trend, to do this we need to check whether
    #1) wtime is signifcant at the 95lvl
    #2) the predicted value is not larger than any observed value
    #3) the historical data span at least 3 years.
    doTrend <- control$trend
#Bug discovered by Julia Kammerer and Sabrina Heckl: Only investigate trend if it actually was part of the GLM
    #if (control$trend) {
    if ("wtime" %in% names(coef(model))){
      #is the p-value for the trend significant (0.05) level
      p <- summary.glm(model)$coefficients["wtime",4]
      significant <- (p < 0.05)
      #prediction for time k
      mu0Hat <- predict.glm(model,data.frame(wtime=c(k),population=population[k]),type="response")
      #have to use at least three years of data to allow for a trend
      atLeastThreeYears <- (control$b>=3)
      #no horrible predictions
      noExtrapolation <- mu0Hat <= max(response)

      #All 3 criteria have to be met in order to include the trend. Otherwise
      #it is removed. Only necessary to check this if a trend is requested.
      if (!(atLeastThreeYears && significant && noExtrapolation)) {
        doTrend <- FALSE
        model <- do.call(control$fitFun, args=list(response=response,wtime=wtime,population=pop,timeTrend=FALSE,reweight=control$reweight))
      }
    } else {
      doTrend <- FALSE
    }
    #done with time trend
    ######################################################################

    ######################################################################
    # Calculate prediction & confidence interval                         #
    ######################################################################
    #Predict value - note that the se is the mean CI
    #and not the prediction error of a single observation
    pred <- predict.glm(model,data.frame(wtime=c(k),population=population[k]),dispersion=model$phi,
                        type="response",se.fit=TRUE)
    #Calculate lower and upper threshold
    lu <- algo.farrington.threshold(pred,model$phi,skewness.transform=control$powertrans,alpha=control$alpha, observed[k])

    ######################################################################
    # If requested show a plot of the fit.
    ######################################################################
    if (control$plot) {
      #Compute all predictions
      data <- data.frame(wtime=seq(min(wtime),k,length.out=1000))
      preds <- predict(model,data,type="response",dispersion=model$phi)

      #Show a plot of the model fit.
      plot(c(wtime, k), c(response,observed[k]),ylim=range(c(observed[data$wtime],lu)),,xlab="time",ylab="No. infected",main=paste("Prediction at time t=",k," with b=",control$b,",w=",control$w,sep=""),pch=c(rep(1,length(wtime)),16))
      #Add the prediction
      lines(data$wtime,preds,col=1,pch=2)

      #Add the thresholds to the plot
      lines(rep(k,2),lu[1:2],col=3,lty=2)
    }


    ######################################################################
    #Postprocessing steps
    ######################################################################

    #Compute exceedance score unless less than 5 reports during last 4 weeks.
    #Changed in version 0.9-7 - current week is included now
    enoughCases <- (sum(observed[(k-control$limit54[2]+1):k])>=control$limit54[1])

    #18 May 2006: Bug/unexpected feature found by Y. Le Strat.
    #the okHistory variable meant to protect against zero count problems,
    #but instead it resulted in exceedance score == 0 for low counts.
    #Now removed to be concordant with the Farrington 1996 paper.
    X <- ifelse(enoughCases,(observed[k] - pred$fit) / (lu[2] - pred$fit),0)

    #Do we have an alarm -- i.e. is observation beyond CI??
    #upperbound only relevant if we can have an alarm (enoughCases)
    trend[k-min(control$range)+1] <- doTrend
    alarm[k-min(control$range)+1] <- (X>1)
    upperbound[k-min(control$range)+1] <- ifelse(enoughCases,lu[2],0)
    #Compute bounds of the predictive
    pd[k-min(control$range)+1,] <- lu[c(3,4)]

  }#done looping over all time points

  #Add name and data name to control object.
  control$name <- paste("farrington(",control$w,",",0,",",control$b,")",sep="")
  control$data <- paste(deparse(substitute(disProgObj)))
  #Add information about predictive distribution
  control$pd   <- pd

  # return alarm and upperbound vectors
  result <- list(alarm = alarm, upperbound = upperbound, trend=trend,
                 disProgObj=disProgObj, control=control)
  class(result) <- "survRes"

  #Done
  return(result)
}

Try the surveillance package in your browser

Any scripts or data that you put into this service are public.

surveillance documentation built on July 20, 2022, 1:06 a.m.