# ____________________________
# |\_________________________/|\
# || || \
# || algo.farrington || \
# || new version || |
# || || |
# || || |
# || || |
# || || |
# || || /
# ||_________________________|| /
# |/_________________________\|/
# __\_________________/__/|_
# |_______________________|/ )
# ________________________ (__
# /oooo oooo oooo oooo /| _ )_
# /ooooooooooooooooooooooo/ / (_)_(_)
# /ooooooooooooooooooooooo/ / (o o)
#/C=_____________________/_/ ==\o/==
# Version of the 26.06.2013
# M.Salmon, M.Hoehle
################################################################################
# CONTENTS
################################################################################
# # MAIN FUNCTION
# Function that manages input and output.
# # RESIDUALS FUNCTION
# Function that calculates Anscombe residuals.
# # WEIGHTS FUNCTION
# Function that calculates weights based on these residuals.
# # FORMULA FUNCTION
# Function that writes a formula for the glm using Booleans from control.
# # FIT GLM FUNCTION
# Function that fits a GLM. If it does not converge this function tries to fit it without time trend.
# # THRESHOLD FUNCTION
# Function that calculates the lower and upper threshold, the probability of observing a count that is >= observed, and the score.
# There are two versions of this function depending on the method chosen.
# # BLOCKS FUNCTION
# Function that creates the factor variable for the glm.
# # DATA GLM FUNCTION
# Function that prepares data for the glm
# # GLM FUNCTION
# Function that calls fit glm, checkst he time trend and calculate the prediction fort he current timepoint.
################################################################################
# END OF CONTENTS
################################################################################
################################################################################
# MAIN FUNCTION
################################################################################
farringtonFlexible <- function(sts, control = list(range = NULL, b = 3, w = 3,
reweight = TRUE,
weightsThreshold = 2.58,
verbose = FALSE,glmWarnings = TRUE,
alpha = 0.01, trend = TRUE,
pThresholdTrend = 0.05, limit54=c(5,4),
powertrans="2/3",
fitFun="algo.farrington.fitGLM.flexible",
populationOffset = FALSE,
noPeriods = 1, pastWeeksNotIncluded = 26,
thresholdMethod = "delta")) {
######################################################################
# Use special Date class mechanism to find reference months/weeks/days
######################################################################
if (is.null( sts@epochAsDate)) {
epochAsDate <- FALSE
} else {
epochAsDate <- sts@epochAsDate
}
######################################################################
# Fetch observed and population
######################################################################
# Fetch observed
observed <- observed(sts)
freq <- sts@freq
if (epochAsDate) {
epochStr <- switch( as.character(freq), "12" = "month","52" = "week",
"365" = "day")
} else {
epochStr <- "none"
}
# Fetch population (if it exists)
if (!is.null(population(sts))) {
population <- population(sts)
} else {
population <- rep(1,length(observed))
}
######################################################################
# Fix missing control options
######################################################################
# How many years to go back in time?
if (is.null(control[["b",exact=TRUE]])) { control$b = 5 }
# Half-window length
if (is.null(control[["w", exact = TRUE]])) { control$w = 3 }
# Range of time points to be evaluated
if (is.null(control[["range", exact=TRUE]])) {
control$range <- (freq*(control$b)+control$w +1):dim(observed)[1]
}
# Reweighting past outbreaks?
if (is.null(control[["reweight",exact=TRUE]])) {control$reweight=TRUE}
# With which threshold?
if (is.null(control[["weightsThreshold",exact=TRUE]])) {
control$weightsThreshold=2.58
}
# Printing information?
if (is.null(control[["verbose",exact=TRUE]])) {control$verbose=FALSE}
# Printing warning from glm.fit?
if (is.null(control[["glmWarnings",exact=TRUE]])) {control$glmWarnings=TRUE}
# An approximate (two-sided) (1 - alpha)% prediction interval is calculated
if (is.null(control[["alpha",exact=TRUE]])) {control$alpha=0.05}
# Include a time trend when possible?
if (is.null(control[["trend",exact=TRUE]])) {control$trend=TRUE}
# Which pvalue for the time trend to be significant?
if (is.null(control[["pThresholdTrend",exact=TRUE]])){
control$pThresholdTrend=0.05}
# No alarm is sounded
# if fewer than cases = 5 reports were received in the past period = 4
# weeks. limit54=c(cases,period) is a vector allowing the user to change
# these numbers
if (is.null(control[["limit54",exact=TRUE]])) {control$limit54=c(5,4)}
# Power transformation to apply to the data.
if (is.null(control[["powertrans",exact=TRUE]])){control$powertrans="2/3"}
# How many noPeriods between windows around reference weeks?
if (is.null(control[["noPeriods",exact=TRUE]])){control$noPeriods=1}
# Use factors in the model? Depends on noPeriods, no input from the user.
if (control$noPeriods!=1) {
control$factorsBool=TRUE
} else {
control$factorsBool=FALSE
}
# Use a population offset in the model?
if (is.null(control[["populationOffset",exact=TRUE]])) {
control$populationOffset=FALSE
}
# How many past weeks not to take into account?
if (is.null(control[["pastWeeksNotIncluded",exact=TRUE]])){
control$pastWeeksNotIncluded=control$w
}
# Which function to use?
# Only one possibility at the moment
if (is.null(control[["fitFun",exact=TRUE]])) {
control$fitFun="algo.farrington.fitGLM.flexible"
} else {
control$fitFun <- match.arg(control$fitFun, c(
"algo.farrington.fitGLM.flexible"))
}
# Which method for calculating the threshold?
# Extracting the method
if (is.null(control[["thresholdMethod",exact=TRUE]]))
{ control$thresholdMethod="delta"}
thresholdMethod<- match.arg(control$thresholdMethod, c("delta","nbPlugin","muan"),several.ok=FALSE)
# Adapt the argument for the glm function
control$typePred <- switch(thresholdMethod, "delta"="response","nbPlugin"="link","muan"="link")
# Which threshold function?
control$thresholdFunction <- switch(thresholdMethod,
"delta"="algo.farrington.threshold.farrington",
"nbPlugin"="algo.farrington.threshold.noufaily",
"muan"="algo.farrington.threshold.noufaily")
######################################################################
# Check options
######################################################################
if (!((control$limit54[1] >= 0) && (control$limit54[2] > 0))) {
stop("The limit54 arguments are out of bounds: cases >= 0 and period > 0.")
}
######################################################################
# Initialize the necessary vectors
######################################################################
# Vector for score
score <- matrix(data = 0, nrow = length(control$range), ncol = ncol(sts))
sts@control$score <- score
# Vector for time trend
trend <- matrix(data = 0, nrow = length(control$range), ncol = ncol(sts))
# Vector for predictive distribution
pvalue <- matrix(data = 0, nrow = length(control$range), ncol = ncol(sts))
sts@control$pvalue <- pvalue
# Vector for expected count
expected <- matrix(data = 0, nrow = length(control$range), ncol = ncol(sts))
sts@control$expected <- expected
# Vector for mu0 (prediction from glm)
mu0Vector <- matrix(data = 0, nrow = length(control$range), ncol = ncol(sts))
sts@control$mu0Vector <- mu0Vector
# Vector for overdispersion phi (from glm)
phiVector <- matrix(data = 0, nrow = length(control$range), ncol = ncol(sts))
sts@control$phiVector <- phiVector
# Vector for time trend (from glm)
trendVector <- matrix(data = 0, nrow = length(control$range), ncol = ncol(sts))
sts@control$trendVector <- trendVector
# Define objects
n <- control$b*(2*control$w+1)
# loop over columns of sts
for (j in 1:ncol(sts)) {
#Vector of dates
if (epochAsDate){
vectorOfDates <- as.Date(sts@epoch, origin="1970-01-01")
} else {
vectorOfDates <- seq_len(length(observed[,j]))
}
# Loop over control$range
for (k in control$range) {
######################################################################
# Prepare data for the glm
######################################################################
dayToConsider <- vectorOfDates[k]
diffDates <- diff(vectorOfDates)
dataGLM <- algo.farrington.data.glm(dayToConsider=dayToConsider,
b=control$b, freq=freq,
epochAsDate=epochAsDate,
epochStr=epochStr,
vectorOfDates=vectorOfDates,w=control$w,
noPeriods=control$noPeriods,
observed=observed[,j],population=population,
verbose=control$verbose,
pastWeeksNotIncluded=control$pastWeeksNotIncluded,k)
######################################################################
# Fit the model
######################################################################
finalModel <- algo.farrington.glm(dataGLM,timeTrend=control$trend,populationOffset=control$populationOffset,
factorsBool=control$factorsBool,reweight=control$reweight,
weightsThreshold=control$weightsThreshold,
pThresholdTrend=control$pThresholdTrend,b=control$b,
noPeriods=control$noPeriods,typePred=control$typePred,
fitFun=control$fitFun,glmWarnings=control$glmWarnings,
epochAsDate=epochAsDate,dayToConsider=dayToConsider,
diffDates=diffDates,populationNow=population[k,j],k,
verbose=control$verbose)
if (is.null(finalModel)){
#Do we have an alarm -- i.e. is observation beyond CI??
#upperbound only relevant if we can have an alarm (enoughCases)
sts@alarm[k,j] <- NA
sts@upperbound[k,j] <- NA
mu0Vector[(k-min(control$range)+1),j] <- NA
# Get overdispersion
phiVector[(k-min(control$range)+1),j] <- NA
# Get score
score[(k-min(control$range)+1),j] <- NA
#Compute bounds of the predictive
pvalue[(k-min(control$range)+1),j] <- NA
# Time trend
trendVector[(k-min(control$range)+1),j] <- NA
trend[(k-min(control$range)+1),j] <- NA
warning(paste("The model could not converge with nor without time trend at timepoint ", k," so no result can be given for timepoint ", k,".\n"))
} else {
pred <- finalModel$pred
doTrend <- finalModel$doTrend
coeffTime <- finalModel$coeffTime
######################################################################
# Calculate lower and upper threshold
######################################################################
argumentsThreshold <- list(predFit=pred$fit,predSeFit=pred$se.fit,
phi=finalModel$phi,
skewness.transform=control$powertrans,
alpha=control$alpha, y=observed[k,j],
method=control$thresholdMethod
)
lu <- do.call(control$thresholdFunction, args=argumentsThreshold)
######################################################################
# Postprocessing steps & output
######################################################################
#Compute exceedance score unless less than 5 reports during last 4 weeks.
#Changed in version 0.9-7 - current week is included now
enoughCases <- (sum(observed[(k-control$limit54[2]+1):k,j])
>=control$limit54[1])
#18 May 2006: Bug/unexpected feature found by Y. Le Strat.
#the okHistory variable meant to protect against zero count problems,
#but instead it resulted in exceedance score == 0 for low counts.
#Now removed to be concordant with the Farrington 1996 paper.
X <- ifelse(enoughCases,lu$score,NA)
#Do we have an alarm -- i.e. is observation beyond CI??
#upperbound only relevant if we can have an alarm (enoughCases)
sts@alarm[k,j] <- !is.na(X) && (X>1) && observed[k,j]!=0
sts@upperbound[k,j] <- ifelse(enoughCases,lu$upper,NA)
# Possible bug alarm although upperbound <- 0?
# Calculate expected value from glm
if (is.na(lu$upper)==FALSE){
if ( control$typePred=="response"){
expected[(k-min(control$range)+1),j] <- ifelse(enoughCases,pred$fit,NA)
} else{
expected[(k-min(control$range)+1),j] <- ifelse(enoughCases,exp(pred$fit),NA)
}
}else{
expected[(k-min(control$range)+1),j] <- NA
}
# Calculate mean of the negbin distribution of the observation
# Use linear predictor mean and sd
eta0 <- pred$fit
seEta0 <- pred$se.fit
# deduce the quantile for mu0 from eta0 which is normally distributed
if (control$thresholdMethod=='nbPlugin'){
mu0Vector[(k-min(control$range)+1),j] <- exp(eta0)
} else {
mu0Vector[(k-min(control$range)+1),j] <- exp(qnorm(1-control$alpha, mean=eta0, sd=seEta0))
}
# Get overdispersion
phiVector[(k-min(control$range)+1),j] <- finalModel$phi
# Get score
score[(k-min(control$range)+1),j] <- lu$score
#Compute bounds of the predictive
pvalue[(k-min(control$range)+1),j] <- lu$prob
# Time trend
if(doTrend){
trendVector[(k-min(control$range)+1),j] <- coeffTime
trend[(k-min(control$range)+1),j] <- 1
} else {
trendVector[(k-min(control$range)+1),j] <- NA
}
}
}#done looping over all time points
} #end of loop over cols in sts.
# Add information about score
sts@control$score[,j] <- score[,j]
# Add information about trend
sts@control$trend[,j] <- trend[,j]
#Add information about predictive distribution
sts@control$pvalue[,j] <- pvalue[,j]
# Add information about expected value
sts@control$expected[,j] <- expected[,j]
# Add information about mean of the negbin distribution of the observation
sts@control$mu0Vector[,j] <- mu0Vector[,j]
# Add information about overdispersion
sts@control$phiVector[,j] <- phiVector[,j]
# Add information about time trend
sts@control$trendVector[,j] <- trendVector[,j]
#Done
return(sts[control$range,])
}
################################################################################
# END OF MAIN FUNCTION
################################################################################
################################################################################
# REFERENCE TIME POINTS FUNCTION
################################################################################
algo.farrington.referencetimepoints <- function(dayToConsider,b=control$b,freq=freq,epochAsDate,epochStr){
if (epochAsDate) {
referenceTimePoints <- as.Date(seq(as.Date(dayToConsider,
origin="1970-01-01"),
length=(b+1), by="-1 year"))
} else {
referenceTimePoints <- seq(dayToConsider, length=(b+1),by=-freq)
if (referenceTimePoints[b+1]<=0){
warning("Some reference values did not exist (index<1).")
}
}
if (epochStr == "week") {
# get the date of the Mondays/Tuesdays/etc so that it compares to
# the reference data
# (Mondays for Mondays for instance)
# Vectors of same days near the date (usually the same week)
# dayToGet
dayToGet <- as.numeric(format(dayToConsider, "%w"))
actualDay <- as.numeric(format(referenceTimePoints, "%w"))
referenceTimePointsA <- referenceTimePoints -
(actualDay
- dayToGet)
# Find the other "same day", which is either before or after referenceTimePoints
referenceTimePointsB <- referenceTimePointsA + ifelse(referenceTimePointsA>referenceTimePoints,-7,7)
# For each year choose the closest Monday/Tuesday/etc
# The order of referenceTimePoints is NOT important
AB <- cbind(referenceTimePointsA,referenceTimePointsB)
ABnumeric <- cbind(as.numeric(referenceTimePointsA),as.numeric(referenceTimePointsB))
distMatrix <- abs(ABnumeric-as.numeric(referenceTimePoints))
idx <- (distMatrix[,1]>distMatrix[,2])+1
referenceTimePoints <- as.Date(AB[cbind(1:dim(AB)[1],idx)],origin="1970-01-01")
}
return(referenceTimePoints)
}
################################################################################
# END OF REFERENCE TIME POINTS FUNCTION
################################################################################
################################################################################
# RESIDUALS FUNCTION
################################################################################
anscombe.residuals <- function(m,phi) {
y <- m$y
mu <- fitted.values(m)
#Compute raw Anscombe residuals
a <- 3/2*(y^(2/3) * mu^(-1/6) - mu^(1/2))
#Compute standardized residuals
a <- a/sqrt(phi * (1-hatvalues(m)))
return(a)
}
################################################################################
# END OF RESIDUALS FUNCTION
################################################################################
## ################################################################################
## # WEIGHTS FUNCTION -- this function has been changed in the original algo_farrington
## # implementation.
## ################################################################################
##
## algo.farrington.assign.weights <- function(s,weightsThreshold) {
## #s_i^(-2) for s_i<weightsThreshold and 1 otherwise
## gamma <- length(s)/(sum( (s^(-2))^(s>weightsThreshold) ))
## omega <- numeric(length(s))
## omega[s>weightsThreshold] <- gamma*(s[s>weightsThreshold]^(-2))
## omega[s<=weightsThreshold] <- gamma
## return(omega)
## }
## ################################################################################
## # END OF WEIGHTS FUNCTION
## ################################################################################
################################################################################
# FORMULA FUNCTION
################################################################################
# Function for writing the good formula depending on timeTrend,
# populationOffset and factorsBool
formulaGLM <- function(populationOffset=FALSE,timeBool=TRUE,factorsBool=FALSE){
# Description
# Args:
# populationOffset: ---
# Returns:
# Vector of X
# Smallest formula
formulaString <- "response ~ 1"
# With time trend?
if (timeBool){
formulaString <- paste(formulaString,"+wtime",sep ="")}
# With population offset?
if(populationOffset){
formulaString <- paste(formulaString,"+offset(log(population))",sep ="")}
# With factors?
if(factorsBool){
formulaString <- paste(formulaString,"+seasgroups",sep ="")}
# Return formula as a string
return(formulaString)
}
################################################################################
# END OF FORMULA FUNCTION
################################################################################
################################################################################
# FIT GLM FUNCTION
################################################################################
algo.farrington.fitGLM.flexible <- function(dataGLM,
timeTrend,populationOffset,factorsBool,reweight,weightsThreshold,glmWarnings,verbose,control,...) {
# Model formula depends on whether to include a time trend or not.
theModel <- formulaGLM(populationOffset,timeBool=timeTrend,factorsBool)
# Fit it -- this is slow. An improvement would be to use glm.fit here.
# This would change the syntax, however.
if (glmWarnings) {
model <- glm(formula(theModel),data=dataGLM,family = quasipoisson(link="log"))
} else {
model <- suppressWarnings(glm(formula(theModel),data=dataGLM,family = quasipoisson(link="log")))
}
#Check convergence - if no convergence we return empty handed.
if (!model$converged) {
#Try without time dependence
if (timeTrend) {
theModel <- formulaGLM(populationOffset,timeBool=F,factorsBool)
if (glmWarnings) {
model <- glm(as.formula(theModel), data=dataGLM,
family = quasipoisson(link="log"))
} else {
model <- suppressWarnings(glm(as.formula(theModel), data=dataGLM,
family = quasipoisson(link="log")))
}
if (verbose) {cat("Warning: No convergence with timeTrend -- trying without.\n")}
}
if (!model$converged) {
if (verbose) {cat("Warning: No convergence in this case.\n")}
if (verbose) {print(dataGLM[,c("response","wtime"),exact=TRUE])}
return(NULL)
}
}
#Overdispersion parameter phi
phi <- max(summary(model)$dispersion,1)
#In case reweighting using Anscome residuals is requested
if (reweight) {
s <- anscombe.residuals(model,phi)
omega <- algo.farrington.assign.weights(s,weightsThreshold)
if (glmWarnings) {
model <- glm(as.formula(theModel),data=dataGLM,
family=quasipoisson(link="log"),
weights=omega)
} else {
model <- suppressWarnings(glm(as.formula(theModel),data=dataGLM,
family=quasipoisson(link="log"),
weights=omega))
}
#Here, the overdispersion often becomes small, so we use the max
#to ensure we don't operate with quantities less than 1.
phi <- max(summary(model)$dispersion,1)
} # end of refit.
#Add wtime, response and phi to the model
model$phi <- phi
model$wtime <- dataGLM$wtime
model$response <- dataGLM$response
model$population <- dataGLM$population
if (reweight) {
model$weights <- omega
} else{
model$weights <- model$weights
}
#Done
return(model)
}
################################################################################
# END OF FIT GLM FUNCTION
################################################################################
################################################################################
# THRESHOLD FUNCTION FARRINGTON
################################################################################
algo.farrington.threshold.farrington <- function(predFit,predSeFit,phi,
skewness.transform,
alpha,y,method){
#Fetch mu0 and var(mu0) from the prediction object
mu0 <- predFit
tau <- phi + (predSeFit^2)/mu0
#Standard deviation of prediction, i.e. sqrt(var(h(Y_0)-h(\mu_0)))
switch(skewness.transform,
"none" = { se <- sqrt(mu0*tau); exponent <- 1},
"1/2" = { se <- sqrt(1/4*tau); exponent <- 1/2},
"2/3" = { se <- sqrt(4/9*mu0^(1/3)*tau); exponent <- 2/3},
{ stop("No proper exponent in algo.farrington.threshold.")})
#Note that lu can contain NA's if e.g. (-1.47)^(3/2)
lu <- sort((mu0^exponent + c(-1,1)*qnorm(1-alpha)*se)^(1/exponent),
na.last=FALSE)
#Ensure that lower bound is non-negative
lu[1] <- max(0,lu[1],na.rm=TRUE)
# probability associated to the observed value as quantile
q <- pnorm( y^(1/exponent) , mean=mu0^exponent, sd=se,lower.tail=FALSE)
# calculate score
x <- ifelse(is.na(lu[2])==FALSE,(y - predFit) / (lu[2] - predFit),NA)
return(list(lower=lu[1],upper=lu[2],prob=q,score=x))
}
################################################################################
# END OF THRESHOLD FUNCTION FARRINGTON
################################################################################
################################################################################
# THRESHOLD FUNCTION NOUFAILY
################################################################################
algo.farrington.threshold.noufaily <- function(predFit,predSeFit,phi,
skewness.transform,
alpha,y,method){
# method of Angela Noufaily with modifications
# Use linear predictor mean and sd
eta0 <- predFit
seEta0 <- predSeFit
# deduce the quantile for mu0 from eta0 which is normally distributed
if (method=='nbPlugin'){
mu0Quantile <- exp(eta0)
} else {
mu0Quantile <- exp(qnorm(1-alpha, mean=eta0, sd=seEta0))
}
if (mu0Quantile==Inf){
lu <- c(NA,NA)
q <- NA
# else is when the method is "muan"
} else{
# Two cases depending on phi value
if (phi>1){
lu<-c(qnbinom(alpha/2,mu0Quantile/(phi-1),1/phi),
qnbinom(1-alpha/2,mu0Quantile/(phi-1),1/phi))
} else {
lu<-c(qpois(alpha/2,mu0Quantile),qpois(1-alpha/2,mu0Quantile))
}
# cannot be negative
lu[1]=max(0,lu[1])
# probability associated to the observed value as quantile
if (phi!=1){
q <- pnbinom(q= y-1 ,size=mu0Quantile/(phi-1),prob=1/phi,lower.tail=FALSE)
} else{
q <- ppois(y-1,mu0Quantile,lower.tail=FALSE)
}
}
# calculate score
x <- ifelse(is.na(lu[2])==FALSE,(y - predFit) / (lu[2] - predFit),NA)
return(list(lower=lu[1],upper=lu[2],prob=q,score=x))
}
################################################################################
# END OF THRESHOLD FUNCTION NOUFAILY
################################################################################
################################################################################
# BLOCKS FUNCTION
################################################################################
blocks <- function(referenceTimePoints,vectorOfDates,freq,dayToConsider,b,w,p,
epochAsDate) {
## INPUT
# freq: are we dealing with daily/weekly/monthly data?
# b: how many years to go back in time
# w: half window length around the reference timepoints
# p: number of noPeriods one wants the year to be split into
## VECTOR OF ABSOLUTE NUMBERS
# Very useful to write the code!
vectorOfAbsoluteNumbers <- seq_len(length(vectorOfDates))
# logical vector indicating where the referenceTimePoints
# are in the vectorOfDates
referenceTimePointsOrNot <- vectorOfDates %in% referenceTimePoints
## VECTOR OF FACTORS
vectorOfFactors <- rep(NA,length(vectorOfDates))
## SETTING THE FACTORS
# Current week
if (epochAsDate==FALSE){
now <- which(vectorOfDates==dayToConsider)
} else {
now <- which(vectorOfDates==as.Date(dayToConsider))
}
vectorOfFactors[(now-w):now] <- p
# Reference weeks
referenceWeeks <- rev(as.numeric(
vectorOfAbsoluteNumbers[referenceTimePointsOrNot=='TRUE']))
for (i in 1:b) {
# reference week
refWeek <- referenceWeeks[i+1]
vectorOfFactors[(refWeek-w):(refWeek+w)] <- p
# The rest is only useful if ones want factors, otherwise only have
# reference timepoints like in the old algo.farrington
if (p!=1){
# Number of time points to be shared between vectors
period <- referenceWeeks[i] - 2 * w - 1 - refWeek
# Check that p is not too big
if (period < (p-(2*w+1))){stop('Number of factors too big!')}
# Look for the length of blocks
lengthOfBlocks <- period %/% (p-1)
rest <- period %% (p-1)
vectorLengthOfBlocks <- rep(lengthOfBlocks,p-1)
# share the rest of the Euclidian division among the first blocks
add <- seq_len(rest)
vectorLengthOfBlocks[add] <- vectorLengthOfBlocks[add]+1
# slight transformation necessary for the upcoming code with cumsum
vectorLengthOfBlocks <- c(0,vectorLengthOfBlocks)
# fill the vector
for (j in 1:(p-1)) {
vectorOfFactors[(refWeek+w+1+cumsum(vectorLengthOfBlocks)[j]):
(refWeek+w+1+cumsum(vectorLengthOfBlocks)[j+1]-1)]<-j
}
}
}
## DONE!
return(vectorOfFactors) #indent
}
################################################################################
# END OF BLOCKS FUNCTION
################################################################################
################################################################################
# DATA GLM FUNCTION
################################################################################
algo.farrington.data.glm <- function(dayToConsider, b, freq,
epochAsDate,epochStr,
vectorOfDates,w,noPeriods,
observed,population,
verbose,pastWeeksNotIncluded,k){
# Identify reference time points
# Same date but with one year, two year, etc, lag
# b+1 because we need to have the current week in the vector
referenceTimePoints <- algo.farrington.referencetimepoints(dayToConsider,b=b,
freq=freq,
epochAsDate=epochAsDate,
epochStr=epochStr
)
if (sum((vectorOfDates %in% min(referenceTimePoints)) == rep(FALSE,length(vectorOfDates))) == length(vectorOfDates)){
stop("Some reference values did not exist (index<1).")
}
if (verbose) { cat("k=", k,"\n")}
# Create the blocks for the noPeriods between windows (including windows)
# If noPeriods=1 this is a way of identifying windows, actually.
blocks <- blocks(referenceTimePoints,vectorOfDates,epochStr,dayToConsider,
b,w,noPeriods,epochAsDate)
# Here add option for not taking the X past weeks into account
# to avoid adaptation of the model to emerging outbreaks
blocksID <- blocks
blocksID[(k-pastWeeksNotIncluded):k] <- NA
# Extract values for the timepoints of interest only
blockIndexes <- which(is.na(blocksID)==FALSE)
# Time
# if epochAsDate make sure wtime has a 1 increment
if (epochAsDate){
wtime <- (as.numeric(vectorOfDates[blockIndexes])-
as.numeric(vectorOfDates[blockIndexes][1]))/as.numeric(diff(vectorOfDates))[1]
} else {
wtime <- as.numeric(vectorOfDates[blockIndexes])
}
# Factors
seasgroups <- as.factor(blocks[blockIndexes])
# Observed
response <- observed[blockIndexes]
# Population
pop <- population[blockIndexes]
if (verbose) { print(response)}
dataGLM <- data.frame(response=response,wtime=wtime,population=pop,
seasgroups=seasgroups,vectorOfDates=vectorOfDates[blockIndexes])
dataGLM <- dataGLM[is.na(dataGLM$response)==FALSE,]
return(dataGLM)
}
################################################################################
# END OF DATA GLM FUNCTION
################################################################################
################################################################################
# GLM FUNCTION
################################################################################
algo.farrington.glm <- function(dataGLM,timeTrend,populationOffset,factorsBool,
reweight,weightsThreshold,pThresholdTrend,b,
noPeriods,typePred,fitFun,glmWarnings,epochAsDate,
dayToConsider,diffDates,populationNow,k,verbose) {
arguments <- list(dataGLM=dataGLM,
timeTrend=timeTrend,
populationOffset=populationOffset,
factorsBool=factorsBool,reweight=reweight,
weightsThreshold=weightsThreshold,glmWarnings=glmWarnings,
verbose=verbose,control=control)
model <- do.call(fitFun, args=arguments)
#Stupid check to pass on NULL values from the algo.farrington.fitGLM proc.
if (is.null(model)) return(model)
######################################################################
#Time trend
######################################################################
#Check whether to include time trend, to do this we need to check whether
#1) wtime is signifcant at the 95lvl
#2) the predicted value is not larger than any observed value
#3) the historical data span at least 3 years.
doTrend <- NULL
# if model converged with time trend
if ("wtime" %in% names(coef(model))){
# get the prediction for k
if(epochAsDate){
wtime=(as.numeric(dayToConsider)-as.numeric(dataGLM$vectorOfDates[1]))/as.numeric(diffDates)[1]
} else {
wtime <- c(k)
}
pred <- predict.glm(model,newdata=data.frame(wtime=wtime,
population=populationNow,
seasgroups=factor(noPeriods),
dispersion=model$phi),se.fit=TRUE,type="response")
# check if three criterion ok
#is the p-value for the trend significant (0.05) level
significant <- (summary.glm(model)$coefficients["wtime",4] < pThresholdTrend)
#have to use at least three years of data to allow for a trend
atLeastThreeYears <- (b>=3)
#no horrible predictions
noExtrapolation <- (pred$fit <= max(dataGLM$response,na.rm=T))
#All 3 criteria have to be met in order to include the trend. Otherwise
#it is removed. Only necessary to check this if a trend is requested.
doTrend <- (atLeastThreeYears && significant && noExtrapolation)
# if not then refit
if (doTrend==FALSE) {
arguments$timeTrend=FALSE
model <- do.call(fitFun, args=arguments)
}
} else {
doTrend <- FALSE
}
#done with time trend
######################################################################
######################################################################
# Calculate prediction #
######################################################################
#Predict value
if(epochAsDate){
wtime=(as.numeric(dayToConsider)-as.numeric(dataGLM$vectorOfDates[1]))/as.numeric(diffDates)[1]
} else {
wtime <- c(k)
}
pred <- predict.glm(model,newdata=data.frame(wtime=wtime,
population=populationNow,
seasgroups=factor(noPeriods),
dispersion=model$phi),se.fit=TRUE,type=typePred)
coeffTime=ifelse(doTrend,summary.glm(model)$coefficients["wtime",1],NA)
finalModel <- list (pred,doTrend,coeffTime,model$phi)
names(finalModel) <- c("pred","doTrend","coeffTime","phi")
return(finalModel)
}
################################################################################
# END OF GLM FUNCTION
################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.