Nothing
#' @title runtsir
#'
#' @description This function runs the TSIR model.
#' @param data The data frame containing cases and interpolated births and populations.
#' @param nsim The number of simulations to do. Defaults to 100.
#' @param xreg The x-axis for the regression. Options are 'cumcases' and 'cumbirths'. Defaults to 'cumcases'.
#' @param IP The infectious period in weeks. Defaults to 2 weeks.
#' @param regtype The type of regression used in susceptible reconstruction.
#' Options are 'gaussian', 'lm' (linear model), 'spline' (smooth.spline with 2.5 degrees freedom),
#' 'lowess' (with f = 2/3, iter = 1), 'loess' (degree 1), and 'user' which is just a user inputed vector.
#' Defaults to 'gaussian' and if that fails then defaults to loess.
#' @param sigmamax The inverse kernal width for the gaussian regression. Default is 3.
#' Smaller, stochastic outbreaks tend to need a lower sigma.
#' @param userYhat The inputed regression vector if regtype='user'. Defaults to NULL.
#' @param family The family in the GLM regression. One can use any of the GLM ones, but the options are essentially
#' 'poisson' (with link='log'), 'gaussian' (with link='log' or 'identity'), or 'quasipoisson' (with link='log'). Default is 'gaussian'.
#' @param link The link function used with the glm family. Options are link='log' or 'identity'. Default is 'identity'.
#' @param sbar The mean number of susceptibles. Defaults to NULL, i.e. the function estimates sbar.
#' @param alpha The mixing parameter. Defaults to NULL, i.e. the function estimates alpha.
#' @param method The type of next step prediction used. Options are 'negbin' for negative binomial,
#' 'pois' for poisson distribution, and 'deterministic'. Defaults to 'deterministic'.
#' @param epidemics The type of data splitting. Options are 'cont' which doesn't split the data up at all,
#' and 'break' which breaks the epidemics up if there are a lot of zeros. Defaults to 'cont'.
#' @param pred The type of prediction used. Options are 'forward' and 'step-ahead'. Defaults to 'forward'.
#' @param seasonality The type of contact to use. Options are standard for 52/IP point contact or schoolterm for just a two point on off contact, or none for a single contact parameter. Defaults to standard.
#' @param threshold The cut off for a new epidemic if epidemics = 'break'. Defaults to 1.
#' @param add.noise.sd The sd for additive noise, defaults to zero.
#' @param mul.noise.sd The sd for multiplicative noise, defaults to zero.
#' @param printon Whether to show diagnostic prints or not, defaults to FALSE.
#' @param fit Now removed but gives a warning.
#' @param fittype Now removed but gives a warning.
#' @param inits.fit Whether or not to fit initial conditions using simple least squares as well. Defaults to FALSE. This parameter is more necessary in more chaotic locations.
#' @examples
#' require(kernlab)
#' London <- twentymeas[["London"]]
#' \dontrun{
#' plotdata(London)
#' res <- runtsir(data=London,method='pois',nsim=10, IP=2,inits.fit=FALSE)
#'plotres(res)
#'}
runtsir <- function(data, xreg = 'cumcases',
IP = 2,nsim = 10,
regtype = 'gaussian',sigmamax = 3,
userYhat = numeric(),alpha=NULL,sbar=NULL,
family='gaussian',link='identity',
method='deterministic',
inits.fit=FALSE,
epidemics='cont', pred ='forward',
threshold=1,seasonality='standard',
add.noise.sd = 0, mul.noise.sd = 0,
printon=F,fit = NULL,fittype = NULL){
## check various function arguments
if( (nsim %% 1 ==0) == F){
nsim <- round(nsim)
}
datacheck <- c('time','cases','pop','births')
if(sum(datacheck %in% names(data)) < length(datacheck)){
stop('data frame must contain "time", "cases", "pop", and "births" columns')
}
na.casescheck <- sum(is.na(data$cases))
if(na.casescheck > 0){
stop('there cannot be any NAs in the cases vector -- please correct')
}
na.birthscheck <- sum(is.na(data$births))
if(na.casescheck > 0){
stop('there cannot be any NAs in the births vector -- please correct')
}
xregcheck <- c('cumcases','cumbirths')
if(xreg %in% xregcheck == F){
stop('xreg must be either "cumcases" or "cumbirths"')
}
regtypecheck <- c('gaussian','lm','spline','lowess','loess','user')
if(regtype %in% regtypecheck == F){
stop("regtype must be one of 'gaussian','lm','spline','lowess','loess','user'")
}
if(length(sbar) == 1){
if(sbar > 1 || sbar < 0){
stop("sbar must be a percentage of the population, i.e. between zero and one.")
}
}
linkcheck <- c('log','identity')
if(link %in% linkcheck == F){
stop("link must be either 'log' or 'identity'")
}
seasonalitycheck <- c('standard','schoolterm','none')
if(seasonality %in% seasonalitycheck == F){
stop("seasonality must be either 'standard' or 'schoolterm' or 'none'")
}
methodcheck <- c('deterministic','negbin','pois')
if(method %in% methodcheck == F){
stop("method must be one of 'deterministic','negbin','pois'")
}
epidemicscheck <- c('cont','break')
if(epidemics %in% epidemicscheck == F){
stop("epidemics must be either 'cont' or 'break'")
}
predcheck <- c('forward','step-ahead')
if(pred %in% predcheck == F){
stop("pred must be either 'forward' or 'step-ahead'")
}
if(length(fittype) == 1){
warning('Argument fittype is deprecated;
if fixing alpha or sbar (or both!) set alpha =, and/or sbar = in the function.
For now defaulting to alpha = 0.97, sbar = 0.05. Will be removed soon.')
if(fittype == 'less'){
sbar = 0.05; alpha=0.97
}
if(fittype == 'fixalpha')
alpha = 0.97
}
if(length(fit) == 1){
stop('Argument fit is deprecated; the only fit option here is using a glm.
Please use mcmctsir for an mcmc version of the tsir model.')
}
## copy input alpha and sbar so you can pass the estimation (or fixed parm) to the end
input.alpha <- alpha
input.sbar <- sbar
## print warning if too many zeros and the continuous prediction is likely to fail
nzeros <- length(which(data$cases==0))
ltot <- length(data$cases)
if(nzeros > 0.3 * ltot && epidemics == 'cont'){
print(sprintf('time series is %.0f%% zeros, consider using break method',100*nzeros/ltot))
}
## assign cumulative births and cases as X or Y
cumbirths <- cumsum(data$births)
cumcases <- cumsum(data$cases)
if(xreg == 'cumcases'){
X <- cumcases
Y <- cumbirths
}
if(xreg == 'cumbirths'){
X <- cumbirths
Y <- cumcases
}
## interpolate to smooth a bit
x <- seq(X[1], X[length(X)], length=length(X))
y <- approxfun(X, Y)(x)
y[1] <- y[2] - (y[3]-y[2])
## do the regression type specified and generated Yhat
if(regtype == 'lm'){
Yhat <- predict(lm(Y~X))
}
if(regtype == 'lowess'){
Yhat <- lowess(X,Y,f = 2/3, iter = 1)$y
}
if(regtype == 'loess'){
Yhat <- predict(loess(y~x,se=T,family='gaussian',degree=1,model=T),X)
}
if(regtype == 'spline'){
Yhat <- predict(smooth.spline(x,y,df=2.5),X)$y
}
## gaussian regression type is profiled over until a plausible reporting rate is found
## reporting rate is not guaranteed to be 'optimal'
if(regtype == 'gaussian'){
sigvec <- seq(sigmamax,0,-0.1)
for(it in 1:length(sigvec)){
if(printon == T){
print(sprintf('gaussian regression attempt number %d',it))
}
Yhat <- predict(gausspr(x,y,variance.model=T,fit=T,tol=1e-7,
var=9.999999999999999999e-3,
kernel="rbfdot",
kpar=list(sigma=sigvec[it])),X)
if(sigvec[it] <= min(sigvec)){
## use the loess then
print('gaussian regressian failed -- switching to loess regression')
Yhat <- predict(loess(y~x,se=T,family='gaussian',degree=1,model=T),X)
}
## generate residuals for gaussian regression
if(xreg == 'cumcases'){
Z <- residual.cases(Yhat,Y)
rho <- derivative(X,Yhat)
if(length(which(rho<=1))==0){
break()
}
}
if(xreg == 'cumbirths'){
rho <- derivative(X,Yhat)
Z <- residual.births(rho,Yhat,Y)
if(length(which(rho>=1))==0 && length(which(rho<0)) == 0){
break()
}
}
}
}
if(regtype == 'user'){
Yhat <- userYhat
if(length(Yhat)==0){
stop('Yhat returns numeric(0) -- make sure to input a userYhat under regtype=user')
}
}
## generate reporting rate and residuals for non gaussian regression types
rho <- derivative(X,Yhat)
if(xreg == 'cumcases'){
Z <- residual.cases(Yhat,Y)
}
if(xreg == 'cumbirths'){
Z <- residual.births(rho,Yhat,Y)
}
## 'standardize reporting rate at this point to be consistent throughout the rest of the function
if(xreg == 'cumcases'){
adj.rho <- rho
}
if(xreg == 'cumbirths'){
adj.rho <- 1/rho
}
if(regtype == 'lm'){
adj.rho <- signif(adj.rho,3)
}
## warning meassure if reporting is very small
if(any(1/adj.rho < 0.01)){
warning('Reporting rate has fallen below 1% -- try lowering the value of sigmamax (default is 3) if you are using
regtype="gaussian"')
}
## warning message if reporting > 100%
if(length(which(adj.rho < 1 )) > 1){
warning('Reporting exceeds 100% -- use different regression')
}
## adjust cases to be the 'true' incidence, ie I
Iadjusted <- data$cases * adj.rho
## copy the data for later
datacopy <- data
## declare seasonality type and for each make the period to be used in the GLM to estimate N seas parameters
if(seasonality == 'standard'){
## this is the classical TSIR formulation
period <- rep(1:(52/IP), round(nrow(data)+1))[1:(nrow(data)-1)]
## if IP == 1 we only estimate 26 pts instead of 52
if(IP == 1){
period <- rep(1:(52/2),each=2, round(nrow(data)+1))[1:(nrow(data)-1)]
}
}
if(seasonality == 'schoolterm'){
## do school time in base two weeks and then interpolate
term <- rep(1,26)
term[c(1,8,15,16,17,18,19,23,26)] <- 2
iterm <- round(approx(term,n=52/IP)$y)
period <- rep(iterm, round(nrow(data)+1))[1:(nrow(data)-1)]
}
## this is the null model of no seasonality
if(seasonality == 'none'){
period <- rep(1,nrow(data)-1)
period[nrow(data)-1] <- 2
}
## make step ahead I, step back I, Z
Inew <- tail(Iadjusted,-1)+1
lIminus <- log(head(Iadjusted,-1)+1)
Zminus <- head(Z,-1)
## copy population
pop <- data$pop
## declare the limits to profile S on
minSmean <- max(0.01*pop,-(min(Z) - 1))
Smean <- seq(minSmean, 0.4*mean(pop), length=250)
loglik <- rep(NA, length(Smean))
if(link == 'identity'){
Inew <- log(Inew)
}
if(family %in% c("poisson", "quasipoisson")) {
Inew <- round(Inew)
}
## run through various regression with the permutations of fixed sbar or fixed alpha
## this is estimating both sbar and alpha
if(length(input.alpha) == 0 && length(input.sbar) == 0){
for(i in 1:length(Smean)){
lSminus <- log(Smean[i] + Zminus)
glmfit <- glm(Inew ~ -1 +as.factor(period) + (lIminus) + offset(lSminus),
family=eval(parse(text=family))(link=link))
loglik[i] <- glmfit$deviance
}
sbar <- Smean[which.min(loglik)]
lSminus <- log(sbar + Zminus)
glmfit <- glm(Inew ~ -1 +as.factor(period)+ (lIminus) + offset(lSminus),
family=eval(parse(text=family))(link=link))
beta <- exp(head(coef(glmfit),-1))
alpha <- tail(coef(glmfit),1)
}
if(length(input.alpha) == 1 && length(input.sbar) == 0){
for(i in 1:length(Smean)){
lSminus <- log(Smean[i] + Zminus)
glmfit <- glm(Inew ~ -1 +as.factor(period) + offset(alpha*lIminus) + offset(lSminus),
family=eval(parse(text=family))(link=link))
loglik[i] <- glmfit$deviance
}
sbar <- Smean[which.min(loglik)]
lSminus <- log(sbar + Zminus)
glmfit <- glm(Inew ~ -1 +as.factor(period)+ offset(alpha*lIminus) + offset(lSminus),
family=eval(parse(text=family))(link=link))
beta <- exp(coef(glmfit))
}
if(length(input.alpha) == 0 && length(input.sbar) == 1){
sbar <- sbar * mean(pop)
lSminus <- log(sbar + Zminus)
glmfit <- glm(Inew ~ -1 +as.factor(period) + (lIminus) + offset(lSminus),
family=eval(parse(text=family))(link=link))
beta <- exp(head(coef(glmfit),-1))
alpha <- tail(coef(glmfit),1)
}
if(length(input.alpha) == 1 && length(input.sbar) == 1){
sbar <- sbar * mean(pop)
lSminus <- log(sbar + Zminus)
glmfit <- glm(Inew ~ -1 +as.factor(period)+ offset(alpha*lIminus) + offset(lSminus),
family=eval(parse(text=family))(link=link))
beta <- exp(coef(glmfit))
}
if(seasonality == 'none'){
beta[2] <- beta[1]
beta <- mean(beta)
period <- rep(1,nrow(data)-1)
}
## generate confidence intervals, calculate AIC, and combine into a DF
confinterval <- suppressMessages(confint(glmfit))
continterval <- confinterval[1:length(unique(period)),]
betalow <- exp(confinterval[,1])
betahigh <- exp(confinterval[,2])
glmAIC <- AIC(glmfit)
contact <- as.data.frame(cbind('time'=seq(1,length(beta[period]),1),
betalow[period],beta[period],betahigh[period]),row.names=F)
names(contact) <- c('time','betalow','beta','betahigh')
contact <- head(contact,52/IP)
## est initial conditions using determinsitic skeleton
S <- rep(0,length(data$cases))
I <- rep(0,length(data$cases))
nsample <- 30
inits.grid <- expand.grid(
S0 = seq(0.01*mean(pop), 0.1*mean(pop), length=nsample),
I0 = seq(0.01*1e-3*mean(pop), 1*1e-3*mean(pop), length=nsample)
)
## profile over IC combos and evaluate using least squares
if(inits.fit == TRUE){
inits.res <- rep(NA,nsample*nsample)
for(it in 1:nrow(inits.grid)){
S0 <- inits.grid[it,1]
I0 <- inits.grid[it,2]
S[1] <- S0
I[1] <- I0
for (t in 2:(nrow(data))){
lambda <- min(S[t-1],unname(beta[period[t-1]] * S[t-1] * (I[t-1])^alpha))
#if(lambda < 1 || is.nan(lambda) == T){lambda <- 0}
if(is.nan(lambda) == T){lambda <- 0}
I[t] <- lambda
if(epidemics == 'cont'){
I[t] <- I[t]
}
if(epidemics == 'break'){
t0s <- epitimes(data,threshold)$start
if(t %in% t0s){
I[t] <- adj.rho[t]*data$cases[t]
}
}
S[t] <- max(S[t-1] + data$births[t-1] - I[t],0)
}
inits.res[it] <- sum((I - data$cases*adj.rho)^2)
}
inits <- inits.grid[which.min(inits.res),]
inits.grid$S0 <- inits.grid$S0/mean(pop)
inits.grid$I0 <- inits.grid$I0/mean(pop)
inits.grid$log10LS <- log10(inits.res)
S_start <- inits[[1]]
I_start <- inits[[2]]
}else{
S_start <- sbar + Z[1]
I_start <- adj.rho[1]*datacopy$cases[1]
}
## generate ICS either by profile or by standard formulation
IC <- c(S_start,I_start)
if(any(IC < 0 )){
warning('One (or both) initial condition is zero, try fixing or increasing sbar')
}
## print estimates so far
## now we forward simulate the model
print(c('alpha'=unname(signif(alpha,2)),
'mean beta'=unname(signif(mean(beta),3)),
'mean rho' =unname(signif(mean(1/adj.rho),3)),
'mean sus' =unname(signif(sbar,3)),
'prop. init. sus.' =unname(signif(S_start/mean(pop),3)),
'prop. init. inf.' =unname(signif(I_start/mean(pop),3))
))
## create empty matrixes for simulations
nsim <- nsim
res <- matrix(0,length(data$cases),nsim)
Sres <- matrix(0,length(data$cases),nsim)
## simulate the model nsim times
for(ct in 1:nsim){
S <- rep(0,length(data$cases))
I <- rep(0,length(data$cases))
S[1] <- S_start
I[1] <- I_start
for (t in 2:(nrow(data))){
if(pred == 'step-ahead'){
lambda <- min(S[t-1],unname(beta[period[t-1]] * S[t-1] * (adj.rho[t-1]*data$cases[t-1])^alpha))
}
if(pred == 'forward'){
I <- I
lambda <- min(S[t-1],unname(beta[period[t-1]] * S[t-1] * (I[t-1])^alpha))
}
#if(lambda < 1 || is.nan(lambda) == T){lambda <- 0}
if(is.nan(lambda) == T){lambda <- 0}
if(method == 'deterministic'){
I[t] <- lambda * rnorm( n = 1, mean = 1, sd=mul.noise.sd)
if(I[t] < 0 && lambda >= 0 ){
warning('infected overflow -- reduce multiplicative noise sd')
}
}
if(method == 'negbin'){
I[t] <- rnbinom(n=1,mu=lambda,size=I[t-1]+1e-10)
}
if(method == 'pois'){
I[t] <- rpois(n=1,lambda=lambda)
}
if(epidemics == 'cont'){
I[t] <- I[t]
}
if(epidemics == 'break'){
t0s <- epitimes(data,threshold)$start
if(t %in% t0s){
I[t] <- adj.rho[t]*data$cases[t]
}
}
S[t] <- max(S[t-1] + data$births[t-1] - I[t] + rnorm(n=1,mean=0,sd=add.noise.sd),0)
if(S[t] < 0 && (S[t-1] + data$births[t-1] - I[t]) >0 ){
warning('susceptible overflow -- reduce additive noise sd')
}
}
res[,ct] <- I / adj.rho
Sres[,ct] <- S
}
res[is.nan(res)] <- 0
res[res < 1] <- 0
## calculate SD, mean
res <- as.data.frame(res)
Sres <- as.data.frame(Sres)
#res$mean <- apply(res, 1, function(row) mean(row[-1],na.rm=T))
res$mean <- rowMeans(res,na.rm=T)
res$sd <- apply(res, 1, function(row) sd(row[-1],na.rm=T))
res$time <- data$time
Sres$mean <- rowMeans(Sres,na.rm=T)
Sres$sd <- apply(Sres, 1, function(row) sd(row[-1],na.rm=T))
Sres$time <- data$time
res$cases <- data$cases
obs <- res$cases
pred <- res$mean
fit <- lm(pred ~ obs)
rsquared <- signif(summary(fit)$adj.r.squared, 2)
## store everything into a list
return(list('X'=X,'Y'=Y,'Yhat' =Yhat, 'contact'=contact,'period'=period,
'glmfit'=glmfit, 'AIC'=glmAIC,
'beta'=head(beta[period],52/IP),'rho'=adj.rho,'pop'=pop,
'Z'=Z,'sbar'=sbar,'alpha'=alpha,
'res'=res,'simS'=Sres,'loglik'=loglik,'Smean'=Smean,
'nsim'=nsim,'rsquared'=rsquared,
'inits.fit'=inits.fit,'time'=data$time,
'inits.grid'=inits.grid,'inits'=IC))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.