In bbolker/fitsir: SIR fitting tools

library(plyr)
library(dplyr)
library(tidyr)
library(ggplot2); theme_set(theme_bw())
library(devtools)
load_all("..")

fn <- "fitLHS.rda"
fn2 <- "fitLHS2.rda"
fn3 <- "fitLHS3.rda"
fn4 <- "fitLHS4.rda"
## L2 uses L1 as starting values to fit the curve
## L3 uses fitsir.optim based on nll sensitivity
## L4 is like L2 but it uses L3 as starting values
L1 <- load(fn)  ## contains p (starting values), [fs]{pars,.nll,.SSQ}
L2 <- load(fn2)
L3 <- load(fn3) ## npars, n.nll, n.SSQ
L4 <- load(fn4) ## npars2, n.nll2, n.SSQ2

I performed 500 simulations based on these parameters:

colnames(p) <- c("log.beta", "log.gamma", "log.N", "logit.i")
pairs(~log.beta+log.gamma+log.N+logit.i, data = as.data.frame(p),
      gap=0,cex=0.5)

Negative log-likelihood values:

dd <- rbind(data.frame(method="fitsir",NLL1=f.nll, NLL2 = f.nll2),
      data.frame(method="fitsir.optim",NLL1=s.nll, NLL2 = s.nll2),
      data.frame(method="fitsir.optim.nll", NLL1 = n.nll, NLL2 = n.nll2)) %>%
    gather(run,NLL,-method) %>%
    group_by(method,run) %>%
    mutate(r=rank(NLL))
brks <- c(150,200,225,250)
ggplot(dd,aes(r,NLL,colour=method,linetype=run))+geom_line()+
    labs(x="rank",y="negative log-likelihood")+
    scale_colour_brewer(palette="Set1")+
  geom_hline(data=data.frame(brks),aes(yintercept=brks),
             linetype=2,col="gray")

How many NA values?

with(dd,table(is.na(NLL),run,method))

par(mfrow = c(1,3))
ylim <- c(-30, 30)
matplot(apply(fpars, 2, sort), type = "l", ylab = "param value", main = "fitsir", ylim = ylim)
text(8,tail(fpars,1),colnames(fpars),col=1:4,adj=-1)
matplot(apply(spars, 2, sort), type = "l", ylab = "param value", main = "fitsir.optim", ylim = ylim)
text(8,tail(fpars,1),colnames(fpars),col=1:4,adj=-1)
matplot(apply(npars, 2, sort), type = "l", ylab = "param value", main = "fitsir.optim.nll", ylim = ylim)
text(8,tail(fpars,1),colnames(fpars),col=1:4,adj=-1)

Let's try the Raue et al graph:

(experimental)

which is which?

tmpfun1 <- function(resList,pstart=p,
                    xlims=c(-20,20),
                    ylims=c(-15,25),
                    cex=0.4,
                    plot.start=TRUE,
                    plot.end=TRUE,
                    scol=adjustcolor("gray",alpha=0.2)) {
  ##xlims <- range(c(p[,1],npars[,1]),na.rm=TRUE)
  ## ylims <- range(c(p[,2],npars[,2]),na.rm=TRUE)
  ## pref <- "f"; pstart <- get(paste0(pref,"pars"))
  pend <- resList$pars
  cc <- cut(resList$nll,brks)
  cols <- adjustcolor(c("black","red","blue"),alpha=0.8)
  if (plot.start) plot(pstart[,1],pstart[,2],
       cex=cex,pch=1,xlim=xlims,ylim=ylims,
       col=cols[cc]) else
  plot(pend[,1],pend[,2],pch=2,cex=0.4,col=cols[cc],
       xlim=xlims,ylim=ylims)
  if (plot.start && plot.end) {
     segments(pstart[,1],pstart[,2],pend[,1],pend[,2],
           col=scol)
     points(pend[,1],pend[,2],pch=2,cex=0.4,col=cols[cc])
  }
}
par(mfrow = c(1,1))
fList <- list(pars=spars,nll=s.nll)
tmpfun1(fList,xlim=c(-5,8),ylim=c(-10,10),cex=0.7)
tmpfun1(fList,xlim=c(-3,6),ylim=c(-5,8),cex=0.7,
        plot.end=FALSE)
tmpfun1(fList,xlims=c(-6,6),ylims=c(-8,8),cex=0.7,
        plot.start=FALSE)

#pairs(fList$pars,col=cols[cc],gap=0,cex=0.5)
#pairs(p,col=cols[cc],gap=0,cex=0.5)

#library(rgl)
#plot3d(fpars[,1], fpars[,3], fpars[,4], col = cols[cc], radius = 0.1, type = "s")

tmpfun <- function(n.sample, i1, i2){
    set.seed(101)
    na.vec <- which(is.na(fpars[,1]))
    sim <- c(1:500)
    s.n <- sample(sim[-na.vec], n.sample)
    p.sample <- p[s.n,]
    fpars.sample <- fpars[s.n,]
    plot(p.sample[,c(i1,i2)], xlim = c(-2, 8), ylim = c(-2, 6))
    points(fpars.sample[,c(i1,i2)], col = 2)
    arrows(p.sample[,i1], p.sample[,i2], x1=fpars.sample[,i1], y1 = fpars.sample[,i2], col = "Gray")
}

par(mfrow = c(1,1))
tmpfun(50, 1, 2)

Let's look at fitsir.optim only. I'm going to sort out the good fits:

spars.good <- spars[(s.nll < 169),]
s.nll.good <- s.nll[(s.nll < 169)]

This is quite interesting...

pairs(~log.beta+log.gamma+log.N+logit.i, data = as.data.frame(spars.good),
      gap=0,cex=0.5)

Let's try plotting all of them:

par(mfrow = c(1,1))
hist(s.nll.good)

bombay2 <- setNames(bombay, c("tvec", "count"))
tmpfun2 <- function(i, pars){
    I <- SIR.detsim(bombay2$tvec, trans.pars(pars[i,]))
}
library(reshape2)
tmpfun3 <- function(n, pars){
    good.sims <- list()
    good.sims <- lapply(c(1:n),function(i){data.frame(tvec = bombay2$tvec, count = tmpfun2(i, pars))})
    mL <- melt(good.sims, id.vars = "tvec")
ggplot(bombay2, aes(tvec, count)) + geom_point() +
    geom_line(data = mL, aes(tvec, value, group = L1), col = "Gray")
}

n <- nrow(spars.good)
tmpfun3(n, spars.good)

Really good fits:

min(s.nll, na.rm = TRUE)
spars.good2 <- spars[(s.nll < 167.7),]
s.nll.good2 <- s.nll[(s.nll < 167.7)]
hist(s.nll.good2)
pairs(~log.beta+log.gamma+log.N+logit.i, data = as.data.frame(spars.good2),
      gap=0,cex=0.5)

Reparameterization:

spars.good2 <- transform(spars.good2,
          log.R0 = log(exp(log.beta)/exp(log.gamma)),
          log.r = log(exp(log.beta)-exp(log.gamma)),
          log.S0 = log(exp(log.N) * (1 - plogis(logit.i))),
          log.I0 = log(exp(log.N) * plogis(logit.i)))

library(GGally)
ggpairs(spars.good2[,-(1:4)])

Looks really different from the fits using fitsir

fpars.good <- fpars[(f.nll < 167.7),]
f.nll.good <- f.nll[(f.nll < 167.7)]
fpars.good <- transform(fpars.good,
          log.R0 = log(exp(log.beta)/exp(log.gamma)),
          log.r = log(exp(log.beta)-exp(log.gamma)),
          log.S0 = log(exp(log.N) * (1 - plogis(logit.i))),
          log.I0 = log(exp(log.N) * plogis(logit.i)))

ggpairs(fpars.good[,-(1:4)])

Look at the hessians:

tmphess <- function(pars){
    eigen <- try(eigen(findHess(bombay2, pars)))
    if(!is(eigen,"try-error")){
        return(all(eigen$values>0))
    }else{
        return(NA)
    }
}

(hesst <- apply(spars.good2, 1, function(x) tmphess(x[1:4])))
cols <- adjustcolor(c("black","red","blue"),alpha=0.8)
cc <- sapply(hesst, FUN = function(x){
    if(is.na(x)){
        return(3)
    }else if(x){
        return(1)
    }else{
        return(2)
    }
})

pairs(spars.good2[,1:4], col = cols[cc],gap=0,cex=0.9)

I don't think this plot helps... Trying some nwe things...

s.nll.good2 <- s.nll.good2[!is.na(s.nll.good2)]
spars.good2 <- spars.good2[!is.na(s.nll.good2),]

min.ind <- which.min(s.nll.good2)
min.pars <- as.numeric(spars.good2[min.ind,][1:4])
names(min.pars) <- c("log.beta", "log.gamma", "log.N", "logit.i")

g <- SIR.logLik(incidence = FALSE)
tmpf <- function(log.beta,log.gamma,basepars=fpars,data=bombay2,
                 useSSQ=FALSE) {
    pars <- basepars
    pars[c("log.beta","log.gamma")] <- c(log.beta,log.gamma)
    if (useSSQ) return(findSSQ(data,pars)$SSQ)
    return(g(pars,data$count))
}

tmpf2 <- function(log.beta,log.gamma,basepars=fpars,data=bombay2,
                 useSSQ=FALSE) {
    pars <- basepars
    pars[c("log.beta","log.gamma")] <- c(log.beta,log.gamma)
    fit <- fitsir.optim(bombay2, start = pars)
    return(coef(fit))
}

library(emdbook)
fn <- "fitLHS_explore.rda"
if (file.exists(fn)) {
    load(fn)
} else {
    ## 10% slice 
    cc1a <- curve3d(tmpf(x,y,basepars=min.pars),
                  xlim=min.pars["log.beta"]*c(0.8,1.2),
                  ylim=min.pars["log.gamma"]*c(0.8,1.2),
                  n=c(241,241))
    ## 5% slice
    cc2a <- curve3d(tmpf(x,y,basepars=min.pars),
                  xlim=min.pars["log.beta"]*c(0.95,1.05),
                  ylim=min.pars["log.gamma"]*c(0.95,1.05),
                  n=c(121,121))

    ## 1% slice
    cc3a <- curve3d(tmpf(x,y,basepars=min.pars),
                  xlim=min.pars["log.beta"]*c(0.99,1.01),
                  ylim=min.pars["log.gamma"]*c(0.99,1.01),
                  n=c(121,121))


    save("cc1a", "cc2a", "cc3a", file=fn)
}

image(cc1a$z)
plot(cc1a$z[34,])
image(cc2a$z)

library(rgl)
with(cc2a, {
    image(x, y, z, xlab = "log.beta")
    #persp3d(x, y, z, col = "blue")
})

tmpf3 <- function(log.beta,log.gamma,basepars=fpars,data=bombay2,
                 useSSQ=FALSE) {
    pars <- basepars
    pars[c("log.beta","log.gamma")] <- c(log.beta,log.gamma)
    tpars <- trans.pars(pars)
    I <- SIR.detsim(bombay2$tvec, tpars)
    plot(bombay2)
    lines(I)
}

tmpf3(4.099, 3.95, basepars = min.pars)
abline(h = 3.95)
abline(v = 4.099)