knitr::opts_chunk$set(echo = FALSE, warning=FALSE, message=FALSE)
library(CoV19)
library(ggplot2)
library(gridExtra)
# estimate mu, sigma and scaling
f2<-function(x, theta) {
  m<-theta[1]; s<-theta[2]; a<-theta[3];
  a*exp(-0.5*((x-m)/s)^2)
}
f2<-function(x, theta) {
  m<-theta[1]; s<-theta[2]; a<-theta[3];
  x <- x-m
  a*exp(-0.5*(x/s)^2)
}
# fix m
f3<-function(x, theta, m) {
  s<-theta[1]; a<-theta[2];
  a*exp(-0.5*((x-m)/s)^2)
}
# fix s
f4<-function(x, theta, s) {
  m<-theta[1]; a<-theta[2];
  a*exp(-0.5*((x-m)/s)^2)
}
f1<-function(x, theta, a) {
  m<-theta[1]; s<-theta[2];
  a*exp(-0.5*((x-m)/s)^2)
}
fitf1<- function(subx, m=5, a=1, s=0.08, xshift=0){
  # m is actual m not log(day)
  f1<-function(x, theta, a) {
    m<-theta[1]; s<-theta[2];
    ifelse(s<0,-1,1)*a*exp(-0.5*((x-m)/s)^2)
  }
  f1<-function(x, theta, a) {
    m<-theta[1]; s<-theta[2];
    x <- x-m
    ifelse(s<0,-1,1)*a*exp(-0.5*(x/s)^2)
  }
  for(m in c(m, seq(.5,2,.1)*m)){
    fit1<-try(nls(y~f1(x,c(m,s),a), data=subx, start=list(m=m, s=s), control = list(maxiter = 500), algorithm="port", lower=c(0,0.03)), silent=TRUE)
    if(!inherits(fit1, "try-error")) break
  }
  return(fit1)
}

signx <- function(x) return(ifelse(x<0,-1,1))

getcis <- function(fit.best, subx, m=20, s=0.2, sig=0.05, level=0.05, xshift=0){
  m <- coef(fit.best)[1]
  s <- coef(fit.best)[2]
  sig <- max(summary(fit.best)$sigma, sig)
  err.best <- residuals(fit.best)
  n <- length(err.best)
  val=c()
  aval <- seq(.5, 10*coef(fit.best)[3],0.1)
  for(a in aval){
    fit <- fitf1(subx, a=a, m=m, s=s, xshift=xshift)
    err <- residuals(fit)
    rss=sum(err^2, na.rm=TRUE)
    Fval <- (rss-sum(err.best^2, na.rm=TRUE))/sig^2
    tauval <- signx(a-coef(fit.best)[3])*sqrt(Fval)
    if(!is.na(tauval) && tauval>3 && tauval<5) break #catch bad fits with < 5
    if(is.na(tauval)) next
    val <- rbind(val, c(rss=rss, tau=tauval,
                        coef(fit),a=a))
  }
  val <- val[val[,"tau.a"]> -5 & val[,"tau.a"]< 5,]
  mina <- max(which(val[,"tau.a"]<qt(level/2,n-3)),1)
  maxa <- min(which(val[,"tau.a"]>qt(1-level/2,n-3)),nrow(val))
  ret <- cbind(val[mina,c("m", "s", "a")], val[maxa,c("m", "s", "a")])
  if(ret[1,2]<m) ret[,2] <- NA
  if(ret[1,1]>m) ret[,2] <- NA
  return(cbind(val[mina,c("m", "s", "a")], val[maxa,c("m", "s", "a")]))
}
# Get the fit
getfit <- function(x, posmin=100, maxh=200, xshift=0, fun="f2", s.fix=0.08, sig=0.05){
  fil <- 7 #must be odd
  x$new.cases.smooth <- stats::filter(x$new.cases, rep(1/fil,fil))
  x$new.cases.smooth <- c(rep(NA,floor(fil/2)),zoo::rollapply(x$new.cases, fil, mean, na.rm=TRUE), rep(NA,floor(fil/2)))
  day1 <- min(which(x$positive>posmin))
  x$day <- 1:nrow(x) - day1 + 1

  subx <- subset(x, positive>posmin)
  subx$day <- subx$day+xshift
  subx$x <- log(subx$day)
  subx$y <- subx$new.cases.smooth*subx$day
  maxy <- max(subx$y, na.rm=TRUE)
  subx$y <- subx$y/maxy
  for(s in c(0.05,.1,1)){
    for(m in seq(10,200,25)+xshift){
      if(fun=="f2") fit<-try(nls(y~f2(x,c(m,s,a)), data=subx, start=list(m=log(m), s=s, a=max(subx$y, na.rm=TRUE)), control = list(maxiter = 500)), silent=TRUE)
      if(fun=="f4") fit<-try(nls(y~f4(x,c(m,a), s.fix), data=subx, start=list(m=log(m), a=max(subx$y, na.rm=TRUE)), control = list(maxiter = 500)), silent=TRUE)
      if(!inherits(fit, "try-error")) break()
    }
    if(!inherits(fit, "try-error")) break()
  }
  if(inherits(fit, "try-error")) return(list(x=x, pred=NULL, fit=fit))
  x$fitted <-c(rep(NA, day1-1), predict(fit, newdata=subx)*maxy/subx$day)

  pred <- data.frame(day=((nrow(x)+1):(nrow(x)+maxh))-day1+1+xshift, date=x$date[1]+nrow(x):(nrow(x)+maxh-1))
  pred$x <- log(pred$day)
  pred$pred <- predict(fit, newdata=pred)*maxy/pred$day

  cm1=NA; cm2=NA
  confval1 <- try(confint(fit, level=0.95), silent=TRUE)
  if(inherits(confval1, "try-error") || summary(fit)$sigma<0.03 || any(is.na(confval1)) ){ 
    confval <- try(getcis(fit.best=fit, subx=subx, level=0.05, xshift=xshift, sig=sig))
    if(!inherits(confval1, "try-error") && !inherits(confval, "try-error") && !any(any(is.na(confval1[,1])))) confval[,1] <- confval1[,1]
    if(!inherits(confval1, "try-error") && !inherits(confval, "try-error") && !any(any(is.na(confval1[,2])))) confval[,2] <- confval1[,2]
  }else{ confval <- confval1 }

  if(!inherits(confval, "try-error")){
    if(fun=="f2"){
      cm1 <- sum(f2(log(pred$day), confval[,1])*maxy/pred$day)
      cm2 <- sum(f2(log(pred$day), confval[,2])*maxy/pred$day)
      pred$pred.low <- f2(log(pred$day), confval[,1])*maxy/pred$day
      pred$pred.high <- f2(log(pred$day), confval[,2])*maxy/pred$day
      x$fitted.low <-c(rep(NA, day1-1), f2(subx$x, confval[,1])*maxy/subx$day)
      x$fitted.high <-c(rep(NA, day1-1), f2(subx$x, confval[,2])*maxy/subx$day)
    }else{
      cm1 <- sum(f4(log(pred$day), confval[,1], s.fix)*maxy/pred$day)
      cm2 <- sum(f4(log(pred$day), confval[,2], s.fix)*maxy/pred$day)
      pred$pred.low <- f4(log(pred$day), confval[,1], s.fix)*maxy/pred$day
      pred$pred.high <- f4(log(pred$day), confval[,2], s.fix)*maxy/pred$day
      x$fitted.low <-c(rep(NA, day1-1), f4(subx$x, confval[,1], s.fix)*maxy/subx$day)
      x$fitted.high <-c(rep(NA, day1-1), f4(subx$x, confval[,2], s.fix)*maxy/subx$day)
    }

  }

  return(list(x=x, subx=subx, pred=pred, fit=fit, confval=confval, cm=sort(c(cm1, cm2))))
}
myfun <- function(data, reg, regname="", posmin=100, maxh=200, mindate="2020-02-15", maxdate="2020-09-01", xshift=0, bad.reg=NULL, fun="f2", s.fix=0.08, sig=0.05, fitit=TRUE){
  mindate <- as.Date(mindate)
  maxdate <- as.Date(maxdate)
  library(dplyr)
  b <- data %>% 
      subset(region %in% reg) %>%
      dplyr::group_by(date) %>%
      dplyr::summarize_if(is.numeric, function(x){ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE))})
    # This will not have the region column, so we add that back on
  if(missing(regname)) regname <- paste(reg, collapse="+")
  b$region <- regname

  #b<-subset(data, region==reg)
  if(max(b$positive)<500) return()
  day1 <- min(which(b$positive>posmin))

  b$new.cases <- c(NA, diff(b$positive))
  b$new.cases[b$new.cases<0] <- NA
  if(identical(reg, "Hubei China")) b$new.cases[b$new.cases>10000]=NA #hack 
  if(identical(reg, "France")) b$new.cases[b$new.cases>10000]=NA #hack
  if(identical(reg, "Hungary")) b$new.cases[b$new.cases>200]=NA #hack
  cfr <- mean(b$death[(nrow(b)-5):nrow(b)]/b$positive[(nrow(b)-5-7):(nrow(b)-7)], na.rm=TRUE)

  fit <- getfit(b, posmin=posmin, maxh=maxh, xshift=xshift,
                fun=fun, s.fix=s.fix, sig=sig)
  if(!fitit | inherits(fit$fit, "try-error")){
    b <- fit$x
    bm <- 1.05*max(b$new.cases, na.rm=TRUE)
    popmill <- round(sum(popdata$population[match(unique(c(reg, regname)), popdata$name)], na.rm=TRUE)/1e6)
    p <- ggplot(b, aes(x = date, y = new.cases.smooth)) + geom_point() +
      ggtitle(paste0(regname, "\nTo date deaths ", round(max(b$death, na.rm=TRUE)/(popmill), digits=2), "/million")) +
      scale_x_date(limits = c(mindate, maxdate), date_breaks="1 month", 
                   date_labels="%b")
    return(list(p=p))
  }

  # updated with fitted
  b <- fit$x
  pred <- fit$pred
  bm <- 1.05*max(b$new.cases.smooth, pred$pred, pred$pred.high, na.rm=TRUE)

  p <- ggplot(b, aes(x = date, y = new.cases.smooth)) + geom_point() + 
    geom_point(data=subset(b, positive>posmin), col="blue") +
    geom_line(aes(x=date, y=fitted), data=b) +
    geom_line(aes(x=date, y=pred), data=pred, color="red") +
    ggtitle(regname) +
    scale_x_date(limits = c(mindate, maxdate), date_breaks="1 month", 
                 date_labels="%b")

  npos <- max(b$positive, na.rm=TRUE)+sum(pred$pred, na.rm=TRUE)
  ndeaths.p <- max(b$death, na.rm=TRUE)+
    sum(b$new.cases[(nrow(b)-7):nrow(b)], na.rm=TRUE)*cfr
  ndeaths <- ndeaths.p +
    sum(pred$pred, na.rm=TRUE)*cfr

  if(!inherits(fit$confval, "try-error") && !reg%in%bad.reg){
    #mode of log normal is 
    mode.lims <- sort(round(exp(fit$confval[1,]-fit$confval[2,]^2)))
    df <- data.frame(x=b$date[1]+day1+mode.lims-xshift-1, y=c(bm,bm))
    #p <- p + geom_line(data=df, aes(x=x, y=y), size=5, color="grey")
    if(fun=="f2") cfs <- coef(fit$fit)
    if(fun=="f4") cfs <- c(coef(fit$fit)[1], s.fix, coef(fit$fit)[2])
    popmill <- round(sum(popdata$population[match(unique(c(reg, regname)), popdata$name)], na.rm=TRUE)/1e6)
    p <- p +
      ggtitle(paste0(regname, 
                    "\nEst Deaths = ", 
                    round(ndeaths/popmill, digits=2), "/million"))
    fit$pred$new.cases.smooth <- fit$pred$pred
    p <- p +
      geom_ribbon(data=fit$pred, aes(x=date, ymin = pred.low, ymax = pred.high), fill = "pink") +
      geom_line(data=fit$pred, aes(x=date, y=pred.low), linetype="dashed", color="red") +
      geom_line(data=fit$pred, aes(x=date, y=pred.high), linetype="dashed", color="red") + 
      geom_line(data=fit$pred, aes(x=date, y=pred), color="red")
   # p <- p + annotate("text", x=mindate+1, y=Inf, label=paste("\nDeaths to date\n", max(b$death, na.rm=TRUE)," ", round(max(b$death, na.rm=TRUE)/(popmill*1000),digits=2), "per thousand"), hjust=0, vjust=1)


  }else{
    cfs <- coef(fit$fit)

    p <- p +
      ggtitle(paste(regname, "\nPoint estimates: Positives =", round(npos),
                    "Deaths =", round(ndeaths), "CFR =",
                    round(100*cfr,digits=2), "\n",
                    "s = ", round(cfs[2],3), "sig = ", round(summary(fit$fit)$sigma,3)))
    p <- p + annotate("text", x=mindate+1, y=bm, label=paste("\nDeaths to date\n", max(b$death, na.rm=TRUE)), hjust=0)

  }

  return(list(p=p, fit=fit$fit, confint=fit$confval, data=b))
}
stateplot <- function(reg, posmin=100, maxh=150, fitit=TRUE, data=states, sec.axis.name="new cases/million"){
#regfull <- state.name[match(reg, state.abb)]
popmill <- round(sum(popdata$population[match(reg, popdata$name)], na.rm=TRUE)/1e6, digits=2)
p <- myfun(data, reg, posmin=posmin, maxh=maxh, fitit=fitit)
if(inherits(p, "list")){
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2)
  p$p <- p$p + scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name=sec.axis.name))
 return(p$p)
}
}


eeholmes/CoV19 documentation built on Oct. 19, 2021, 10:59 a.m.