knitr::opts_chunk$set(echo = FALSE, warning=FALSE, message=FALSE)
library(CoV19) library(ggplot2) library(gridExtra)
# estimate mu, sigma and scaling f2<-function(x, theta) { m<-theta[1]; s<-theta[2]; a<-theta[3]; a*exp(-0.5*((x-m)/s)^2) } f2<-function(x, theta) { m<-theta[1]; s<-theta[2]; a<-theta[3]; x <- x-m a*exp(-0.5*(x/s)^2) } # fix m f3<-function(x, theta, m) { s<-theta[1]; a<-theta[2]; a*exp(-0.5*((x-m)/s)^2) } # fix s f4<-function(x, theta, s) { m<-theta[1]; a<-theta[2]; a*exp(-0.5*((x-m)/s)^2) } f1<-function(x, theta, a) { m<-theta[1]; s<-theta[2]; a*exp(-0.5*((x-m)/s)^2) }
fitf1<- function(subx, m=5, a=1, s=0.08, xshift=0){ # m is actual m not log(day) f1<-function(x, theta, a) { m<-theta[1]; s<-theta[2]; ifelse(s<0,-1,1)*a*exp(-0.5*((x-m)/s)^2) } f1<-function(x, theta, a) { m<-theta[1]; s<-theta[2]; x <- x-m ifelse(s<0,-1,1)*a*exp(-0.5*(x/s)^2) } for(m in c(m, seq(.5,2,.1)*m)){ fit1<-try(nls(y~f1(x,c(m,s),a), data=subx, start=list(m=m, s=s), control = list(maxiter = 500), algorithm="port", lower=c(0,0.03)), silent=TRUE) if(!inherits(fit1, "try-error")) break } return(fit1) } signx <- function(x) return(ifelse(x<0,-1,1)) getcis <- function(fit.best, subx, m=20, s=0.2, sig=0.05, level=0.05, xshift=0){ m <- coef(fit.best)[1] s <- coef(fit.best)[2] sig <- max(summary(fit.best)$sigma, sig) err.best <- residuals(fit.best) n <- length(err.best) val=c() aval <- seq(.5, 10*coef(fit.best)[3],0.1) for(a in aval){ fit <- fitf1(subx, a=a, m=m, s=s, xshift=xshift) err <- residuals(fit) rss=sum(err^2, na.rm=TRUE) Fval <- (rss-sum(err.best^2, na.rm=TRUE))/sig^2 tauval <- signx(a-coef(fit.best)[3])*sqrt(Fval) if(!is.na(tauval) && tauval>3 && tauval<5) break #catch bad fits with < 5 if(is.na(tauval)) next val <- rbind(val, c(rss=rss, tau=tauval, coef(fit),a=a)) } val <- val[val[,"tau.a"]> -5 & val[,"tau.a"]< 5,] mina <- max(which(val[,"tau.a"]<qt(level/2,n-3)),1) maxa <- min(which(val[,"tau.a"]>qt(1-level/2,n-3)),nrow(val)) ret <- cbind(val[mina,c("m", "s", "a")], val[maxa,c("m", "s", "a")]) if(ret[1,2]<m) ret[,2] <- NA if(ret[1,1]>m) ret[,2] <- NA return(cbind(val[mina,c("m", "s", "a")], val[maxa,c("m", "s", "a")])) }
# Get the fit getfit <- function(x, posmin=100, maxh=200, xshift=0, fun="f2", s.fix=0.08, sig=0.05){ fil <- 7 #must be odd x$new.cases.smooth <- stats::filter(x$new.cases, rep(1/fil,fil)) x$new.cases.smooth <- c(rep(NA,floor(fil/2)),zoo::rollapply(x$new.cases, fil, mean, na.rm=TRUE), rep(NA,floor(fil/2))) day1 <- min(which(x$positive>posmin)) x$day <- 1:nrow(x) - day1 + 1 subx <- subset(x, positive>posmin) subx$day <- subx$day+xshift subx$x <- log(subx$day) subx$y <- subx$new.cases.smooth*subx$day maxy <- max(subx$y, na.rm=TRUE) subx$y <- subx$y/maxy for(s in c(0.05,.1,1)){ for(m in seq(10,200,25)+xshift){ if(fun=="f2") fit<-try(nls(y~f2(x,c(m,s,a)), data=subx, start=list(m=log(m), s=s, a=max(subx$y, na.rm=TRUE)), control = list(maxiter = 500)), silent=TRUE) if(fun=="f4") fit<-try(nls(y~f4(x,c(m,a), s.fix), data=subx, start=list(m=log(m), a=max(subx$y, na.rm=TRUE)), control = list(maxiter = 500)), silent=TRUE) if(!inherits(fit, "try-error")) break() } if(!inherits(fit, "try-error")) break() } if(inherits(fit, "try-error")) return(list(x=x, pred=NULL, fit=fit)) x$fitted <-c(rep(NA, day1-1), predict(fit, newdata=subx)*maxy/subx$day) pred <- data.frame(day=((nrow(x)+1):(nrow(x)+maxh))-day1+1+xshift, date=x$date[1]+nrow(x):(nrow(x)+maxh-1)) pred$x <- log(pred$day) pred$pred <- predict(fit, newdata=pred)*maxy/pred$day cm1=NA; cm2=NA confval1 <- try(confint(fit, level=0.95), silent=TRUE) if(inherits(confval1, "try-error") || summary(fit)$sigma<0.03 || any(is.na(confval1)) ){ confval <- try(getcis(fit.best=fit, subx=subx, level=0.05, xshift=xshift, sig=sig)) if(!inherits(confval1, "try-error") && !inherits(confval, "try-error") && !any(any(is.na(confval1[,1])))) confval[,1] <- confval1[,1] if(!inherits(confval1, "try-error") && !inherits(confval, "try-error") && !any(any(is.na(confval1[,2])))) confval[,2] <- confval1[,2] }else{ confval <- confval1 } if(!inherits(confval, "try-error")){ if(fun=="f2"){ cm1 <- sum(f2(log(pred$day), confval[,1])*maxy/pred$day) cm2 <- sum(f2(log(pred$day), confval[,2])*maxy/pred$day) pred$pred.low <- f2(log(pred$day), confval[,1])*maxy/pred$day pred$pred.high <- f2(log(pred$day), confval[,2])*maxy/pred$day x$fitted.low <-c(rep(NA, day1-1), f2(subx$x, confval[,1])*maxy/subx$day) x$fitted.high <-c(rep(NA, day1-1), f2(subx$x, confval[,2])*maxy/subx$day) }else{ cm1 <- sum(f4(log(pred$day), confval[,1], s.fix)*maxy/pred$day) cm2 <- sum(f4(log(pred$day), confval[,2], s.fix)*maxy/pred$day) pred$pred.low <- f4(log(pred$day), confval[,1], s.fix)*maxy/pred$day pred$pred.high <- f4(log(pred$day), confval[,2], s.fix)*maxy/pred$day x$fitted.low <-c(rep(NA, day1-1), f4(subx$x, confval[,1], s.fix)*maxy/subx$day) x$fitted.high <-c(rep(NA, day1-1), f4(subx$x, confval[,2], s.fix)*maxy/subx$day) } } return(list(x=x, subx=subx, pred=pred, fit=fit, confval=confval, cm=sort(c(cm1, cm2)))) }
myfun <- function(data, reg, regname="", posmin=100, maxh=200, mindate="2020-02-15", maxdate="2020-09-01", xshift=0, bad.reg=NULL, fun="f2", s.fix=0.08, sig=0.05, fitit=TRUE){ mindate <- as.Date(mindate) maxdate <- as.Date(maxdate) library(dplyr) b <- data %>% subset(region %in% reg) %>% dplyr::group_by(date) %>% dplyr::summarize_if(is.numeric, function(x){ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE))}) # This will not have the region column, so we add that back on if(missing(regname)) regname <- paste(reg, collapse="+") b$region <- regname #b<-subset(data, region==reg) if(max(b$positive)<500) return() day1 <- min(which(b$positive>posmin)) b$new.cases <- c(NA, diff(b$positive)) b$new.cases[b$new.cases<0] <- NA if(identical(reg, "Hubei China")) b$new.cases[b$new.cases>10000]=NA #hack if(identical(reg, "France")) b$new.cases[b$new.cases>10000]=NA #hack if(identical(reg, "Hungary")) b$new.cases[b$new.cases>200]=NA #hack cfr <- mean(b$death[(nrow(b)-5):nrow(b)]/b$positive[(nrow(b)-5-7):(nrow(b)-7)], na.rm=TRUE) fit <- getfit(b, posmin=posmin, maxh=maxh, xshift=xshift, fun=fun, s.fix=s.fix, sig=sig) if(!fitit | inherits(fit$fit, "try-error")){ b <- fit$x bm <- 1.05*max(b$new.cases, na.rm=TRUE) popmill <- round(sum(popdata$population[match(unique(c(reg, regname)), popdata$name)], na.rm=TRUE)/1e6) p <- ggplot(b, aes(x = date, y = new.cases.smooth)) + geom_point() + ggtitle(paste0(regname, "\nTo date deaths ", round(max(b$death, na.rm=TRUE)/(popmill), digits=2), "/million")) + scale_x_date(limits = c(mindate, maxdate), date_breaks="1 month", date_labels="%b") return(list(p=p)) } # updated with fitted b <- fit$x pred <- fit$pred bm <- 1.05*max(b$new.cases.smooth, pred$pred, pred$pred.high, na.rm=TRUE) p <- ggplot(b, aes(x = date, y = new.cases.smooth)) + geom_point() + geom_point(data=subset(b, positive>posmin), col="blue") + geom_line(aes(x=date, y=fitted), data=b) + geom_line(aes(x=date, y=pred), data=pred, color="red") + ggtitle(regname) + scale_x_date(limits = c(mindate, maxdate), date_breaks="1 month", date_labels="%b") npos <- max(b$positive, na.rm=TRUE)+sum(pred$pred, na.rm=TRUE) ndeaths.p <- max(b$death, na.rm=TRUE)+ sum(b$new.cases[(nrow(b)-7):nrow(b)], na.rm=TRUE)*cfr ndeaths <- ndeaths.p + sum(pred$pred, na.rm=TRUE)*cfr if(!inherits(fit$confval, "try-error") && !reg%in%bad.reg){ #mode of log normal is mode.lims <- sort(round(exp(fit$confval[1,]-fit$confval[2,]^2))) df <- data.frame(x=b$date[1]+day1+mode.lims-xshift-1, y=c(bm,bm)) #p <- p + geom_line(data=df, aes(x=x, y=y), size=5, color="grey") if(fun=="f2") cfs <- coef(fit$fit) if(fun=="f4") cfs <- c(coef(fit$fit)[1], s.fix, coef(fit$fit)[2]) popmill <- round(sum(popdata$population[match(unique(c(reg, regname)), popdata$name)], na.rm=TRUE)/1e6) p <- p + ggtitle(paste0(regname, "\nEst Deaths = ", round(ndeaths/popmill, digits=2), "/million")) fit$pred$new.cases.smooth <- fit$pred$pred p <- p + geom_ribbon(data=fit$pred, aes(x=date, ymin = pred.low, ymax = pred.high), fill = "pink") + geom_line(data=fit$pred, aes(x=date, y=pred.low), linetype="dashed", color="red") + geom_line(data=fit$pred, aes(x=date, y=pred.high), linetype="dashed", color="red") + geom_line(data=fit$pred, aes(x=date, y=pred), color="red") # p <- p + annotate("text", x=mindate+1, y=Inf, label=paste("\nDeaths to date\n", max(b$death, na.rm=TRUE)," ", round(max(b$death, na.rm=TRUE)/(popmill*1000),digits=2), "per thousand"), hjust=0, vjust=1) }else{ cfs <- coef(fit$fit) p <- p + ggtitle(paste(regname, "\nPoint estimates: Positives =", round(npos), "Deaths =", round(ndeaths), "CFR =", round(100*cfr,digits=2), "\n", "s = ", round(cfs[2],3), "sig = ", round(summary(fit$fit)$sigma,3))) p <- p + annotate("text", x=mindate+1, y=bm, label=paste("\nDeaths to date\n", max(b$death, na.rm=TRUE)), hjust=0) } return(list(p=p, fit=fit$fit, confint=fit$confval, data=b)) }
stateplot <- function(reg, posmin=100, maxh=150, fitit=TRUE, data=states, sec.axis.name="new cases/million"){ #regfull <- state.name[match(reg, state.abb)] popmill <- round(sum(popdata$population[match(reg, popdata$name)], na.rm=TRUE)/1e6, digits=2) p <- myfun(data, reg, posmin=posmin, maxh=maxh, fitit=fitit) if(inherits(p, "list")){ p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2) p$p <- p$p + scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name=sec.axis.name)) return(p$p) } }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.