knitr::opts_chunk$set(echo = FALSE, warning=FALSE, message=FALSE)
library(CoV19) library(ggplot2) library(gridExtra)
abb.names1 <- c("newengland.abb", "midatlantic.abb", "encentralmidwest.abb", "wncentralmidwest.abb", "nsatlantic.abb", "ssatlantic.abb", "south.abb", "mtnnorth.abb", "mtnsw.abb", "westcoast.abb") abb.names2 <- c("northeast.abb", "atlantic.abb", "midwest.abb", "south.abb", "west.abb") newengland.abb <- c("ME","VT","NH","MA","RI","CT") midatlantic.abb <- c("NY","NJ","PA") encentralmidwest.abb <- c("OH", "IN", "IL", "WI", "MI") wncentralmidwest.abb <- c("MN", "MO", "ND", "SD", "IA", "NE", "KS") nsatlantic.abb <- c("DC", "DE", "VA", "WV", "MD") ssatlantic.abb <- c("GA", "FL", "SC", "NC") south.abb <- c("AL", "MS", "KY", "TN", "TX", "OK", "AR","LA") mtnnorth.abb <- c("ID", "MT", "WY") mtnsw.abb <- c("CO", "UT", "NV", "NM", "AZ") westcoast.abb <- c("WA", "OR", "CA") west.abb <- c(mtnnorth.abb, mtnsw.abb, westcoast.abb) midwest.abb <- c(encentralmidwest.abb, wncentralmidwest.abb) atlantic.abb <- c(nsatlantic.abb, ssatlantic.abb) northeast.abb <- c(newengland.abb, midatlantic.abb)
# estimate mu, sigma and scaling f2<-function(x, theta) { m<-theta[1]; s<-theta[2]; a<-theta[3]; a*exp(-0.5*((x-m)/s)^2) } f2<-function(x, theta) { m<-theta[1]; s<-theta[2]; a<-theta[3]; x <- x-m a*exp(-0.5*(x/s)^2) } # fix m f3<-function(x, theta, m) { s<-theta[1]; a<-theta[2]; a*exp(-0.5*((x-m)/s)^2) } # fix s f4<-function(x, theta, s) { m<-theta[1]; a<-theta[2]; a*exp(-0.5*((x-m)/s)^2) } f1<-function(x, theta, a) { m<-theta[1]; s<-theta[2]; a*exp(-0.5*((x-m)/s)^2) }
fitf1<- function(subx, m=5, a=1, s=0.08){ # m is actual m not log(day) f1<-function(x, theta, a) { m<-theta[1]; s<-theta[2]; ifelse(s<0,-1,1)*a*exp(-0.5*((x-m)/s)^2) } f1<-function(x, theta, a) { m<-theta[1]; s<-theta[2]; x <- x-m ifelse(s<0,-1,1)*a*exp(-0.5*(x/s)^2) } for(m in c(m, seq(.5,2,.1)*m)){ fit1<-try(nls(y~f1(x,c(m,s),a), data=subx, start=list(m=m, s=s), control = list(maxiter = 500), algorithm="port", lower=c(0,0.03)), silent=TRUE) if(!inherits(fit1, "try-error")) break } return(fit1) } signx <- function(x) return(ifelse(x<0,-1,1)) getcis <- function(fit.best, subx, m=20, s=0.2, sig=0.05, level=0.05){ if(coef(fit.best)[3] > 20) return(matrix(NA,3,2)) m <- coef(fit.best)[1] s <- coef(fit.best)[2] sig <- max(summary(fit.best)$sigma, sig) err.best <- residuals(fit.best) n <- length(err.best) val=c() aval <- seq(.5, 10*coef(fit.best)[3],0.1) for(a in aval){ fit <- fitf1(subx, a=a, m=m, s=s) err <- residuals(fit) rss=sum(err^2, na.rm=TRUE) Fval <- (rss-sum(err.best^2, na.rm=TRUE))/sig^2 tauval <- signx(a-coef(fit.best)[3])*sqrt(Fval) if(!is.na(tauval) && tauval>3 && tauval<5) break #catch bad fits with < 5 if(is.na(tauval)) next val <- rbind(val, c(rss=rss, tau=tauval, coef(fit),a=a)) } val <- val[val[,"tau.a"]> -5 & val[,"tau.a"]< 5,] mina <- max(which(val[,"tau.a"]<qt(level/2,n-3)),1) maxa <- min(which(val[,"tau.a"]>qt(1-level/2,n-3)),nrow(val)) ret <- cbind(val[mina,c("m", "s", "a")], val[maxa,c("m", "s", "a")]) if(ret[1,2]<m) ret[,2] <- NA if(ret[1,1]>m) ret[,2] <- NA return(cbind(val[mina,c("m", "s", "a")], val[maxa,c("m", "s", "a")])) }
# Get the fit getfit <- function(x, posmin=100, maxh=200, fun="f2", s.fix=0.08, sig=0.05, fitit=TRUE, fil=7, sub=1){ x$new.cases.smooth <- stats::filter(x$new.cases, rep(1/fil,fil)) x$new.cases.smooth <- c(rep(NA,floor(fil/2)),zoo::rollapply(x$new.cases, fil, mean, na.rm=TRUE), rep(NA,floor(fil/2))) day1 <- min(which(x$positive>posmin)) x$day <- 1:nrow(x) - day1 + 1 x$x <- log(x$day) subx <- subset(x, positive>posmin)[,c("date", "positive", "region", "new.cases", "new.cases.smooth", "day", "x")] subx <- na.omit(subx) subx$x <- log(subx$day) subx$y <- subx$new.cases.smooth*subx$day maxy <- max(subx$y, na.rm=TRUE) subx$y <- subx$y/maxy subrange <- seq(nrow(subx) %% sub, nrow(subx),sub) subrange <- subrange[subrange!=0] subx <- subx[subrange,] if(!fitit) return(list(x=x, subx=subx)) for(s in c(0.05,.1,1)){ for(m in seq(10,200,25)){ if(fun=="f2") fit<-try(nls(y~f2(x,c(m,s,a)), data=subx, start=list(m=log(m), s=s, a=max(subx$y, na.rm=TRUE)), control = list(maxiter = 500)), silent=TRUE) if(fun=="f4") fit<-try(nls(y~f4(x,c(m,a), s.fix), data=subx, start=list(m=log(m), a=max(subx$y, na.rm=TRUE)), control = list(maxiter = 500)), silent=TRUE) if(!inherits(fit, "try-error")) break() } if(!inherits(fit, "try-error")) break() } if(inherits(fit, "try-error")) return(list(x=x, pred=NULL, fit=fit)) tmp <- predict(fit, newdata=x)*maxy/x$day nNAs <- nrow(x)-length(tmp) # subx cuts off period where positive < posmin x$fitted <-c(rep(NA, nNAs), tmp) pred <- data.frame( day=max(x$day)+1:maxh, date=max(x$date)+1:maxh) pred$x <- log(pred$day) pred$pred <- predict(fit, newdata=pred)*maxy/pred$day cm1=NA; cm2=NA confval1 <- try(confint(fit, level=0.95), silent=TRUE) if(inherits(confval1, "try-error") || any(is.na(confval1)) ){ confval <- try(getcis(fit.best=fit, subx=subx, level=0.05, sig=sig)) if(!inherits(confval1, "try-error") && !inherits(confval, "try-error") && !any(any(is.na(confval1[,1])))) confval[,1] <- confval1[,1] if(!inherits(confval1, "try-error") && !inherits(confval, "try-error") && !any(any(is.na(confval1[,2])))) confval[,2] <- confval1[,2] }else{ confval <- confval1 } if(!inherits(confval, "try-error")){ if(fun=="f2"){ cm1 <- sum(f2(log(pred$day), confval[,1])*maxy/pred$day) cm2 <- sum(f2(log(pred$day), confval[,2])*maxy/pred$day) pred$pred.low <- f2(log(pred$day), confval[,1])*maxy/pred$day pred$pred.high <- f2(log(pred$day), confval[,2])*maxy/pred$day x$fitted.low <- f2(x$x, confval[,1])*maxy/x$day x$fitted.high <- f2(x$x, confval[,2])*maxy/x$day }else{ cm1 <- sum(f4(log(pred$day), confval[,1], s.fix)*maxy/pred$day) cm2 <- sum(f4(log(pred$day), confval[,2], s.fix)*maxy/pred$day) pred$pred.low <- f4(log(pred$day), confval[,1], s.fix)*maxy/pred$day pred$pred.high <- f4(log(pred$day), confval[,2], s.fix)*maxy/pred$day x$fitted.low <- f4(x$x, confval[,1], s.fix)*maxy/x$day x$fitted.high <- f4(x$x, confval[,2], s.fix)*maxy/x$day } } return(list(x=x, subx=subx, pred=pred, fit=fit, confval=confval, cm=sort(c(cm1, cm2)))) }
myfun <- function(data, reg, regname="", posmin=100, maxh=200, mindate="2020-02-15", maxdate="2020-12-01", bad.reg=NULL, fun="f2", s.fix=0.08, sig=0.05, fitit=FALSE, fil=7, sub=4){ mindate <- as.Date(mindate) maxdate <- as.Date(maxdate) library(dplyr) b <- data %>% subset(region %in% reg) %>% dplyr::group_by(date) %>% dplyr::summarize_if(is.numeric, function(x){ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE))}) # This will not have the region column, so we add that back on if(missing(regname)) regname <- paste(reg, collapse="+") b$region <- regname b <- b[!is.na(b$positive),] #b<-subset(data, region==reg) if(max(b$positive, na.rm=TRUE)<500) return() day1 <- min(which(b$positive>posmin)) b$new.cases <- c(NA, diff(b$positive)) b$new.deaths <- c(NA, diff(b$death)) b$new.cases[b$new.cases<0] <- NA #if(identical(reg, "Hubei China")) b$new.cases[b$new.cases>10000]=NA #hack #if(identical(reg, "France")) b$new.cases[b$new.cases>10000]=NA #hack #if(identical(reg, "Hungary")) b$new.cases[b$new.cases>200]=NA #hack cfr <- mean(b$death[(nrow(b)-5):nrow(b)]/b$positive[(nrow(b)-5-7):(nrow(b)-7)], na.rm=TRUE) fit <- getfit(b, posmin=posmin, maxh=maxh, fun=fun, s.fix=s.fix, sig=sig, fitit=fitit, fil=fil, sub=sub) if(!fitit || inherits(fit$fit, "try-error") || all(is.na(fit$confval))){ b <- fit$x bm <- 1.05*max(b$new.cases, na.rm=TRUE) popmill <- round(sum(popdata$population[match(unique(c(reg, regname)), popdata$name)], na.rm=TRUE)/1e6, digits = 2) cfr <- mean(b$death[(nrow(b)-5):nrow(b)]/b$positive[(nrow(b)-5-7):(nrow(b)-7)], na.rm=TRUE) b2 <- subset(b, months(b$date) == "April") cfr.apr <- sum(b2$new.deaths[7:nrow(b2)],na.rm=TRUE)/sum(b2$new.cases[1:(nrow(b2)-7)], na.rm=TRUE) b2 <- subset(b, months(b$date) == "September") cfr.sep <- sum(b2$new.deaths[7:nrow(b2)],na.rm=TRUE)/sum(b2$new.cases[1:(nrow(b2)-7)], na.rm=TRUE) p <- ggplot(b, aes(x = date, y = new.cases.smooth)) + geom_point() + ggtitle(paste0(regname, "\n", "CFR Apr = ", round(100*cfr.apr,digits=2), " CFR Sep = ", round(100*cfr.sep,digits=2))) + scale_x_date(limits = c(mindate, maxdate), date_breaks="1 month", date_labels="%b") #y=bm lab <- paste( "\n\nDeaths ", max(b$death, na.rm=TRUE), " ", round(max(b$death, na.rm=TRUE)/(popmill*1000), digits=2), "per thousand\n", "population =", popmill, "million") p <- p + annotate("text", x=mindate+1, y=Inf, label=lab, hjust=0) return(list(p=p, cfr=cfr)) } # updated with fitted b <- fit$x pred <- fit$pred bm <- 1.05*max(b$new.cases.smooth, pred$pred, pred$pred.high, na.rm=TRUE) p <- ggplot(b, aes(x = date, y = new.cases.smooth)) + geom_point(col="grey") + geom_point(data=fit$subx, col="blue") + geom_line(aes(x=date, y=fitted), data=b) + geom_line(aes(x=date, y=pred), data=pred, color="red") + ggtitle(regname) + scale_x_date(limits = c(mindate, maxdate), date_breaks="1 month", date_labels="%b") npos <- max(b$positive, na.rm=TRUE)+sum(pred$pred, na.rm=TRUE) ndeaths.p <- max(b$death, na.rm=TRUE)+ sum(b$new.cases[(nrow(b)-7):nrow(b)], na.rm=TRUE)*cfr ndeaths <- ndeaths.p + sum(pred$pred, na.rm=TRUE)*cfr if(!inherits(fit$confval, "try-error") && !reg%in%bad.reg && !any(is.na(fit$confval))){ #mode of log normal is mode.lims <- sort(round(exp(fit$confval[1,]-fit$confval[2,]^2))) df <- data.frame(x=b$date[1]+day1+mode.lims-1, y=c(bm,bm)) #p <- p + geom_line(data=df, aes(x=x, y=y), size=5, color="grey") if(fun=="f2") cfs <- coef(fit$fit) if(fun=="f4") cfs <- c(coef(fit$fit)[1], s.fix, coef(fit$fit)[2]) popmill <- round(sum(popdata$population[match(unique(c(reg, regname)), popdata$name)], na.rm=TRUE)/1e6) p <- p + ggtitle(paste0(regname, "\nEst at Sept 1: Positives = ", round(npos), " Deaths = ", round(ndeaths), " (", round(ndeaths/(popmill*1000), digits=2), "/thousand)\n", "Estimate range: Deaths low ", round(ndeaths.p+fit$cm[1]*cfr), " to Deaths high ", round(ndeaths.p+fit$cm[2]*cfr), "\n", "CFR Est = ", round(100*cfr,digits=2))) fit$pred$new.cases.smooth <- fit$pred$pred p <- p + geom_ribbon(data=fit$pred, aes(x=date, ymin = pred.low, ymax = pred.high), fill = "pink") + geom_line(data=fit$pred, aes(x=date, y=pred.low), linetype="dashed", color="red") + geom_line(data=fit$pred, aes(x=date, y=pred.high), linetype="dashed", color="red") + geom_line(data=fit$pred, aes(x=date, y=pred), color="red") p <- p + annotate("text", x=mindate+1, y=Inf, label=paste("\nDeaths to date\n", max(b$death, na.rm=TRUE)," ", round(max(b$death, na.rm=TRUE)/(popmill*1000),digits=2), "per thousand"), hjust=0, vjust=1) }else{ cfs <- coef(fit$fit) p <- p + ggtitle(paste(regname, "\nPoint estimates: Positives =", round(npos), "Deaths =", round(ndeaths), "CFR =", round(100*cfr,digits=2), "\n", "s = ", round(cfs[2],3), "sig = ", round(summary(fit$fit)$sigma,3))) p <- p + annotate("text", x=mindate+1, y=bm, label=paste("\nDeaths to date\n", max(b$death, na.rm=TRUE)), hjust=0) } return(list(p=p, fit=fit$fit, confint=fit$confval, data=b, x=fit$x, subx=fit$subx, cfr=cfr)) }
worldplot <- function(reg, posmin=100, maxh=150, fitit=FALSE, ymax=1000){ #popmill <- round(sum(worldpop$PopTotal[match(reg, worldpop$Location)], na.rm=TRUE)/1e6, digits=2) popmill <- round(sum(popdata$population[match(reg, popdata$name)], na.rm=TRUE)/1e6, digits=2) p <- myfun(world, reg, posmin=posmin, maxh=maxh, fitit=fitit) if(inherits(p, "list")){ p$p <- p$p+ scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"), limits=c(0,ymax*popmill)) p$p } }
stateplot <- function(reg, posmin=100, maxh=150, fitit=FALSE, data="states", ylims=c(0,1000)){ #regfull <- state.name[match(reg, state.abb)] if(data=="states"){ popmill <- round(sum(popdata$population[match(reg, popdata$name)], na.rm=TRUE)/1e6, digits=2) p <- myfun(states, reg, posmin=posmin, maxh=maxh, fitit=fitit) } if(data=="world"){ reg2 <- paste(c(state.name, "District of Columbia")[match(reg, c(state.abb, "DC"))], "US") p <- myfun(world, reg2, regname=paste(reg, collapse="+"), posmin=posmin, maxh=maxh) } if(inherits(p, "list")){ p$p <- p$p+geom_hline(yintercept = 750*popmill, col="red"); p$p <- p$p+geom_hline(yintercept = 500*popmill, col="blue"); p$p <- p$p + scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"), limits=popmill*ylims) # plot(p$p) p$p } }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.