knitr::opts_chunk$set(echo = FALSE, warning=FALSE, message=FALSE, fig.height=6)
library(CoV19)
library(ggplot2)
library(gridExtra)

This post is just focused on the US. See Europe Trends for Europe. Basically trends in Europe continue downward mostly across the board. There are some small outbreaks in a few Eastern European countries but nothing major. This is an update to my regional patterns post 10 days ago: https://eeholmes.github.io/CoV19/updates/2020-05-17-Patterns.html

# estimate mu, sigma and scaling
f2<-function(x, theta) {
  m<-theta[1]; s<-theta[2]; a<-theta[3];
  a*exp(-0.5*((x-m)/s)^2)
}
f2<-function(x, theta) {
  m<-theta[1]; s<-theta[2]; a<-theta[3];
  x <- x-m
  a*exp(-0.5*(x/s)^2)
}
# fix m
f3<-function(x, theta, m) {
  s<-theta[1]; a<-theta[2];
  a*exp(-0.5*((x-m)/s)^2)
}
# fix s
f4<-function(x, theta, s) {
  m<-theta[1]; a<-theta[2];
  a*exp(-0.5*((x-m)/s)^2)
}
f1<-function(x, theta, a) {
  m<-theta[1]; s<-theta[2];
  a*exp(-0.5*((x-m)/s)^2)
}
fitf1<- function(subx, m=5, a=1, s=0.08, xshift=0){
  # m is actual m not log(day)
  f1<-function(x, theta, a) {
    m<-theta[1]; s<-theta[2];
    ifelse(s<0,-1,1)*a*exp(-0.5*((x-m)/s)^2)
  }
  f1<-function(x, theta, a) {
    m<-theta[1]; s<-theta[2];
    x <- x-m
    ifelse(s<0,-1,1)*a*exp(-0.5*(x/s)^2)
  }
  for(m in c(m, seq(.5,2,.1)*m)){
    fit1<-try(nls(y~f1(x,c(m,s),a), data=subx, start=list(m=m, s=s), control = list(maxiter = 500), algorithm="port", lower=c(0,0.03)), silent=TRUE)
    if(!inherits(fit1, "try-error")) break
  }
  return(fit1)
}

signx <- function(x) return(ifelse(x<0,-1,1))

getcis <- function(fit.best, subx, m=20, s=0.2, sig=0.05, level=0.05, xshift=0){
  m <- coef(fit.best)[1]
  s <- coef(fit.best)[2]
  sig <- max(summary(fit.best)$sigma, sig)
  err.best <- residuals(fit.best)
  n <- length(err.best)
  val=c()
  aval <- seq(.5, 10*coef(fit.best)[3],0.1)
  for(a in aval){
    fit <- fitf1(subx, a=a, m=m, s=s, xshift=xshift)
    err <- residuals(fit)
    rss=sum(err^2, na.rm=TRUE)
    Fval <- (rss-sum(err.best^2, na.rm=TRUE))/sig^2
    tauval <- signx(a-coef(fit.best)[3])*sqrt(Fval)
    if(!is.na(tauval) && tauval>3 && tauval<5) break #catch bad fits with < 5
    if(is.na(tauval)) next
    val <- rbind(val, c(rss=rss, tau=tauval,
                        coef(fit),a=a))
  }
  val <- val[val[,"tau.a"]> -5 & val[,"tau.a"]< 5,]
  mina <- max(which(val[,"tau.a"]<qt(level/2,n-3)),1)
  maxa <- min(which(val[,"tau.a"]>qt(1-level/2,n-3)),nrow(val))
  ret <- cbind(val[mina,c("m", "s", "a")], val[maxa,c("m", "s", "a")])
  if(ret[1,2]<m) ret[,2] <- NA
  if(ret[1,1]>m) ret[,2] <- NA
  return(cbind(val[mina,c("m", "s", "a")], val[maxa,c("m", "s", "a")]))
}
# Get the fit
getfit <- function(x, posmin=100, maxh=200, xshift=0, fun="f2", s.fix=0.08, sig=0.05){
  fil <- 7 #must be odd
  x$new.cases.smooth <- stats::filter(x$new.cases, rep(1/fil,fil))
  x$new.cases.smooth <- c(rep(NA,floor(fil/2)),zoo::rollapply(x$new.cases, fil, mean, na.rm=TRUE), rep(NA,floor(fil/2)))
  day1 <- min(which(x$positive>posmin))
  x$day <- 1:nrow(x) - day1 + 1

  subx <- subset(x, positive>posmin)
  subx$day <- subx$day+xshift
  subx$x <- log(subx$day)
  subx$y <- subx$new.cases.smooth*subx$day
  maxy <- max(subx$y, na.rm=TRUE)
  subx$y <- subx$y/maxy
  for(s in c(0.05,.1,1)){
    for(m in seq(10,200,25)+xshift){
      if(fun=="f2") fit<-try(nls(y~f2(x,c(m,s,a)), data=subx, start=list(m=log(m), s=s, a=max(subx$y, na.rm=TRUE)), control = list(maxiter = 500)), silent=TRUE)
      if(fun=="f4") fit<-try(nls(y~f4(x,c(m,a), s.fix), data=subx, start=list(m=log(m), a=max(subx$y, na.rm=TRUE)), control = list(maxiter = 500)), silent=TRUE)
      if(!inherits(fit, "try-error")) break()
    }
    if(!inherits(fit, "try-error")) break()
  }
  if(inherits(fit, "try-error")) return(list(x=x, pred=NULL, fit=fit))
  x$fitted <-c(rep(NA, day1-1), predict(fit, newdata=subx)*maxy/subx$day)

  pred <- data.frame(day=((nrow(x)+1):(nrow(x)+maxh))-day1+1+xshift, date=x$date[1]+nrow(x):(nrow(x)+maxh-1))
  pred$x <- log(pred$day)
  pred$pred <- predict(fit, newdata=pred)*maxy/pred$day

  cm1=NA; cm2=NA
  confval1 <- try(confint(fit, level=0.95), silent=TRUE)
  if(inherits(confval1, "try-error") || summary(fit)$sigma<0.03 || any(is.na(confval1)) ){ 
    confval <- try(getcis(fit.best=fit, subx=subx, level=0.05, xshift=xshift, sig=sig))
    if(!inherits(confval1, "try-error") && !inherits(confval, "try-error") && !any(any(is.na(confval1[,1])))) confval[,1] <- confval1[,1]
    if(!inherits(confval1, "try-error") && !inherits(confval, "try-error") && !any(any(is.na(confval1[,2])))) confval[,2] <- confval1[,2]
  }else{ confval <- confval1 }

  if(!inherits(confval, "try-error")){
    if(fun=="f2"){
      cm1 <- sum(f2(log(pred$day), confval[,1])*maxy/pred$day)
      cm2 <- sum(f2(log(pred$day), confval[,2])*maxy/pred$day)
      pred$pred.low <- f2(log(pred$day), confval[,1])*maxy/pred$day
      pred$pred.high <- f2(log(pred$day), confval[,2])*maxy/pred$day
      x$fitted.low <-c(rep(NA, day1-1), f2(subx$x, confval[,1])*maxy/subx$day)
      x$fitted.high <-c(rep(NA, day1-1), f2(subx$x, confval[,2])*maxy/subx$day)
    }else{
      cm1 <- sum(f4(log(pred$day), confval[,1], s.fix)*maxy/pred$day)
      cm2 <- sum(f4(log(pred$day), confval[,2], s.fix)*maxy/pred$day)
      pred$pred.low <- f4(log(pred$day), confval[,1], s.fix)*maxy/pred$day
      pred$pred.high <- f4(log(pred$day), confval[,2], s.fix)*maxy/pred$day
      x$fitted.low <-c(rep(NA, day1-1), f4(subx$x, confval[,1], s.fix)*maxy/subx$day)
      x$fitted.high <-c(rep(NA, day1-1), f4(subx$x, confval[,2], s.fix)*maxy/subx$day)
    }

  }

  return(list(x=x, subx=subx, pred=pred, fit=fit, confval=confval, cm=sort(c(cm1, cm2))))
}
myfun <- function(data, reg, regname="", posmin=100, maxh=200, mindate="2020-02-15", maxdate="2020-09-01", xshift=0, bad.reg=NULL, fun="f2", s.fix=0.08, sig=0.05, fitit=TRUE){
  mindate <- as.Date(mindate)
  maxdate <- as.Date(maxdate)
  library(dplyr)
  b <- data %>% 
      subset(region %in% reg) %>%
      dplyr::group_by(date) %>%
      dplyr::summarize_if(is.numeric, function(x){ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE))})
    # This will not have the region column, so we add that back on
  if(missing(regname)) regname <- paste(reg, collapse="+")
  b$region <- regname

  #b<-subset(data, region==reg)
  #if(max(b$positive)<500) return()
  day1 <- min(which(b$positive>posmin))

  b$new.cases <- c(NA, diff(b$positive))
  b$new.cases[b$new.cases<0] <- NA
  if(identical(reg, "Hubei China")) b$new.cases[b$new.cases>10000]=NA #hack 
  if(identical(reg, "France")) b$new.cases[b$new.cases>10000]=NA #hack
  if(identical(reg, "Hungary")) b$new.cases[b$new.cases>200]=NA #hack
  cfr <- mean(b$death[(nrow(b)-5):nrow(b)]/b$positive[(nrow(b)-5-7):(nrow(b)-7)], na.rm=TRUE)

  fit <- getfit(b, posmin=posmin, maxh=maxh, xshift=xshift,
                fun=fun, s.fix=s.fix, sig=sig)
  if(!fitit | inherits(fit$fit, "try-error")){
    b <- fit$x
    bm <- 1.05*max(b$new.cases, na.rm=TRUE)
    popmill <- round(sum(popdata$population[match(unique(c(reg, regname)), popdata$name)], na.rm=TRUE)/1e6)
    p <- ggplot(b, aes(x = date, y = new.cases.smooth)) + geom_point() +
      ggtitle(paste0(regname, "\nTo date deaths ", round(max(b$death, na.rm=TRUE)/(popmill), digits=2), "/million",
                     "\nCFR = ", round(100*cfr,digits=2))) +
      scale_x_date(limits = c(mindate, maxdate), date_breaks="1 month", 
                   date_labels="%b")
    return(list(p=p, cfr=cfr))
  }

  # updated with fitted
  b <- fit$x
  pred <- fit$pred
  bm <- 1.05*max(b$new.cases.smooth, pred$pred, pred$pred.high, na.rm=TRUE)

  p <- ggplot(b, aes(x = date, y = new.cases.smooth)) + geom_point() + 
    geom_point(data=subset(b, positive>posmin), col="blue") +
    geom_line(aes(x=date, y=fitted), data=b) +
    geom_line(aes(x=date, y=pred), data=pred, color="red") +
    ggtitle(paste(regname,
              "\nCFR = ", round(100*cfr,digits=2))) +
    scale_x_date(limits = c(mindate, maxdate), date_breaks="1 month", 
                 date_labels="%b")

  npos <- max(b$positive, na.rm=TRUE)+sum(pred$pred, na.rm=TRUE)
  ndeaths.p <- max(b$death, na.rm=TRUE)+
    sum(b$new.cases[(nrow(b)-7):nrow(b)], na.rm=TRUE)*cfr
  ndeaths <- ndeaths.p +
    sum(pred$pred, na.rm=TRUE)*cfr

  if(!inherits(fit$confval, "try-error") && !reg%in%bad.reg){
    #mode of log normal is 
    mode.lims <- sort(round(exp(fit$confval[1,]-fit$confval[2,]^2)))
    df <- data.frame(x=b$date[1]+day1+mode.lims-xshift-1, y=c(bm,bm))
    #p <- p + geom_line(data=df, aes(x=x, y=y), size=5, color="grey")
    if(fun=="f2") cfs <- coef(fit$fit)
    if(fun=="f4") cfs <- c(coef(fit$fit)[1], s.fix, coef(fit$fit)[2])
    popmill <- round(sum(popdata$population[match(unique(c(reg, regname)), popdata$name)], na.rm=TRUE)/1e6)
    p <- p +
      ggtitle(paste0(regname, 
                    "\nEst Deaths = ", 
                    round(ndeaths/popmill, digits=2), "/million",
              "\nCFR = ", round(100*cfr,digits=2)))
    fit$pred$new.cases.smooth <- fit$pred$pred
    p <- p +
      geom_ribbon(data=fit$pred, aes(x=date, ymin = pred.low, ymax = pred.high), fill = "pink") +
      geom_line(data=fit$pred, aes(x=date, y=pred.low), linetype="dashed", color="red") +
      geom_line(data=fit$pred, aes(x=date, y=pred.high), linetype="dashed", color="red") + 
      geom_line(data=fit$pred, aes(x=date, y=pred), color="red")
   # p <- p + annotate("text", x=mindate+1, y=Inf, label=paste("\nDeaths to date\n", max(b$death, na.rm=TRUE)," ", round(max(b$death, na.rm=TRUE)/(popmill*1000),digits=2), "per thousand"), hjust=0, vjust=1)


  }else{
    cfs <- coef(fit$fit)

    p <- p +
      ggtitle(paste(regname, "\nPoint estimates: Positives =", round(npos),
                    "Deaths =", round(ndeaths), "CFR =",
                    round(100*cfr,digits=2), "\n",
                    "s = ", round(cfs[2],3), "sig = ", round(summary(fit$fit)$sigma,3)))
    p <- p + annotate("text", x=mindate+1, y=bm, label=paste("\nDeaths to date\n", max(b$death, na.rm=TRUE)), hjust=0)

  }

  return(list(p=p, fit=fit$fit, confint=fit$confval, data=b, cfr=cfr))
}

I have added a reference line to the state plots of the estimated daily Covid-19 deaths per day relative to the average US deaths per day. The average deaths per year in the US (all age groups) is roughly 10,000 per million (roughly 1% of population per year), obviously it higher in some states than others but for now I am not downloading the state by state info on that.

That translates to 10000 x pop size in millions / 365 deaths per day. I can translate new cases per day into future deaths per day using CFR. I put a reference line of 5% of normal deaths per day. Note Covid-19 deaths are heavily skewed to those over 70 and deaths rates for that age group is 2-4 times higher than the all-age groups rate. However, the denominator (millions of people age 70+) is even smaller, so actually if I tranlated this just to focus on those age 70+, it would look even worse. Meaning the Covid-19 death rate relative to normal death rate would be higher.

Note, I have heard friends wondering about increases in other causes of death due to side-effects of the lockdowns---avoidance of health care and hospitals, putting off vaccinations, loss of jobs and with it loss of health insurance, increase in depression, etc. https://www.reuters.com/investigates/special-report/health-coronavirus-usa-cost/ On the otherhand, some types of deaths have dropped along with the distancing measures. In particular, the flu season was stopped and this season's flu was unusually lethal to children and teens. https://www.nature.com/articles/d41586-020-01538-8 My goal here is just to explore the patterns in the Covid-19 reported deaths but I am certainly aware that this isn't the only story.

Summary

At its peak, Covid-19 was causing daily death rates that were 70-80% of what you would expect from all other causes in the Northeast region. Obviously this was not uniform over the region; NYC Metropolitan Area and other large urban centers were the epicenters. Now the new cases per day are much lower. Projected death rate from the current daily infections is 10-20% of the normal daily death rate, but new cases still dropping fast.

The other regions that with high death rates (relative to average daily rates) are the upper-eastern mid-west and the southern Atlantic states. Projected death rate from the current daily infections is ca 30% of the normal daily death rate and cases are not yet dropping though maybe they are at the peak.

stateplot <- function(reg, posmin=100, maxh=150, fitit=TRUE){
#regfull <- state.name[match(reg, state.abb)]
popmill <- round(sum(popdata$population[match(reg, popdata$name)], na.rm=TRUE)/1e6, digits=2)
#p <- myfun(states, reg, posmin=posmin, maxh=maxh, fitit=fitit)
reg2 <- paste(c(state.name, "District of Columbia")[match(reg, c(state.abb, "DC"))], "US")
p <- myfun(world, reg2, regname=paste(reg, collapse="+"), posmin=posmin, maxh=maxh)
if(inherits(p, "list")){
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2)
  onetoone <- ((10000*popmill/365)/(p$cfr))
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=.05*onetoone, label=paste("5%"), hjust=1, vjust=-1)
  p$p <- p$p+geom_hline(yintercept = .05*onetoone, col="pink"); 
  p$p <- p$p + scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"))
 p$p
}
}
worldplot <- function(reg, posmin=100, maxh=150, fitit=TRUE){
popmill <- round(sum(popdata$population[match(reg, popdata$name)], na.rm=TRUE)/1e6, digits=2)
p <- myfun(world, reg, posmin=posmin, maxh=maxh, fitit=fitit)
if(inherits(p, "list")){
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2)
  onetoone <- ((10000*popmill/365)/(p$cfr))
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=.05*onetoone, label=paste("5%"), hjust=1, vjust=-1)
  p$p <- p$p+geom_hline(yintercept = .05*onetoone, col="pink"); 
  p$p <- p$p+ scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"))
 p$p
}
}
newengland <- stateplot(c("ME","VT","NH","MA","RI","CT"))
midatlantic <- stateplot(c("NY","NJ","PA"))
midwest1 <- stateplot(c("OH", "IN", "IL", "WI", "MI"))
southatlantic <- stateplot(c("DC", "DE", "VA", "WV", "MD"))
grid.arrange(newengland, midatlantic, midwest1, southatlantic, nrow=2)

In the media, at least as served up to me on Google News, there is a lot of chatter about Georgia, Texas, Missouri, Arkansas and Florida and outbreaks due to social gatherings and a breakdown of public health precautions (distancing, limiting number of contacts, staying 6 feet apart, masking if unable to stay distant or if indoors, that sort of thing). We may certainly see that in 4-14 days, but at the moment those states not the ones showing big outbreaks.

There are four states that jump out at me right now outside of the northeastern hot-zone: South Dakota, Mississippi, Minnesota and Illinois. I haven't seen much discussion of these in the regular media (meaning if I don't search for a regional newspaper). The death rates are high enough that I think it means that there is community spread outside of the known high-risk regions: high density agricultural plants (meat, fruit packing, etc), prison, and nursing homes. In South Dakota's case, this might still be cases associated with processing plants---they are small so one big processing plant outbreak can lead to many new cases per capita.

grid.arrange(stateplot("SD"), stateplot("MS"), stateplot("MN"), stateplot("IL"))

There is still a lot of discussion and worry about the southern states, and new cases are still high (though nothing like the northeast) and cases are not really dropping.

grid.arrange(stateplot("FL"), stateplot("AL"), stateplot("GA"), stateplot("MS"))

In my home county, King County Washington, we had an early peak and actually the deaths per day were especially high since the early outbreaks were in nursing and skilled-care homes and the CFR was high (above 10%). The CFR in the plot is the current CFR, which has now dropped to something more in line with the rest of the US. The King Co mask requirement went into effect on May 18 and I am very curious to see if we see an effect of that in another week. Epidemiologists that I've been listening to say mandatory mask wearing will have to be implemented. It works and lockdowns are a emergency control measure only and have too many negative effects. Hopefully Seattlites and King Co residents embrace the masks and we can get more data on the effects of masks on new cases.

In Washington state as a whole, the daily deaths rates never got anywhere near the rates seen in the northeastern states. The Yakima and Tri-Cities outbreaks from 2 weeks ago seem to have passed and cases are going back down again. There continue to be outbreaks in the apple packing plants in Wenatchee and Yakima area (heavily covered in local papers).

reg <- "King Washington US"
king <- worldplot(reg)
grid.arrange(king, stateplot("WA"), nrow=1)

Regional trends

Note: scroll down to the bottom of this page to see state by state data: https://eeholmes.github.io/CoV19/Forecasts.html

Takehome is that across the US, the new case curves (7-day average) continue to show a downward trend at the regional level. Overall, it still looks like the first wave will last well into summer for most of the US (excluding part of the NE) but new cases per million is staying relatively low.

reg <- c("San Diego California US",
         "San Bernardino California US",
         "Riverside California US",
         "Orange California US",
         "Los Angeles California US",
         "Ventura California US",
         "Kern California US",
         "Santa Barbara California US",
         "San Luis Obispo California US")
cacounties <- unique(world$region[str_detect(world$region, "California")])
ncacounties <- cacounties[!cacounties %in% reg & 
                            !cacounties=="California US" & 
                            !cacounties=="Out of CA California US" &
                            !cacounties=="Unassigned California US"]
ncacounties <- as.character(ncacounties)
p <- myfun(world, c("Washington US", "Oregon US", ncacounties), regname="WA+OR+NorCal", posmin=100, maxh=150)
popmill <- sum(statepop$POPESTIMATE2019[match(c("Washington", "Oregon"), statepop$NAME)], na.rm=TRUE)+0.4*statepop$POPESTIMATE2019[match(c("California"), statepop$NAME)]
popmill <- round(popmill/1e6, digits=2)
if(inherits(p, "list")){
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2)
  onetoone <- (10000*popmill/365)/(p$cfr)
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=.05*onetoone, label=paste("5%"), hjust=1, vjust=-1)
  p$p <- p$p+geom_hline(yintercept = .05*onetoone, col="pink"); 
  p$p <- p$p+ scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"))
northwest <- p$p
}
reg <- c("San Diego California US",
         "San Bernardino California US",
         "Riverside California US",
         "Orange California US",
         "Los Angeles California US",
         "Ventura California US",
         "Kern California US",
         "Santa Barbara California US",
         "San Luis Obispo California US")
p <- myfun(world, reg, regname="Southern CA", posmin=10, maxh=150)
if(inherits(p, "list")){
popmill <- round(0.6*statepop$POPESTIMATE2019[match(c("California"), statepop$NAME)]/1e6, digits=2)
    p$p <- p$p + scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"))
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2)
  onetoone <- ((10000*popmill/365)/(p$cfr))
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=.05*onetoone, label=paste("5%"), hjust=1, vjust=-1)
  p$p <- p$p+geom_hline(yintercept = .05*onetoone, col="pink"); 
  socal <- p$p
}
midwest1 <- stateplot(c("OH", "IN", "IL", "WI", "MI"))
midwest2 <- stateplot(c("MN", "MO", "ND", "SD", "IA", "NE"))
southatlantic <- stateplot(c("DC", "DE", "VA", "WV", "MD"))
eastsouth <- stateplot(c("AL", "MS", "GA", "TN"))
grid.arrange(midwest1, midwest2, southatlantic, eastsouth)
mtnwest <- stateplot(c("CO", "UT", "NM", "AZ"))
southcentral <- stateplot(c("TX", "OK", "KS", "AR"))
grid.arrange(mtnwest, southcentral, northwest, socal)
mtnnorth <- stateplot(c("ID", "MT", "WY"))
grid.arrange(mtnnorth, stateplot("HI"), stateplot("NV"), stateplot("AK"))


eeholmes/CoV19 documentation built on Oct. 19, 2021, 10:59 a.m.