knitr::opts_chunk$set(
  collapse = TRUE,
  echo=FALSE,
  comment = "#>"
)
library(vdhcovid)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(cowplot)

Estimates of effective reproduction rate $R_t$. This is vdhcovid vr packageVersion('vdhcovid'). Testing data is current through r max(vadailytests$date). Case count data is current through r max(vadailycases$date).

Intermediate results

## Exclude the last couple of days of data, as the data (especially the number of tests
## gets a lot of revision in the first couple of days)
datemax <- pmin(max(vadailycases$date), max(vadailytests$date)) - 4

## First, adjust cases counts for the number of tests done.  We only know the test 
## counts at the health district level, so we'll just use that adjustment for all
## localities in the district.
countycases <- 
  select(vadailycases, date, fips, locality, HealthDistrict, cumcases=cases) %>%
  filter(date <= datemax)
countycases <- group_by(countycases, fips) %>% mutate(cases = c(cumcases[1], diff(cumcases)))
districttests <- 
  mutate(vadailytests, fpos = npos / ntest) %>%
  select(date, HealthDistrict, ntest, fpos, npos)
countycases <- 
  left_join(countycases, districttests, by=c('date','HealthDistrict')) %>%
  filter(!is.na(ntest))
countycases$t <- as.numeric(countycases$date - as.Date('2020-01-01'))
adjustment <- 1000/countycases$ntest
countycases$adjusted_cases <- adjustment * countycases$cases
countycases$weight <- 1/adjustment
countycases$ladjcase <- log(pmax(countycases$adjusted_cases, 0.1))
districtcases <- group_by(countycases, HealthDistrict, date) %>%
  summarise(cases=sum(cases), ntest=ntest[1], npos=npos[1], fpos=fpos[1])
districtcases$t <- as.numeric(districtcases$date - as.Date('2020-01-01'))
adjustment <- 1000 / districtcases$ntest
districtcases$adjusted_cases <- adjustment * districtcases$cases
districtcases$weight <- 1/adjustment
districtcases$ladjcase <- log(pmax(districtcases$adjusted_cases, 0.1))

Smoothed adjusted cases

Plots of daily new cases, adjusted for the number of tests performed. Each line is a locality, and localities are grouped by health district. Cases are recorded at the locality level, while number of tests is recorded at the district level, so the units here are cases in the locality per thousand cases in the district. For districts that comprise a single locality, like Arlington, this is equivalent to the test positivity rate in units of permille. For multi-locality districts, the plotted figure will be less than the actual test positivity rate, since the number of cases is being divided by the number of tests done in the whole district, not just the number of tests done in the locality.

## Fit loess smoothing to log of adjusted cases.  We will use this to estimate 
## growth rates.  The span is intended to smooth over roughly a month worth of data.
## However, we want the entire month to have meaningful effect on smoothing.  The 
## loess algorithm rather dramatically deweights points near the edge of the window,
## so we actually set the window to 60 days.  This results in 80% of the total weight
## being on points within 1 month of the central date.
smoothingspan <- 2 * 30 / as.numeric(max(districtcases$date) - min(districtcases$date))
smooth_county <- function(df, group) {
  fit <- loess(ladjcase~t, df, df$weight, span=smoothingspan)
  df$smoothlogcase <- predict(fit)
  df$smoothcase <- exp(df$smoothlogcase)

##  fit <- loess(adjusted_cases~t, df, df$weight, span=smoothingspan)
##  df$smoothcase <- predict(fit)
##  df$smoothlogcase <- log(df$smoothcase)
  df
}
countycases <- 
  group_by(countycases, fips) %>% 
  group_modify(smooth_county)

districtcases <-
  group_by(districtcases, HealthDistrict) %>%
  group_modify(smooth_county)
cat('plots suppressed\n')
##ggplot(countycases, aes(x=date, y=smoothcase, color=locality)) + geom_line(show.legend = FALSE) + facet_wrap(~HealthDistrict, scales='free_y', ncol=4) + ylab('Smoothed adjusted cases') + theme_bw()

##ggplot(districtcases, aes(x=date, y=smoothcase)) + geom_line(show.legend = FALSE) + facet_wrap(~HealthDistrict, scales='free_y', ncol=4) + ylab('Smoothed adjusted cases') + theme_bw()

Raw growth rates

Growth rates (i.e., derivative of log cases), unadjusted for sample enrichment effects.

gr_window <- 3                  # Number of days +/- for calculating growth rate
county_growth <- function(df, group) {
  N <- nrow(df)
  i <- seq(1, N)
  ilo <- pmax(1, i-gr_window)
  ihi <- pmin(N, i+gr_window)

  ## Days near the edges of the dataset will have fewer than the required number 
  ## of data points.  Widen the window in the other direction.
  iloadjust <- pmax(0, gr_window - (ihi-i))
  ihiadjust <- pmax(0, gr_window - (i-ilo))

  ilo <- ilo - iloadjust
  ihi <- ihi + ihiadjust

  calcgr <- function(i) {
    i1 <- ilo[i]
    i2 <- ihi[i]
    dd <- df[seq(i1,i2),]
    modsm <- lm(smoothlogcase~t, dd)
    modunsm <- lm(ladjcase~t, dd)
    smodsm <- suppressWarnings(summary(modsm))
    smodunsm <- suppressWarnings(summary(modunsm))
    m <- smodsm$coefficients[2,1]
    sig <- 2*smodunsm$coefficients[2, 2]
    c(gr=m, grsig = sig)
  }

  gr <- sapply(i, calcgr)
  df$growthrate <- gr['gr',]
  df$growthratesig <- gr['grsig',]

  # grsm <- (df$smoothlogcase[ihi]-df$smoothlogcase[ilo]) / (df$t[ihi]-df$t[ilo])
  # grunsm <- (df$ladjcase[ihi]-df$ladjcase[ilo]) / (df$t[ihi]-df$t[ilo])
  # calcsig <- function(i) {
  #   i1 <- ilo[i]
  #   i2 <- ihi[i]
  #   sd(grunsm[seq(i1,i2)])
  # }
  # 
  # df$growthrate <- grsm
  # df$growthratesig <- sapply(i, calcsig)

  df
}
countycases <-
  group_by(countycases, fips) %>%
  group_modify(county_growth) %>%
  mutate(pctgr = (exp(growthrate)-1)*100)

districtcases <-
  group_by(districtcases, HealthDistrict) %>%
  group_modify(county_growth) %>%
  mutate(grlo = growthrate-growthratesig, grhi = growthrate+growthratesig)

# ggplot(countycases, aes(x=date, y=pctgr, color=locality)) + geom_line(show.legend = FALSE) + facet_wrap(~HealthDistrict, scales='free_y', ncol=4) + ylab('Growth Rate (pct)') + theme_bw()
# 
# ggplot(filter(districtcases, HealthDistrict=='Thomas Jefferson'), aes(x=date, y=growthrate)) +
#   geom_line(size=1.2) +
#   geom_ribbon(mapping=aes(ymin=grlo, ymax=grhi), alpha=0.4) +
#   theme_bw()
#   

Adjusted growth rates

These growth rates are adusted for enrichment. We assume that overall prevalence is low enough that we can use the approximation in our paper, for which the adjustment depends only on the observed positive test fraction. We use the positive test fraction for each health district to adjust all of the localities within that district.
In the early days the number of tests was small, and we would sometimes see 1/1 positive tests for a 100% positive test fraction. To deal with this, we cap the positive test fraction at 50%. At no time since the number of tests climbed to more than a hundred a day has it been higher than this anywhere. (Unlike the raw growth rates, these have not been converted to percentages.)

countycases$adjgrowth <- countycases$growthrate / (1 - pmin(0.5, countycases$fpos))
districtcases$adjgrowth <- districtcases$growthrate / (1 - pmin(0.5, districtcases$fpos))
adjgrsig <- districtcases$growthratesig / (1 - pmin(0.5, districtcases$fpos))
districtcases$agrlo <- districtcases$adjgrowth - adjgrsig
districtcases$agrhi <- districtcases$adjgrowth + adjgrsig

# ggplot(districtcases, aes(x=date, y=adjgrowth)) + geom_line(size=1.2) +
#   #geom_ribbon(mapping=aes(ymin=agrlo, ymax=agrhi), alpha=0.4) +
#   facet_wrap(~HealthDistrict, scales='free_y', ncol=4) + 
#   ylab('Growth Rate')+ theme_bw()
# 
# ggplot(countycases, aes(x=date, y=adjgrowth, color=locality)) + geom_line(show.legend = FALSE) + facet_wrap(~HealthDistrict, scales='free_y', ncol=4) + ylab('Growth Rate') + theme_bw()

Effective reproduction number

We calculate the effective reproduction number from the adjusted growth rates, using the equation $R_t = 1 + kD$, where $D$ is the serial interval. To do this, we have to estimate a value for $D$. Estimates of $D$ vary dramatically in the literature. Here we use $D=6$, which is the value favored by our model and is within the range of uncertainties reported in the literature. Occasionally regions show extremely high growth rates for short periods of time, typically early in the infection when case counts are low. We believe such measurements are artifacts; therefore, we cut off the display for $R_t$ values at 3.5.

District aggregate Rt plots

D <- 6
countycases$rt <- pmax(0, 1 + countycases$adjgrowth * D)
districtcases$rt <- pmax(0, 1 + districtcases$adjgrowth * D)
districtcases$rtlo <- pmax(0, 1 + districtcases$agrlo * D)
districtcases$rthi <- pmax(0, 1 + districtcases$agrhi * D)
alldistricts <- unique(districtcases$HealthDistrict)

### Make plots of locality Rt, grouped by district
### (can also be used for district plots)
plotrtrgn <- function(district, alldata, locality=TRUE)
{
  dat <- filter(alldata, HealthDistrict==district)
  if(locality) {
    pltbase <- ggplot(dat, aes(x=date, y=rt, color=locality))
  }
  else {
    pltbase <- ggplot(dat, aes(x=date, y=rt)) + geom_hline(yintercept=1, color='blue') #+
      #geom_ribbon(mapping=aes(ymin=rtlo, ymax=rthi), alpha=0.4)
  }

  pltbase + geom_line(size=1) + 
    coord_cartesian(ylim=c(0,3.5)) +
    ggtitle(district) +
    theme_bw() 
}

ggplot(districtcases, aes(x=date, y=rt)) + geom_line(size=1) + 
  geom_hline(yintercept=1, color='blue') +
  coord_cartesian(ylim=c(0,3.5)) + 
  facet_wrap(~HealthDistrict) + 
  theme_bw(10)

Blow-ups of the individual districts:

distplots <- lapply(alldistricts, plotrtrgn, alldata=districtcases, locality=FALSE)
for(plt in distplots) {
  print(plt)
}

Locality Rt plots

#alldistricts <- unique(countycases$HealthDistrict)
alldistricts <- 'Thomas Jefferson'
localplots <- lapply(alldistricts, plotrtrgn, alldata=countycases)
for(plt in localplots) {
  print(plt)
}
select(districtcases, HealthDistrict, date, rt, ntest, npos) %>%
  mutate(fpos = 100*signif(npos/ntest, 3)) %>%
  filter(date==max(date)) %>%
  arrange(rt)


rplzzz/vdhcovid documentation built on July 2, 2021, 7:43 a.m.