knitr::opts_chunk$set( collapse = TRUE, echo=FALSE, comment = "#>" )
library(vdhcovid) library(dplyr) library(ggplot2) library(ggthemes) library(cowplot)
Estimates of effective reproduction rate $R_t$. This is vdhcovid vr packageVersion('vdhcovid')
.
Testing data is current through
r max(vadailytests$date)
. Case count data is current through r max(vadailycases$date)
.
## Exclude the last couple of days of data, as the data (especially the number of tests ## gets a lot of revision in the first couple of days) datemax <- pmin(max(vadailycases$date), max(vadailytests$date)) - 4 ## First, adjust cases counts for the number of tests done. We only know the test ## counts at the health district level, so we'll just use that adjustment for all ## localities in the district. countycases <- select(vadailycases, date, fips, locality, HealthDistrict, cumcases=cases) %>% filter(date <= datemax) countycases <- group_by(countycases, fips) %>% mutate(cases = c(cumcases[1], diff(cumcases))) districttests <- mutate(vadailytests, fpos = npos / ntest) %>% select(date, HealthDistrict, ntest, fpos, npos) countycases <- left_join(countycases, districttests, by=c('date','HealthDistrict')) %>% filter(!is.na(ntest)) countycases$t <- as.numeric(countycases$date - as.Date('2020-01-01')) adjustment <- 1000/countycases$ntest countycases$adjusted_cases <- adjustment * countycases$cases countycases$weight <- 1/adjustment countycases$ladjcase <- log(pmax(countycases$adjusted_cases, 0.1))
districtcases <- group_by(countycases, HealthDistrict, date) %>% summarise(cases=sum(cases), ntest=ntest[1], npos=npos[1], fpos=fpos[1]) districtcases$t <- as.numeric(districtcases$date - as.Date('2020-01-01')) adjustment <- 1000 / districtcases$ntest districtcases$adjusted_cases <- adjustment * districtcases$cases districtcases$weight <- 1/adjustment districtcases$ladjcase <- log(pmax(districtcases$adjusted_cases, 0.1))
Plots of daily new cases, adjusted for the number of tests performed. Each line is a locality, and localities are grouped by health district. Cases are recorded at the locality level, while number of tests is recorded at the district level, so the units here are cases in the locality per thousand cases in the district. For districts that comprise a single locality, like Arlington, this is equivalent to the test positivity rate in units of permille. For multi-locality districts, the plotted figure will be less than the actual test positivity rate, since the number of cases is being divided by the number of tests done in the whole district, not just the number of tests done in the locality.
## Fit loess smoothing to log of adjusted cases. We will use this to estimate ## growth rates. The span is intended to smooth over roughly a month worth of data. ## However, we want the entire month to have meaningful effect on smoothing. The ## loess algorithm rather dramatically deweights points near the edge of the window, ## so we actually set the window to 60 days. This results in 80% of the total weight ## being on points within 1 month of the central date. smoothingspan <- 2 * 30 / as.numeric(max(districtcases$date) - min(districtcases$date)) smooth_county <- function(df, group) { fit <- loess(ladjcase~t, df, df$weight, span=smoothingspan) df$smoothlogcase <- predict(fit) df$smoothcase <- exp(df$smoothlogcase) ## fit <- loess(adjusted_cases~t, df, df$weight, span=smoothingspan) ## df$smoothcase <- predict(fit) ## df$smoothlogcase <- log(df$smoothcase) df } countycases <- group_by(countycases, fips) %>% group_modify(smooth_county) districtcases <- group_by(districtcases, HealthDistrict) %>% group_modify(smooth_county)
cat('plots suppressed\n') ##ggplot(countycases, aes(x=date, y=smoothcase, color=locality)) + geom_line(show.legend = FALSE) + facet_wrap(~HealthDistrict, scales='free_y', ncol=4) + ylab('Smoothed adjusted cases') + theme_bw() ##ggplot(districtcases, aes(x=date, y=smoothcase)) + geom_line(show.legend = FALSE) + facet_wrap(~HealthDistrict, scales='free_y', ncol=4) + ylab('Smoothed adjusted cases') + theme_bw()
Growth rates (i.e., derivative of log cases), unadjusted for sample enrichment effects.
gr_window <- 3 # Number of days +/- for calculating growth rate county_growth <- function(df, group) { N <- nrow(df) i <- seq(1, N) ilo <- pmax(1, i-gr_window) ihi <- pmin(N, i+gr_window) ## Days near the edges of the dataset will have fewer than the required number ## of data points. Widen the window in the other direction. iloadjust <- pmax(0, gr_window - (ihi-i)) ihiadjust <- pmax(0, gr_window - (i-ilo)) ilo <- ilo - iloadjust ihi <- ihi + ihiadjust calcgr <- function(i) { i1 <- ilo[i] i2 <- ihi[i] dd <- df[seq(i1,i2),] modsm <- lm(smoothlogcase~t, dd) modunsm <- lm(ladjcase~t, dd) smodsm <- suppressWarnings(summary(modsm)) smodunsm <- suppressWarnings(summary(modunsm)) m <- smodsm$coefficients[2,1] sig <- 2*smodunsm$coefficients[2, 2] c(gr=m, grsig = sig) } gr <- sapply(i, calcgr) df$growthrate <- gr['gr',] df$growthratesig <- gr['grsig',] # grsm <- (df$smoothlogcase[ihi]-df$smoothlogcase[ilo]) / (df$t[ihi]-df$t[ilo]) # grunsm <- (df$ladjcase[ihi]-df$ladjcase[ilo]) / (df$t[ihi]-df$t[ilo]) # calcsig <- function(i) { # i1 <- ilo[i] # i2 <- ihi[i] # sd(grunsm[seq(i1,i2)]) # } # # df$growthrate <- grsm # df$growthratesig <- sapply(i, calcsig) df } countycases <- group_by(countycases, fips) %>% group_modify(county_growth) %>% mutate(pctgr = (exp(growthrate)-1)*100) districtcases <- group_by(districtcases, HealthDistrict) %>% group_modify(county_growth) %>% mutate(grlo = growthrate-growthratesig, grhi = growthrate+growthratesig) # ggplot(countycases, aes(x=date, y=pctgr, color=locality)) + geom_line(show.legend = FALSE) + facet_wrap(~HealthDistrict, scales='free_y', ncol=4) + ylab('Growth Rate (pct)') + theme_bw() # # ggplot(filter(districtcases, HealthDistrict=='Thomas Jefferson'), aes(x=date, y=growthrate)) + # geom_line(size=1.2) + # geom_ribbon(mapping=aes(ymin=grlo, ymax=grhi), alpha=0.4) + # theme_bw() #
These growth rates are adusted for enrichment. We assume that overall prevalence
is low enough that we can use the approximation in our paper, for which the adjustment
depends only on the observed positive test fraction. We use the positive test fraction
for each health district to adjust all of the localities within that district.
In the early days the number of tests was small, and we would sometimes see 1/1
positive tests for a 100% positive test fraction. To deal with this, we cap the
positive test fraction at 50%. At no time since the number of tests climbed to
more than a hundred a day has it been higher than this anywhere. (Unlike the raw
growth rates, these have not been converted to percentages.)
countycases$adjgrowth <- countycases$growthrate / (1 - pmin(0.5, countycases$fpos)) districtcases$adjgrowth <- districtcases$growthrate / (1 - pmin(0.5, districtcases$fpos)) adjgrsig <- districtcases$growthratesig / (1 - pmin(0.5, districtcases$fpos)) districtcases$agrlo <- districtcases$adjgrowth - adjgrsig districtcases$agrhi <- districtcases$adjgrowth + adjgrsig # ggplot(districtcases, aes(x=date, y=adjgrowth)) + geom_line(size=1.2) + # #geom_ribbon(mapping=aes(ymin=agrlo, ymax=agrhi), alpha=0.4) + # facet_wrap(~HealthDistrict, scales='free_y', ncol=4) + # ylab('Growth Rate')+ theme_bw() # # ggplot(countycases, aes(x=date, y=adjgrowth, color=locality)) + geom_line(show.legend = FALSE) + facet_wrap(~HealthDistrict, scales='free_y', ncol=4) + ylab('Growth Rate') + theme_bw()
We calculate the effective reproduction number from the adjusted growth rates, using the equation $R_t = 1 + kD$, where $D$ is the serial interval. To do this, we have to estimate a value for $D$. Estimates of $D$ vary dramatically in the literature. Here we use $D=6$, which is the value favored by our model and is within the range of uncertainties reported in the literature. Occasionally regions show extremely high growth rates for short periods of time, typically early in the infection when case counts are low. We believe such measurements are artifacts; therefore, we cut off the display for $R_t$ values at 3.5.
D <- 6 countycases$rt <- pmax(0, 1 + countycases$adjgrowth * D) districtcases$rt <- pmax(0, 1 + districtcases$adjgrowth * D) districtcases$rtlo <- pmax(0, 1 + districtcases$agrlo * D) districtcases$rthi <- pmax(0, 1 + districtcases$agrhi * D) alldistricts <- unique(districtcases$HealthDistrict) ### Make plots of locality Rt, grouped by district ### (can also be used for district plots) plotrtrgn <- function(district, alldata, locality=TRUE) { dat <- filter(alldata, HealthDistrict==district) if(locality) { pltbase <- ggplot(dat, aes(x=date, y=rt, color=locality)) } else { pltbase <- ggplot(dat, aes(x=date, y=rt)) + geom_hline(yintercept=1, color='blue') #+ #geom_ribbon(mapping=aes(ymin=rtlo, ymax=rthi), alpha=0.4) } pltbase + geom_line(size=1) + coord_cartesian(ylim=c(0,3.5)) + ggtitle(district) + theme_bw() } ggplot(districtcases, aes(x=date, y=rt)) + geom_line(size=1) + geom_hline(yintercept=1, color='blue') + coord_cartesian(ylim=c(0,3.5)) + facet_wrap(~HealthDistrict) + theme_bw(10)
Blow-ups of the individual districts:
distplots <- lapply(alldistricts, plotrtrgn, alldata=districtcases, locality=FALSE) for(plt in distplots) { print(plt) }
#alldistricts <- unique(countycases$HealthDistrict) alldistricts <- 'Thomas Jefferson' localplots <- lapply(alldistricts, plotrtrgn, alldata=countycases) for(plt in localplots) { print(plt) }
select(districtcases, HealthDistrict, date, rt, ntest, npos) %>% mutate(fpos = 100*signif(npos/ntest, 3)) %>% filter(date==max(date)) %>% arrange(rt)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.