library('dplyr')
library('ggplot2')
library('ggthemes')
library('vdhcovid')
## VDH case and test reports trickle in over a period of a few days; therefore,
## the last few days of reports are subject to substantial revision.
datemax <- pmin(max(vadailycases$date), max(vadailytests$date)) - 4

## First, adjust cases counts for the number of tests done.  We only know the test 
## counts at the health district level, so we'll just use that adjustment for all
## localities in the district.
countycases <- 
  select(vadailycases, date, fips, locality, HealthDistrict, cumcases=cases) %>%
  filter(HealthDistrict == 'Thomas Jefferson', date <= datemax)
countycases <- group_by(countycases, fips) %>% 
  mutate(cases = c(cumcases[1], diff(cumcases))) %>%
  ungroup()

districttests <- 
  mutate(vadailytests, fpos = npos / ntest) %>%
  select(date, HealthDistrict, ntest, fpos, npos)
countycases <- 
  left_join(countycases, districttests, by=c('date','HealthDistrict')) %>%
  filter(!is.na(ntest))
countycases$t <- as.numeric(countycases$date - as.Date('2020-01-01'))
adjustment <- 1000/countycases$ntest
countycases$adjusted_cases <- adjustment * countycases$cases
countycases$weight <- 1/adjustment
#countycases$weight <- 1
countycases$ladjcase <- log(pmax(countycases$adjusted_cases, 0.1))
## Fit loess smoothing to log of adjusted cases.  We will use this to estimate 
## growth rates.
reclen <- as.numeric(max(countycases$date) - min(countycases$date))
## These spans are intended to smooth over periods of roughly 1 quarter, 1 month,
## and one week.  In practice we double them because the cubic weighting in the loess
## algorithm deweights points near the edge of the window pretty dramatically.
span050 <- 2 * 90 / reclen
span025 <- 2 * 30 / reclen
span015 <- 2 * 7 / reclen
smooth_county <- function(df, group, smoothing_span=span050) {
  fit <- loess(ladjcase~t, df, df$weight, span=smoothing_span)
  df$smoothlogcase <- predict(fit)
  df$smoothcase <- exp(df$smoothlogcase)
  df
}
countycases_s050 <- 
  group_by(countycases, fips) %>% 
  group_modify(smooth_county) %>%
  ungroup()

countycases_s025 <- 
  group_by(countycases, fips) %>% 
  group_modify(function(df, g) {smooth_county(df, g, smoothing_span=span025)}) %>%
  ungroup()

districtcases_base <-
  group_by(countycases, date, t, HealthDistrict) %>%
  summarise(cumcases = sum(cumcases), cases=sum(cases), ntest=mean(ntest),
            fpos=mean(fpos), npos=mean(npos), adjusted_cases=sum(adjusted_cases)) %>%
  mutate(ladjcase = log(pmax(adjusted_cases,1)), weight=ntest/1000) %>%
  ungroup() %>%
  group_by(HealthDistrict)

districtcases_s050 <- 
  group_modify(districtcases_base, smooth_county, smoothing_span=span050) %>%
  ungroup()

districtcases_s025 <-
  group_modify(districtcases_base, function(df, g) {smooth_county(df,g, smoothing_span=span025)}) %>%
  ungroup()

districtcases_s015 <-
  group_modify(districtcases_base, function(df, g) {smooth_county(df,g, smoothing_span=span015)}) %>%
  ungroup()
dayspan <- 7
extraplen <- 21
extrapolate <- function(df, group)
{
  i2 <- which.max(df$date)
  i1 <- i2 - dayspan + 1

  df <- df[seq(i1,i2), ]
  df$t <- as.numeric(df$date - df$date[1])

  mod <- lm(smoothlogcase~t, df)
  m <- mod$coefficients[2]

  extrapx <- seq(1,extraplen)
  n <- nrow(df)
  smoothlogcase <- df$smoothlogcase[n]+ m*extrapx

  extrapt <- df$t[n] + extrapx
  extrapdate <- df$date[n] + extrapx

  tibble(t = extrapt, date = extrapdate, smoothlogcase = smoothlogcase, 
         smoothcase = exp(smoothlogcase))
}

extrapcounty_s050 <- 
  group_by(countycases_s050, locality) %>%
  group_modify(extrapolate)

extrapdist_s050 <- 
  group_by(districtcases_s050, HealthDistrict) %>%
  group_modify(extrapolate)

extrapcounty_s025 <- 
  group_by(countycases_s025, locality) %>%
  group_modify(extrapolate)

extrapdist_s025 <- 
  group_by(districtcases_s025, HealthDistrict) %>%
  group_modify(extrapolate)

extrapdist_s015 <- 
  group_by(districtcases_s015, HealthDistrict) %>%
  group_modify(extrapolate)


## Adjust the district figures to reflect the number of tests actually being done
## recently (past week).
tmax <- max(districtcases_base$t)
t1 <- tmax - 7
ntestmean <- mean(districtcases_base$ntest[districtcases_base$t > t1])
cfac <- ntestmean / 1000
lcfac <- log(cfac)

ntadjust <- function(df) {
  df$smoothcase <- df$smoothcase * cfac
  df$smoothlogcase <- df$smoothlogcase + lcfac
  df  
}
ntdistrictcases_s050 <- ntadjust(districtcases_s050)
ntdistrictcases_s050$adjusted_cases <- districtcases_s050$adjusted_cases * cfac
ntdistrictcases_s050$ladjcase <- districtcases_s050$ladjcase + lcfac

ntdistrictcases_s025 <- ntadjust(districtcases_s025)
ntdistrictcases_s025$adjusted_cases <- districtcases_s025$adjusted_cases * cfac
ntdistrictcases_s025$ladjcase <- districtcases_s025$ladjcase + lcfac

ntdistrictcases_s015 <- ntadjust(districtcases_s015)
ntdistrictcases_s015$adjusted_cases <- districtcases_s015$adjusted_cases * cfac
ntdistrictcases_s015$ladjcase <- districtcases_s015$ladjcase + lcfac

ntextrapdist_s050 <- ntadjust(extrapdist_s050)
ntextrapdist_s025 <- ntadjust(extrapdist_s025)
ntextrapdist_s015 <- ntadjust(extrapdist_s015)



ggplot(mapping=aes(x=date, y=smoothcase)) +
  geom_line(data=ntdistrictcases_s050, mapping=aes(color='Historical'), size=1.2) +
  geom_point(data=ntdistrictcases_s050, mapping=aes(y=adjusted_cases), color='black') +
  geom_line(data=ntextrapdist_s050, mapping=aes(color='Extrapolated'), size=1.2) +
  scale_color_manual(values=c('red','black'), name='Source') +
  ylab('Adjusted daily new cases') +
  ggtitle('Smoothing span = Quarterly') +
  theme_bw()

ggplot(mapping=aes(x=date, y=smoothcase)) +
  geom_line(data=ntdistrictcases_s025, mapping=aes(color='Historical'), size=1.2) +
  geom_point(data=ntdistrictcases_s025, mapping=aes(y=adjusted_cases), color='black') +
  geom_line(data=ntextrapdist_s025, mapping=aes(color='Extrapolated'), size=1.2) +
  scale_color_manual(values=c('red','black'), name='Source') +
  ylab('Adjusted daily new cases') +
  ggtitle('Smoothing span = Monthly') +
  theme_bw()

ggplot(mapping=aes(x=date, y=smoothcase)) +
  geom_line(data=ntdistrictcases_s015, mapping=aes(color='Historical'), size=1.2) +
  geom_point(data=ntdistrictcases_s015, mapping=aes(y=adjusted_cases), color='black') +
  geom_line(data=ntextrapdist_s015, mapping=aes(color='Extrapolated'), size=1.2) +
  scale_color_manual(values=c('red','black'), name='Source') +
  ylab('Adjusted daily new cases') +
  ggtitle('Smoothing span = Weekly') +
  theme_bw()
datemin <- lubridate::today() - 60
distcases_60day <- filter(ntdistrictcases_s025, date >= datemin)
ggplot(distcases_60day, aes(x=date, y=ntest)) +
  geom_line(size=1.0) +
  ggtitle('BRHD daily testing') +
  ylab('number of tests') +
  theme_bw()

ggplot(distcases_60day, aes(x=date)) +
  geom_line(mapping=aes(y=adjusted_cases, color='adjusted'), size=1.2) +
  geom_line(mapping=aes(y=cases, color='raw'), size=1.2) +
  scale_color_manual(values=c(raw='red', adjusted='blue')) +
  ylab('cases') +
  coord_cartesian(ylim=c(0,25)) +
  theme_bw()
choalbcases <- 
  filter(vadailycases, locality %in% c('AlbemarleCounty', 'Charlottesvillecity')) %>%
  group_by(county) %>%
  mutate(newcases = c(NA, diff(cases))) %>%
  filter(date >= datemin)

ggplot(choalbcases, aes(x=date, y=newcases, color=county)) +
  geom_line(size=1.0) +
  scale_color_manual(values=c(Albemarle='blue', Charlottesville='red')) +
  theme_bw()


rplzzz/vdhcovid documentation built on July 2, 2021, 7:43 a.m.