knitr::opts_chunk$set(message=FALSE, warning=FALSE, echo=FALSE)

Intro

The Reich Lab at UMass-Amherst produces weekly forecasts of the percentage of doctors visits that present with influenza-like-illness (ILI) at the state level based on data provided through ILINet, as part of the CDC FluSight Challenge. Our models use historical ILI data to predict future ILI trends using statistical models.

Current forecasts

The plot below shows the current forecasts from our forecasting model (blue dots and line) with 50% and 80% confidence interval around the forecasts. The currently observed ILI data for Massachusetts in the current season is shown in black, while previous seasons are shown as grey lines in the background. The vertical dashed line is drawn at the current week of the season, which is typically several weeks past current available data.

get_ci_from_sample <- function(dist,level){


  total_mass <- 0
  start_index <-   which.max(dist)
  left_index <- start_index
  right_index <- start_index
  while(total_mass <level){
    if (left_index >= 1 & right_index <= length(dist)){
      if (left_index == start_index){
        total_mass <- total_mass + dist[start_index]
      } else{
              total_mass <- total_mass  + dist[left_index] + dist[right_index]
      }
      left_index <- left_index - 1
      right_index <- right_index + 1

    } else if (left_index == 0 & right_index <= length(dist)){
      total_mass <- total_mass + dist[right_index]
   } else if (right_index == length(dist) & left_index >=1){
        total_mass <- total_mass + dist[left_index]
   }
  }
  return (c(left_index/10,right_index/10))
}



library(ggplot2)
library(cdcFlu20182019)
data <- download_and_preprocess_state_flu_data()
data_ma <- data[data$region == "Massachusetts",]

current_epiweek <- tail(data$week,1)
current_season <- tail(data$season,1)

current_year <- as.numeric(ifelse(current_epiweek > 30, substr(current_season,1,4),substr(current_season,6,10)))
current_date <- MMWRweek2Date(current_year,current_epiweek)
model_csv <- read.csv(paste0("../../submissions/state-sarima/EW",current_epiweek,"-KoT-StateILI-",ymd(Sys.Date()),".csv"))
#model_csv <- read.csv(paste0("../../submissions/state-sarima/EW",current_epiweek,"-KoT-StateILI-2019-04-15",".csv"))


k_step_ahead <- c(model_csv[model_csv$Location=="Massachusetts" & model_csv$Target == "1 wk ahead" & model_csv$Type == "Point",]$Value,
                  model_csv[model_csv$Location=="Massachusetts" & model_csv$Target == "2 wk ahead" & model_csv$Type == "Point",]$Value,
                  model_csv[model_csv$Location=="Massachusetts" & model_csv$Target == "3 wk ahead" & model_csv$Type == "Point",]$Value,
                  model_csv[model_csv$Location=="Massachusetts" & model_csv$Target == "4 wk ahead" & model_csv$Type == "Point",]$Value
                  )

k_step_ahead_quantile_80 <- c(get_ci_from_sample(model_csv[model_csv$Location=="Massachusetts" & model_csv$Target == "1 wk ahead" & model_csv$Type == "Bin",]$Value,.8),
                  get_ci_from_sample(model_csv[model_csv$Location=="Massachusetts" & model_csv$Target == "2 wk ahead" & model_csv$Type == "Bin",]$Value,.8),
                  get_ci_from_sample(model_csv[model_csv$Location=="Massachusetts" & model_csv$Target == "3 wk ahead" & model_csv$Type == "Bin",]$Value,.8),
                  get_ci_from_sample(model_csv[model_csv$Location=="Massachusetts" & model_csv$Target == "4 wk ahead" & model_csv$Type == "Bin",]$Value,.8)
                  )
upr_80 <- k_step_ahead_quantile_80[seq(0,length(k_step_ahead_quantile_80),by=2)]
lwr_80 <- k_step_ahead_quantile_80[seq(1,length(k_step_ahead_quantile_80),by=2)]


k_step_ahead_quantile_50 <- c(get_ci_from_sample(model_csv[model_csv$Location=="Massachusetts" & model_csv$Target == "1 wk ahead" & model_csv$Type == "Bin",]$Value,.5),
                  get_ci_from_sample(model_csv[model_csv$Location=="Massachusetts" & model_csv$Target == "2 wk ahead" & model_csv$Type == "Bin",]$Value,.5),
                  get_ci_from_sample(model_csv[model_csv$Location=="Massachusetts" & model_csv$Target == "3 wk ahead" & model_csv$Type == "Bin",]$Value,.5),
                  get_ci_from_sample(model_csv[model_csv$Location=="Massachusetts" & model_csv$Target == "4 wk ahead" & model_csv$Type == "Bin",]$Value,.5)
                  )
upr_50 <- k_step_ahead_quantile_50[seq(0,length(k_step_ahead_quantile_50),by=2)]
lwr_50 <- k_step_ahead_quantile_50[seq(1,length(k_step_ahead_quantile_50),by=2)]


l <- lapply(c(seq(31,52),seq(30)),function(x){
  if(x >= 31){
    return (MMWRweek2Date(2018,x))
  } else{
    return (MMWRweek2Date(2019,x))
  }

})

dates <- (d <- do.call("c", l))
## get current season_week
current_week <- MMWRweek(Sys.Date()) 
current_season_week <- ifelse(
    current_week$MMWRweek <= 30,
    current_week$MMWRweek + MMWRweek(MMWRweek:::start_date(current_week$MMWRyear) - 1)$MMWRweek - 30,
    current_week$MMWRweek - 30
  )
theme_set(theme_bw())
ggplot(data_ma[data_ma$season == "2018/2019",], aes(x=season_week, y=unweighted_ili)) + 
 xlab(NULL) + ylab("ILI (%)") + xlim(0,52) + 
  geom_point() + 
  geom_line() +
  geom_line(data=data.frame(y=c(tail(data_ma$unweighted_ili,1), k_step_ahead)), aes(x=(tail(data$season_week,1)):(tail(data$season_week,1)+4),y=y), col="cornflowerblue") +
  geom_point(data=data.frame(y=c(tail(data_ma$unweighted_ili,1), k_step_ahead)), aes(x=(tail(data$season_week,1)):(tail(data$season_week,1)+4),y=y), col="cornflowerblue") +
  geom_point() + ## adding again so last real data point is black
  geom_ribbon(data=data.frame(lwr=lwr_80,upr=upr_80,y=k_step_ahead), aes(x=(tail(data$season_week,1)+1):(tail(data$season_week,1)+4), ymin=lwr,ymax=upr,y=y),alpha=0.3,fill="cornflowerblue") + 
  geom_ribbon(data=data.frame(lwr=lwr_50,upr=upr_50,y= k_step_ahead), aes(x=(tail(data$season_week,1)+1):(tail(data$season_week,1)+4),ymin=lwr,ymax=upr,y=y), alpha=0.3,fill="cornflowerblue") +
  geom_line(data=data_ma[data_ma$season == "2017/2018",],aes(x=season_week,y=unweighted_ili),alpha=.1)+
  geom_line(data=data_ma[data_ma$season == "2016/2017",],aes(x=season_week,y=unweighted_ili),alpha=.1)+
  geom_line(data=data_ma[data_ma$season == "2015/2016",],aes(x=season_week,y=unweighted_ili),alpha=.1)+
  geom_line(data=data_ma[data_ma$season == "2014/2015",],aes(x=season_week,y=unweighted_ili),alpha=.1)+
  geom_line(data=data_ma[data_ma$season == "2013/2014",],aes(x=season_week,y=unweighted_ili),alpha=.1)+
  geom_line(data=data_ma[data_ma$season == "2012/2013",],aes(x=season_week,y=unweighted_ili),alpha=.1)+
  scale_x_continuous(labels=dates,breaks=data_ma[data_ma$season == "2017/2018",]$season_week)+ 
  ## vline at current date
  geom_vline(xintercept = current_season_week, linetype="dashed") +
  theme(
    axis.text.x = element_text(angle = 70, hjust = 1), 
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank())


reichlab/2018-2019-cdc-flu-contest documentation built on May 24, 2019, 7:36 a.m.