inst/data-processing/create-flu-baseline-file.R

## create long flu baseline dataset
## Nicholas Reich
## October 2016

library(tidyr)
library(dplyr)

dat <- read.csv("https://raw.githubusercontent.com/cdcepi/FluSight-forecasts/master/wILI_Baseline.csv")
dat <- as_data_frame(dat) %>%
    gather(key=year, value=baseline, -X) %>%
    transmute(region = as.character(X),
              season = substr(year, start=2, stop=10), ## removes X at beginning of season name
              baseline=baseline)
dat$season <- gsub("\\.", "/", dat$season) ## replaces . with / in season string

## fill in baselines for seasons before 2007/2008 with means
mean_region_baseline <- dat %>%
  group_by(region) %>%
  summarize(baseline = round(mean(baseline), 1))
mean_region_baseline <- mean_region_baseline[c(1:2, 4:11, 3), ]

regions <- c("National", paste0("Region", 1:10))
for(first_season_year in 2006:1997) {
  dat <- bind_rows(
    data.frame(
      region = regions,
      season = paste0(first_season_year, "/", first_season_year + 1),
      baseline = mean_region_baseline$baseline
    ),
    dat
  )
}

flu_onset_baselines <- as.data.frame(dat)

write.csv(dat, "data-raw/flu_onset_baselines.csv", quote = FALSE, row.names = FALSE)
save(flu_onset_baselines, file = "data/flu_onset_baselines.rdata")
reichlab/2017-2018-cdc-flu-contest documentation built on May 24, 2019, 6:17 a.m.