require(dplyr)
require(data.table)
require(dtplyr)
require(cdcfluview)
#' Create targets from historical flu data
#'
#' This function will
#'
#'
#'
create_targets <- function(fluview = TRUE, week_flu = NULL, output=FALSE,
file = NULL) {
# Creates file of seasonal and week ahead targets for current season of
# CDC FluSight influenza forecasting challenge from seasonal ILINet data.
#
# Args:
# fluview: if TRUE, retrieves ILINet data from FluView; if not, uses
# provided ILINet data. Default is TRUE
# week_flu: data frame of weekly ILINet values from this season to use for
# calculating targets. Must be provided if fluview == FALSE.
# Must contain the following columns:
# - location: character with 11 values; "US National" and
# "HHS Region 1" to "HHS Region 10"
# - week: MMWR week
# - season: Influenza season formatted as XXXX/XXXX
# - observation: weighted ILI value for given
# location/week/season combination
# output: if TRUE, outputs target file as CSV; if not, doesn't.
# Default is FALSE
# file: file name for CSV output. Must be provided if output == TRUE
#
# Returns:
# A data frame and CSV of seasonal and week ahead flu targets.
# Return an error if fluview == FALSE and no data frame provided
if (fluview == FALSE & is.null(week_flu)) {
stop("ILINet data required if not fetching data from FluView")
}
# Return an error if fluview == TRUE and data frame provided
if (fluview == TRUE & !is.null(week_flu)) {
stop("Do not provide data if fetching data from ILINet")
}
# Return an error if output == TRUE and no file path provided
if (output == TRUE & is.null(file)) {
stop("Must provide output file path for target file")
}
# Return an error if provided data frame not in proper format
if (!is.null(week_flu)) {
stopifnot(c("location","week","season","observation") %in% names(week_flu))
}
# Read in ILINet results -----------------------------------------------------
if (fluview == TRUE) {
if (!is.null(week_flu))
# Read in ILINet data and rename locations to match template
usflu <- get_flu_data("national", "ilinet", years = 2015:2016) %>%
select(
location = REGION.TYPE,
YEAR,
week = WEEK,
observation = X..WEIGHTED.ILI) %>%
mutate(
location = "US National") %>%
as.data.table()
regionflu <- get_flu_data("HHS", sub_region = 1:10,
"ilinet", years = 2015:2016) %>%
select(
location = REGION,
YEAR,
week = WEEK,
observation = X..WEIGHTED.ILI) %>%
as.data.table()
# Append HHS to beginning of location values for regional data
regionflu$location <- paste("HHS", regionflu$location)
# Join national and HHs regional flu data
week_flu <- rbind(usflu, regionflu)
# Rename seasons to be consistent
week_flu[week < 40, season := paste(YEAR-1,YEAR,sep="/")]
week_flu[week >= 40, season := paste(YEAR,YEAR+1,sep="/")]
week_flu[, YEAR := NULL]
# Round weighted ILINet values to 0.1%
week_flu$observation <- round(week_flu$observation, 1)
}
# Create baselines for each region
# TODO (Craig): Needs to be updated 10/11/2016 with new baselines
baselines <- data.table(location = unique(week_flu$location),
value = c(2.1, 1.3, 2.3, 1.8, 1.6, 1.9, 3.6, 1.7,
1.4, 2.6, 1.1))
# Date first forecasts received
start.date <- as.Date("2015-11-02")
start.wk <- 42 #First week of ILINet data used for forecasts
end.wk <- 18 + 52 #Last week of ILINet data used for forecasts
# Add 52 to weeks in new year to keep weeks in order
week_flu[week < 40, week := as.integer(week + 52)]
# Only keep weeks of interest for weekly forecasts received
week_flu <- week_flu[week_flu$week >= start.wk & week_flu$week <= (end.wk + 4), ]
# Create data shell to add targets to
targets <- data.frame(target = character(),
location = character(),
season = character(),
forecast.date = as.Date(character()),
observation = numeric()) %>%
as.data.table()
# Calculate season long targets if reached ----------------------------------
for (this.location in levels(as.factor(week_flu$location))) {
# Subset data
these.data <- week_flu[location == this.location]
# Season onset -------------------------------------------------------------
# Check to see if 3 weeks above baseline have passed
j <- 0 # Counter for weeks above peak
for (i in
head(these.data$week, n = 1):tail(these.data$week, n = 1)) {
if (these.data$observation[these.data$week == i] >=
baselines$value[baselines$location == this.location]) {
j <- j + 1
} else {
j <- 0
}
if (j == 3) {
onset <- i - 2
break
}
if (i == tail(these.data$week, n = 1)) {
onset <- NA
}
}
# If onset week > 52, reset to MMWR week
if (onset > 52) {
onset <- onset - 52
}
# Peak week and peak percentage --------------------------------------------
pkwk <- these.data$week[these.data$observation ==
max(these.data$observation)]
pkper <- round(max(these.data$observation), 1)
# If peak week > 52, reset to MMWR week
for (i in 1:length(pkwk)) {
if (pkwk[i] > 52) {
pkwk[i] <- pkwk[i] - 52
}
}
# Set to NA if less than 4 weeks following peak
if (tail(these.data$week, n = 1) - max(pkwk) < 4) {
pkwk <- NA
pkper <- NA
}
# Save onset, peak week, and peak percentage -------------------------------
if (length(pkwk) == 1) {
this.pred <- data.frame(target = c("Season onset", "Season peak week",
"Season peak percentage"),
location = this.location,
season = "2015/2016",
observation = c(onset, pkwk, pkper))
} else { # Multiple peak weeks
this.pred <- data.frame(target = c("Season onset", "Season peak week",
"Season peak percentage"),
location = this.location,
season = "2015/2016",
observation = c(onset, pkwk[1], pkper))
for (i in 2:length(pkwk)) {
extra.obs <- data.frame(V1 = c(NA, pkwk[i], NA))
names(extra.obs) <- paste0("observation", i)
this.pred <- cbind(this.pred, extra.obs)
}
}
#Append targets to overall target file
targets <- rbind(targets, this.pred, fill=TRUE)
}
# Calculate weekly targets --------------------------------------------------
these.targets <- c("1 wk ahead", "2 wk ahead", "3 wk ahead", "4 wk ahead")
week.target <- data.frame(target = character(),
location = character(),
season = character(),
forecast.date = as.Date(character()),
observation = integer()) %>%
as.data.table()
for (this.location in levels(as.factor(week_flu$location))) {
for (this.target in these.targets) {
wk <- as.numeric(substr(this.target, 1, 1))
for (this.week in start.wk:end.wk) {
#Set forecast date
forecast.date <- start.date + (this.week - start.wk) * 7
#Set forecast location
this.point <- filter(week_flu, location == this.location &
week == this.week + wk) %>%
mutate(
target = this.target,
forecast.date = forecast.date) %>%
select(
-week)
week.target <- rbind(week.target, this.point)
}
}
}
# Ad hoc forecast date corrections due to Thanksgiving/Christmas/New Years
# TODO (Craig): Needs to be updated to 2016/17 values once code final
week.target[forecast.date == "2015-11-30", forecast.date := as.Date("2015-12-01")]
week.target[forecast.date == "2015-12-28", forecast.date := as.Date("2015-12-30")]
week.target[forecast.date == "2016-01-04", forecast.date := as.Date("2016-01-06")]
# Join weekly targets to season targets
targets <- rbind(targets, week.target, fill = TRUE)
# Export to CSV if requested
if (output == TRUE) {
write.table(targets, file, sep = ",", row.names = FALSE)
}
return(targets)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.