knitr::opts_chunk$set(message = FALSE, warning = FALSE, error = FALSE, fig.align = "center", dev.args = list(pointsize = 10))
options(digits = 4) # for more compact numerical outputs library("HIDDA.forecasting") library("ggplot2") source("setup.R", local = TRUE) # define test periods (OWA, TEST)
In this vignette, we compute naive reference forecasts to be compared with
the more sophisticated modelling approaches presented in the other vignettes.
Our naive approach to predict weekly ILI counts from
r format_period(OWA)
(the OWA
period) is to estimate a log-normal
distribution from the counts observed in the previous years in the same
calendar week. At each week, we estimate the two parameters using maximum
likelihood as implemented in MASS::fitdistr()
.
The corresponding software reference is:
cat("<blockquote>") print(citation(package = "MASS", auto = TRUE), style = "html") cat("</blockquote>\n")
CHILI_calendarweek <- as.integer(strftime(index(CHILI), "%V")) naiveowa <- t(sapply(X = OWA+1, FUN = function (week) { cw <- CHILI_calendarweek[week] index_cws <- which(CHILI_calendarweek == cw) index_prior_cws <- index_cws[index_cws < week] MASS::fitdistr(CHILI[index_prior_cws], "lognormal")$estimate }))
par(mar = c(5,5,1,1), las = 1) .PIT <- plnorm(CHILI[OWA+1], meanlog = naiveowa[,"meanlog"], sdlog = naiveowa[,"sdlog"]) hist(.PIT, breaks = seq(0, 1, 0.1), freq = FALSE, main = "", xlab = "PIT") abline(h = 1, lty = 2, col = "grey")
naiveowa_scores <- scores_lnorm( x = CHILI[OWA+1], meanlog = naiveowa[,"meanlog"], sdlog = naiveowa[,"sdlog"], which = c("dss", "logs")) summary(naiveowa_scores)
Note that discretized forecast distributions yield almost identical scores (essentially due to the large counts):
naiveowa_scores_discretized <- scores_lnorm_discrete( x = CHILI[OWA+1], meanlog = naiveowa[,"meanlog"], sdlog = naiveowa[,"sdlog"], which = c("dss", "logs")) summary(naiveowa_scores_discretized)
stopifnot( all.equal(naiveowa_scores_discretized[,"dss"], naiveowa_scores[,"dss"], tolerance = 1e-5), all.equal(naiveowa_scores_discretized[,"logs"], naiveowa_scores[,"logs"], tolerance = 1e-5) )
naiveowa_quantiles <- sapply(X = 1:99/100, FUN = qlnorm, meanlog = naiveowa[,"meanlog"], sdlog = naiveowa[,"sdlog"]) par(mar = c(5,5,1,1)) osaplot( quantiles = naiveowa_quantiles, probs = 1:99/100, observed = CHILI[OWA+1], scores = naiveowa, start = OWA[1]+1, xlab = "Week", ylim = c(0,60000), fan.args = list(ln = c(0.1,0.9), rlab = NULL) )
With this naive forecasting approach, the long-term forecast for a whole season is simply composed of the sequential one-week-ahead forecasts during that season.
rownames(naiveowa) <- OWA+1 naivefor <- lapply(TEST, function (testperiod) { owas <- naiveowa[as.character(testperiod),,drop=FALSE] list(testperiod = testperiod, observed = as.vector(CHILI[testperiod]), meanlog = owas[,"meanlog"], sdlog = owas[,"sdlog"]) })
par(mar = c(5,5,1,1), mfrow = sort(n2mfrow(length(naivefor))), las = 1) invisible(lapply(naivefor, function (x) { PIT <- plnorm(x$observed, meanlog = x$meanlog, sdlog = x$sdlog) hist(PIT, breaks = seq(0, 1, 0.1), freq = FALSE, main = format_period(x$testperiod, fmt = "%Y", collapse = "/")) abline(h = 1, lty = 2, col = "grey") }))
par(mar = c(5,5,1,1)) t(sapply(naivefor, function (x) { quantiles <- sapply(X = 1:99/100, FUN = qlnorm, meanlog = x$meanlog, sdlog = x$sdlog) scores <- scores_lnorm(x = x$observed, meanlog = x$meanlog, sdlog = x$sdlog, which = c("dss", "logs")) osaplot(quantiles = quantiles, probs = 1:99/100, observed = x$observed, scores = scores, start = x$testperiod[1], xlab = "Week", ylim = c(0,60000), fan.args = list(ln = c(0.1,0.9), rlab = NULL)) colMeans(scores) }))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.