knitr::opts_chunk$set(message = FALSE, warning = FALSE, error = FALSE, fig.align = "center", dev.args = list(pointsize = 10))
options(digits = 4) # for more compact numerical outputs library("HIDDA.forecasting") library("ggplot2") source("setup.R", local = TRUE) # define test periods (OWA, TEST)
In this vignette, we use forecasting methods provided by:
library("prophet")
The corresponding software reference is:
cat("<blockquote>") print(citation(package = "prophet", auto = TRUE), style = "html") cat("</blockquote>\n")
prophet uses a Gaussian likelihood, so we will work with log-counts as in ARIMA.
## which dates correspond to ISO week 52? index(CHILI)[strftime(index(CHILI), "%V") == "52"]
So for consistency with the other models we define holidays from 21 to 28 December each year.
if (packageVersion("prophet") < "0.4") { ## workaround a bug in make_holiday_features(), which needs row_number() library("dplyr") }
set.seed(1411171) christmas <- data.frame( holiday = "Christmas", ds = as.Date(paste0(2000:2016, "-12-21")), lower_window = 0, upper_window = 7 ) prophetfit_control <- prophet( yearly.seasonality = TRUE, weekly.seasonality = FALSE, daily.seasonality = FALSE, holidays = christmas, mcmc.samples = 0, # invokes rstan::optimizing (fast MAP estimation) interval.width = 0.95, fit = FALSE) prophetfit <- fit.prophet( m = prophetfit_control, df = data.frame(ds = index(CHILI), y = log(CHILI)) ) prophetfitted <- predict(prophetfit)
prophet_plot_components(prophetfit, prophetfitted)
CHILIdat <- fortify(CHILI) CHILIdat[c("prophetfitted","prophetlower","prophetupper")] <- exp(prophetfitted[c("yhat", "yhat_lower", "yhat_upper")])
##plot(prophetfit, prophetfitted) ggplot(CHILIdat, aes(x=Index, ymin=prophetlower, y=prophetfitted, ymax=prophetupper)) + geom_ribbon(fill="orange") + geom_line(col="darkred") + geom_point(aes(y=CHILI), pch=20) + scale_y_sqrt(expand = c(0,0), limits = c(0,NA))
We compute r length(OWA)
one-week-ahead forecasts
from r format_period(OWA)
(the OWA
period).
For each time point, forecasting with prophet
takes about 3 seconds, i.e.,
computing all one-week-ahead forecasts takes approx.
r sprintf("%.1f", length(OWA) * 3/60)
minutes ...
set.seed(1411172) prophetowa <- cross_validation( model = prophetfit, horizon = 1, period = 1, initial = OWA[1] - 1, units = "weeks" ) ## add forecast variance (calculated from prediction interval) prophetowa$sigma <- with(prophetowa, (yhat_upper-yhat_lower)/2/qnorm(0.975)) ## save results save(prophetowa, file = "prophetowa.RData")
load("prophetowa.RData")
## BTW, the error SD can be obtained from a prophet fit as prophetfit$y.scale * prophetfit$params$sigma_obs
prophet
forecasts for the log-counts are approximately Gaussian with mean
yhat
and variance sigma^2
=> back-transformation via exp() is log-normal
par(mar = c(5,5,1,1), las = 1) .PIT <- plnorm(exp(prophetowa$y), meanlog = prophetowa$yhat, sdlog = prophetowa$sigma) hist(.PIT, breaks = seq(0, 1, 0.1), freq = FALSE, main = "", xlab = "PIT") abline(h = 1, lty = 2, col = "grey")
prophetowa_scores <- scores_lnorm( x = exp(prophetowa$y), meanlog = prophetowa$yhat, sdlog = prophetowa$sigma, which = c("dss", "logs")) summary(prophetowa_scores)
Note that discretized forecast distributions yield almost identical scores (essentially due to the large counts):
prophetowa_scores_discretized <- scores_lnorm_discrete( x = exp(prophetowa$y), meanlog = prophetowa$yhat, sdlog = prophetowa$sigma, which = c("dss", "logs")) summary(prophetowa_scores_discretized)
stopifnot( all.equal(prophetowa_scores_discretized[,"dss"], prophetowa_scores[,"dss"], tolerance = 1e-5), all.equal(prophetowa_scores_discretized[,"logs"], prophetowa_scores[,"logs"], tolerance = 1e-5) )
prophetowa_quantiles <- sapply(X = 1:99/100, FUN = qlnorm, meanlog = prophetowa$yhat, sdlog = prophetowa$sigma) par(mar = c(5,5,1,1)) osaplot( quantiles = prophetowa_quantiles, probs = 1:99/100, observed = exp(prophetowa$y), scores = prophetowa_scores, start = OWA[1]+1, xlab = "Week", ylim = c(0,60000), fan.args = list(ln = c(0.1,0.9), rlab = NULL) )
## try with the first test period TEST1 <- TEST[[1]] fit1 <- fit.prophet( m = prophetfit_control, df = data.frame(ds = index(CHILI), y = log(CHILI))[1:(TEST1[1]-1),] ) prophetfor1 <- predict(fit1, data.frame(ds = index(CHILI)[TEST1])) ## add forecast variance (calculated from prediction interval) prophetfor1$sigma <- with(prophetfor1, (yhat_upper-yhat_lower)/2/qnorm(0.975))
set.seed(1411173) prophetfor <- lapply(TEST, function (testperiod) { t0 <- testperiod[1] - 1 fit0 <- fit.prophet( m = prophetfit_control, df = data.frame(ds = index(CHILI), y = log(CHILI))[1:t0,] ) fc <- predict(fit0, data.frame(ds = index(CHILI)[testperiod])) ## add forecast variance (calculated from prediction interval) fc$sigma <- with(fc, (yhat_upper-yhat_lower)/2/qnorm(0.975)) list(testperiod = testperiod, observed = as.vector(CHILI[testperiod]), yhat = fc$yhat, sigma = fc$sigma) })
par(mar = c(5,5,1,1), mfrow = sort(n2mfrow(length(prophetfor))), las = 1) invisible(lapply(prophetfor, function (x) { PIT <- plnorm(x$observed, meanlog = x$yhat, sdlog = x$sigma) hist(PIT, breaks = seq(0, 1, 0.1), freq = FALSE, main = format_period(x$testperiod, fmt = "%Y", collapse = "/")) abline(h = 1, lty = 2, col = "grey") }))
par(mar = c(5,5,1,1)) t(sapply(prophetfor, function (x) { quantiles <- sapply(X = 1:99/100, FUN = qlnorm, meanlog = x$yhat, sdlog = x$sigma) scores <- scores_lnorm(x = x$observed, meanlog = x$yhat, sdlog = x$sigma, which = c("dss", "logs")) osaplot(quantiles = quantiles, probs = 1:99/100, observed = x$observed, scores = scores, start = x$testperiod[1], xlab = "Week", ylim = c(0,60000), fan.args = list(ln = c(0.1,0.9), rlab = NULL)) colMeans(scores) }))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.