knitr::opts_chunk$set( warning = FALSE, message = FALSE, collapse = TRUE, comment = "#>" )
The conformalForecast package provides some commonly used conformal prediction methods for time series forecasting.
library(conformalForecast) library(forecast) library(ggplot2) library(dplyr) library(tibble) library(tsibble)
Suppose we are interested in forecasting a time series data generated from an AR(2) model with $\phi_1 = 0.8$, $\phi_2=-0.5$, and $\sigma^2 = 1$.
set.seed(0) series <- arima.sim(n = 1000, list(ar = c(0.8, -0.5)), sd = sqrt(1)) autoplot(series) + labs( title = "Time series generated from an AR(2) model", ylab = "" ) + theme_bw()
We first train a forecasting model AR(2) on a rolling forecast origin to generate forecasts and forecast errors on validation sets.
far2 <- function(x, h, level) { Arima(x, order = c(2, 0, 0)) |> forecast(h = h, level) } fc <- cvforecast(series, forecastfun = far2, h = 3, level = c(80, 95), forward = TRUE, window = 100, initial = 1) summary(fc)
fc |> autoplot() + labs( title = "Forecasts produced using an AR(2) model", ylab = "" ) + theme_bw()
(fc_score <- accuracy(fc, byhorizon = TRUE)) (fc_cov <- coverage(fc, window = 100, level = 95)) (fc_wid <- width(fc, window = 100, level = 95, includemedian = TRUE))
Based on the forecast errors on validation sets, we can train various conformal prediction methods to obtain distribution-free uncertainty estimation.
Here, we perform a SCP method with equal weights in sample quantile estimation.
scpfc <- scp(fc, symmetric = FALSE, ncal = 100, rolling = TRUE, weightfun = NULL, kess = FALSE, quantiletype = 1) (scpfc_score <- accuracy(scpfc, byhorizon = TRUE)) (scpfc_cov <- coverage(scpfc, window = 100, level = 95)) (scpfc_wid <- width(scpfc, window = 100, level = 95, includemedian = TRUE))
The scp() function allows us to include non-equal weights for sample quantile estimation by passing a weight calculation function to the weightfun argument.
expweight <- function(n) 0.99^{n+1-(1:n)} scpfc_exp <- scp(fc, symmetric = FALSE, ncal = 100, rolling = TRUE, weightfun = expweight, kess = FALSE, quantiletype = 1) (scpfc_exp_score <- accuracy(scpfc_exp, byhorizon = TRUE)) (scpfc_exp_cov <- coverage(scpfc_exp, window = 100, level = 95)) (scpfc_exp_wid <- width(scpfc_exp, window = 100, level = 95, includemedian = TRUE))
The ACP method uses an online update of $\alpha$ to perform the calibration so that we can achieve either approximate or exact marginal coverage.
acpfc <- acp(fc, symmetric = FALSE, gamma = 0.005, ncal = 100, rolling = TRUE) (acpfc_score <- accuracy(acpfc, byhorizon = TRUE)) (acpfc_cov <- coverage(acpfc, window = 100, level = 95)) (acpfc_wid <- width(acpfc, window = 100, level = 95, includemedian = TRUE))
The PID method combines three modules (quantile tracking, error integration, and scorecasting) to make an iteration to produce a sequence of quantile estimates used in the prediction sets.
# PID setup Tg <- 1000; delta <- 0.01 Csat <- 2 / pi * (ceiling(log(Tg) * delta) - 1 / log(Tg)) KI <- 2 lr <- 0.1
# PID without scorecaster pidfc_nsf <- pid(fc, symmetric = FALSE, ncal = 100, rolling = TRUE, integrate = TRUE, scorecast = FALSE, lr = lr, KI = KI, Csat = Csat) (pidfc_nsf_score <- accuracy(pidfc_nsf, byhorizon = TRUE)) (pidfc_nsf_cov <- coverage(pidfc_nsf, window = 100, level = 95)) (pidfc_nsf_wid <- width(pidfc_nsf, window = 100, level = 95, includemedian = TRUE))
# PID with a Naive method as the scorecaster naivefun <- function(x, h) { naive(x) |> forecast(h = h) } pidfc <- pid(fc, symmetric = FALSE, ncal = 100, rolling = TRUE, integrate = TRUE, scorecast = TRUE, scorecastfun = naivefun, lr = lr, KI = KI, Csat = Csat) (pidfc_score <- accuracy(pidfc, byhorizon = TRUE)) (pidfc_cov <- coverage(pidfc, window = 100, level = 95)) (pidfc_wid <- width(pidfc, window = 100, level = 95, includemedian = TRUE))
Similar to the PID method, the AcMCP method also integrates three modules (P, I, and D) to form the final iteration. However, instead of performing conformal prediction for each individual forecast horizon $h$ separately, AcMCP employs a combination of an MA($h-1$) model and a linear regression model of $e_{t+h|t}$ on $e_{t+h-1|t},\dots,e_{t+1|t}$ as the scorecaster. This allows the AcMCP method to capture the relationship between the $h$-step ahead forecast error and past errors.
acmcpfc <- acmcp(fc, ncal = 100, rolling = TRUE, integrate = TRUE, scorecast = TRUE, lr = lr, KI = KI, Csat = Csat) (acmcpfc_score <- accuracy(acmcpfc, byhorizon = TRUE)) (acmcpfc_cov <- coverage(acmcpfc, window = 100, level = 95)) (acmcpfc_wid <- width(acmcpfc, window = 100, level = 95, includemedian = TRUE))
Taking the AcMCP result as an example, we now show the average coverage on validation sets.
acmcpfc_cov$rollmean |> as_tsibble() |> mutate(horizon = key, coverage = value) |> update_tsibble(key = horizon) |> select(-c(key, value)) |> ggplot(aes(x = index, y = coverage, group = horizon)) + geom_line() + geom_hline(yintercept = 0.95, linetype = "dashed", color = "blue") + facet_grid(horizon~., scales = "free_y") + xlab("Time") + ylab("Rolling mean coverage for AcMCP") + theme_bw()
We can also show the rolling average interval width on validation sets.
acmcpfc_wid$rollmean |> as_tsibble() |> mutate(horizon = key, width = value) |> update_tsibble(key = horizon) |> select(-c(key, value)) |> ggplot(aes(x = index, y = width, group = horizon)) + geom_line() + facet_grid(horizon~., scales = "free_y") + xlab("Time") + ylab("Rolling mean width for AcMCP") + theme_bw()
We can also combine all the results and show them in one single plot.
candidates <- c("fc", "scpfc", "scpfc_exp", "acpfc", "pidfc_nsf", "pidfc", "acmcpfc") methods <- c("AR", "SCP", "WCP", "ACP", "PI", "PID", "AcMCP") for (i in 1:length(candidates)) { out <- get(paste0(candidates[i], "_cov")) out_pivot <- out$rollmean |> as_tsibble() |> mutate(horizon = key, coverage = value) |> update_tsibble(key = horizon) |> select(-c(key, value)) |> mutate(method = methods[i]) |> as_tibble() assign(paste0(methods[i], "_cov"), out_pivot) } cov <- bind_rows(mget(paste0(methods, "_cov"))) cols <- c( "AR" = "black", "SCP" = "yellow", "WCP" = "#fa9200", "ACP" = "green", "PI" = "blue", "PID" = "purple", "AcMCP" = "red" ) cov |> as_tsibble(index = index, key = c(horizon, method)) |> mutate(method = factor(method, levels = methods)) |> ggplot(aes(x = index, y = coverage, group = method, colour = method)) + geom_line(size = 0.8, alpha = 0.8) + scale_colour_manual(values = cols) + geom_hline(yintercept = 0.95, linetype = "dashed", colour = "gray") + facet_grid(horizon~.) + xlab("Time") + ylab("Rolling mean coverage") + theme_bw()
cov_mean <- lapply(1:length(candidates), function(i) { out_cov <- get(paste0(candidates[i], "_cov")) out_score <- get(paste0(candidates[i], "_score")) out_mean <- data.frame( method = methods[i], covmean = as.vector(out_cov$mean), winkler = as.vector(out_score[, "Winkler_95"]), msis = as.vector(out_score[,"MSIS_95"]) ) |> as_tibble() |> rownames_to_column("horizon") |> mutate(horizon = paste0("h=", horizon)) out_mean }) cov_mean <- do.call(bind_rows, cov_mean) |> mutate(method = factor(method, levels = methods)) |> mutate(covdiff = covmean - 0.95) |> arrange(horizon, method) print(cov_mean, n = nrow(cov_mean))
for (i in 1:length(candidates)) { out <- get(paste0(candidates[i], "_wid")) out_pivot <- out$rollmean |> as_tsibble() |> mutate(horizon = key, width = value) |> update_tsibble(key = horizon) |> select(-c(key, value)) |> mutate(method = methods[i]) |> as_tibble() assign(paste0(methods[i], "_wid"), out_pivot) } wid <- bind_rows(mget(paste0(methods, "_wid"))) wid |> as_tsibble(index = index, key = c(horizon, method)) |> mutate(method = factor(method, levels = methods)) |> ggplot(aes(x = index, y = width, group = method, colour = method)) + geom_line(size = 0.8, alpha = 0.8) + scale_colour_manual(values = cols) + facet_grid(horizon~.) + xlab("Time") + ylab("Rolling mean width") + theme_bw()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.