#' Conformalized Forecasting using Machine Leaning models
#'
#' @param y A numeric vector or time series of class \code{ts}
#' @param h Forecasting horizon
#' @param level Confidence level for prediction intervals
#' @param lags Number of lags of the input time series considered in the regression
#' @param fit_func Fitting function (Statistical/ML model). Default is Ridge regression.
#' @param predict_func Prediction function (Statistical/ML model)
#' @param coeffs Coefficients of the fitted model. If provided, a linear combination with the coefficients is used to compute the prediction.
#' @param type_pi Type of prediction interval
#' @param B Number of bootstrap replications or number of simulations
#' @param agg "mean" or "median" (aggregation method)
#' @param ... additional parameters passed to the fitting function \code{fit_func}
#'
#' @return
#' @export
#'
#' @examples
#'
#' res <- ahead::mlf(USAccDeaths, h=10L, lags=15L, type_pi="surrogate", B=250L)
#' plot(res)
#'
#' res <- ahead::mlf(USAccDeaths, fit_func = glmnet::cv.glmnet, h=15L, lags=15L,
#' type_pi="kde", B=250L)
#' plot(res)
#'
#' (res <- ahead::mlf(USAccDeaths, fit_func = e1071::svm, h=15L, lags=15L,
#' type_pi="kde", B=250L))
#' plot(res)
#'
mlf <- function(y, h = 5, level = 95, lags = 15L,
fit_func = ahead::ridge,
predict_func = predict,
coeffs = NULL,
type_pi = c("kde", "surrogate", "bootstrap"),
B = 250L, agg = c("mean", "median"),
seed = 123,
...)
{
set.seed(seed)
n <- length(y)
freq_x <- frequency(y)
half_n <- ceiling(n/2)
idx_train <- seq_len(half_n)
idx_calib <- setdiff(seq_len(n), idx_train)
splitted_y <- misc::splitts(y)
y_train <- splitted_y$training
y_calib <- splitted_y$testing
type_pi <- match.arg(type_pi)
agg <- match.arg(agg)
y_pred_calibration <- ml_forecast(y = y_train,
h = length(y_calib),
lags=lags,
fit_func = fit_func,
predict_func = predict_func,
coeffs = coeffs,
...)$mean
preds_obj <- ml_forecast(y = y_calib,
h = h,
lags = lags,
fit_func = fit_func,
predict_func = predict_func,
coeffs = coeffs,
...)
preds <- preds_obj$mean
tspx <- tsp(y_calib)
start_preds <- tspx[2] + 1 / tspx[3]
matrix_preds <- replicate(B, preds)
calibrated_raw_residuals <- y_calib - y_pred_calibration
scaled_calib_resids <- base::scale(calibrated_raw_residuals)
xm <- attr(scaled_calib_resids, "scaled:center")
xsd <- attr(scaled_calib_resids, "scaled:scale")
scaled_calibrated_residuals <- base::scale(calibrated_raw_residuals,
center = TRUE,
scale = TRUE)
if (type_pi == "kde") {
simulated_scaled_calibrated_residuals <-
rgaussiandens(
scaled_calibrated_residuals,
n = h,
p = B,
seed = seed
)
sd_calibrated_residuals <- sd(calibrated_raw_residuals)
}
if (type_pi == "surrogate") {
set.seed(seed)
simulated_scaled_calibrated_residuals <-
tseries::surrogate(scaled_calibrated_residuals,
ns =
B)[seq_along(h), ]
sd_calibrated_residuals <- sd(calibrated_raw_residuals)
}
if (type_pi == "bootstrap") {
freq_calibrated_raw_residuals <- frequency(calibrated_raw_residuals)
if (length(calibrated_raw_residuals) <= 2 * freq_calibrated_raw_residuals)
freq_calibrated_raw_residuals <- 1L
block_size <-
ifelse(
freq_calibrated_raw_residuals > 1,
2 * freq_calibrated_raw_residuals,
min(8, floor(
length(calibrated_raw_residuals) / 2
))
)
block_size <-
floor(min(
max(3L, block_size),
length(calibrated_raw_residuals) - 1L
))
set.seed(seed)
simulated_scaled_calibrated_residuals <-
tseries::tsbootstrap(
scaled_calibrated_residuals,
nb =
B,
b = floor(block_size),
type =
"block"
)[seq_along(h), ]
sd_calibrated_residuals <- sd(calibrated_raw_residuals)
}
sims <- matrix_preds
sims <- sims + sd_calibrated_residuals * simulated_scaled_calibrated_residuals
sims <- ts(sims,
start = start_preds,
frequency = frequency(y_train))
preds_lower <-
apply(sims, 1, function(x)
quantile(x, probs = (1 - level / 100) / 2))
preds_upper <-
apply(sims, 1, function(x)
quantile(x, probs = 1 - (1 - level / 100) / 2))
out <- vector("list", 8)
class(out) <- "forecast"
out$mean <- ts(switch(
agg,
median = apply(sims, 1, median),
mean = apply(sims, 1, mean)
),
start = start_preds,
frequency = freq_x)
out$lower <- ts(preds_lower,
start = start_preds,
frequency = freq_x)
out$upper <- ts(preds_upper,
start = start_preds,
frequency = freq_x)
out$x <- y_calib
out$level <- level
out$method <- "conformalized ML"
out$model <- preds_obj$model
out$residuals <- ts(
calibrated_raw_residuals,
start = start(y_calib),
frequency = frequency(y_train)
) # /!\ not the same residuals, beware
out$fitted <- ts(
y_pred_calibration,
start = start(y_calib),
frequency = frequency(y_train)
) # /!\ not the same fitted, beware
out$sims <- sims
return(out)
}
ml_forecast <- function(y, h,
lags=1,
fit_func = ahead::ridge,
predict_func = predict,
coeffs = NULL,
...)
{
df <- as.data.frame(embed(rev(as.numeric(y)), lags + 1L))
ncol_df <- ncol(df)
colnames(df) <- c(paste0("lag", rev(seq_len(lags))), "y")
if(is.null(coeffs))
{
fit <- try(fit_func(y ~ ., data = df, ...), silent=TRUE)
if (inherits(fit, "try-error"))
{
fit <- fit_func(x = as.matrix(df)[, -ncol_df],
y = df$y, ...)
}
for (i in 1:h)
{
newdata <- matrix(df[1, (ncol_df-lags + 1):ncol_df], nrow=1)
colnames(newdata) <- paste0("lag", rev(seq_len(lags)))
newdata_df <- data.frame(matrix(as.numeric((newdata)), ncol=lags))
colnames(newdata_df) <- paste0("lag", rev(seq_len(lags)))
prediction <- as.numeric(predict_func(fit, newdata_df)) # /!\ not the same prediction, beware
df <- data.frame(rbind(as.matrix(cbind(newdata_df, prediction)), as.matrix(df)))
colnames(df) <- c(paste0("lag", rev(seq_len(lags))), "y")
}
} else {
fit <- list(coefficients = coeffs)
for (i in 1:h)
{
newdata <- matrix(df[1, (ncol_df-lags + 1):ncol_df], nrow=1)
colnames(newdata) <- paste0("lag", rev(seq_len(lags)))
newdata_df <- data.frame(matrix(as.numeric((newdata)), ncol=lags))
colnames(newdata_df) <- paste0("lag", rev(seq_len(lags)))
prediction <- as.numeric(fit$coefficients %*% as.matrix(newdata_df)) # /!\ not the same prediction, beware
df <- data.frame(rbind(as.matrix(cbind(newdata_df, prediction)), as.matrix(df)))
colnames(df) <- c(paste0("lag", rev(seq_len(lags))), "y")
}
}
return(list(model = fit, mean = rev(head(df$y, h))))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.