Nothing
#' chopper
#'
#' A changepoint-aware probabilistic forecasting algorithm for univariate time series.
#' This function decomposes the input series into structurally homogeneous segments
#' to detect changepoints in both mean and variance. On each segment, three distinct probabilistic forecasting models
#' are applied—TBATS, Theta and ARFIMA to estimate future trajectories.
#'
#' Instead of relying on a single model, `chopper()` embraces the variety of probabilistic forecasts by merging
#' their output using a product-of-experts composition rule. This method allows each model to contribute to the
#' final distribution proportionally to the confidence it assigns to different outcomes.
#'
#' The final output is a collection of forecast distributions—one for each horizon step—that captures both
#' epistemic uncertainty (from model disagreement) and aleatoric uncertainty (from residual variability).
#'
#' @param ts A numeric vector representing the input time series.
#' @param horizon An integer specifying the number of future time steps to forecast.
#' @param decay A numeric value controlling the exponential decay rate used to the softmax weight forecasts from each segment (default is 0.5).
#' @param n_samples An integer specifying the number of samples drawn for estimating the forecast distributions (default is 1000).
#' @param alt_mode Logical flag: when TRUE, fits models on independent segments between changepoints instead of cumulative segments up to each changepoint (default: FALSE).
#' @param reverse Logical flag: when TRUE, inverts the weight vector (default: FALSE).
#'
#' @return A named list of forecast distributions, one for each time step in the forecast horizon. Weights for each segment
#' are assigned based on the softmax of exponential decay calculated at the end position of each segment.
#'
#' Each distribution is represented as a list containing:
#' \describe{
#' \item{rfun}{A random sampling function for drawing synthetic future values.}
#' \item{dfun}{A density function over the predicted values.}
#' \item{pfun}{A cumulative distribution function.}
#' \item{qfun}{A quantile function.}
#' }
#'
#' @details
#' This function uses the `changepoint` package to perform offline changepoint detection
#' on the historical series. For each segment identified, four forecasting models are fit:
#' \itemize{
#' \item \strong{Theta}: decomposed trend extrapolation with adjusted curvature.
#' \item \strong{TBATS}: based on exponential smoothing, designed for time series data with multiple seasonalities.
#' \item \strong{ARFIMA}: a long-memory extension of ARIMA that captures persistence in volatility and mean.
#' }
#'
#' The output of each model is converted to a forecast density for each horizon step. These are multiplied together
#' (product of experts) and normalized over a shared evaluation grid. A weighted average of these densities is
#' constructed across segments using segment-specific weights based on softmax and exponential decay.
#'
#' @examples
#' \donttest{
#' # Fast example with the tail of a time series
#' forecasted <- chopper(ts = tail(ts_set$AMZN.Close, 70), horizon = 5, n_samples = 100)
#'
#' # Draw samples from forecast for t1 and t5
#' forecasted$pred_funs$t1$rfun(10)
#' forecasted$pred_funs$t5$rfun(10)
#'
#' }
#'
#' @importFrom changepoint cpt.meanvar
#' @importFrom forecast arfima forecast tbats thetaf
#' @importFrom imputeTS na_kalman
#' @importFrom purrr map map2 pmap flatten map_dbl
#' @importFrom utils head tail
#' @importFrom stats qnorm rnorm dnorm pnorm fft approxfun coef dunif median punif qunif runif sd
#' @importFrom normalp paramp dnormp
#' @importFrom GeneralizedHyperbolic hyperbFit dhyperb
#' @importFrom evd fgev dgev
#' @importFrom ald mleALD dALD
#' @importFrom fGarch sstdFit dsstd
#' @importFrom scales number
#' @importFrom lubridate seconds_to_period
#' @importFrom ggplot2 ggplot geom_line aes geom_ribbon scale_x_date scale_x_continuous scale_y_continuous ylab theme_bw theme element_text
#'
#' @export
chopper <- function(ts, horizon, decay = 0.5, n_samples = 1000, alt_mode = FALSE, reverse = FALSE)
{
start <- Sys.time()
if(anyNA(ts)){ts <- na_kalman(ts)}
n_length <- length(ts)
model <- suppressWarnings(cpt.meanvar(dts(ts, 1)))
sections <- c(1, model@cpts + 1)
s_points <- head(sections, -1)
e_points <- tail(sections, -1)
if(!alt_mode){models <- map(s_points, ~ product_of_experts(ts[.x:n_length], horizon, n_samples, alt_mode, ts))}
if(alt_mode){models <- map2(s_points, e_points, ~ product_of_experts(ts[.x:.y], horizon, n_samples, alt_mode, ts))}
weights <- softmax(exp_decay(tail(sections, -1), decay))
pred_funs <- merge_dist(models, horizon, weights, n_samples, reverse)
names(pred_funs) <- paste0("t", 1:horizon)
plot <- plot_graph(ts, pred_funs)
end <- Sys.time()
time_log <- seconds_to_period(round(difftime(end, start, units = "secs"), 0))
out <- list(pred_funs = pred_funs, plot = plot, time_log = time_log)
return(out)
}
####
product_of_experts <- function(ts, horizon, n_samples = 1000, alt_mode = FALSE, long_ts = NULL)
{
b_preds <- tbats_predictor(ts, horizon, alt_mode)
f_preds <- arfima_predictor(ts, horizon, alt_mode)
t_preds <- theta_predictor(ts, horizon, alt_mode)
ranges <- lapply(1:horizon, function(h) range(c(b_preds[[h]]$rfun(n_samples),
f_preds[[h]]$rfun(n_samples),
t_preds[[h]]$rfun(n_samples)
)))
values <- map(ranges, ~ seq(.x[1], .x[2], length.out = n_samples))
joint_dens <- pmap(list(b_preds, f_preds, t_preds, values), ~ ..1$dfun(..4) * ..2$dfun(..4) * ..3$dfun(..4))
samples <- map2(values, joint_dens, ~ sample(.x[!is.na(.y)], size = n_samples, prob = .y[!is.na(.y)], replace = TRUE))
if(!alt_mode){pred_funs <- map(samples, ~ mixture(.x))}
if(alt_mode){ground <- tail(long_ts, 1); pred_funs <- map(samples, ~ mixture(ground * (1 + .x)))}
names(pred_funs) <- paste0("t", 1:horizon)
return(pred_funs)
}
###
merge_dist <- function(pred_fun_list, h, w, n_samples = 1000, reverse = FALSE)
{
n_dist <- length(pred_fun_list)
ranges <- lapply(1:h, function(h) range(unlist(lapply(pred_fun_list, function(pfun) pfun[[h]]$rfun(n_samples)))))
values <- map(ranges, ~ seq(.x[1], .x[2], length.out = n_samples))
if(reverse){w <- rev(w)}
joint_dens <- lapply(1:h, function(h) colSums(w * Reduce(rbind, lapply(pred_fun_list, function(pfun) pfun[[h]]$dfun(values[[h]])))))
samples <- map2(values, joint_dens, ~ sample(.x[!is.na(.y)], size = n_samples, prob = .y[!is.na(.y)], replace = TRUE))
pred_funs <- map(samples, ~ empfun(.x))
names(pred_funs) <- paste0("t", 1:h)
return(pred_funs)
}
###
tbats_predictor <- function(ts, h = 5, alt_mode = FALSE)
{
model <- tbats(ts)
preds <- forecast(model, h = h, level = 0.95)
mean_forecast <- preds$mean
upper_95 <- preds$upper[, 1]
lower_95 <- preds$lower[, 1]
z_score <- qnorm(0.975)
std_dev <- (upper_95 - lower_95) / (2 * z_score)
params <- data.frame(
mean = mean_forecast,
sd = std_dev
)
pred_funs <- function(mean, sd)
{
rfun <- function(n) rnorm(n, mean, sd)
dfun <- function(x) dnorm(x, mean, sd)
pfun <- function(q) pnorm(q, mean, sd)
qfun <- function(p) qnorm(p, mean, sd)
out <- list(rfun = rfun, dfun = dfun, pfun = pfun, qfun = qfun)
return(out)
}
ground <- tail(ts, 1)
if(!alt_mode){pred_fun <- map2(params$mean, params$sd, ~ pred_funs(.x, .y))}
if(alt_mode){pred_fun <- map2(params$mean/ground - 1, params$sd/ground, ~ pred_funs(.x, .y))}
names(pred_fun) <- paste0("t", 1:h)
return(pred_fun)
}
###
theta_predictor <- function(ts, h = 5, alt_mode = FALSE)
{
preds <- thetaf(ts, h = h, level = 0.95)
mean_forecast <- preds$mean
upper_95 <- preds$upper[, 1]
lower_95 <- preds$lower[, 1]
z_score <- qnorm(0.975)
std_dev <- (upper_95 - lower_95) / (2 * z_score)
params <- data.frame(
mean = mean_forecast,
sd = std_dev
)
pred_funs <- function(mean, sd)
{
rfun <- function(n) rnorm(n, mean, sd)
dfun <- function(x) dnorm(x, mean, sd)
pfun <- function(q) pnorm(q, mean, sd)
qfun <- function(p) qnorm(p, mean, sd)
out <- list(pred_fun = list(rfun = rfun, dfun = dfun, pfun = pfun, qfun = qfun))
return(out)
}
ground <- tail(ts, 1)
if(!alt_mode){pred_fun <- map2(params$mean, params$sd, ~ flatten(pred_funs(.x, .y)))}
if(alt_mode){pred_fun <- map2(params$mean/ground - 1, params$sd/ground, ~ flatten(pred_funs(.x, .y)))}
names(pred_fun) <- paste0("t", 1:h)
return(pred_fun)
}
####
arfima_predictor <- function(ts, h = 5, alt_mode = FALSE)
{
model <- arfima(ts)
preds <- forecast(model, h = h, level = 0.95)
mean_forecast <- preds$mean
upper_95 <- preds$upper[, 1]
lower_95 <- preds$lower[, 1]
z_score <- qnorm(0.975)
std_dev <- (upper_95 - lower_95) / (2 * z_score)
params <- data.frame(
mean = mean_forecast,
sd = std_dev
)
pred_funs <- function(mean, sd)
{
rfun <- function(n) rnorm(n, mean, sd)
dfun <- function(x) dnorm(x, mean, sd)
pfun <- function(q) pnorm(q, mean, sd)
qfun <- function(p) qnorm(p, mean, sd)
out <- list(pred_fun = list(rfun = rfun, dfun = dfun, pfun = pfun, qfun = qfun))
return(out)
}
ground <- tail(ts, 1)
if(!alt_mode){pred_fun <- map2(params$mean, params$sd, ~ flatten(pred_funs(.x, .y)))}
if(alt_mode){pred_fun <- map2(params$mean/ground - 1, params$sd/ground, ~ flatten(pred_funs(.x, .y)))}
names(pred_fun) <- paste0("t", 1:h)
return(pred_fun)
}
####
mixture <- function(x, n_samp = 1e3) {
if (sd(x) == 0) x <- x + runif(length(x), -1e-6, 1e-6)
dens <- numeric(length(x)) # single accumulator
d <- .safe_dens(function(z) unlist(paramp(z)[c(2,4,5)]), dnormp, x)
dens <- dens + d / ifelse(all(d == 0), 1, max(d))
d <- .safe_dens(function(z) fgev(z, std.err = FALSE)$estimate, dgev, x)
dens <- dens + d / ifelse(all(d == 0), 1, max(d))
d <- .safe_dens(function(z) mleALD(z)$par, dALD, x)
dens <- dens + d / ifelse(all(d == 0), 1, max(d))
d <- .safe_dens(function(z) coef(hyperbFit(z)), dhyperb, x)
dens <- dens + d / ifelse(all(d == 0), 1, max(d))
d <- .safe_dens(function(z) suppressWarnings(sstdFit(x)$estimate), dsstd, x)
dens <- dens + d / ifelse(all(d == 0), 1, max(d))
if (all(!is.finite(dens)) | all(dens == 0)) return(empfun(x))
idx <- sample.int(length(x), n_samp, replace = TRUE, prob = dens)
vals <- x[idx]
empfun(vals)
}
###
.safe_dens <- function(fit_fun, dens_fun, x, ...) {
tryCatch({
p_list <- flatten(as.list(fit_fun(x)))
arg_names <- names( formals( match.fun(dens_fun) ) )
arg_names <- setdiff(arg_names, c("x", "y", "..."))
if (is.null(names(p_list)) || any(names(p_list) == "")) {
names(p_list) <- arg_names[seq_along(p_list)]
}
p_list <- p_list[arg_names[arg_names %in% names(p_list)]]
do.call(dens_fun, c(list(x), p_list))
}, error = function(e) numeric(length(x))) # graceful fallback
}
###
custom_density <- function(x,
bw = "nrd0",
kernel = c("gaussian","epanechnikov","rectangular","triangular"),
from = NULL,
to = NULL,
n = 1024L,
adjust = 1,
na.rm = TRUE,
useFFT = TRUE) {
call <- match.call()
if (!is.numeric(x)) stop("`x` must be numeric.")
if (na.rm) x <- x[is.finite(x)]
if (length(x) < 2L) stop("Need at least two finite data points.")
kernel <- match.arg(kernel)
h0 <- switch(
as.character(bw),
nrd0 = stats::bw.nrd0(x),
nrd = stats::bw.nrd(x),
{
if (is.numeric(bw) && bw > 0) bw
else stop("`bw` must be 'nrd0', 'nrd', or a positive number.")
}
)
h <- h0 * adjust
data_min <- min(x); data_max <- max(x)
if (is.null(from)) from <- data_min
if (is.null(to)) to <- data_max
if (!is.numeric(from) || !is.numeric(to) || from >= to)
stop("`from` and `to` must be numeric with from < to.")
if (n < 10L) stop("`n` should be at least 10.")
grid_x <- seq(from, to, length.out = n)
m <- length(grid_x)
ux <- unique(x)
fx <- as.vector(tabulate(match(x, ux), nbins = length(ux)))
tot_n <- sum(fx)
Kfun <- switch(kernel,
gaussian = function(u) stats::dnorm(u),
epanechnikov = function(u) { K <- 3/4*(1 - u^2); K[abs(u)>1] <- 0; K },
rectangular = function(u) { K <- 1/2; K[abs(u)>1] <- 0; K },
triangular = function(u) { K <- 1 - abs(u); K[abs(u)>1] <- 0; K }
)
if (useFFT && tot_n > 1000L) {
dx <- grid_x[2] - grid_x[1]
bin_idx <- pmin(pmax(1L, ceiling((x - from)/dx)), m)
counts <- tabulate(bin_idx, nbins = m)
u_grid <- seq(-(m-1)/2, (m-1)/2, length.out = m) * dx / h
ker_vals <- Kfun(u_grid)
fcounts <- stats::fft(counts)
fkernel <- stats::fft(ker_vals)
conv <- Re(stats::fft(fcounts * fkernel, inverse = TRUE)) / m
y <- conv / (length(x) * h)
} else {
y <- numeric(m)
for (i in seq_along(ux)) {
y <- y + fx[i] * Kfun((grid_x - ux[i]) / h)
}
y <- y / (tot_n * h)
}
structure(
list(x = grid_x,
y = y,
bw = h,
n = m,
call = call),
class = "density"
)
}
#####
empfun <- function(values)
{
if(sd(values) <= 1e-6)
{
funs <- list(dfun = function(x) dunif(x, min(values), max(values)),
pfun = function(q) punif(q, min(values), max(values)),
qfun = function(p) qunif(p, min(values), max(values)),
rfun = function(n) runif(n, min(values), max(values)))
}
else
{
values <- values[is.finite(values)]
dens <- custom_density(values)
x_vals <- dens$x
y_vals <- dens$y
fix_idx <- (x_vals >= min(values) & x_vals <= max(values)) & !duplicated(x_vals)
x_vals <- x_vals[fix_idx]
y_vals <- y_vals[fix_idx]
dx <- x_vals[2] - x_vals[1]
pfun_raw <- cumsum(y_vals) * dx
pfun_vals <- pfun_raw / max(pfun_raw)
dfun <- suppressWarnings(approxfun(x_vals, y_vals, rule = 2))
pfun <- suppressWarnings(approxfun(x_vals, pfun_vals, rule = 2, yleft = 0, yright = 1, ties = "ordered"))
qfun <- suppressWarnings(approxfun(pfun_vals, x_vals, rule = 2, yleft = min(x_vals), yright = max(x_vals), ties = "ordered"))
rfun <- function(n) qfun(runif(n))
funs <- list(dfun = dfun, pfun = pfun, qfun = qfun, rfun = rfun)
}
return(funs)
}
###
utils::globalVariables(".data")
###
plot_graph <- function(ts, pred_funs, alpha = 0.05, dates = NULL, line_size = 1.3, label_size = 11,
forcat_band = "seagreen2", forcat_line = "seagreen4", hist_line = "gray43",
label_x = "Horizon", label_y= "Forecasted Var", date_format = "%b-%Y")
{
preds <- Reduce(rbind, map(pred_funs, ~ quantile(.x$rfun(1000), probs = c(alpha, 0.5, (1-alpha)))))
colnames(preds) <- c("lower", "median", "upper")
future <- nrow(preds)
if(is.null(dates)){x_hist <- 1:length(ts)} else {x_hist <- as.Date(as.character(dates))}
if(is.null(dates)){x_forcat <- length(ts) + 1:nrow(preds)} else {x_forcat <- as.Date(as.character(tail(dates, 1)))+ 1:future}
forecast_data <- data.frame(x_forcat = x_forcat, preds)
historical_data <- data.frame(x_all = as.Date(c(x_hist, x_forcat)), y_all = c(ts = ts, pred = preds[, "median"]))
plot <- ggplot()+ geom_line(data = historical_data, aes(x = .data$x_all, y = .data$y_all), color = hist_line, linewidth = line_size)
plot <- plot + geom_ribbon(data = forecast_data, aes(x = x_forcat, ymin = .data$lower, ymax = .data$upper), alpha = 0.3, fill = forcat_band)
plot <- plot + geom_line(data = forecast_data, aes(x = x_forcat, y = median), color = forcat_line, linewidth = line_size)
if(!is.null(dates)){plot <- plot + scale_x_date(name = paste0("\n", label_x), date_labels = date_format)}
if(is.null(dates)){plot <- plot + scale_x_continuous(name = paste0("\n", label_x))}
plot <- plot + scale_y_continuous(name = paste0(label_y, "\n"), labels = number)
plot <- plot + ylab(label_y) + theme_bw()
plot <- plot + theme(axis.text=element_text(size=label_size), axis.title=element_text(size=label_size + 2))
return(plot)
}
###
dts <- function(ts, lag = 1)
{
scaled_ts <- tail(ts, -lag)/head(ts, -lag)-1
scaled_ts[!is.finite(scaled_ts)] <- NA
if(anyNA(ts)){scaled_ts <- na_kalman(scaled_ts)}
return(scaled_ts)
}
###
softmax <- function(x) exp(x - max(x)) / sum(exp(x - max(x)))
###
exp_decay <- function(x, decay = 0.5, inverse = TRUE) {
raw <- exp(-decay * x)
weights <- 1 - raw / sum(raw)
return(weights)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.