R/main.R
In chopper: Changepoint-Aware Ensemble for Probabilistic Modeling

Documented in chopper

#' chopper
#'
#' A changepoint-aware probabilistic forecasting algorithm for univariate time series.
#' This function decomposes the input series into structurally homogeneous segments
#' to detect changepoints in both mean and variance. On each segment, three distinct probabilistic forecasting models
#' are applied—TBATS, Theta and ARFIMA to estimate future trajectories.
#'
#' Instead of relying on a single model, `chopper()` embraces the variety of probabilistic forecasts by merging
#' their output using a product-of-experts composition rule. This method allows each model to contribute to the
#' final distribution proportionally to the confidence it assigns to different outcomes.
#'
#' The final output is a collection of forecast distributions—one for each horizon step—that captures both
#' epistemic uncertainty (from model disagreement) and aleatoric uncertainty (from residual variability).
#'
#' @param ts A numeric vector representing the input time series.
#' @param horizon An integer specifying the number of future time steps to forecast.
#' @param decay A numeric value controlling the exponential decay rate used to the softmax weight forecasts from each segment (default is 0.5).
#' @param n_samples An integer specifying the number of samples drawn for estimating the forecast distributions (default is 1000).
#' @param alt_mode Logical flag: when TRUE, fits models on independent segments between changepoints instead of cumulative segments up to each changepoint (default: FALSE).
#' @param reverse Logical flag: when TRUE, inverts the weight vector (default: FALSE).
#'
#' @return A named list of forecast distributions, one for each time step in the forecast horizon. Weights for each segment
#' are assigned based on the softmax of exponential decay calculated at the end position of each segment.
#'
#' Each distribution is represented as a list containing:
#' \describe{
#'   \item{rfun}{A random sampling function for drawing synthetic future values.}
#'   \item{dfun}{A density function over the predicted values.}
#'   \item{pfun}{A cumulative distribution function.}
#'   \item{qfun}{A quantile function.}
#' }
#'
#' @details
#' This function uses the `changepoint` package to perform offline changepoint detection
#' on the historical series. For each segment identified, four forecasting models are fit:
#' \itemize{
#'   \item \strong{Theta}: decomposed trend extrapolation with adjusted curvature.
#'   \item \strong{TBATS}: based on exponential smoothing, designed for time series data with multiple seasonalities.
#'   \item \strong{ARFIMA}: a long-memory extension of ARIMA that captures persistence in volatility and mean.
#' }
#'
#' The output of each model is converted to a forecast density for each horizon step. These are multiplied together
#' (product of experts) and normalized over a shared evaluation grid. A weighted average of these densities is
#' constructed across segments using segment-specific weights based on softmax and exponential decay.
#'
#' @examples
#' \donttest{
#' # Fast example with the tail of a time series
#' forecasted <- chopper(ts = tail(ts_set$AMZN.Close, 70), horizon = 5, n_samples = 100)
#'
#' # Draw samples from forecast for t1 and t5
#' forecasted$pred_funs$t1$rfun(10)
#' forecasted$pred_funs$t5$rfun(10)
#'
#' }

#'
#' @importFrom changepoint cpt.meanvar
#' @importFrom forecast arfima forecast tbats thetaf
#' @importFrom imputeTS na_kalman
#' @importFrom purrr map map2 pmap flatten map_dbl
#' @importFrom utils head tail
#' @importFrom stats qnorm rnorm dnorm pnorm fft approxfun coef dunif median punif qunif runif sd
#' @importFrom normalp paramp dnormp
#' @importFrom GeneralizedHyperbolic hyperbFit dhyperb
#' @importFrom evd fgev dgev
#' @importFrom ald mleALD dALD
#' @importFrom fGarch sstdFit dsstd
#' @importFrom scales number
#' @importFrom lubridate seconds_to_period
#' @importFrom ggplot2 ggplot geom_line aes geom_ribbon scale_x_date scale_x_continuous scale_y_continuous ylab theme_bw theme element_text
#'
#' @export

chopper <- function(ts, horizon, decay = 0.5, n_samples = 1000, alt_mode = FALSE, reverse = FALSE)
{
  start <- Sys.time()

  if(anyNA(ts)){ts <- na_kalman(ts)}
  n_length <- length(ts)
  model <- suppressWarnings(cpt.meanvar(dts(ts, 1)))
  sections <- c(1, model@cpts + 1)
  s_points <- head(sections, -1)
  e_points <- tail(sections, -1)
  if(!alt_mode){models <- map(s_points, ~ product_of_experts(ts[.x:n_length], horizon, n_samples, alt_mode, ts))}
  if(alt_mode){models <- map2(s_points, e_points, ~ product_of_experts(ts[.x:.y], horizon, n_samples, alt_mode, ts))}
  weights <- softmax(exp_decay(tail(sections, -1), decay))
  pred_funs <- merge_dist(models, horizon, weights, n_samples, reverse)
  names(pred_funs) <- paste0("t", 1:horizon)
  plot <- plot_graph(ts, pred_funs)
  end <- Sys.time()
  time_log <- seconds_to_period(round(difftime(end, start, units = "secs"), 0))

  out <- list(pred_funs = pred_funs, plot = plot, time_log = time_log)

  return(out)
}

####
product_of_experts <- function(ts, horizon, n_samples = 1000, alt_mode = FALSE, long_ts = NULL)
{
  b_preds <- tbats_predictor(ts, horizon, alt_mode)
  f_preds <- arfima_predictor(ts, horizon, alt_mode)
  t_preds <- theta_predictor(ts, horizon, alt_mode)

  ranges <- lapply(1:horizon, function(h) range(c(b_preds[[h]]$rfun(n_samples),
                                                  f_preds[[h]]$rfun(n_samples),
                                                  t_preds[[h]]$rfun(n_samples)
                                                  )))

  values <- map(ranges, ~ seq(.x[1], .x[2], length.out = n_samples))
  joint_dens <- pmap(list(b_preds, f_preds, t_preds, values), ~ ..1$dfun(..4) * ..2$dfun(..4) * ..3$dfun(..4))
  samples <- map2(values, joint_dens, ~ sample(.x[!is.na(.y)], size = n_samples, prob = .y[!is.na(.y)], replace = TRUE))
  if(!alt_mode){pred_funs <- map(samples, ~ mixture(.x))}
  if(alt_mode){ground <- tail(long_ts, 1); pred_funs <- map(samples, ~ mixture(ground * (1 + .x)))}
  names(pred_funs) <- paste0("t", 1:horizon)
  return(pred_funs)
}

###
merge_dist <- function(pred_fun_list, h, w, n_samples = 1000, reverse = FALSE)
{
  n_dist <- length(pred_fun_list)
  ranges <- lapply(1:h, function(h) range(unlist(lapply(pred_fun_list, function(pfun) pfun[[h]]$rfun(n_samples)))))
  values <- map(ranges, ~ seq(.x[1], .x[2], length.out = n_samples))
  if(reverse){w <- rev(w)}
  joint_dens <- lapply(1:h, function(h) colSums(w * Reduce(rbind, lapply(pred_fun_list, function(pfun) pfun[[h]]$dfun(values[[h]])))))
  samples <- map2(values, joint_dens, ~ sample(.x[!is.na(.y)], size = n_samples, prob = .y[!is.na(.y)], replace = TRUE))
  pred_funs <- map(samples, ~ empfun(.x))
  names(pred_funs) <- paste0("t", 1:h)
  return(pred_funs)
}


###
tbats_predictor <- function(ts, h = 5, alt_mode = FALSE)
{
  model <- tbats(ts)
  preds <- forecast(model, h = h, level = 0.95)

  mean_forecast <- preds$mean
  upper_95 <- preds$upper[, 1]
  lower_95 <- preds$lower[, 1]

  z_score <- qnorm(0.975)
  std_dev <- (upper_95 - lower_95) / (2 * z_score)

  params <- data.frame(
    mean = mean_forecast,
    sd = std_dev
  )

  pred_funs <- function(mean, sd)
  {
    rfun <- function(n) rnorm(n, mean, sd)
    dfun <- function(x) dnorm(x, mean, sd)
    pfun <- function(q) pnorm(q, mean, sd)
    qfun <- function(p) qnorm(p, mean, sd)

    out <- list(rfun = rfun, dfun = dfun, pfun = pfun, qfun = qfun)
    return(out)
  }

  ground <- tail(ts, 1)

  if(!alt_mode){pred_fun <- map2(params$mean, params$sd, ~ pred_funs(.x, .y))}
  if(alt_mode){pred_fun <- map2(params$mean/ground - 1, params$sd/ground, ~ pred_funs(.x, .y))}

  names(pred_fun) <- paste0("t", 1:h)

  return(pred_fun)
}

###
theta_predictor <- function(ts, h = 5, alt_mode = FALSE)
{
  preds <- thetaf(ts, h = h, level = 0.95)

  mean_forecast <- preds$mean
  upper_95 <- preds$upper[, 1]
  lower_95 <- preds$lower[, 1]

  z_score <- qnorm(0.975)
  std_dev <- (upper_95 - lower_95) / (2 * z_score)

  params <- data.frame(
    mean = mean_forecast,
    sd = std_dev
  )

  pred_funs <- function(mean, sd)
  {
    rfun <- function(n) rnorm(n, mean, sd)
    dfun <- function(x) dnorm(x, mean, sd)
    pfun <- function(q) pnorm(q, mean, sd)
    qfun <- function(p) qnorm(p, mean, sd)

    out <- list(pred_fun = list(rfun = rfun, dfun = dfun, pfun = pfun, qfun = qfun))
    return(out)
  }

  ground <- tail(ts, 1)

  if(!alt_mode){pred_fun <- map2(params$mean, params$sd, ~ flatten(pred_funs(.x, .y)))}
  if(alt_mode){pred_fun <- map2(params$mean/ground - 1, params$sd/ground, ~ flatten(pred_funs(.x, .y)))}
  names(pred_fun) <- paste0("t", 1:h)

  return(pred_fun)
}

####
arfima_predictor <- function(ts, h = 5, alt_mode = FALSE)
{
  model <- arfima(ts)
  preds <- forecast(model, h = h, level = 0.95)

  mean_forecast <- preds$mean
  upper_95 <- preds$upper[, 1]
  lower_95 <- preds$lower[, 1]

  z_score <- qnorm(0.975)
  std_dev <- (upper_95 - lower_95) / (2 * z_score)

  params <- data.frame(
    mean = mean_forecast,
    sd = std_dev
  )

  pred_funs <- function(mean, sd)
  {
    rfun <- function(n) rnorm(n, mean, sd)
    dfun <- function(x) dnorm(x, mean, sd)
    pfun <- function(q) pnorm(q, mean, sd)
    qfun <- function(p) qnorm(p, mean, sd)

    out <- list(pred_fun = list(rfun = rfun, dfun = dfun, pfun = pfun, qfun = qfun))
    return(out)
  }

  ground <- tail(ts, 1)

  if(!alt_mode){pred_fun <- map2(params$mean, params$sd, ~ flatten(pred_funs(.x, .y)))}
  if(alt_mode){pred_fun <- map2(params$mean/ground - 1, params$sd/ground, ~ flatten(pred_funs(.x, .y)))}
  names(pred_fun) <- paste0("t", 1:h)

  return(pred_fun)
}

####
mixture <- function(x, n_samp = 1e3) {

  if (sd(x) == 0) x <- x + runif(length(x), -1e-6, 1e-6)

  dens <- numeric(length(x))      # single accumulator

  d <- .safe_dens(function(z) unlist(paramp(z)[c(2,4,5)]), dnormp, x)
  dens <- dens + d / ifelse(all(d == 0), 1, max(d))

  d <- .safe_dens(function(z) fgev(z, std.err = FALSE)$estimate, dgev, x)
  dens <- dens + d / ifelse(all(d == 0), 1, max(d))

  d <- .safe_dens(function(z) mleALD(z)$par, dALD, x)
  dens <- dens + d / ifelse(all(d == 0), 1, max(d))

  d <- .safe_dens(function(z) coef(hyperbFit(z)), dhyperb, x)
  dens <- dens + d / ifelse(all(d == 0), 1, max(d))

  d <- .safe_dens(function(z) suppressWarnings(sstdFit(x)$estimate), dsstd,  x)
  dens <- dens + d / ifelse(all(d == 0), 1, max(d))

  if (all(!is.finite(dens)) | all(dens == 0)) return(empfun(x))

  idx <- sample.int(length(x), n_samp, replace = TRUE, prob = dens)
  vals <- x[idx]

  empfun(vals)
}
###
.safe_dens <- function(fit_fun, dens_fun, x, ...) {

  tryCatch({

    p_list <- flatten(as.list(fit_fun(x)))

    arg_names <- names( formals( match.fun(dens_fun) ) )
    arg_names <- setdiff(arg_names, c("x", "y", "..."))

    if (is.null(names(p_list)) || any(names(p_list) == "")) {
      names(p_list) <- arg_names[seq_along(p_list)]
    }

    p_list <- p_list[arg_names[arg_names %in% names(p_list)]]

    do.call(dens_fun, c(list(x), p_list))

  }, error = function(e) numeric(length(x)))   # graceful fallback
}


###
custom_density <- function(x,
                           bw         = "nrd0",
                           kernel     = c("gaussian","epanechnikov","rectangular","triangular"),
                           from       = NULL,
                           to         = NULL,
                           n          = 1024L,
                           adjust     = 1,
                           na.rm      = TRUE,
                           useFFT     = TRUE) {
  call <- match.call()

  if (!is.numeric(x)) stop("`x` must be numeric.")
  if (na.rm) x <- x[is.finite(x)]
  if (length(x) < 2L) stop("Need at least two finite data points.")
  kernel <- match.arg(kernel)

  h0 <- switch(
    as.character(bw),
    nrd0 = stats::bw.nrd0(x),
    nrd  = stats::bw.nrd(x),
    {
      if (is.numeric(bw) && bw > 0) bw
      else stop("`bw` must be 'nrd0', 'nrd', or a positive number.")
    }
  )
  h <- h0 * adjust

  data_min <- min(x);  data_max <- max(x)
  if (is.null(from)) from <- data_min
  if (is.null(to))   to   <- data_max
  if (!is.numeric(from) || !is.numeric(to) || from >= to)
    stop("`from` and `to` must be numeric with from < to.")
  if (n < 10L) stop("`n` should be at least 10.")
  grid_x <- seq(from, to, length.out = n)
  m      <- length(grid_x)

  ux <- unique(x)
  fx <- as.vector(tabulate(match(x, ux), nbins = length(ux)))
  tot_n <- sum(fx)

  Kfun <- switch(kernel,
                 gaussian     = function(u) stats::dnorm(u),
                 epanechnikov = function(u) { K <- 3/4*(1 - u^2); K[abs(u)>1] <- 0; K },
                 rectangular  = function(u) { K <- 1/2;   K[abs(u)>1] <- 0; K },
                 triangular   = function(u) { K <- 1 - abs(u); K[abs(u)>1] <- 0; K }
  )

  if (useFFT && tot_n > 1000L) {
    dx     <- grid_x[2] - grid_x[1]
    bin_idx <- pmin(pmax(1L, ceiling((x - from)/dx)), m)
    counts  <- tabulate(bin_idx, nbins = m)
    u_grid  <- seq(-(m-1)/2, (m-1)/2, length.out = m) * dx / h
    ker_vals <- Kfun(u_grid)
    fcounts <- stats::fft(counts)
    fkernel <- stats::fft(ker_vals)
    conv    <- Re(stats::fft(fcounts * fkernel, inverse = TRUE)) / m
    y       <- conv / (length(x) * h)
  } else {
    y <- numeric(m)
    for (i in seq_along(ux)) {
      y <- y + fx[i] * Kfun((grid_x - ux[i]) / h)
    }
    y <- y / (tot_n * h)
  }

  structure(
    list(x    = grid_x,
         y    = y,
         bw   = h,
         n    = m,
         call = call),
    class = "density"
  )
}


#####
empfun <- function(values)
{
  if(sd(values) <= 1e-6)
  {
    funs <- list(dfun = function(x) dunif(x, min(values), max(values)),
                 pfun = function(q) punif(q, min(values), max(values)),
                 qfun = function(p) qunif(p, min(values), max(values)),
                 rfun = function(n) runif(n, min(values), max(values)))
  }

  else
  {
    values <- values[is.finite(values)]
    dens <- custom_density(values)
    x_vals <- dens$x
    y_vals <- dens$y

    fix_idx <- (x_vals >= min(values) & x_vals <= max(values)) & !duplicated(x_vals)
    x_vals <- x_vals[fix_idx]
    y_vals <- y_vals[fix_idx]

    dx <- x_vals[2] - x_vals[1]
    pfun_raw <- cumsum(y_vals) * dx
    pfun_vals <- pfun_raw / max(pfun_raw)

    dfun <- suppressWarnings(approxfun(x_vals, y_vals, rule = 2))
    pfun <- suppressWarnings(approxfun(x_vals, pfun_vals, rule = 2, yleft = 0, yright = 1, ties = "ordered"))
    qfun <- suppressWarnings(approxfun(pfun_vals, x_vals, rule = 2, yleft = min(x_vals), yright = max(x_vals), ties = "ordered"))
    rfun <- function(n) qfun(runif(n))

    funs <- list(dfun = dfun, pfun = pfun, qfun = qfun, rfun = rfun)
  }

  return(funs)
}

###
utils::globalVariables(".data")

###
plot_graph <- function(ts, pred_funs, alpha = 0.05, dates = NULL, line_size = 1.3, label_size = 11,
                       forcat_band = "seagreen2", forcat_line = "seagreen4", hist_line = "gray43",
                       label_x = "Horizon", label_y= "Forecasted Var", date_format = "%b-%Y")
{
  preds <- Reduce(rbind, map(pred_funs, ~ quantile(.x$rfun(1000), probs = c(alpha, 0.5, (1-alpha)))))
  colnames(preds) <- c("lower", "median", "upper")
  future <- nrow(preds)

  if(is.null(dates)){x_hist <- 1:length(ts)} else {x_hist <- as.Date(as.character(dates))}
  if(is.null(dates)){x_forcat <- length(ts) + 1:nrow(preds)} else {x_forcat <- as.Date(as.character(tail(dates, 1)))+ 1:future}

  forecast_data <- data.frame(x_forcat = x_forcat, preds)
  historical_data <- data.frame(x_all = as.Date(c(x_hist, x_forcat)), y_all = c(ts = ts, pred = preds[, "median"]))

  plot <- ggplot()+ geom_line(data = historical_data, aes(x = .data$x_all, y = .data$y_all), color = hist_line, linewidth = line_size)
  plot <- plot + geom_ribbon(data = forecast_data, aes(x = x_forcat, ymin = .data$lower, ymax = .data$upper), alpha = 0.3, fill = forcat_band)
  plot <- plot + geom_line(data = forecast_data, aes(x = x_forcat, y = median), color = forcat_line, linewidth = line_size)
  if(!is.null(dates)){plot <- plot + scale_x_date(name = paste0("\n", label_x), date_labels = date_format)}
  if(is.null(dates)){plot <- plot + scale_x_continuous(name = paste0("\n", label_x))}
  plot <- plot + scale_y_continuous(name = paste0(label_y, "\n"), labels = number)
  plot <- plot + ylab(label_y) + theme_bw()
  plot <- plot + theme(axis.text=element_text(size=label_size), axis.title=element_text(size=label_size + 2))

  return(plot)
}

###
dts <- function(ts, lag = 1)
{
  scaled_ts <- tail(ts, -lag)/head(ts, -lag)-1
  scaled_ts[!is.finite(scaled_ts)] <- NA
  if(anyNA(ts)){scaled_ts <- na_kalman(scaled_ts)}
  return(scaled_ts)
}

###
softmax <- function(x) exp(x - max(x)) / sum(exp(x - max(x)))

###
exp_decay <- function(x, decay = 0.5, inverse = TRUE) {
  raw <- exp(-decay * x)
  weights <- 1 - raw / sum(raw)
  return(weights)
}