Nothing
#' @name stsc
#' @title Signal-Transformed-Subset-Combination (STSC)
#' @description `stsc()` is a time series forecasting method designed to handle
#' vast sets of predictive signals, many of which may be irrelevant or
#' short-lived. This method transforms heterogeneous scalar-valued signals into
#' candidate density forecasts via time-varying coefficient models (TV-C),
#' and subsequently, combines them into an ultimate aggregate density forecast
#' via dynamic subset combinations (DSC).
#' @param y A matrix of dimension `T * 1` or numeric vector of length `T`
#' containing the observations of the target variable.
#' @param X A matrix with `T` rows containing the lagged 'P-signals'
#' in each column. Use `NULL` if no (external) 'P-signal' is to be included.
#' @param Ext_F A matrix with `T` rows containing the (external) 'F-signals'
#' in each column. For 'F-Signals', the slope of the TV-C models is fixed to 1.
#' Use `NULL` if no (external) 'F-signal' is to be included.
#' @param init An integer that denotes the number of observations used
#' to initialize the observational variance and the coefficients' variance
#' in the TV-C models.
#' @param lambda_grid A numeric vector which takes values between 0 and 1
#' denoting the discount factor(s) that control the dynamics of the time-varying
#' coefficients. Each signal in combination with each value of
#' lambda provides a separate candidate forecast.
#' Constant coefficients are nested for the case `lambda = 1`.
#' @param kappa_grid A numeric vector which takes values between 0 and 1
#' to accommodate time-varying volatility in the TV-C models.
#' The observational variance is estimated via
#' Exponentially Weighted Moving Average and kappa denotes the underlying
#' decay factor. Constant variance is nested for the case `kappa = 1`.
#' Each signal in combination with each value of
#' kappa provides a separate candidate density forecast.
#' For the values of kappa, we follow the recommendation
#' of RiskMetrics (Reuters, 1996).
#' @param bias A boolean to indicate whether the TV-C-models
#' allow for a bias correction to F-signals.
#' `TRUE` allows for a time-varying intercept, and `FALSE` sets (and fixes)
#' the intercept to 0.
#' @param gamma_grid A numeric vector containing potential discount factors
#' between 0 and 1 to exponentially down-weight the past predictive performance
#' of the candidate forecasting models. The values of this tuning parameter
#' are chosen in a procedure that amounts to leave-one-out cross-validation,
#' taking into account the time series structure of the data.
#' For details, \emph{see Adaemmer et al. (2023)}.
#' @param psi_grid An integer vector that controls
#' the (possible) sizes of the subsets. The values of this tuning parameter
#' are chosen in a procedure that amounts to leave-one-out cross-validation,
#' taking taking into account the time series structure of the data.
#' For details, \emph{see Adaemmer et al. (2023)}.
#' @param delta A numeric value between 0 and 1 denoting the discount factor
#' applied to down-weight the past predictive performance of the
#' aggregate predictive densities.
#' @param burn_in An integer value `>= 1` that denotes the number of
#' observations used to 'initialize' the rankings.
#' After 'burn_in' observations, the rankings for both,
#' the candidate forecasting models and aggregate predictive densities
#' are reset. `burn_in = 1` means no burn-in period is applied.
#' @param burn_in_dsc An integer value `>= 1` that denotes the number of
#' observations used to 'initialize' the rankings.
#' After 'burn_in_dsc' observations, only the ranking of the
#' aggregate predictive densities is reset.
#' `burn_in_dsc = 1` means no burn-in period is applied.
#' @param metric An integer from the set `1, 2, 3, 4, 5` representing
#' the metric used to rank the candidate forecasting models (TV-C models)
#' and subset combinations based on their predictive performance.
#' The default value is `metric = 5` which ranks them according to the
#' sum of (discounted) Continuous-Ranked-Probability-Scores (CRPS).
#' `metric = 1` uses discounted Predictive Log-Likelihoods,
#' `metric = 2` uses discounted Squared-Errors,
#' `metric = 3` uses discounted Absolute-Errors,
#' `metric = 4` uses discounted Compounded-Returns
#' (in this case the target variable y has to be a time series of
#' financial returns).
#' @param equal_weight A boolean that denotes whether equal weights are used to
#' combine the candidate forecasts within a subset. If `FALSE`, the weights are
#' calculated applying the softmax function on the ranking scores of
#' the candidate forecasting models. The method proposed in
#' Adaemmer et al. (2023) uses equal weights to combine the
#' candidate forecasting models.
#' @param incl An optional integer vector that denotes signals that
#' must be included in the subset combinations. For example, `incl = c(1, 3)`
#' includes all candidate forecasting models generated by
#' the first and third signals. If `NULL`, no signal is forced to be included.
#' @param parallel A boolean indicating whether the function should
#' be parallelized.
#' @param n_threads An integer that denotes the number of cores used
#' for parallelization. Only necessary if `parallel = TRUE`.
#' @param portfolio_params A numeric vector of length 3
#' containing the following elements:
#' \describe{
#' \item{risk_aversion}{
#' A non-negative double representing the investor's
#' risk aversion. Higher values indicate more risk-averse behavior.
#' }
#' \item{min_weight}{
#' A double specifying the minimum weight allocated to the market.
#' A non-negative lower bound effectively rules out short sales.
#' }
#' \item{max_weight}{
#' A double specifying the maximum weight allocated to the market.
#' For example, a value of 2 allows for a maximum leverage ratio of two.
#' }
#' }
#' This parameter is only required if `metric = 4`.
#' @return A list containing:
#' \describe{
#' \item{Forecasts}{A list containing:
#' \describe{
#' \item{Realization}{
#' A vector with the actual values of the target variable.
#' }
#' \item{Point_Forecast}{
#' A vector with the first moments of the aggregate predictive densities
#' of the STSC model.
#' }
#' \item{Variance_Prediction}{
#' A vector with the second moments of the aggregate predictive densities
#' of the STSC model.
#' }
#' }
#' }
#' \item{Tuning_Parameters}{A list containing:
#' \describe{
#' \item{Gamma}{
#' A vector containing the selected values for the tuning parameter gamma.}
#' \item{Psi}{
#' A vector containing the selected values for the tuning parameter psi.}
#' \item{Signals}{
#' A matrix containing the selected signals.}
#' \item{Lambda}{
#' A matrix containing the selected values for the tuning parameter lambda.}
#' \item{Kappa}{
#' A matrix containing the selected values for the tuning parameter kappa.}
#' }
#' }
#' \item{Model}{A list containing:
#' \describe{
#' \item{Lambda_grid}{The grid of lambda values used in the model.}
#' \item{Kappa_grid}{The grid of kappa values used in the model.}
#' \item{Gamma_grid}{The grid of gamma values used in the model.}
#' \item{Psi_grid}{The grid of psi values used in the model.}
#' \item{Delta}{The delta value used in the model.}
#' \item{Init}{The init value used in the model.}
#' \item{Burn_in}{The burn-in period used in the model.}
#' \item{Burn_in_dsc}{The burn-in period used in the model.}
#' \item{metric}{The ranking metric used in the model.}
#' \item{Equal_weight}{A boolean indicating if equal weighting was used.}
#' \item{Bias}{A boolean indicating if bias correction was applied.}
#' \item{Incl}{Additional included parameters.}
#' }
#' }
#' }
#' @seealso \url{https://github.com/lehmasve/hdflex#readme}
#' @author Philipp Adämmer, Sven Lehmann, Rainer Schüssler
#' @references
#' Beckmann, J., Koop, G., Korobilis, D., and Schüssler, R. A. (2020)
#' "Exchange rate predictability and dynamic bayesian learning."
#' \emph{Journal of Applied Econometrics}, 35 (4): 410–421.
#'
#' Dangl, T. and Halling, M. (2012)
#' "Predictive regressions with time-varying coefficients."
#' \emph{Journal of Financial Economics}, 106 (1): 157–181.
#'
#' Del Negro, M., Hasegawa, R. B., and Schorfheide, F. (2016)
#' "Dynamic prediction pools:
#' An investigation of financial frictions and forecasting performance."
#' \emph{Journal of Econometrics}, 192 (2): 391–405.
#'
#' Koop, G. and Korobilis, D. (2012)
#' "Forecasting inflation using dynamic model averaging."
#' \emph{International Economic Review}, 53 (3): 867–886.
#'
#' Koop, G. and Korobilis, D. (2023)
#' "Bayesian dynamic variable selection in high dimensions."
#' \emph{International Economic Review}.
#'
#' Raftery, A. E., Kárn`y, M., and Ettler, P. (2010)
#' "Online prediction under model uncertainty via dynamic model averaging:
#' Application to a cold rolling mill."
#' \emph{Technometrics}, 52 (1): 52–66.
#'
#' West, M. and Harrison, J. (1997)
#' "Bayesian forecasting and dynamic models"
#' \emph{Springer}, 2nd edn.
#'
#' @import checkmate
#' @importFrom stats na.omit
#' @export
#' @examples
#' \donttest{
#'
#' #########################################################
#' ######### Forecasting quarterly U.S. inflation ##########
#' #### Please see Koop & Korobilis (2023) for further ####
#' #### details regarding the data & external forecasts ####
#' #########################################################
#'
#' # Load Package
#' library("hdflex")
#' library("ggplot2")
#' library("cowplot")
#'
#' ########## Get Data ##########
#' # Load Package Data
#' inflation_data <- inflation_data
#'
#' # Set Target Variable
#' y <- inflation_data[, 1]
#'
#' # Set 'P-Signals'
#' X <- inflation_data[, 2:442]
#'
#' # Set 'F-Signals'
#' Ext_F <- inflation_data[, 443:462]
#'
#' # Get Dates and Number of Observations
#' tdates <- rownames(inflation_data)
#' tlength <- length(tdates)
#'
#' # First complete observation (no missing values)
#' first_complete <- which(complete.cases(inflation_data))[1]
#'
#' ########## Rolling AR2-Benchmark ##########
#' # Set up matrix for predictions
#' benchmark <- matrix(NA, nrow = tlength,
#' ncol = 1, dimnames = list(tdates, "AR2"))
#'
#' # Set Window-Size (15 years of quarterly data)
#' window_size <- 15 * 4
#'
#' # Time Sequence
#' t_seq <- seq(window_size, tlength - 1)
#'
#' # Loop with rolling window
#' for (t in t_seq) {
#'
#' # Split Data for Training Train Data
#' x_train <- cbind(int = 1, X[(t - window_size + 1):t, 1:2])
#' y_train <- y[(t - window_size + 1):t]
#'
#' # Split Data for Prediction
#' x_pred <- cbind(int = 1, X[t + 1, 1:2, drop = FALSE])
#'
#' # Fit AR-Model
#' model_ar <- .lm.fit(x_train, y_train)
#'
#' # Predict and store in benchmark matrix
#' benchmark[t + 1, ] <- x_pred %*% model_ar$coefficients
#' }
#'
#' ########## STSC ##########
#' # Set TV-C-Parameter
#' init <- 5 * 4
#' lambda_grid <- c(0.90, 0.95, 1.00)
#' kappa_grid <- c(0.94, 0.96, 0.98)
#' bias <- TRUE
#'
#' # Set DSC-Parameter
#' gamma_grid <- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90,
#' 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00)
#' n_tvc <- (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)
#' psi_grid <- c(1:100, sapply(1:4, function(i) floor(i * n_tvc / 4)))
#' delta <- 0.95
#' burn_in <- first_complete + init / 2
#' burn_in_dsc <- 1
#' metric <- 5
#' equal_weight <- TRUE
#' incl <- NULL
#' parallel <- FALSE
#' n_threads <- NULL
#'
#' # Apply STSC-Function
#' results <- hdflex::stsc(y,
#' X,
#' Ext_F,
#' init,
#' lambda_grid,
#' kappa_grid,
#' bias,
#' gamma_grid,
#' psi_grid,
#' delta,
#' burn_in,
#' burn_in_dsc,
#' metric,
#' equal_weight,
#' incl,
#' parallel,
#' n_threads,
#' NULL)
#'
#' ########## Evaluation ##########
#' # Define Evaluation Period (OOS-Period)
#' eval_period <- which(tdates >= "1991-04-01" & tdates <= "2021-12-01")
#'
#' # Get Evaluation Summary for STSC
#' eval_results <- summary(obj = results, eval_period = eval_period)
#'
#' # Calculate (Mean-)Squared-Errors for AR2-Benchmark
#' se_ar2 <- (y[eval_period] - benchmark[eval_period, 1])^2
#' mse_ar2 <- mean(se_ar2)
#'
#' # Create CSSED-Plot
#' cssed <- cumsum(se_ar2 - eval_results$MSE[[2]])
#' plot_cssed <- ggplot(
#' data = data.frame(eval_period, cssed),
#' aes(x = eval_period, y = cssed)
#' ) +
#' geom_line() +
#' ylim(-0.0008, 0.0008) +
#' ggtitle("Cumulative Squared Error Differences") +
#' xlab("Time Index") +
#' ylab("CSSED") +
#' geom_hline(yintercept = 0, linetype = "dashed", color = "darkgray") +
#' theme_minimal(base_size = 15) +
#' theme(
#' panel.grid.major = element_blank(),
#' panel.grid.minor = element_blank(),
#' panel.border = element_rect(colour = "black", fill = NA),
#' axis.ticks = element_line(colour = "black"),
#' plot.title = element_text(hjust = 0.5)
#' )
#'
#' # Show Plots
#' options(repr.plot.width = 15, repr.plot.height = 15)
#' plots_list <- eval_results$Plots
#' plots_list <- c(list(plot_cssed), plots_list)
#' cowplot::plot_grid(plotlist = plots_list,
#' ncol = 2,
#' nrow = 3,
#' align = "hv")
#'
#' # Relative MSE
#' print(paste("Relative MSE:", round(eval_results$MSE[[1]] / mse_ar2, 4)))
#' }
### STSC-Function
stsc <- function(y,
X,
Ext_F,
init,
lambda_grid,
kappa_grid,
bias,
gamma_grid,
psi_grid,
delta,
burn_in,
burn_in_dsc,
metric,
equal_weight,
incl,
parallel = FALSE,
n_threads = parallel::detectCores() - 2,
portfolio_params = NULL) {
# Convert y to matrix
y <- as.matrix(y, ncol = 1)
########################################################
### Checkmate
# Check if numeric vector without missing or infinite values
assertNumeric(y, min.len = 1, any.missing = FALSE, finite = TRUE)
# Either X or Ext_F must not be null
assert(checkMatrix(X), checkMatrix(Ext_F), combine = "or")
# Check if numeric matrices and have same length as y
assertMatrix(X, mode = "numeric", nrows = nrow(y), null.ok = TRUE)
assertMatrix(Ext_F, mode = "numeric", nrows = nrow(y), null.ok = TRUE)
# Check if integer between 2 and the length of y
assertInt(init, lower = 2, upper = nrow(y))
# Check if numeric vectors with values between exp(-10) and 1
assertNumeric(lambda_grid, lower = exp(-10), upper = 1, min.len = 1, any.missing = FALSE, finite = TRUE)
assertNumeric(kappa_grid, lower = exp(-10), upper = 1, min.len = 1, any.missing = FALSE, finite = TRUE)
assertNumeric(gamma_grid, lower = exp(-10), upper = 1, min.len = 1, any.missing = FALSE, finite = TRUE)
# Check if boolean
assertLogical(bias, len = 1, any.missing = FALSE)
assertLogical(equal_weight, len = 1, any.missing = FALSE)
# Check if integer vector with values between 1 and J
assertIntegerish(
psi_grid,
lower = 1,
upper = (
ncol(cbind(if (exists("X")) X, if (exists("Ext_F")) Ext_F)) *
length(lambda_grid) * length(kappa_grid)
),
min.len = 1,
any.missing = FALSE,
null.ok = FALSE
)
# Check if numeric value between exp(-10) and 1
assertNumber(delta, lower = exp(-10), upper = 1, finite = TRUE, na.ok = FALSE, null.ok = FALSE)
# Check if integers between 1 and the length of y
assertInt(burn_in, lower = 1, upper = nrow(y), na.ok = FALSE, null.ok = FALSE)
assertInt(burn_in_dsc, lower = 1, upper = nrow(y), na.ok = FALSE, null.ok = FALSE)
# Check if element of the set {1, 2, 3, 4, 5}
assertChoice(metric, c(1, 2, 3, 4, 5), null.ok = FALSE)
# Additional checks if metric == 4
if (metric == 4) {
# Check if "returns"
assertNumeric(y, lower = -1)
# Check if numeric vector of length 3
assertNumeric(portfolio_params, len = 3, any.missing = FALSE)
# Extract values from portfolio_params
risk_aversion <- portfolio_params[1]
min_weight <- portfolio_params[2]
max_weight <- portfolio_params[3]
# Check if numeric value and at least 0.0
assertNumber(risk_aversion, lower = 0, upper = Inf)
# Check if min_weight is a number smaller than max_weight
assertNumber(min_weight, lower = -Inf, upper = max_weight)
# Check if max_weight is a number greater than min_weight
assertNumber(max_weight, lower = min_weight, upper = Inf)
}
# Check if there are only NA values in any row
all_na <- any(
apply(
cbind(if (exists("X")) X, if (exists("Ext_F")) Ext_F), 1,
function(x) sum(is.na(x)) == length(x)
)
)
assertFALSE(all_na)
# Check if there are any non-consecutive NA values
non_consec_na <- any(
apply(
is.na(cbind(if (exists("X")) X, if (exists("Ext_F")) Ext_F)), 2,
function(x) sum(abs(diff(x))) > 1 | sum(diff(x)) == 1
)
)
assertFALSE(non_consec_na)
# Check if nullable integer vector
assertIntegerish(
incl,
lower = 1,
upper = (
ncol(cbind(if (exists("X")) X, if (exists("Ext_F")) Ext_F)) *
length(lambda_grid) * length(kappa_grid)
),
null.ok = TRUE
)
# Check if minimum psi matches the keep argument
if (!is.null(incl)) {
assertTRUE(
min(psi_grid) >= length(incl) * length(lambda_grid) * length(kappa_grid)
)
}
# Check if there are no NA values in the keep columns
if (!is.null(incl)) {
assertFALSE(
any(
is.na(cbind(if (exists("X")) X, if (exists("Ext_F")) Ext_F)[, incl, drop = FALSE])
)
)
}
# Check if any column in X is constant for the first observations
if (!is.null(X)) {
if (any(apply(X, 2, function(x) length(unique(na.omit(x)[1:init])) == 1))) {
print("One or more columns in X are constant for the first 1:init observations.")
}
}
# Check if any column in Ext_F is constant for the first observations
if (!is.null(Ext_F)) {
if (any(apply(Ext_F, 2, function(x) length(unique(na.omit(x)[1:init])) == 1))) {
print("One or more columns in Ext_F are constant for the first 1:init observations.")
}
}
########################################################
# Parallel or Single Core
if (parallel) {
if (is.null(n_threads)) {
n_threads <- parallel::detectCores() - 2
}
# Apply Multi-Core-Rcpp-Function
stsc_results <- stsc_loop_par_(y,
X,
Ext_F,
init,
lambda_grid,
kappa_grid,
bias,
gamma_grid,
psi_grid,
delta,
burn_in,
burn_in_dsc,
metric,
equal_weight,
incl,
n_threads,
portfolio_params)
} else {
# Apply Single-Core-Rcpp-Function
stsc_results <- stsc_loop_(y,
X,
Ext_F,
init,
lambda_grid,
kappa_grid,
bias,
gamma_grid,
psi_grid,
delta,
burn_in,
burn_in_dsc,
metric,
equal_weight,
incl,
portfolio_params)
}
# Assign Results
stsc_forecast <- as.numeric(stsc_results[[1]])
stsc_variance <- as.numeric(stsc_results[[2]])
stsc_comb_mod <- as.integer(stsc_results[[3]])
stsc_cand_mod <- stsc_results[[4]]
# Remove
rm(list = c("stsc_results"))
# Get Values for Gamma & Psi
para_grid <- expand.grid(psi_grid, gamma_grid)
chosen_psi <- para_grid[stsc_comb_mod + 1, 1]
chosen_gamma <- para_grid[stsc_comb_mod + 1, 2]
# P-Signal / F-Signal Names
x_names <- if (!is.null(X)) if (!is.null(colnames(X))) colnames(X) else paste0("X", seq_len(ncol(X)))
f_names <- if (!is.null(Ext_F)) if (!is.null(colnames(Ext_F))) colnames(Ext_F) else paste0("Ext_F", seq_len(ncol(Ext_F)))
signal_names <- c(if (exists("x_names")) x_names, if (exists("f_names")) f_names)
# Create Signal-Parameter-Grid
signal_grid <- expand.grid(signal_names,
kappa_grid,
lambda_grid,
stringsAsFactors = FALSE)
# Set up matrix for selected signals
chosen_signals <- matrix(0, nrow = nrow(y),
ncol = length(signal_names),
dimnames = list(NULL, signal_names))
# Set up matrix for selected lambda
chosen_lambda <- matrix(0, nrow = nrow(y),
ncol = length(lambda_grid),
dimnames = list(NULL, lambda_grid))
# Set up matrix for selected kappa
chosen_kappa <- matrix(0, nrow = nrow(y),
ncol = length(kappa_grid),
dimnames = list(NULL, kappa_grid))
# Fill matrices
for (t in seq(max(burn_in, burn_in_dsc) + 1, nrow(y))) {
# Select Signals
col_names <- signal_grid[stsc_cand_mod[[t]] + 1, 1]
chosen_signals[t, col_names] <- 1
# Select Lambda
lambda_values <- as.matrix(table(signal_grid[stsc_cand_mod[[t]] + 1, 3]))
chosen_lambda[t, rownames(lambda_values)] <- lambda_values
# Select Kappa
kappa_values <- as.matrix(table(signal_grid[stsc_cand_mod[[t]] + 1, 2]))
chosen_kappa[t, rownames(kappa_values)] <- kappa_values
}
# Return Results
return(
structure(
list(
Forecasts = list(
Realization = y,
Point_Forecasts = stsc_forecast,
Variance_Forecasts = stsc_variance
),
Tuning_Parameters = list(
Gamma = chosen_gamma,
Psi = chosen_psi,
Signals = chosen_signals,
Lambda = chosen_lambda,
Kappa = chosen_kappa
),
Model = list(
Lambda_grid = lambda_grid,
Kappa_grid = kappa_grid,
Gamma_grid = gamma_grid,
Psi_grid = psi_grid,
Delta = delta,
Init = init,
Burn_in = burn_in,
Burn_in_dsc = burn_in_dsc,
Metric = metric,
Equal_weight = equal_weight,
Bias = bias,
Incl = incl
)
),
class = "stsc_obj"
)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.