R/comb_WA.R
In GeomComb: (Geometric) Forecast Combination Methods

#' @title Winsorized Mean Forecast Combination
#'
#' @description Computes a \sQuote{combined forecast} from a pool of individual model forecasts using winsorized mean at each point in time.
#'
#' @details
#' Suppose \eqn{y_t} is the variable of interest, there are \eqn{N} not perfectly collinear predictors,
#' \eqn{\mathbf{f}_t = (f_{1t}, \ldots, f_{Nt})'}{f_t = (f_{1t}, \ldots, f_{Nt})'}. For each point in time, the order forecasts are
#' computed:
#'
#' \deqn{\mathbf{f}_t^{ord} = (f_{(1)t}, \ldots, f_{(N)t})'}{(f_t)_ord = (f_{(1)t}, \ldots, f_{(N)t})'}
#'
#' Using a trim factor \eqn{\lambda} (i.e., the top/bottom \eqn{\lambda \%} are winsorized), and setting \eqn{K = N\lambda} ,
#' the combined forecast is calculated as (Jose and Winkler, 2008):
#'
#' \deqn{\hat{y}_t = \frac{1}{N} \left[Kf_{(K+1)t} + \sum_{i=K+1}^{N-K} f_{(i)t} + Kf_{(N-K)t}\right]}{\hat{y}_t = 1/N *[Kf_{(K+1)t} + \sum_{i=K+1}^{N-K} f_{(K)t} + Kf_{(N-K)t}]}
#'
#' Like the trimmed mean, the winsorized mean is a robust statistic that is less sensitive to outliers than the simple average.
#' It is less extreme about handling outliers than the trimmed mean and preferred by Jose and Winkler (2008) for this reason.
#'
#' This method allows the user to select \eqn{\lambda} (by specifying \code{trim_factor}), or to leave the selection to
#' an optimization algorithm -- in which case the optimization criterion has to be selected (one of "MAE", "MAPE", or "RMSE").
#'
#' @param x An object of class \code{foreccomb}. Contains training set (actual values + matrix of model forecasts) and optionally a test set.
#' @param trim_factor numeric. Must be between 0 and 0.5.
#' @param criterion If \code{trim_factor} is not specified, an optimization criterion for automated trimming needs to be defined. One of
#' "MAE", "MAPE", or "RMSE" (default).
#'
#' @return Returns an object of class \code{foreccomb_res} with the following components:
#' \item{Method}{Returns the used forecast combination method.}
#' \item{Models}{Returns the individual input models that were used for the forecast combinations.}
#' \item{Weights}{Returns the combination weights obtained by applying the combination method to the training set.}
#' \item{Trim Factor}{Returns the trim factor, \eqn{\lambda}.}
#' \item{Fitted}{Returns the fitted values of the combination method for the training set.}
#' \item{Accuracy_Train}{Returns range of summary measures of the forecast accuracy for the training set.}
#' \item{Forecasts_Test}{Returns forecasts produced by the combination method for the test set. Only returned if input included a forecast matrix for the test set.}
#' \item{Accuracy_Test}{Returns range of summary measures of the forecast accuracy for the test set. Only returned if input included a forecast matrix and a vector of actual values for the test set.}
#' \item{Input_Data}{Returns the data forwarded to the method.}
#'
#' @author Christoph E. Weiss and Gernot R. Roetzer
#'
#' @examples
#' obs <- rnorm(100)
#' preds <- matrix(rnorm(1000, 1), 100, 10)
#' train_o<-obs[1:80]
#' train_p<-preds[1:80,]
#' test_o<-obs[81:100]
#' test_p<-preds[81:100,]
#'
#' ## User-selected trim factor:
#' data<-foreccomb(train_o, train_p, test_o, test_p)
#' comb_TA(data, trim_factor=0.1)
#'
#' ## Algorithm-optimized trim factor:
#' data<-foreccomb(train_o, train_p, test_o, test_p)
#' comb_TA(data, criterion="RMSE")
#'
#' @seealso
#' \code{\link[psych]{winsor.mean}},
#' \code{\link{foreccomb}},
#' \code{\link{plot.foreccomb_res}},
#' \code{\link{summary.foreccomb_res}},
#' \code{\link{comb_SA}},
#' \code{\link{comb_TA}},
#' \code{\link[forecast]{accuracy}}
#'
#' @references
#' Jose, V. R. R., and Winkler, R. L. (2008). Simple Robust Averages of Forecasts: Some Empirical Results. \emph{International Journal of Forecasting}, \bold{24(1)}, 163--169.
#'
#' @keywords models
#'
#' @import forecast
#' @importFrom psych winsor.mean
#'
#' @export
comb_WA <- function(x, trim_factor = NULL, criterion = "RMSE") {
    if (class(x) != "foreccomb")
        stop("Data must be class 'foreccomb'. See ?foreccomb, to bring data in correct format.", call. = FALSE)
    observed_vector <- x$Actual_Train
    prediction_matrix <- x$Forecasts_Train
    modelnames <- x$modelnames

    if (!is.null(trim_factor)) {
        if (!is.numeric(trim_factor))
            stop("Trim Factor must be numeric.", call. = FALSE)
        if (abs(trim_factor) > 0.5)
            stop("Trim Factor must be between 0 and 0.5.", call. = FALSE)
        trimf <- trim_factor
        adj_pred <- apply(prediction_matrix, 1, function(x) winsor.mean(x, trim = trimf, na.rm = TRUE))
    } else {
        if (is.null(criterion))
            stop("Automatic optimization of trim factor requires selection of 'criterion'.", call. = FALSE)
        if (length(grep(criterion, c("MAE", "MAPE", "RMSE"))) != 1)
            stop("Criterion for trim factor optimization must be 'MAE', 'MAPE', or 'RMSE'.", call. = FALSE)
        aux_matrix <- matrix(NA, nrow = 51, ncol = 1)
        rownames(aux_matrix) <- seq(0, 0.5, 0.01)
        message("Optimization algorithm chooses trim factor for winsorized mean approach...")
        for (i in 1:51) {
            aux_matrix[i, ] <- accuracy(apply(prediction_matrix, 1, function(x) winsor.mean(x, trim = ((i - 1)/100), na.rm = TRUE)), observed_vector)[2]
        }
        best <- which(aux_matrix == min(aux_matrix))[1]
        trimf <- as.numeric(rownames(aux_matrix)[best])
        message(paste0("Algorithm finished. Optimized trim factor: ", trimf))

        adj_pred <- apply(prediction_matrix, 1, function(x) winsor.mean(x, trim = trimf, na.rm = TRUE))
    }

    weights <- "Weights of the individual forecasts differ over time with winsorized mean"
    fitted <- adj_pred
    accuracy_insample <- accuracy(fitted, observed_vector)

    if (is.null(x$Forecasts_Test) & is.null(x$Actual_Test)) {
        result <- structure(list(Method = "Winsorized Mean", Models = modelnames, Weights = weights, Trim_Factor = trimf, Fitted = fitted, Accuracy_Train = accuracy_insample,
            Input_Data = list(Actual_Train = x$Actual_Train, Forecasts_Train = x$Forecasts_Train)), class = c("foreccomb_res"))
        rownames(result$Accuracy_Train) <- "Training Set"
    }

    if (is.null(x$Forecasts_Test) == FALSE) {
        newpred_matrix <- x$Forecasts_Test
        pred <- apply(newpred_matrix, 1, function(x) winsor.mean(x, trim = trimf))
        if (is.null(x$Actual_Test) == TRUE) {
            result <- structure(list(Method = "Winsorized Mean", Models = modelnames, Weights = weights, Trim_Factor = trimf, Fitted = fitted, Accuracy_Train = accuracy_insample,
                Forecasts_Test = pred, Input_Data = list(Actual_Train = x$Actual_Train, Forecasts_Train = x$Forecasts_Train, Forecasts_Test = x$Forecasts_Test)), class = c("foreccomb_res"))
            rownames(result$Accuracy_Train) <- "Training Set"
        } else {
            newobs_vector <- x$Actual_Test
            accuracy_outsample <- accuracy(pred, newobs_vector)
            result <- structure(list(Method = "Winsorized Mean", Models = modelnames, Weights = weights, Trim_Factor = trimf, Fitted = fitted, Accuracy_Train = accuracy_insample,
                Forecasts_Test = pred, Accuracy_Test = accuracy_outsample, Input_Data = list(Actual_Train = x$Actual_Train, Forecasts_Train = x$Forecasts_Train, Actual_Test = x$Actual_Test,
                  Forecasts_Test = x$Forecasts_Test)), class = c("foreccomb_res"))
            rownames(result$Accuracy_Train) <- "Training Set"
            rownames(result$Accuracy_Test) <- "Test Set"
        }
    }
    return(result)
}