Nothing
#' Hybrid time series modeling
#'
#' Create a hybrid time series model with two to five component models.
#'
#' @export
#' @import forecast
#' @import thief
#' @import stats
#' @import graphics
#' @import zoo
#' @param y A numeric vector or time series.
#' @param lambda
#' Box-Cox transformation parameter.
#' Ignored if NULL. Otherwise, data transformed before model is estimated.
#' @param models A character string of up to seven characters indicating which contributing
#' models to use:
#' a (\code{\link[forecast]{auto.arima}}), e (\code{\link[forecast]{ets}}),
#' f (\code{\link{thetam}}), n (\code{\link[forecast]{nnetar}}),
#' s (\code{\link[forecast]{stlm}}), t (\code{\link[forecast]{tbats}}),
#' and z (\code{\link[forecast]{snaive}}).
#' @param a.args an optional \code{list} of arguments to pass to \code{\link[forecast]{auto.arima}}.
#' See details.
#' @param e.args an optional \code{list} of arguments to pass to \code{\link[forecast]{ets}}.
#' See details.
#' @param n.args an optional \code{list} of arguments to pass to \code{\link[forecast]{nnetar}}.
#' See details.
#' @param s.args an optional \code{list} of arguments to pass to \code{\link[forecast]{stlm}}.
#' See details.
#' @param t.args an optional \code{list} of arguments to pass to \code{\link[forecast]{tbats}}.
#' See details.
#' @param z.args an optional \code{list} of arguments to pass to \code{\link[forecast]{snaive}}.
#' See details.
#' @param weights method for weighting the forecasts of the various contributing
#' models. Defaults to \code{equal}, which has shown to be robust and better
#' in many cases than giving more weight to models with better in-sample performance.
#' Cross validated errors--implemented with \code{link{cvts}}
#' should produce the best forecast, but the model estimation is also the slowest. Note that
#' extra arguments
#' passed in \code{a.args}, \code{e.args}, \code{n.args}, \code{s.args}, and \code{t.args} are
#' not used during cross validation. See further explanation in \code{\link{cvts}}.
#' Weights utilizing in-sample errors are also available but not recommended.
#' @param errorMethod method of measuring accuracy to use if weights are not
#' to be equal.
#' Root mean square error (\code{RMSE}), mean absolute error (\code{MAE})
#' and mean absolute scaled error (\code{MASE})
#' are supported.
#' @param parallel a boolean indicating if parallel processing should be used between models.
#' Parallelization will still occur within individual models that support it and can be controlled
#' using \code{a.args} and \code{t.args}.
#' @param num.cores If \code{parallel=TRUE}, how many cores to use.
#' @param cvHorizon If \code{weights = "cv.errors"}, this controls which forecast to horizon to use
#' for the error calculations.
#' @param rolling If \code{weights = "cv.errors"}, this controls the use of rolling cross validation
#' in \code{cvts()}
#' @param windowSize length of the window to build each model, only used when
#' \code{weights = "cv.errors"}.
#' @param horizonAverage If \code{weights = "cv.errors"}, setting this to \code{TRUE} will average
#' all forecast horizons up to \code{cvHorizon} for calculating the errors instead of using
#' the single horizon given in \code{cvHorizon}.
#' @param verbose Should the status of which model is being fit/cross validated be printed
#' to the terminal?
#' @seealso \code{\link{forecast.hybridModel}}, \code{\link[forecast]{auto.arima}},
#' \code{\link[forecast]{ets}}, \code{\link{thetam}}, \code{\link[forecast]{nnetar}},
#' \code{\link[forecast]{stlm}}, \code{\link[forecast]{tbats}}
#' @return An object of class hybridModel.
#' The individual component models are stored inside of the object
#' and can be accessed for all the regular manipulations available in the forecast package.
#' @details The \code{hybridModel} function fits multiple individual model specifications
#' to allow easy creation of ensemble forecasts. While default settings for the individual
#' component models work quite well in most cases, fine control can be exerted by passing detailed
#' arguments to the component models in the
#' \code{a.args}, \code{e.args}, \code{n.args}, \code{s.args}, and \code{t.args} lists.
#' Note that if \code{xreg} is passed to the \code{a.args}, \code{n.args}, or
#' \code{s.args} component models it must now be passed as a matrix. In "forecastHybrid"
#' versions earlier than 4.0.15 it would instead be passed in as a dataframe, but for consistency
#' with "forecast" v8.5 we now require a matrix with colnames
#' \cr
#' \cr
#' Characteristics of the input series can cause problems for certain types of models
#' and parameters. For example, \code{\link[forecast]{stlm}} models require that the input
#' series be seasonal; furthermore, the data must include at least two seasons
#' of data (i.e. \code{length(y) >= 2 * frequency(y)}) for the decomposition to succeed.
#' If this is not the case, \code{hybridModel()}
#' will remove the \code{stlm} model so an error does not occur.
#' Similarly, \code{nnetar} models require that
#' \code{length(y) >= 2 * frequency(y)}, so these models will be removed if the condition
#' is not satisfied. The \code{\link[forecast]{ets}} model does not handle
#' a series well with a seasonal period longer than 24 and will ignore the seasonality.
#' In this case, \code{hybridModel()} will also drop the \code{ets} model from the ensemble.
#'
#' @examples
#' \dontrun{
#'
#' # Fit an auto.arima, ets, thetam, nnetar, stlm, and tbats model
#' # on the time series with equal weights
#' mod1 <- hybridModel(AirPassengers)
#' plot(forecast(mod1))
#'
#' # Use an auto.arima, ets, and tbats model with weights
#' # set by the MASE in-sample errors
#' mod2 <- hybridModel(AirPassengers, models = "aet",
#' weights = "insample.errors", errorMethod = "MASE")
#'
#' # Pass additional arguments to auto.arima() to control its fit
#' mod3 <- hybridModel(AirPassengers, models = "aens",
#' a.args = list(max.p = 7, max.q = 7, approximation = FALSE))
#'
#' # View the component auto.arima() and stlm() models
#' mod3$auto.arima
#' mod3$stlm
#' }
#'
#' @author David Shaub
#'
hybridModel <- function(y, # nolint
models = "aefnst",
lambda = NULL, # nolint
a.args = NULL, # nolint
e.args = NULL, # nolint
n.args = NULL, # nolint
s.args = NULL, # nolint
t.args = NULL, # nolint
z.args = NULL, # nolint
weights = c("equal", "insample.errors", "cv.errors"),
errorMethod = c("RMSE", "MAE", "MASE"),
rolling = FALSE,
cvHorizon = frequency(y),
windowSize = 84,
horizonAverage = FALSE,
parallel = FALSE,
num.cores = 2L, # nolint
verbose = TRUE) {
##############################################################################
# Validate input
##############################################################################
modelArguments <- list("a" = a.args, "e" = e.args, "f" = NULL, "n" = n.args,
"s" = s.args, "t" = t.args, "z" = z.args)
# Validate and clean the input timeseries
y <- prepareTimeseries(y = y)
# using weights = "insample.errors" will tend to overfit and favor complex models, so it is
# not recommended
weights <- match.arg(weights)
if (weights == "insample.errors") {
wrnMsg <- paste0("Using insample.error weights is not recommended for accuracy and ",
"may be deprecated in the future.")
warning(wrnMsg)
}
errorMethod <- match.arg(errorMethod)
# Match the specified models
expandedModels <- sort(unique(tolower(unlist(strsplit(models, split = "")))))
# Check models and data length to ensure enough data: remove models that require more data
expandedModels <- removeModels(y = y, models = expandedModels)
# Check the parallel arguments
checkParallelArguments(parallel = parallel, num.cores = num.cores)
# Check a.args/t.args/e.args/n.args/s.args
checkModelArgs(modelArguments = modelArguments, models = expandedModels)
if (weights == "cv.errors" && errorMethod == "MASE") {
warning("cv errors currently do not support MASE. Reverting to RMSE.")
errorMethod <- "RMSE"
}
##############################################################################
# Fit models
##############################################################################
# Parallel execuation
if (parallel) {
if (.Platform$OS.type == "unix") {
cl <- parallel::makeForkCluster(num.cores)
} else {
cl <- parallel::makeCluster(num.cores)
}
doParallel::registerDoParallel(cl)
on.exit(parallel::stopCluster(cl))
# Parallel processing won't be used by default since the benefit only occurs on long series
# with large frequency
currentModel <- NULL
fitModels <- foreach::foreach(modelCode = expandedModels,
.packages = c("forecast", "forecastHybrid")) %dopar% {
# thetam() currently does not handle arguments
if (modelCode == "f") {
fitModel <- thetam(y)
} else { # All other models handle lambda and additional arguments
argsAdditional <- modelArguments[[modelCode]]
if (is.null(argsAdditional)) {
argsAdditional <- list(lambda = lambda)
} else if (is.null(argsAdditional$lambda)) {
argsAdditional$lambda <- lambda
}
currentModel <- purrr::partial(getModel(modelCode), y = y)
fitModel <- do.call(currentModel, argsAdditional)
}
fitModel
}
modelResults <- unwrapParallelModels(fitModels, expandedModels)
} else {# serial execution
modelResults <- list()
for (modelCode in expandedModels) {
modelName <- getModelName(modelCode)
if (verbose) {
message("Fitting the ", modelName, " model")
}
# thetam() currently does not handle arguments
if (modelCode == "f") {
modelResults[[modelName]] <- thetam(y)
} else { # All other models handle lambda and additional arguments
argsAdditional <- modelArguments[[modelCode]]
if (is.null(argsAdditional)) {
argsAdditional <- list(lambda = lambda)
} else if (is.null(argsAdditional$lambda)) {
argsAdditional$lambda <- lambda
}
currentModel <- purrr::partial(getModel(modelCode), y = y)
modelResults[[modelName]] <- do.call(currentModel, argsAdditional)
}
}
}
##############################################################################
# Determine model weights
##############################################################################
# Set the model weights
includedModels <- names(modelResults)
numModels <- length(expandedModels)
if (weights == "equal") {
modelResults$weights <- rep(1 / numModels, numModels)
} else if (weights %in% c("insample.errors", "cv.errors")) {
modelResults$weights <- rep(0, numModels)
modResults <- modelResults
if (weights == "cv.errors") {
for (modelCode in expandedModels) {
modelName <- getModelName(modelCode)
currentModel <- getModel(modelCode)
if (verbose) {
message("Cross validating the ", modelName, " model")
}
modResults[[modelName]] <- cvts(y, FUN = currentModel,
maxHorizon = cvHorizon,
horizonAverage = horizonAverage,
rolling = rolling,
verbose = FALSE,
windowSize = windowSize,
num.cores = num.cores)
}
}
# If horizonAverage == TRUE, the resulting accuracy object will have only one row
cvHorizon <- ifelse(horizonAverage, 1, cvHorizon)
cvHorizon <- ifelse(weights != "cv.errors", 1, cvHorizon)
# Set the weights
weightFunction <- function(x) accuracy(modResults[[getModelName(x)]])[cvHorizon, errorMethod]
modelResults$weights <- sapply(expandedModels, weightFunction)
# Scale the weights
inverseErrors <- 1 / modelResults$weights
modelResults$weights <- inverseErrors / sum(inverseErrors)
}
# Check for valid weights when weights = "insample.errors" and submodels produce perfect fits
if (is.element(NaN, modelResults$weights) & weights %in% c("insample.errors", "cv.errors")) {
wrnMsg <- paste0("At least one model perfectly fit the series, so accuracy measures cannot",
" be used for weights. Reverting to weights = \"equal\".") # nolint
warning(wrnMsg)
modelResults$weights <- rep(1 / length(includedModels),
length(includedModels))
}
names(modelResults$weights) <- includedModels
##############################################################################
# Prepare hybridModel object
##############################################################################
# Apply the weights to construct the fitted values
fits <- sapply(includedModels, FUN = function(x) fitted(modelResults[[x]]))
fitsWeightsMatrix <- matrix(rep(modelResults$weights[includedModels],
times = nrow(fits)),
nrow = nrow(fits), byrow = TRUE)
fits <- rowSums(fits * fitsWeightsMatrix)
resid <- y - fits
tsp(fits) <- tsp(y)
# Save which models used xreg
xregs <- list()
if ("a" %in% expandedModels) {
xregs$auto.arima <- ifelse("xreg" %in% names(a.args) && !is.null(a.args$xreg), TRUE, FALSE) # nolint
}
if ("n" %in% expandedModels) {
xregs$nnetar <- ifelse("xreg" %in% names(n.args) && !is.null(n.args$xreg), TRUE, FALSE)
}
if ("s" %in% expandedModels) {
methodArima <- "method" %in% names(s.args) && s.args$method == "arima"
xregs$stlm <- ifelse("xreg" %in% names(s.args) && !is.null(s.args$xreg) && methodArima,
TRUE, FALSE)
}
# Prepare the hybridModel object
class(modelResults) <- "hybridModel"
modelResults$frequency <- frequency(y)
modelResults$x <- y
modelResults$xreg <- xregs
modelResults$models <- includedModels
modelResults$fitted <- fits
modelResults$residuals <- resid
return(modelResults)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.