Nothing
#'Train an univariate time series forecasting model
#'
#'This function trains a model from the historical values of a time series using
#'an autoregressive approach: the targets are the historical values and the
#'features of the targets their lagged values.
#'
#'The functions used to build and train the model are:
#' * KNN: In this case no model is built and the function [FNN::knn.reg()] is
#'used to predict the future values of the time series. By default, k is equal
#'to 3.
#' * Linear models: Function [stats::lm()] to build the model and the method
#'[stats::predict.lm()] associated with the trained model to forecast the future
#'values of the time series.
#' * Regression trees: Function [rpart::rpart()] to build the model and the
#'method [rpart::predict.rpart()] associated with the trained model to forecast
#'the future values of the time series.
#' * Model trees: Function [Cubist::cubist()] to build the model and the
#'method [Cubist::predict.cubist()] associated with the trained model to
#'forecast the future values of the time series. By default, the parameter
#'`committees` is set to 5.
#' * Bagging: Function [ipred::bagging()] to build the model and the
#'method [ipred::predict.regbagg()] associated with the trained model to
#'forecast the future values of the time series.
#' * Random forest: Function [ranger::ranger()] to build the model and the
#'method [ranger::predict.ranger()] associated with the trained model to
#'forecast the future values of the time series.
#' * Extreme gradient boosting: Function [xgboost::xgboost()] to build the model and the
#'method [xgboost::predict.xgboost()] associated with the trained model to
#'forecast the future values of the time series.
#'
#'@param timeS A time series of class `ts` or a numeric vector.
#'@param lags An integer vector, in increasing order, expressing the lags used
#' as autoregressive variables. If the default value (`NULL`) is provided, a
#' suitable vector is chosen.
#'@param method A string indicating the method used for training and
#' forecasting. Allowed values are:
#' * `"knn"`: k-nearest neighbors (the default)
#' * `"lm"`: linear regression
#' * `"rt"`: regression trees
#' * `"mt"`: model trees
#' * `"bagging"`
#' * `"rf"`: random forests
#' * `"xgboost"`: extreme gradient boosting
#'
#' See details for a brief explanation of the models. It is also possible to
#' use your own regression model, in that case a function explaining how to
#' build your model must be provided, see the vignette for further details.
#'
#'@param trend A character indicating the type of preprocessing applied to the
#' time series in order to deal with trending series, see the vignette for
#' details.
#'
#'@param nfd In the case that the parameter `trend` has the value "differences",
#' it specifies the order of first differences to be applied. If the default
#' (-1) is used, the order of first differences needed by the time series will
#' be estimated by the [forecast::ndiffs()] function.
#'
#'@param transform_features A logical value indicating whether the training
#' features are also transformed if the additive or multiplicative
#' transformation has been used as preprocessing to deal with trending series.
#'
#'@param ... Parameters for the underlying function that builds the model. If no
#' parameters are provided, the model is normally fitted with its default
#' parameters. See details for the functions used to train the models.
#'
#'@returns An S3 object of class `utsf`, basically a list with, at least, the
#' following components: \item{`ts`}{The time series being forecast.}
#' \item{`features`}{A data frame with the features of the training set. The
#' column names of the data frame indicate the autoregressive lags.}
#' \item{`targets`}{A vector with the targets of the training set.}
#' \item{`lags`}{An integer vector with the autoregressive lags.}
#' \item{`model`}{The regression model used recursively to make the forecast.}
#'@export
#'
#' @examples
#' ## Build model using k-nearest neighbors
#' create_model(AirPassengers, method = "knn")
#'
#' ## Using k-nearest neighbors changing the default k value
#' create_model(AirPassengers, method = "knn", k = 5)
#'
#' ## Using your own regression model
#'
#' # Function to build the regression model
#' my_knn_model <- function(X, y, param) {
#' structure(list(X = X, y = y), class = "my_knn")
#'}
#' # Function to predict a new example
#' predict.my_knn <- function(object, new_value) {
#' FNN::knn.reg(train = object$X, test = new_value, y = object$y)$pred
#' }
#' create_model(AirPassengers, method = my_knn_model)
#'
create_model <- function(timeS,
lags = NULL,
method = c("knn", "lm", "rt", "mt", "bagging", "rf", "xgboost"),
trend = c("additive", "multiplicative", "differences", "none"),
nfd = -1,
transform_features = TRUE,
...) {
# Check timeS parameter
if (! (stats::is.ts(timeS) || is.vector(timeS, mode = "numeric")))
stop("timeS parameter should be of class ts or a numeric vector")
if (! stats::is.ts(timeS))
timeS <- stats::as.ts(timeS)
trend <- match.arg(trend)
# Check lags parameter
lagsc <- lags
if (! (is.null(lagsc) || is.vector(lagsc, mode = "numeric"))) {
stop("lags parameter should be NULL or numeric")
}
if (!is.null(lagsc) && !all(lagsc == floor(lagsc))) {
stop("lag values should be integer")
}
if (is.null(lagsc)) {
if (stats::frequency(timeS) > 1) {
lagsc <- 1:stats::frequency(timeS)
} else {
partial <- stats::pacf(timeS, plot = FALSE)
lagsc <- which(partial$acf > 2/ sqrt(length(timeS)))
if (length(lagsc) == 0 ||
(length(lagsc) == 1 &&
trend %in% c("additive", "multiplicative"))) {
lagsc <- 1:5
}
}
}
if (is.unsorted(lagsc)) lagsc <- sort(lagsc)
if (lagsc[1] < 1) stop("lag values should be greater than zero")
if ((length(lagsc) == 1 && trend %in% c("additive", "multiplicative")) && transform_features) {
stop("It does not make sense to use only 1 autoregressive lag with the additive or multiplicative transformation of features")
}
if (utils::tail(lagsc, 1) >= length(timeS)) {
stop("Maximum lag cannot be greater or equal to the length of the series")
}
# Check method parameter
tryCatch(method <- match.arg(method),
error = function(cond) {
if (length(method) != 1)
stop("parameter method cannot be a vector")
if (! class(method) %in% "function")
stop("parameter method should be a function or valid method")
method <- method
}
)
# Check nfd parameter
if (trend == "differences") {
if (! (is.numeric(nfd) && length(nfd) == 1 && nfd >= -1 && floor(nfd) == nfd))
stop("nfd parameter should be an integer scalar value >= -1")
}
# Check transform_features parameter
if (! (is.logical(transform_features) && length(transform_features) == 1))
stop("transform_features parameter should be a logical value")
# Create training set and targets / transformations / preprocessing
if (trend == "differences") {
preprocessing_fd <- fd_preprocessing(timeS, nfd)
if (preprocessing_fd$differences == 0) {
out <- build_examples(timeS, rev(lagsc))
} else {
out <- build_examples(preprocessing_fd$preprocessed, rev(lagsc))
}
out$differences <- preprocessing_fd
} else {
out <- build_examples(timeS, rev(lagsc))
if (trend == "additive") {
means <- rowMeans(out$features)
if (transform_features) {
out$features <- sapply(1:nrow(out$features),
function(row) out$features[row, ] - means[row])
out$features <- t(out$features)
}
out$features <- as.data.frame(out$features)
out$targets <- out$targets - means
# means <- rowMeans(out$features[, 1:length(lagsc)])
# out$features[ , 1:length(lagsc)] <- sapply(1:nrow(out$features),
# function(row) out$features[row, 1:length(lagsc)] - means[row])
# out$features <- as.data.frame(out$features)
# out$targets <- out$targets - means
} else if (trend == "multiplicative") {
means <- rowMeans(out$features)
if (transform_features) {
out$features <- sapply(1:nrow(out$features),
function(row) out$features[row, ] / means[row])
out$features <- t(out$features)
}
out$features <- as.data.frame(out$features)
out$targets <- out$targets / means
}
}
if (!is.data.frame(out$features)) out$features <- as.data.frame(out$features)
# Add other information to the output object
out$call <- match.call()
out$ts <- timeS
out$lags <- lagsc
out$trend <- trend
if (trend == "differences") {
out$nfd <- nfd
}
if (trend %in% c("additive", "multiplicative")) {
out$transform_features <- transform_features
}
out$param <- list(...) # param
# Create/train the model
if (inherits(method, "function")) {
# model provided by the user
args <- c(list(X = out$features, y = out$targets), list(...))
out$model <- do.call(method, args = args)
} else {
# model supported by the package
out$model <- build_model(out$features, out$targets, method, list(...))
}
out$method <- method
class(out) <- "utsf"
out
}
#'Forecasting a time series
#'
#'@param object an object of class `utsf` embedding a forecasting model for a
#' time series.
#'@param h A positive integer. Number of values to be forecast into the future,
#' i.e., forecast horizon.
#'@param PI If TRUE, prediction intervals are produced using simulation and
#' assuming normally distributed errors.
#'@param level Confidence level for predictions intervals.
#'@param ... Other arguments passed to methods
#'
#'@returns an object of class `utsf_forecast` with the same components of the
#' model received as first argument, plus several components:
#' \item{`pred`}{The forecast as an `ts` object.}
#' \item{`lower`}{Lower limits for prediction interval.}
#' \item{`upper`}{Upper limits for prediction interval.}
#' \item{`level`}{Confidence value associated with the prediction interval}
#' @examples
#' ## Forecast time series using k-nearest neighbors
#' m <- create_model(USAccDeaths, method = "knn")
#' f <- forecast(m, h = 12)
#' f$pred
#' library(ggplot2)
#' autoplot(f)
#'
#' ## Using k-nearest neighbors changing the default k value
#' m <- create_model(USAccDeaths, method = "knn", k = 5)
#' forecast(m, h = 12)
#'
#' ## Using your own regression model
#'
#' # Function to build the regression model
#' my_knn_model <- function(X, y, param) {
#' structure(list(X = X, y = y), class = "my_knn")
#'}
#' # Function to predict a new example
#' predict.my_knn <- function(object, new_value) {
#' FNN::knn.reg(train = object$X, test = new_value, y = object$y)$pred
#' }
#' m <- create_model(USAccDeaths, method = my_knn_model)
#' forecast(m, h = 12)
#'@export
forecast.utsf <- function(object, h, PI = FALSE, level = 90, ...) {
# Check h parameter
if (! (is.numeric(h) && length(h) == 1 && h >= 1 && floor(h) == h))
stop("h parameter should be an integer scalar greater than zero")
# Check PI parameter
if (! (is.logical(PI) && length(PI) == 1))
stop("PI parameter should be a logical value")
# Check level parameter
if (! (is.numeric(level) && length(level) == 1 && level > 0 && level <= 100))
stop("Confidence limit out of range")
if (level < 1) level <- level * 100
out <- object
out$pred <- recursive_prediction(out, h = h)
if (out$trend == "differences" && out$differences$differences > 0) {
out$pred <- fd_unpreprocessing(out$pred, out$differences)
}
if (PI) {
s <- try(simulations(object, h, level), silent = TRUE)
if (inherits(s, "try-error")) {
warning("Series too short to compute prediction intervals.")
} else {
out$lower <- stats::ts(s[1, ], start = stats::start(out$pred), frequency = stats::frequency(out$pred))
out$upper <- stats::ts(s[2, ], start = stats::start(out$pred), frequency = stats::frequency(out$pred))
out$level <- level
}
}
class(out) <- "utsf_forecast"
out
}
# @param object S3 object of class utsf
predict_one_value_transforming <- function(object, example) {
if (object$trend == "additive") {
mean_ <- mean(example)
if (object$transform_features) {
example <- example - mean_
}
# mean_ <- mean(head(example, length(object$lags))) # añadido
# example[seq_along(object$lags)] <- example[seq_along(object$lags)] - mean_ # añadido
} else if (object$trend == "multiplicative") {
mean_ <- mean(example)
if (object$transform_features) {
example <- example / mean_
}
}
example <- as.data.frame(matrix(example, ncol = length(example)))
colnames(example) <- colnames(object$features)
# print(example)
if (inherits(object$method,"character")) {
r <- stats::predict(object, example)
} else {
r <- stats::predict(object$model, example)
}
if (object$trend == "additive") {
r <- r + mean_
} else if (object$trend == "multiplicative") {
r <- r * mean_
}
r
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.