R/forecastOOP.R

Defines functions model_est

Documented in model_est

#' Estimate Direct Forecasting Model
#'
#' Given \code{y} and \code{xreg}, this function estimates a direct forecasting model as
#' described below, in section "Details".
#'
#' @details Condiser the following regression:
#' \itemize{
#' \item \eqn{y_{i,t+h} = a_0 + y_{i,t + 1 - pstart} + ... + y_{i,t + 1 - pend} + x_{i,t + 1 - qstart} + ... + x_{i,t + 1 - qend} + e_{i,t + h}}.
#' }
#' \code{model_est} estimates the above regression for all indexes in the
#' data frame \code{lags} (\code{p_start} \eqn{\ge 1} and \code{q_start} \eqn{\ge 1}). Forecasting from a model with this
#' structure is called direct forecast, where \eqn{h}
#' denotes the horizon over which \eqn{y} is to be forecast. This
#' approach can be used to forecast several steps ahead.
#'
#' A measure of core inflation (\eqn{\pi^*}) should
#' improve forecasting of headline inflation (\eqn{\pi}). A way of test
#' this property is running a regression like that one above:
#' \itemize{
#' \item \eqn{\pi_{t+h} = \alpha_0 + \sum_{i = 1}^p\beta_i\pi_{t-i+1} +
#'  \sum_{i = 1}^q\gamma_i\pi^*_{t-i+1} e_{t + h}}.
#' }
#' Using \code{recursive = FALSE} (default) in function \code{\link{lags}} is
#' equivalent to estimate the previous equation for headline inflation.
#' If \code{recursive = TRUE},
#' \eqn{i} is not necessarily initiated in 1, i.e, \code{p_start}
#' and \code{q_start}  also changes in
#' the first equation. Allowing \code{p_start} and \code{q_start} to vary,
#' there are more models to be estimated because it opens up
#' a lot of new possibilities.
#'
#'
#' @param y A numeric vector or time series.
#' @param xreg A numeric vector for the exogenous variable.
#' @param h An integer. The horizon of prediction.
#' @param lags A data frame generated by the function \code{\link{lags}}.
#'
#' @return A list with following components:
#' \item{model}{A \code{lm} object.}
#' \item{n_obs}{Number of observations of \code{y}.}
#' \item{h}{The horizon from which y is being forecasted.}
#'
#' @export
#'
#' @seealso \code{\link{model_est_single}} \code{\link{model_fcast}}
#'
#' @importFrom rlang :=
#'
#' @examples
#' inf_head <- coreinf_br[["ipca"]]
#' inf_corems <- coreinf_br[["ipcams"]]
#' pq <- lags(2, 1)
#' model_est(inf_head, inf_corems, 2, pq)
model_est <- function(y, xreg, h, lags) {
  yname <- rlang::enexpr(y)
  if (!is.name(yname)) {
    yname <- "y"
  }
  xregname <- rlang::enexpr(xreg)
  if (!is.name(xregname)) {
    xregname <- "x"
  }
  models <- purrr::pmap(lags, function(...) {
    lagp <- (..1:..2) + (h - 1)
    names(lagp) <- paste0(rlang::expr(!!yname), "_lag", lagp)
    lagq <- (..3:..4) + (h - 1)
    names(lagq) <- paste0(rlang::expr(!!xregname), "_lag", lagq)
    ylag <- purrr::map_df(lagp, ~ dplyr::lag(as.vector(y), .))
    ylag <- tibble::add_column(ylag, !!yname := y, .before = 1)
    xlag <- purrr::map_df(lagq, ~ dplyr::lag(as.vector(xreg), .))
    data <- dplyr::bind_cols(ylag, xlag)
    model <- comb_regr(data[(max(lags) + h):(length(y) - (h)), ])
    return(list(data = data, model = model, n_obs = length(y), h = h))
  })
  models
}


#' Direct Autoregressive Forecast Model
#'
#' Estimates a direct autoregressive forecast model.
#'
#' @param y A vector or a time series.
#' @param h An integer, the horizon from which \code{y} is being forecasted.
#' @param lags A data frame with lags to begin and to stop. These
#' lags can be indirectly obtaneid by the function \code{lags} as example
#' below shows.
#'
#' @details Condiser the following regression:
#' \itemize{
#' \item \eqn{y_{i,t+h} = a_0 + y_{i,t + 1 - pstart} + ... + y_{i,t + 1 - pend}  + e_{i,t + h}}.
#' }
#' \code{model_est_single} estimates the above regression for all indexes in the
#' data frame \code{lags} (\code{p_start} \eqn{\ge 1}).
#'
#' Details section for the function \code{\link{model_est}} provides
#' more information. Forecasting obtainded by the direct autoregressive model
#' can be compared with forecasting where lags of the core inflation
#' measure are considered in the model as in \code{\link{model_est}} function.
#'
#' @return A list of the following components:
#' \item{model}{A lm object.}
#' \item{n_obs}{Number of observations.}
#' \item{h}{The horizon.}
#'
#' @export
#' @seealso \code{\link{model_est}} \code{\link{model_fcast}}
#' @examples
#' x <- arima.sim(n = 30, list(ar = c(0.8897, -0.4858)))
#' p <- unique(lags(2, 1)[, 1:2])
#' model_est_single(x, 2, p)
model_est_single <- function(y, h, lags) {
  yname <- rlang::enexpr(y)
  if (!is.name(yname)) {
    yname <- "y"
  }
  models <- purrr::pmap(lags, function(...) {
    lagp <- (..1:..2) + (h - 1)
    names(lagp) <- paste0(rlang::expr(!!yname), "_lag", lagp)
    ylag <- purrr::map_df(lagp, ~ dplyr::lag(as.vector(y), .))
    ylag <- tibble::add_column(ylag, !!yname := y, .before = 1)
    data <- ylag
    model <- comb_regr(data[(max(lags) + h):(length(y) - (h)), ])
    return(list(data = data, model = model, n_obs = length(y), h = h))
  })
  models
}

#' Select Best Model
#'
#' A function for selecting the best model fitted by the
#' function \code{\link{model_est}} or \code{\link{model_est_single}}.
#'
#' @param models An object generated by the funcion
#' \code{\link{model_est}} or \code{\link{model_est_single}}.
#' @param ic Information criterion: \code{BIC} or \code{AIC}.
#'
#' @return A list containing the following elements:
#' \item{data}{Tibble with the data used in fitting the model.}
#' \item{model}{An \code{lm} object.}
#' \item{n_obs}{Number of observations of the dependent variable.}
#' \item{h}{Horizon used in direct estimation.}
#' @export
#' @seealso \code{\link{model_est}}, \code{\link{model_est_single}}
#' @importFrom stats BIC
#'
#' @examples
#'
#' inf_head <- coreinf_br[["ipca"]]
#' inf_corems <- coreinf_br[["ipcams"]]
#' pq <- lags(2, 1)
#' model1 <- model_est(inf_head, inf_corems, 2, pq)
#' model_best(model1)
#'
#' p <- unique(lags(2, 1)[, 1:2])
#' model2 <- model_est_single(inf_head, 2, p)
#' model_best(model2, ic = AIC)
model_best <- function(models, ic = BIC) {
  inf_crit <- purrr::map(purrr::map(models, ~ `[[`(.x, "model")), ~ ic(.x))
  best_model <- models[[which.min(inf_crit)]]
  rm(models)
  best_model
}


#' Forecast from the Best Model
#'
#' \code{model_fcast} is a function for forecasting \code{h} step ahead from a model selected by
#' the function \code{\link{model_best}}.
#'
#' @param best_model A model generated by the function \code{\link{model_best}}.
#'
#' @return A prediction value for the horizon \code{h} considered in the
#' function \code{\link{model_best}}.
#'
#' @seealso \code{\link{model_best}}
#'
#' @export
#'
#' @examples
#' inf_head <- coreinf_br[["ipca"]]
#' inf_corems <- coreinf_br[["ipcams"]]
#' pq <- lags(2, 1)
#' model1 <- model_est(inf_head, inf_corems, 2, pq)
#' best <- model_best(model1)
#' model_fcast(best)
model_fcast <- function(best_model) {
  h <- best_model[["h"]]
  n <- best_model[["n_obs"]]
  new_data <- best_model[["data"]][(n - (h - 1)):n, ]
  model_pred <- eval(best_model$model[["call"]], best_model[["data"]][1:(n - h), ])
  fcast <- forecast::forecast(model_pred, newdata = new_data, h = h)$mean[h]
  fcast
}


#' Error Prediction Two Variables
#'
#' This function calculates the root mean square error or mean absolute error of
#' models estimated by the function \code{\link{model_fcast}}. This function
#' is appropriate for forecasting evaluation of a
#' core inflation measure that is never revised.
#'
#' @param k An integer to determine the number of observations to include
#' in the out-of-sample set.
#' @param y A numeric vector or time series of the dependet variable; tipplically
#' the headline inflation.
#' @param xreg A numeric vector or time series of the independent variable; a measure
#' of core inflation that is never revised.
#' @param lags A data frame generated by the function \code{\link{lags}}.
#' @param h An integer to set the desired horizon.
#' @param RMSE Logical: if \code{TRUE} returns \code{RMSE}, if \code{FALSE} returns \code{MAE}.
#'
#' @return An integer contaning the calculated error.
#'
#' @seealso \code{\link{model_est}}, \code{\link{model_best}},
#' \code{\link{model_fcast}}.
#'
#' @export
#'
#' @examples
#' inf_head <- coreinf_br[["ipca"]]
#' inf_corems <- coreinf_br[["ipcams"]]
#' pq <- lags(2, 1)
#' model_error(24, inf_head, inf_corems, pq, 2, RMSE = TRUE)
#' purrr::map_dbl(1:12, ~ model_error(24, inf_head, inf_corems, pq, .x, RMSE = FALSE))
model_error <- function(k, y, xreg, lags, h, RMSE = TRUE) {
  l <- length(y)
  pred <- purrr::map_dbl(k:1, function(.x) {
    y2 <- y[1:(l - (.x - 1))]
    xreg2 <- xreg[1:(l - (.x - 1))]
    model_fcast(model_best(model_est(y2, xreg2, h, lags)))
  })
  if (RMSE == TRUE) {
    error_mean <- sqrt(mean((pred - y[(l - (k - 1)):l])^2))
  } else {
    error_mean <- mean(abs(pred - y[(l - (k - 1)):l]))
  }
  error_mean
}


#' Error Prediction Single Variable
#'
#' Calculates the root mean square error or mean absolute error of
#' univariate models estimated by the function \code{\link{model_fcast}} or
#' \code{\link{model_est_single}}.
#'
#' @inheritParams model_error
#'
#' @return An integer contaning the calculated error.
#'
#' @seealso \code{\link{model_est_single}}, \code{\link{model_best}},
#' \code{\link{model_fcast}}.
#'
#' @export
#'
#'
#' @examples
#' inf_head <- coreinf_br[["ipca"]]
#' pq <- lags(2, 1)
#' purrr::map_dbl(1:12, ~ model_error_single(24, inf_head, unique(pq[, 1:2]), .x, RMSE = FALSE))
model_error_single <- function(k, y, lags, h, RMSE = TRUE) {
  l <- length(y)
  pred <- purrr::map_dbl(k:1, function(.x) {
    y2 <- y[1:(l - (.x - 1))]
    model_fcast(model_best(model_est_single(y2, h, lags)))
  })
  if (RMSE == TRUE) {
    error_mean <- sqrt(mean((pred - y[(l - (k - 1)):l])^2))
  } else {
    error_mean <- mean(abs(pred - y[(l - (k - 1)):l]))
  }
  error_mean
}


#' Error Prediction Wavelet Models
#'
#' \code{model_error_wave} estimates the prediction error of models
#' fitted by the function \code{\link{model_est}} when the \code{x} variable
#' is a wavelet-based signal estimation of \code{y}.
#' For each new observation \code{k} included in the
#' out-of-sample set, the wavelet-based signal is reestimated.
#' This function supports
#' several specifications of wavelet models being a generalization
#' of \code{\link{model_error_wave_single}}.
#'
#'
#' @param x An object of class \code{args_wshr}, \code{args_ebthr} or \code{args_wthr}.
#' @param y A numeric vector or a time series for wavelet-based signal estimation.
#' Headline inflation for estimation of a wavelet core inflation measure.
#' @param h An integer to set the desired horizon.
#' @param lags A data frame generated by the function \code{\link{lags}}.
#' @param k An integer to determine the number of observations to include in the out-of-sample set.
#' @param RMSE logical: if \code{TRUE} returns \code{RMSE}, if \code{FALSE} returns \code{MAE}.
#'
#' @return A vector of errors for every wavelet model considered.
#'
#' @seealso \code{\link{wav_smooth}}, \code{\link{model_est}},
#' \code{\link{model_best}}, \code{\link{model_fcast}}
#'
#' @export
#'
#'
#'
#' @examples
#' wthr_wd <- list(
#'   filter.number = 4,
#'   type = c("wavelet", "station")
#' )
#'
#' wthr_thr <- list(
#'   type = c("soft", "hard"),
#'   policy = c(
#'     "universal",
#'     "BayesThresh"
#'   )
#' )
#'
#' wthr_args <- wav_args_wthr(wthr_wd, wthr_thr, 4:5)
#'
#' h <- 1:3
#' names(h) <- paste0("h", h)
#'
#' pq <- lags(2, 1)
#'
#' inf_head <- coreinf_br[["ipca"]]
#'
#' # h = 2
#'
#' model_error_wave(wthr_args, inf_head, h = 2, pq, 12, RMSE = TRUE)
#'
#' # h = 1:3 (mean of the errors for each wavelet model estimated)
#' \dontrun{
#'
#' pred_wthr <- purrr::map_df(
#'   h,
#'   ~ model_error_wave(wthr_args, inf_head, .x, pq, 12, RMSE = TRUE)
#' ) %>%
#'   dplyr::mutate(pred = purrr::possibly(rowMeans, NA)(.)) %>%
#'   dplyr::select(pred)
#'
#' pred_wthr
#' }
#'
model_error_wave <- function(x, y, h, lags, k, RMSE = TRUE) {
  purrr::pmap_dbl(x[[1]], function(...) {
    if (class(x) == "args_wshr") {
      args_wave <- list(args_tbl = tibble::tibble(...))
    } else {
      args_wave <- list(args_tbl = tibble::tibble(...), nwt_args = x[[2]])
    }
    class(args_wave) <- class(x)
    l <- length(y)
    pred <- purrr::map_dbl(k:1, function(.x) {
      y2 <- y[1:(l - (.x - 1))]
      xreg <- wav_smooth(y2, args_wave)[["x_sm"]][[1]]
      model_est <- purrr::possibly(model_est, NA)(y2, xreg, h, lags)
      model_best <- purrr::possibly(model_best, NA)(model_est)
      purrr::possibly(model_fcast, NA)(model_best)
    })
    if (RMSE == TRUE) {
      error_mean <- sqrt(mean((pred - y[(l - (k - 1)):l])^2))
    } else {
      error_mean <- mean(abs(pred - y[(l - (k - 1)):l]))
    }
    error_mean
  })
}


#' Summarize Error Prediction Wavelet Models
#'
#' This function calculates the average forecast errors over forecasting horizons.
#' Those errors are estimated by the function \code{\link{model_error_wave}}.
#'
#' @param h A positive integer that controls which maximum horizon is being used in forecasting (\eqn{1, 2, ..., h}).
#' @param ... Additional parameters to pass to the function
#' \code{\link{model_error_wave}}.
#'
#' @return A tibble with the average forecast errors for all horizon \code{h} considered and
#' for all  wavelet models estimated.
#'
#' @seealso \code{\link{model_error_wave}}
#'
#' @export
#'
#' @importFrom rlang .data
#'
#' @importFrom dplyr %>%
#' @importFrom purrr %||%
#'
#'
#' @examples
#' wthr_wd <- list(
#'   filter.number = 2:4,
#'   type = c("wavelet", "station")
#' )
#'
#' wthr_thr <- list(
#'   type = c("soft", "hard"),
#'   policy = c(
#'     "universal",
#'     "BayesThresh"
#'   )
#' )
#'
#' wthr_args <- wav_args_wthr(wthr_wd, wthr_thr, 3:5)
#'
#' pq <- lags(2, 1)
#'
#' inf_head <- coreinf_br[["ipca"]]
#'
#' error_wave_summary(2, x = wthr_args, y = inf_head, lags = pq, k = 3, RMSE = TRUE)
error_wave_summary <- function(h, ...) {
  args <- list(...)
  h <- 1:h
  names(h) <- paste0("h", h)
  error_mean <- purrr::map_df(h, ~ do.call(model_error_wave, c(args, h = .x))) %>%
    dplyr::mutate(error = purrr::possibly(rowMeans, NA)(.)) %>%
    dplyr::select(.data$error)
  error_mean
}


#' Insert Date Column
#'
#' Inserts a data column in an object generated by the function \code{\link{wav_smooth}}.
#'
#' @param x A tibble generated by the function \code{\link{wav_smooth}}.
#' @param ts A time series vector.
#'
#' @return A tibble.
#' @export
#'
#' @examples
#' library(lubridate)
#'
#' wshr_obj <- wav_args_wshr(list(
#'   wavelet = c("haar", "d4", "d6", "d8", "s8"),
#'   n.level = 1:4
#' ))
#'
#' inf_head <- coreinf_br[["ipca"]]
#'
#' date_start <- coreinf_br[["date"]][1]
#' ts_start <- c(year(date_start), month(date_start))
#' inf_head_ts <- ts(inf_head, start = ts_start, frequency = 12)
#'
#' core_wavelet <- wav_smooth(inf_head, wshr_obj)
#'
#' wcore_table(core_wavelet, inf_head_ts)
wcore_table <- function(x, ts) {
  x %>%
    tibble::add_column(
      date = rep(list(as.vector(stats::time(ts))))
    )
}




#' Wavelet Denoising
#'
#' The basic idea behind \code{smooth_wavelet} is to denoise a vector
#' of data using wavelet methods that are available in 3 \code{R} packages:
#' \code{wmtsa, EbayesThresh} and \code{wavethresh}. Do not mix parameters
#' related to different methods.
#'
#' @param x A numeric vector. If the data is not of length
#' \eqn{2 ^ J} for some integer \eqn{J}, the function
#' \code{WiSEBoot::\link[WiSEBoot]{padVector}} increases the length of data to
#' achieve the particular length requirement based on reflection type.
#' See the help page of the mentioned function for details. After the
#' signal computation, the length is again adjusted for the orginal length of \code{x}.
#' @param thfun A function to denoise \code{x}. This should be a method
#' available in packages \code{wmtsa}, \code{EbayesThresh} and \code{wavethresh}.
#' The options are \code{\link[wmtsa]{wavShrink}},\cr
#' \code{\link[EbayesThresh]{ebayesthresh.wavelet}} and
#' \code{\link[wavethresh]{threshold}}. Default: \code{\link[wmtsa]{wavShrink}}.
#' @param wtfun A wavelet transform function: \code{\link[waveslim]{dwt}} or
#' \code{\link[waveslim]{modwt}} if
#' \code{thfun = \link[EbayesThresh]{ebayesthresh.wavelet}}.
#' \code{\link[wavethresh]{wd}} if \code{thfun =  \link[wavethresh]{threshold}}.
#' If \code{thfun = \link[wmtsa]{wavShrink}}, \code{wtfun = NULL} (default).
#' @param wtfunlist A named list of parameters to pass to functions \code{\link[waveslim]{dwt}},
#' \code{\link[waveslim]{modwt}} or \code{\link[wavethresh]{wd}}.
#' @param ... Additional arguments to pass to functions:
#' \itemize{
#' \item{}{\code{wmtsa::\link[wmtsa]{wavShrink}}}
#' \item{}{\code{EbayesThresh::\link[EbayesThresh]{ebayesthresh.wavelet}}}
#' \item{}{\code{wavethresh::\link[wavethresh]{threshold}}}
#' }
#'
#' @return A numeric vector with the wavelet-based signal estimation.
#'
#' @seealso \code{\link[wmtsa]{wavShrink}},
#' \code{\link[EbayesThresh]{ebayesthresh.wavelet}},
#' \code{\link[wavethresh]{threshold}}
#'
#' @export
#'
#' @importFrom wmtsa wavShrink
#' @importFrom waveslim modwt
#' @importFrom waveslim dwt
#' @importFrom wavethresh wd
#'
#'
#' @examples
#' inf_head <- coreinf_br[["ipca"]]
#'
#' # From package wmtsa (default):
#' smooth_wavelet(inf_head)
#'
#' # From package EbayesThresh:
#' smooth_wavelet(inf_head, ebayesthresh.wavelet, modwt, list(wf = "haar"),
#'                a = NA, vscale = "independent")
#'
#' # From package wavethresh:
#' smooth_wavelet(inf_head, threshold, wd, list(filter.number = 8), policy = "cv")
#'
#'
smooth_wavelet <- function(x, thfun = wavShrink, wtfun = NULL, wtfunlist = list(), ...) {
  fun_thr <- rlang::enexpr(thfun)
  fun_wt <- rlang::enexpr(wtfun)
  if (!is.null(wtfun)) {
    if (!(fun_thr == "wavShrink") & !(fun_wt == "modwt")) {
      lx <- length(x)
      x <- WiSEBoot::padVector(x, pad.direction = "rear", replaceLinearTrend = TRUE)[[1]]
    }
  }
  if (!is.null(wtfun)) {
    if (fun_wt == "wd") {
      args <- c(list(data = x), wtfunlist)
    } else {
      args <- c(list(x = x), wtfunlist)
    }
  } else {
    args <- NULL
  }
  if (fun_thr == "wavShrink") {
    eval(rlang::expr((!!fun_thr)(x, ...)))
  } else if (fun_thr == "ebayesthresh.wavelet") {
    wt <- do.call(match.fun(wtfun), args)
    if (fun_wt == "modwt") {
      waveslim::imodwt(EbayesThresh::ebayesthresh.wavelet(wt, ...))
    } else {
      waveslim::idwt(EbayesThresh::ebayesthresh.wavelet(wt, ...))[1:lx]
    }
  } else {
    wt <- do.call(match.fun(wtfun), args)
    if (wt$type == "wavelet") {
      wavethresh::wr(wavethresh::threshold(wt, ...))[1:lx]
    } else {
      wavethresh::AvBasis(wavethresh::convert(wavethresh::threshold(wt, ...)))[1:lx]
    }
  }
}



#' Error Prediction for Single Wavelet Model
#'
#' \code{model_error_wave_single} estimates the prediction
#' error of models fitted by the
#' function \code{\link{model_est}} when the \code{x} variable is
#' \code{y} smoothed by wavelet models. For each new observation
#' \code{k} inlucluded in the out-of-sample set,
#' the wavelet-based signal is reestimated.
#' This function computes
#' the prediction error for a single wavelet specification and
#' therefore is similar to \code{\link{model_error_wave}}
#' that is used for computing the error for several specifications of
#' wavelet models.
#'
#' @inheritParams model_error_wave
#' @param ... Additional parameters to pass to function \code{\link{smooth_wavelet}}.
#'
#' @return A numeric vector of length one.
#'
#' @seealso \code{\link{smooth_wavelet}, \link{model_error_wave}},
#' \code{\link{model_fcast}, \link{model_best}, \link{model_est}}
#'
#' @export
#'
#' @examples
#' inf_head <- coreinf_br[["ipca"]]
#' pq <- lags(2, 1)
#'
#' # h = 2
#' model_error_wave_single(
#' 36, inf_head, pq, 2, RMSE = FALSE,
#' thfun = ebayesthresh.wavelet,
#' wtfun = "dwt",
#' wtfunlist = list(wf = "d6", n.levels = 4),
#' a = 0.5, vscale = "level", prior = "cauchy")
#'
#' # h = 1:6; k = 20
#'
#' purrr::map_dbl(1:6, ~ model_error_wave_single(
#' 20, inf_head, pq, .x, RMSE = FALSE,
#' thfun = ebayesthresh.wavelet,
#' wtfun = "dwt",
#' wtfunlist = list(wf = "d6", n.levels = 4),
#' a = 0.5, vscale = "level", prior = "cauchy"))
model_error_wave_single <- function(k, y, lags, h, RMSE = TRUE, ...) {
  l <- length(y)
  pred <- purrr::map_dbl(k:1, function(.x) {
    y2 <- y[1:(l - (.x - 1))]
    xreg2 <- smooth_wavelet(y2, ...)
    model_fcast(model_best(model_est(y2, xreg2, h, lags)))
  })
  if (RMSE == TRUE) {
    error_mean <- sqrt(mean((pred - y[(l - (k - 1)):l])^2))
  } else {
    error_mean <- mean(abs(pred - y[(l - (k - 1)):l]))
  }
  error_mean
}
nelson16silva/wavcoreinf documentation built on Feb. 17, 2025, 7:10 p.m.