R/tslarsP.R

Defines functions tslarsPFit rtslarsP.default rtslarsP.formula rtslarsP tslarsP.default tslarsP.formula tslarsP

Documented in rtslarsP rtslarsP.default rtslarsP.formula tslarsP tslarsP.default tslarsP.formula

# --------------------------------------
# Author: Andreas Alfons
#         Erasmus Universiteit Rotterdam
#
# based on code by Sarah Gelper
# --------------------------------------

#' (Robust) least angle regression for time series data with fixed lag length
#'
#' (Robustly) sequence groups of candidate predictors and their respective
#' lagged values according to their predictive content and find the optimal
#' model along the sequence.  Note that lagged values of the response are
#' included as a predictor group as well.
#'
#' @aliases print.tslarsP
#'
#' @param formula  a formula describing the full model.
#' @param data  an optional data frame, list or environment (or object coercible
#' to a data frame by \code{\link{as.data.frame}}) containing the variables in
#' the model.  If not found in data, the variables are taken from
#' \code{environment(formula)}, typically the environment from which
#' \code{tslarsP} or \code{rtslarsP} is called.
#' @param x  a numeric matrix or data frame containing the candidate predictor
#' series.
#' @param y  a numeric vector containing the response series.
#' @param h  an integer giving the forecast horizon (defaults to 1).
#' @param p  an integer giving the number of lags in the model (defaults to 2).
#' @param sMax  an integer giving the number of predictor series to be
#' sequenced.  If it is \code{NA} (the default), predictor groups are sequenced
#' as long as there are twice as many observations as predictor variables.
#' @param centerFun  a function to compute a robust estimate for the center
#' (defaults to \code{\link[stats]{median}}).
#' @param scaleFun  a function to compute a robust estimate for the scale
#' (defaults to \code{\link[stats]{mad}}).
#' @param regFun  a function to compute robust linear regressions that can be
#' interpreted as weighted least squares (defaults to
#' \code{\link[robustbase]{lmrob}}).
#' @param regArgs  a list of arguments to be passed to \code{regFun}.
#' @param combine  a character string specifying how to combine the data
#' cleaning weights from the robust regressions with each predictor group.
#' Possible values are \code{"min"} for taking the minimum weight for each
#' observation, \code{"euclidean"} for weights based on Euclidean distances
#' of the multivariate set of standardized residuals (i.e., multivariate
#' winsorization of the standardized residuals assuming independence), or
#' \code{"mahalanobis"} for weights based on Mahalanobis distances of the
#' multivariate set of standardized residuals (i.e., multivariate winsorization
#' of the standardized residuals).
#' @param winsorize  a logical indicating whether to clean the data by
#' multivariate winsorization.
#' @param const  numeric; tuning constant for multivariate winsorization to be
#' used in the initial corralation estimates based on adjusted univariate
#' winsorization (defaults to 2).
#' @param prob  numeric; probability for the quantile of the
#' \eqn{\chi^{2}}{chi-squared} distribution to be used in multivariate
#' winsorization (defaults to 0.95).
#' @param fit  a logical indicating whether to fit submodels along the sequence
#' (\code{TRUE}, the default) or to simply return the sequence (\code{FALSE}).
#' @param s  an integer vector of length two giving the first and last
#' step along the sequence for which to compute submodels.  The default
#' is to start with a model containing only an intercept (step 0) and
#' iteratively add all series along the sequence (step \code{sMax}).  If
#' the second element is \code{NA}, predictor groups are added to the
#' model as long as there are twice as many observations as predictor
#' variables.  If only one value is supplied, it is recycled.
#' @param crit  a character string specifying the optimality criterion to be
#' used for selecting the final model.  Currently, only \code{"BIC"} for the
#' Bayes information criterion is implemented.
#' @param ncores  a positive integer giving the number of processor cores to be
#' used for parallel computing (the default is 1 for no parallelization).  If
#' this is set to \code{NA}, all available processor cores are used.  For
#' obtaining the data cleaning weights and for fitting models along the
#' sequence, parallel computing is implemented on the \R level using package
#' \pkg{parallel}.  Otherwise parallel computing for some of of the more
#' computer-intensive computations in the sequencing step is implemented on the
#' C++ level via OpenMP (\url{https://www.openmp.org/}).
#' @param cl  a \pkg{parallel} cluster for parallel computing as generated by
#' \code{\link[parallel]{makeCluster}}.  This is preferred over \code{ncores}
#' for tasks that are parallelized on the \R level, in which case \code{ncores}
#' is only used for tasks that are parallelized on the C++ level.
#' @param seed  optional initial seed for the random number generator
#' (see \code{\link{.Random.seed}}), which is useful because many robust
#' regression functions (including \code{\link[robustbase]{lmrob}}) involve
#' randomness.  On parallel \R worker processes, random number streams are
#' used and the seed is set via \code{\link{clusterSetRNGStream}}.
#' @param model  a logical indicating whether the model data should be included
#' in the returned object.
#' @param \dots  additional arguments to be passed down.
#'
#' @return
#' If \code{fit} is \code{FALSE}, an integer vector containing the indices of
#' the sequenced predictor series.
#'
#' Otherwise an object of class \code{"tslarsP"} (inheriting from classes
#' \code{"grplars"} and \code{"seqModel"}) with the following components:
#' \describe{
#'   \item{\code{active}}{an integer vector containing the sequence of predictor
#' series.}
#'   \item{\code{s}}{an integer vector containing the steps for which submodels
#' along the sequence have been computed.}
#'   \item{\code{coefficients}}{a numeric matrix in which each column contains the
#' regression coefficients of the corresponding submodel along the sequence.}
#'   \item{\code{fitted.values}}{a numeric matrix in which each column contains
#' the fitted values of the corresponding submodel along the sequence.}
#'   \item{\code{residuals}}{a numeric matrix in which each column contains
#' the residuals of the corresponding submodel along the sequence.}
#'   \item{\code{df}}{an integer vector containing the degrees of freedom of the
#' submodels along the sequence (i.e., the number of estimated coefficients).}
#'   \item{\code{robust}}{a logical indicating whether a robust fit was computed.}
#'   \item{\code{scale}}{a numeric vector giving the robust residual scale
#' estimates for the submodels along the sequence (only returned for a robust
#' fit).}
#'   \item{\code{crit}}{an object of class \code{"bicSelect"} containing the BIC
#' values and indicating the final model (only returned if argument \code{crit}
#' is \code{"BIC"} and argument \code{s} indicates more than one step along the
#' sequence).}
#'   \item{\code{muX}}{a numeric vector containing the center estimates of the
#' predictor variables.}
#'   \item{\code{sigmaX}}{a numeric vector containing the scale estimates of the
#' predictor variables.}
#'   \item{\code{muY}}{numeric; the center estimate of the response.}
#'   \item{\code{sigmaY}}{numeric; the scale estimate of the response.}
#'   \item{\code{x}}{the matrix of candidate predictor series (if \code{model} is
#' \code{TRUE}).}
#'   \item{\code{y}}{the response series (if \code{model} is \code{TRUE}).}
#'   \item{\code{assign}}{an integer vector giving the predictor group to which
#' each predictor variable belongs.}
#'   \item{\code{w}}{a numeric vector giving the data cleaning weights (only
#' returned for a robust fit).}
#'   \item{\code{h}}{the forecast horizon.}
#'   \item{\code{p}}{the number of lags in the model.}
#'   \item{\code{call}}{the matched function call.}
#' }
#'
#' @note The predictor group of lagged values of the response is indicated by
#' the index 0.
#'
#' @author Andreas Alfons, based on code by Sarah Gelper
#'
#' @references
#' Alfons, A., Croux, C. and Gelper, S. (2016) Robust groupwise least angle
#' regression. \emph{Computational Statistics & Data Analysis}, \bold{93},
#' 421--435. \doi{10.1016/j.csda.2015.02.007}
#'
#' @seealso \code{\link[=coef.seqModel]{coef}},
#' \code{\link[=fitted.seqModel]{fitted}},
#' \code{\link[=plot.seqModel]{plot}},
#' \code{\link[=predict.seqModel]{predict}},
#' \code{\link[=residuals.seqModel]{residuals}},
#' \code{\link[=rstandard.seqModel]{rstandard}},
#' \code{\link{tslars}}, \code{\link[robustbase]{lmrob}}
#'
#' @keywords regression robust ts
#'
#' @export

tslarsP <- function(x, ...) UseMethod("tslarsP")


#' @rdname tslarsP
#' @method tslarsP formula
#' @export

tslarsP.formula <- function(formula, data, ...) {
  ## initializations
  call <- match.call()  # get function call
  call[[1]] <- as.name("tslarsP")
  # prepare model frame
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data"), names(mf), 0)
  mf <- mf[c(1, m)]
  mf$drop.unused.levels <- TRUE
  mf[[1]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())
  mt <- attr(mf, "terms")
  if(is.empty.model(mt)) stop("empty model")
  # extract response and candidate predictors from model frame
  y <- model.response(mf, "numeric")
  x <- model.matrix(mt, mf)
  # remove first column for intercept, if existing
  if(attr(mt, "intercept")) x <- x[, -1, drop=FALSE]
  ## call default method
  out <- tslarsP.default(x, y, ...)
  if(inherits(out, "tslarsP")) {
    out$call <- call  # add call to return object
    out$terms <- mt   # add model terms to return object
  }
  out
}


#' @rdname tslarsP
#' @method tslarsP default
#' @export

tslarsP.default <- function(x, y, h = 1, p = 2, sMax = NA, fit = TRUE,
                            s = c(0, sMax), crit = "BIC", ncores = 1,
                            cl = NULL, model = TRUE, ...) {
  ## call fit function with classical functions for center, scale,
  ## correlation and regression
  call <- match.call()  # get function call
  call[[1]] <- as.name("tslarsP")
  out <- tslarsPFit(x, y, h=h, p=p, sMax=sMax, robust=FALSE, centerFun=mean,
                    scaleFun=sd, fit=fit, s=s, crit=crit, ncores=ncores, cl=cl,
                    model=model)
  if(inherits(out, "tslarsP")) out$call <- call  # add call to return object
  out
}


#' @rdname tslarsP
#' @export

rtslarsP <- function(x, ...) UseMethod("rtslarsP")


#' @rdname tslarsP
#' @method rtslarsP formula
#' @export

rtslarsP.formula <- function(formula, data, ...) {
  ## initializations
  call <- match.call()  # get function call
  call[[1]] <- as.name("rtslarsP")
  # prepare model frame
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data"), names(mf), 0)
  mf <- mf[c(1, m)]
  mf$drop.unused.levels <- TRUE
  mf[[1]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())
  mt <- attr(mf, "terms")
  if(is.empty.model(mt)) stop("empty model")
  # extract response and candidate predictors from model frame
  y <- model.response(mf, "numeric")
  x <- model.matrix(mt, mf)
  # remove first column for intercept, if existing
  if(attr(mt, "intercept")) x <- x[, -1, drop=FALSE]
  ## call default method and modify return object
  out <- rtslarsP.default(x, y, ...)
  if(inherits(out, "tslarsP")) {
    out$call <- call  # add call to return object
    out$terms <- mt   # add model terms to return object
  }
  out
}


#' @rdname tslarsP
#' @method rtslarsP default
#' @export

rtslarsP.default <- function(x, y, h = 1, p = 2, sMax = NA, centerFun = median,
                             scaleFun = mad, regFun = lmrob, regArgs = list(),
                             combine = c("min", "euclidean", "mahalanobis"),
                             winsorize = FALSE, const = 2, prob = 0.95,
                             fit = TRUE, s = c(0, sMax), crit = "BIC",
                             ncores = 1, cl = NULL, seed = NULL,
                             model = TRUE, ...) {
  ## call fit function with classical functions for center, scale,
  ## correlation and regression
  call <- match.call()  # get function call
  call[[1]] <- as.name("rtslarsP")
  out <- tslarsPFit(x, y, h=h, p=p, sMax=sMax, robust=TRUE,
                    centerFun=centerFun, scaleFun=scaleFun, regFun=regFun,
                    regArgs=regArgs, combine=combine, winsorize=winsorize,
                    const=const, prob=prob, fit=fit, s=s, crit=crit,
                    ncores=ncores, cl=cl, seed=seed, model=model)
  if(inherits(out, "tslarsP")) out$call <- call  # add call to return object
  out
}


## fit function for fixed lag length that allows to specify functions for
## center, scale, correlation and regression
tslarsPFit <- function(x, y, h = 1, p = 2, sMax = NA,
                       robust = FALSE, centerFun = mean, scaleFun = sd,
                       regFun = lm.fit, regArgs = list(),
                       combine = c("min", "euclidean", "mahalanobis"),
                       winsorize = FALSE, const = 2, prob = 0.95, fit = TRUE,
                       s = c(0, sMax), crit = "BIC", ncores = 1, cl = NULL,
                       seed = NULL, model = TRUE) {
  ## initializations
  n <- length(y)
  x <- as.matrix(x)
  if(nrow(x) != n) stop(sprintf("'x' must have %d rows", n))
  m <- ncol(x)
  assign <- rep(seq_len(m+1), each=p)
  crit <- match.arg(crit)
  model <- isTRUE(model)
  ## call workhorse function and modify return object
  out <- grouplars(fitBlocks(x, y, h, p), y[(p+h):n], sMax=sMax, assign=assign,
                   robust=robust, centerFun=centerFun, scaleFun=scaleFun,
                   regFun=regFun, regArgs=regArgs, combine=combine,
                   winsorize=winsorize, const=const, prob=prob, fit=fit, s=s,
                   crit=crit, ncores=ncores, cl=cl, seed=seed, model=model)
  # modify return object (lagged response should have index 0)
  if(inherits(out, "grplars")) {
    out[c("active", "assign", "h", "p")] <-
      list(out$active - 1, out$assign - 1, h, p)
    # include original data rather than derived data
    if(isTRUE(model)) out[c("x", "y")] <- list(x=x, y=y)
    class(out) <- c("tslarsP", class(out))  # inherits from "grplars"
  } else out <- out - 1
  out
}
aalfons/robustHD documentation built on Sept. 30, 2023, 10:39 p.m.