VECM Models Estimation

Documented in fitVAR

#' @title Multivariate VAR estimation
#'
#' @description A function to estimate a (possibly high-dimensional)
#' multivariate VAR time series using penalized least squares methods,
#' such as ENET, SCAD or MC+.
#'
#' @usage fitVAR(data, p = 1, penalty = "ENET", method = "cv", ...)
#'
#' @param data the data from the time series: variables in columns and
#' observations in rows
#' @param p order of the VAR model
#' @param penalty the penalty function to use. Possible values
#' are \code{"ENET"}, \code{"SCAD"} or \code{"MCP"}
#' @param method possible values are \code{"cv"} or \code{"timeSlice"}
#' @param ... the options for the estimation. Global options are:
#' \code{threshold}: if \code{TRUE} all the entries smaller than the oracle
#' threshold are set to zero;
#' \code{scale}: scale the data (default = FALSE)?
#' \code{nfolds}: the number of folds used for cross validation (default = 10);
#' \code{parallel}: if \code{TRUE} use multicore backend (default = FALSE);
#' \code{ncores}: if \code{parallel} is \code{TRUE}, specify the number
#' of cores to use for parallel evaluation. Options for ENET estimation:
#' \code{alpha}: the value of alpha to use in elastic net
#' (0 is Ridge regression, 1 is LASSO (default));
#' \code{type.measure}: the measure to use for error evaluation
#' (\code{"mse"} or \code{"mae"});
#' \code{nlambda}: the number of lambdas to use in the cross
#' validation (default = 100);
#' \code{leaveOut}: in the time slice validation leave out the
#' last \code{leaveOutLast} observations (default = 15);
#' \code{horizon}: the horizon to use for estimating mse/mae (default = 1);
#' \code{picasso}: use picasso package for estimation (only available
#' for \code{penalty = "SCAD"} and \code{method = "timeSlice"}).
#'
#' @return \code{A} the list (of length \code{p}) of the estimated matrices
#' of the process
#' @return \code{fit} the results of the penalized LS estimation
#' @return \code{mse} the mean square error of the cross validation
#' @return \code{time} elapsed time for the estimation
#' @return \code{residuals} the time series of the residuals
#'
#' @export
fitVAR <- function(data, p = 1, penalty = "ENET", method = "cv", ...) {
  opt <- list(...)

  # convert data to matrix
  if (!is.matrix(data)) {
    data <- as.matrix(data)
  }

  cnames <- colnames(data)

  if (method == "cv") {

    # use CV to find lambda
    opt$method <- "cv"
    out <- cvVAR(data, p, penalty, opt)
  } else if (method == "timeSlice") {

    # use timeslice to find lambda
    opt$method <- "timeSlice"
    out <- timeSliceVAR(data, p, penalty, opt)
  } else {

    # error: unknown method
    stop("Unknown method. Possible values are \"cv\" or \"timeSlice\"")
  }

  # Add the names of the variables to the matrices
  if (!is.null(cnames)) {
    for (k in 1:length(out$A)) {
      colnames(out$A[[k]]) <- cnames
      rownames(out$A[[k]]) <- cnames
    }
  }

  return(out)
}

cvVAR <- function(data, p, penalty = "ENET", opt = NULL) {
  nc <- ncol(data)
  nr <- nrow(data)

  picasso <- ifelse(!is.null(opt$picasso), opt$picasso, FALSE)
  threshold <- ifelse(!is.null(opt$threshold), opt$threshold, FALSE)

  threshold_type <- ifelse(!is.null(opt$threshold_type),
    opt$threshold_type, "soft"
  )

  return_fit <- ifelse(!is.null(opt$return_fit), opt$return_fit, FALSE)

  if (picasso) {
    stop("picasso available only with timeSlice method.")
  }
  # transform the dataset
  tr_dt <- transformData(data, p, opt)

  if (penalty == "ENET") {

    # fit the ENET model
    t <- Sys.time()
    fit <- cvVAR_ENET(tr_dt$X, tr_dt$y, nvar = nc, opt)
    elapsed <- Sys.time() - t

    # extract what is needed
    lambda <- ifelse(is.null(opt$lambda), "lambda.min", opt$lambda)

    # extract the coefficients and reshape the matrix
    Avector <- stats::coef(fit, s = lambda)
    A <- matrix(Avector[2:length(Avector)],
      nrow = nc, ncol = nc * p,
      byrow = TRUE
    )

    mse <- min(fit$cvm)
  } else if (penalty == "SCAD") {

    # convert from sparse matrix to std matrix (SCAD does not work with sparse
    # matrices)
    tr_dt$X <- as.matrix(tr_dt$X)

    # fit the SCAD model
    t <- Sys.time()
    fit <- cvVAR_SCAD(tr_dt$X, tr_dt$y, opt)
    elapsed <- Sys.time() - t

    # extract the coefficients and reshape the matrix
    Avector <- stats::coef(fit, s = "lambda.min")
    A <- matrix(Avector[2:length(Avector)],
      nrow = nc, ncol = nc * p,
      byrow = TRUE
    )
    mse <- min(fit$cve)
  } else if (penalty == "MCP") {

    # convert from sparse matrix to std matrix (MCP does not work with sparse
    # matrices)
    tr_dt$X <- as.matrix(tr_dt$X)

    # fit the MCP model
    t <- Sys.time()
    fit <- cvVAR_SCAD(tr_dt$X, tr_dt$y, opt)
    elapsed <- Sys.time() - t

    # extract the coefficients and reshape the matrix
    Avector <- stats::coef(fit, s = "lambda.min")
    A <- matrix(Avector[2:length(Avector)],
      nrow = nc, ncol = nc * p,
      byrow = TRUE
    )
    mse <- min(fit$cve)
  } else {

    # Unknown penalty error
    stop("Unkown penalty. Available penalties are: ENET, SCAD, MCP.")
  }

  # If threshold = TRUE then set to zero all the entries that are smaller than
  # the threshold
  if (threshold == TRUE) {
    A <- applyThreshold(A, nr, nc, p, type = threshold_type)
  }

  # Get back the list of VAR matrices (of length p)
  A <- splitMatrix(A, p)

  # Now that we have the matrices compute the residuals
  res <- computeResiduals(tr_dt$series, A)

  # To extract the sd of mse
  if (penalty == "ENET") {
    ix <- which(fit$cvm == min(fit$cvm))
    mse_sd <- fit$cvsd[ix]
  } else {
    ix <- which(fit$cve == min(fit$cve))
    mse_sd <- fit$cvse[ix]
  }

  # Create the output
  output <- list()
  output$mu <- tr_dt$mu
  output$A <- A

  # Do you want the fit?
  if (return_fit == TRUE) {
    output$fit <- fit
  }

  # Return the "best" lambda
  output$lambda <- fit$lambda.min

  output$mse <- mse
  output$mse_sd <- mse_sd
  output$time <- elapsed
  output$series <- tr_dt$series
  output$residuals <- res

  # Variance/Covariance estimation
  output$sigma <- estimateCovariance(res)

  output$penalty <- penalty
  output$method <- "cv"
  attr(output, "class") <- "var"
  attr(output, "type") <- "fit"
  return(output)
}

cvVAR_ENET <- function(X, y, nvar, opt) {
  a <- ifelse(is.null(opt$alpha), 1, opt$alpha)
  nl <- ifelse(is.null(opt$nlambda), 100, opt$nlambda)
  tm <- ifelse(is.null(opt$type.measure), "mse", opt$type.measure)
  nf <- ifelse(is.null(opt$nfolds), 10, opt$nfolds)
  parall <- ifelse(is.null(opt$parallel), FALSE, opt$parallel)
  ncores <- ifelse(is.null(opt$ncores), 1, opt$ncores)

  # Vector of lambdas to work on
  if (!is.null(opt$lambdas_list)) {
    lambdas_list <- opt$lambdas_list
  } else {
    lambdas_list <- c(0)
  }

  # Assign ids to the CV-folds (useful for replication of results)
  if (is.null(opt$folds_ids)) {
    folds_ids <- numeric(0)
  } else {
    nr <- nrow(X)
    folds_ids <- rep(sort(rep(seq(nf), length.out = nr / nvar)), nvar)
  }

  if (parall == TRUE) {
    if (ncores < 1) {
      stop("The number of cores must be > 1")
    } else {
      cl <- doParallel::registerDoParallel(cores = ncores)

      if (length(folds_ids) == 0) {
        if (length(lambdas_list) < 2) {
          cvfit <- glmnet::cv.glmnet(X, y,
            alpha = a, nlambda = nl,
            type.measure = tm, nfolds = nf,
            parallel = TRUE, standardize = FALSE
          )
        } else {
          cvfit <- glmnet::cv.glmnet(X, y,
            alpha = a, lambda = lambdas_list,
            type.measure = tm, nfolds = nf,
            parallel = TRUE, standardize = FALSE
          )
        }
      } else {
        if (length(lambdas_list) < 2) {
          cvfit <- glmnet::cv.glmnet(X, y,
            alpha = a, nlambda = nl,
            type.measure = tm, foldid = folds_ids,
            parallel = TRUE, standardize = FALSE
          )
        } else {
          cvfit <- glmnet::cv.glmnet(X, y,
            alpha = a, lambda = lambdas_list,
            type.measure = tm, foldid = folds_ids,
            parallel = TRUE, standardize = FALSE
          )
        }
      }
    }
  } else {
    if (length(folds_ids) == 0) {
      if (length(lambdas_list) < 2) {
        cvfit <- glmnet::cv.glmnet(X, y,
          alpha = a, nlambda = nl,
          type.measure = tm, nfolds = nf,
          parallel = FALSE, standardize = FALSE
        )
      } else {
        cvfit <- glmnet::cv.glmnet(X, y,
          alpha = a, lambda = lambdas_list,
          type.measure = tm, nfolds = nf,
          parallel = FALSE, standardize = FALSE
        )
      }
    } else {
      if (length(lambdas_list) < 2) {
        cvfit <- glmnet::cv.glmnet(X, y,
          alpha = a, nlambda = nl,
          type.measure = tm, foldid = folds_ids,
          parallel = FALSE, standardize = FALSE
        )
      } else {
        cvfit <- glmnet::cv.glmnet(X, y,
          alpha = a, lambda = lambdas_list,
          type.measure = tm, foldid = folds_ids,
          parallel = FALSE, standardize = FALSE
        )
      }
    }
  }

  return(cvfit)
}

cvVAR_SCAD <- function(X, y, opt) {
  e <- ifelse(is.null(opt$eps), 0.01, opt$eps)
  nf <- ifelse(is.null(opt$nfolds), 10, opt$nfolds)
  parall <- ifelse(is.null(opt$parallel), FALSE, opt$parallel)
  ncores <- ifelse(is.null(opt$ncores), 1, opt$ncores)
  picasso <- ifelse(is.null(opt$picasso), FALSE, TRUE)

  if (!picasso) {
    if (parall == TRUE) {
      if (ncores < 1) {
        stop("The number of cores must be > 1")
      } else {
        cl <- parallel::makeCluster(ncores)
        cvfit <- ncvreg::cv.ncvreg(X, y,
          nfolds = nf, penalty = "SCAD",
          eps = e, cluster = cl
        )
        parallel::stopCluster(cl)
      }
    } else {
      cvfit <- ncvreg::cv.ncvreg(X, y, nfolds = nf, penalty = "SCAD", eps = e)
    }
  } else {
    cvfit <- picasso::picasso(X, y, method = "scad")
  }

  return(cvfit)
}

cvVAR_MCP <- function(X, y, opt) {
  e <- ifelse(is.null(opt$eps), 0.01, opt$eps)
  nf <- ifelse(is.null(opt$nfolds), 10, opt$nfolds)
  parall <- ifelse(is.null(opt$parallel), FALSE, opt$parallel)
  ncores <- ifelse(is.null(opt$ncores), 1, opt$ncores)

  if (parall == TRUE) {
    if (ncores < 1) {
      stop("The number of cores must be > 1")
    } else {
      cl <- parallel::makeCluster(ncores)
      cvfit <- ncvreg::cv.ncvreg(X, y,
        nfolds = nf, penalty = "MCP",
        eps = e, cluster = cl
      )
      parallel::stopCluster(cl)
    }
  } else {
    cvfit <- ncvreg::cv.ncvreg(X, y, nfolds = nf, penalty = "MCP", eps = e)
  }

  return(cvfit)
}