CovEstim: (High-dimensional) Covariance Estimation

Documented in cov_estim_glasso

#' Graphical Lasso Covariance Estimation
#'
#' Computes the Graphical Lasso (GLASSO) estimator of the covariance matrix.
#'
#' @param data an nxp data matrix.
#' @param rho a double or a sequence, the non-negative regularization parameter \eqn{\rho} for lasso.
#' \eqn{\rho=0} means no regularization. Can be a scalar (usual) or a symmetric p by p matrix, or a vector of length p.
#' In the latter case, the penalty matrix has jkth element \eqn{\sqrt{\rho_j*\rho_k}}. Default value is NULL and
#' an optimal regularization is computed with the cross-validation (CV) procedure
#' as in \insertCite{cvglassopackage;textual}{covestim}.
#' @param type a character, the type of matrix to be estimated.
#' Possible values are c("cor", "cov"). Default value is "cor" for the correlation matrix.
#' @param nfolds an integer, indicating the number of folds for the CV. Default value is 5.
#' @param crit a character, indicating which selection criterion within the CV.
#' Possible values are "loglik", "AIC" and "BIC". Default is set to "BIC".
#' @param pendiag_log a logical, indicating whether the diagonal of the sample covariance matrix
#' is to be penalized (TRUE) or not (FALSE). Default value is FALSE.
#' @param start a character, specifying the start type of the glasso algorithm.
#' Possible values are "warm" or "cold". Default value is "cold".
#' @param tol a double, indicating the tolerance for the glasso algorithm. Default value is set to 1e-05.
#' @param maxit an integer, indicating the maximum number of iterations for the glasso algorithm.
#' Default value is set to 10000.
#' @param cores an integer, indicating how many cores should be used for the CV.
#' Default value is 1. cores cannot be higher than the maximum number of cores of the processor in use.
#' @param seed an integer, the seed for the performed cross-validation. Default value is 1234.
#'
#' @return a list with the following entries
#' \itemize{
#' \item a pxp estimated covariance matrix.
#' \item an estimation specific tuning parameter, here the lasso penalty.
#' }
#'
#' @details The GLASSO estimator is elaborated in detail in \insertCite{friedman2008sparse;textual}{covestim}.
#' More information on the functionality can be found in
#' \insertCite{glassopackage;textual}{covestim} and \insertCite{cvglassopackage;textual}{covestim}.
#'
#'
#' @examples
#' data(rets_m)
#' sigma_glasso <- cov_estim_glasso(rets_m, type = "cov", rho = 0.0001)[[1]]
#'
#' @import glasso CVglasso parallel doParallel foreach Rdpack
#' @references
#' \insertAllCited
#'
#' @export cov_estim_glasso
#'
cov_estim_glasso <-
  function(data,
           rho = NULL,
           type = "cor",
           nfolds = 5,
           crit = "loglik",
           pendiag_log = FALSE,
           start = "warm",
           tol = 1e-04,
           maxit = 10000,
           cores = 1,
           seed = 1234) {
    data <- as.matrix(data)
    names_data <- colnames(data)

    if (type == "cov") {
      sigma_sample <- stats::var(data, na.rm = TRUE)

      if (is.null(rho)) {
        sigma_sample_diag0 <- sigma_sample
        diag(sigma_sample_diag0) <- 0
        rho_max <- max(abs(sigma_sample_diag0))
        rho_min <- 0.001 * rho_max
        rho <-
          10^seq(log10(rho_min), log10(rho_max), length = 100)
        rm(data, sigma_sample, sigma_sample_diag0)
        gc()
      }
      results <-
        CVglasso::CVglasso(
          X = data,
          lam = rho,
          K = nfolds,
          crit.cv = crit,
          diagonal = pendiag_log,
          start = start,
          tol = tol,
          maxit = maxit,
          cores = cores,
          trace = "none"
        )
      rho <- results$Tuning[2]
      sigma_mat <- results$Sigma
      rownames(sigma_mat) <- names_data
      colnames(sigma_mat) <- names_data
    } else if (type == "cor") {
      if (is.null(rho)) {
        rho <- sort(exp(seq(log(1 / 100), log(1), length.out = 100)))
      }

      results <-
        cvglasso_model(
          X = data,
          lam = rho,
          seed = seed,
          K = nfolds,
          crit.cv = crit,
          start = start,
          tol = tol,
          maxit = maxit,
          cores = cores,
          trace = "none"
        )

      rho <- results$Tuning[2]
      vola_mat <- diag(apply(data, 2, stats::sd, na.rm = TRUE))
      sigma_mat <- vola_mat %*% results$Sigma %*% vola_mat

      rownames(sigma_mat) <- names_data
      colnames(sigma_mat) <- names_data
    } else {
      stop("Type not recognized. Please, enter either cov or cor.")
    }

    list(sigma_mat, rho)
  }


cvglasso_model <- function(X = NULL,
                           lam = NULL,
                           seed = 1234,
                           path = FALSE,
                           tol = 1e-04,
                           maxit = 10000,
                           adjmaxit = NULL,
                           K = 5,
                           crit.cv = c("loglik", "AIC", "BIC"),
                           start = c("warm", "cold"),
                           cores = 1,
                           trace = c("progress", "print", "none")) {
  if (is.null(X)) {
    stop("Must provide entry for X!")
  }
  if (!all(lam > 0)) {
    stop("lam must be positive!")
  }
  if (!(all(c(tol, maxit, adjmaxit, K, cores) > 0))) {
    stop("Entry must be positive!")
  }
  if (!(all(sapply(
    c(
      tol, maxit, adjmaxit, K, cores
    ),
    length
  ) <= 1))) {
    stop("Entry must be single value!")
  }
  if (all(c(maxit, adjmaxit, K, cores) %% 1 != 0)) {
    stop("Entry must be an integer!")
  }
  if (cores < 1) {
    stop("Number of cores must be positive!")
  }
  if (cores > 1 && path) {
    cat("\nParallelization not possible when producing solution path. Setting cores = 1...")
    cores <- 1
  }

  K <- ifelse(path, 1, K)

  if (cores > K) {
    cat("\nNumber of cores exceeds K... setting cores = K")
    cores <- K
  }
  if (is.null(adjmaxit)) {
    adjmaxit <- maxit
  }

  # match values
  crit.cv <- match.arg(crit.cv)
  start <- match.arg(start)
  trace <- match.arg(trace)
  call <- match.call()
  MIN.error <- AVG.error <- CV.error <- NULL
  n <- nrow(X)

  # compute sample correlation matrix
  C <- stats::cor(X)
  Cminus <- C
  diag(Cminus) <- 0

  # compute grid of lam values, if necessary
  if (is.null(lam)) {
    lam <- sort(exp(seq(log(1 / 100), log(1), length.out = 100)))
  } else {
    # sort lambda values
    lam <- sort(lam)
  }

  # perform cross validation, if necessary
  if ((length(lam) > 1) && (!is.null(X) || path)) {
    # run CV in parallel?
    if (cores > 1) {
      # execute CVP
      GLASSO <- CVP_mod(
        X = X,
        lam = lam,
        tol = tol,
        maxit = maxit,
        adjmaxit = adjmaxit,
        K = K,
        crit.cv = crit.cv,
        start = start,
        cores = cores,
        trace = trace
      )
      MIN.error <- GLASSO$min.error
      AVG.error <- GLASSO$avg.error
      CV.error <- GLASSO$cv.error
    } else {
      GLASSO <- CV_mod(
        X = X,
        lam = lam,
        path = path,
        tol = tol,
        maxit = maxit,
        adjmaxit = adjmaxit,
        K = K,
        crit.cv = crit.cv,
        start = start,
        trace = trace
      )
      MIN.error <- GLASSO$min.error
      AVG.error <- GLASSO$avg.error
      CV.error <- GLASSO$cv.error
      Path <- GLASSO$path
    }

    # print warning if lam on boundary
    if ((GLASSO$lam == lam[1]) && (length(lam) != 1) && !path) {
      cat(
        "\nOptimal tuning parameter on boundary... consider providing a smaller lam value or decreasing lam.min.ratio!"
      )
    }

    # provide estimate that is pd and dual feasible
    alpha <- min(c(GLASSO$lam / max(abs(Cminus)), 1))
    init <- (1 - alpha) * C
    diag(init) <- diag(C)

    # compute final estimate at best tuning parameters
    lam_ <- GLASSO$lam
    GLASSO <- glasso::glasso(
      s = C,
      rho = lam_,
      thr = tol,
      maxit = maxit,
      penalize.diagonal = FALSE,
      start = "warm",
      w.init = init,
      wi.init = diag(ncol(C)),
      trace = FALSE
    )
    GLASSO$lam <- lam_
  } else {
    # execute ADMM_sigmac
    if (length(lam) > 1) {
      stop("Must set specify X, set path = TRUE, or provide single value for lam.")
    }

    # provide estimate that is pd and dual feasible
    alpha <- min(c(lam / max(abs(Cminus)), 1))
    init <- (1 - alpha) * C
    diag(init) <- diag(C)

    GLASSO <- glasso::glasso(
      s = C,
      rho = lam,
      thr = tol,
      maxit = maxit,
      penalize.diagonal = FALSE,
      start = "warm",
      w.init = init,
      wi.init = diag(ncol(C)),
      trace = FALSE
    )
    GLASSO$lam <- lam
  }

  diag_pen <- 1 - diag(ncol(C))

  # compute penalized loglik
  loglik <- sum(GLASSO$wi * C) - determinant(GLASSO$wi, logarithm = TRUE)$modulus[1]
  +GLASSO$lam * sum(abs(diag_pen * GLASSO$wi))

  # return values
  tuning <- matrix(c(log10(GLASSO$lam), GLASSO$lam), ncol = 2)
  colnames(tuning) <- c("log10(lam)", "lam")
  if (!path) {
    Path <- NULL
  }

  returns <- list(
    Call = call,
    Iterations = GLASSO$niter,
    Tuning = tuning,
    Lambdas = lam,
    maxit = maxit,
    omega_mat = GLASSO$wi,
    Sigma = GLASSO$w,
    Path = Path,
    Loglik = loglik,
    MIN.error = MIN.error,
    AVG.error = AVG.error,
    CV.error = CV.error
  )

  class(returns) <- "CVglasso"
  return(returns)
}

CV_mod <- function(X = NULL,
                   lam = NULL,
                   seed = 1234,
                   path = FALSE,
                   tol = 1e-04,
                   maxit = 10000,
                   adjmaxit = NULL,
                   K = 5,
                   crit.cv = c("loglik", "AIC", "BIC"),
                   start = c("warm", "cold"),
                   cores = 1,
                   trace = c("progress", "print", "none")) {
  set.seed(seed)

  # match values
  crit.cv <- match.arg(crit.cv)
  start <- match.arg(start)
  trace <- match.arg(trace)
  lam <- sort(lam)

  # initialize
  Path <- NULL
  initmaxit <- maxit

  # compute sample correlation matrix
  C <- stats::cor(X)
  C.train <- C.valid <- C

  CV_errors <- array(0, c(length(lam), K))

  # set progress bar
  if (trace == "progress") {
    progress <- utils::txtProgressBar(max = K * length(lam), style = 3)
  }

  # no need to create folds if K = 1
  if (K == 1) {
    # initialize Path, if necessary
    if (path) {
      Path <- array(0, c(ncol(C), ncol(C), length(lam)))
    }
  } else {
    # designate folds and shuffle -- ensures randomized folds
    n <- nrow(X)
    ind <- sample(n)
  }

  # parse data into folds and perform CV
  for (k in 1:K) {
    if (K > 1) {
      # training set
      leave.out <- ind[(1 + floor((k - 1) * n / K)):floor(k *
        n / K)]
      X.train <- X[-leave.out, , drop = FALSE]
      X_bar <- apply(X.train, 2, mean)
      X.train <- scale(X.train, center = X_bar, scale = FALSE)

      # validation set
      X.valid <- X[leave.out, , drop = FALSE]
      X.valid <- scale(X.valid, center = X_bar, scale = FALSE)

      C.train <- stats::cor(X.train)
      C.valid <- stats::cor(X.valid)
    }
    rm(X.valid, X.train, X_bar)
    gc()

    # re-initialize values for each fold
    maxit <- initmaxit
    init <- C.train
    initOmega <- diag(ncol(C.train))

    # provide estimate that is pd and dual feasible
    Cminus <- C.train
    diag(Cminus) <- 0
    alpha <- min(c(lam[1] / max(abs(Cminus)), 1))
    init <- (1 - alpha) * C.train
    diag(init) <- diag(C.train)


    # loop over all tuning parameters
    for (i in seq_along(lam)) {
      # set temporary tuning parameter
      lam_ <- lam[i]

      # compute the penalized likelihood precision matrix estimator
      GLASSO <- glasso::glasso(
        s = C.train,
        rho = lam_,
        thr = tol,
        maxit = maxit,
        penalize.diagonal = FALSE,
        start = "warm",
        w.init = init,
        wi.init = initOmega,
        trace = FALSE
      )

      if (start == "warm") {
        # option to save initial values for warm starts
        init <- GLASSO$w
        initOmega <- GLASSO$wi
        maxit <- adjmaxit
      }

      CV_errors[i, k] <- sum(GLASSO$wi * C.valid) - determinant(GLASSO$wi, logarithm = TRUE)$modulus[1]

      # update for crit.cv, if necessary
      if (crit.cv == "AIC") {
        CV_errors[i, k] <- CV_errors[i, k] + sum(GLASSO$wi !=
          0)
      }
      if (crit.cv == "BIC") {
        CV_errors[i, k] <- CV_errors[i, k] + sum(GLASSO$wi !=
          0) * log(n) / 2
      }

      # save estimate if path = TRUE
      if (path) {
        Path[, , i] <- GLASSO$wi
      }

      # update progress bar
      if (trace == "progress") {
        utils::setTxtProgressBar(progress, i + (k - 1) * length(lam))

        # if not quiet, then print progress lambda
      } else if (trace == "print") {
        cat("\nFinished lam = ", paste(lam_, sep = ""))
      }
    }

    # if not quiet, then print progress kfold
    if (trace == "print") {
      cat("\nFinished fold ", paste(k, sep = ""))
    }
  }

  # determine optimal tuning parameters
  AVG <- apply(CV_errors, 1, mean)
  best_lam <- lam[which.min(AVG)]
  error <- min(AVG)


  # return best lam and alpha values
  return(
    list(
      lam = best_lam,
      path = Path,
      min.error = error,
      avg.error = AVG,
      cv.error = CV_errors
    )
  )
}


CVP_mod <- function(X = NULL,
                    lam = exp(seq(log(1 / 100), log(1), length.out = 100)),
                    seed = 1234,
                    tol = 1e-04,
                    maxit = 10000,
                    adjmaxit = NULL,
                    K = 5,
                    crit.cv = c("loglik", "AIC", "BIC"),
                    start = c("warm", "cold"),
                    cores = 1,
                    trace = c("progress", "print", "none")) {
  set.seed(seed)

  # match values
  crit.cv <- match.arg(crit.cv)
  start <- match.arg(start)
  trace <- match.arg(trace)
  lam <- sort(lam)

  # make cluster and register cluster
  num_cores <- parallel::detectCores()
  if (cores > num_cores) {
    cat("\nOnly detected", paste(num_cores, "cores...", sep = " "))
  }
  if (cores > K) {
    cat("\nNumber of cores exceeds K... setting cores = K")
    cores <- K
  }

  cluster <- parallel::makeCluster(cores, type = "FORK")
  doParallel::registerDoParallel(cluster)

  # use cluster for each fold in CV
  n <- nrow(X)
  ind <- sample(n)
  k <- NULL
  CV <- foreach::foreach(
    k = 1:K,
    .packages = "glasso",
    .combine = "cbind",
    .inorder = FALSE
  ) %dopar% {
    # set progress bar
    if (trace == "progress") {
      progress <- utils::txtProgressBar(max = length(lam), style = 3)
    }

    # training set
    leave.out <- ind[(1 + floor((k - 1) * n / K)):floor(k * n / K)]
    X.train <- X[-leave.out, , drop = FALSE]
    X_bar <- apply(X.train, 2, mean)
    X.train <- scale(X.train, center = X_bar, scale = FALSE)
    n.train <- nrow(X.train)

    # validation set
    X.valid <- X[leave.out, , drop = FALSE]
    X.valid <- scale(X.valid, center = X_bar, scale = FALSE)
    n.valid <- nrow(X.valid)

    C.train <- stats::cor(X.train)
    C.valid <- stats::cor(X.valid)

    # initial estimates
    init <- C.train
    initOmega <- diag(ncol(C.train))
    CV_error <- array(0, length(lam))


    # provide estimate that is pd and dual feasible
    Cminus <- C.train
    diag(Cminus) <- 0
    alpha <- min(c(lam[1] / max(abs(Cminus)), 1))
    init <- (1 - alpha) * C.train
    diag(init) <- diag(C.train)

    # loop over all tuning parameters
    for (i in seq_along(lam)) {
      # set temporary tuning parameter
      lam_ <- lam[i]

      # compute the penalized likelihood precision matrix estimator
      GLASSO <- glasso::glasso(
        s = C.train,
        rho = lam_,
        thr = tol,
        maxit = maxit,
        penalize.diagonal = FALSE,
        start = "warm",
        w.init = init,
        wi.init = initOmega,
        trace = FALSE
      )

      if (start == "warm") {
        # option to save initial values for warm starts
        init <- GLASSO$w
        initOmega <- GLASSO$wi
        maxit <- adjmaxit
      }

      # compute the observed negative validation loglikelihood
      CV_error[i] <- sum(GLASSO$wi * C.valid) - determinant(GLASSO$wi, logarithm = TRUE)$modulus[1]

      # update for crit.cv, if necessary
      if (crit.cv == "AIC") {
        CV_error[i] <- CV_error[i] + sum(GLASSO$wi != 0)
      }
      if (crit.cv == "BIC") {
        CV_error[i] <- CV_error[i] + sum(GLASSO$wi != 0) *
          log(n) / 2
      }

      # update progress bar
      if (trace == "progress") {
        utils::setTxtProgressBar(progress, i)

        # if not quiet, then print progress lambda
      } else if (trace == "print") {
        cat("\nFinished lam = ", paste(lam_, sep = ""))
      }
    }

    # return CV errors
    return(CV_error)
  }

  # determine optimal tuning parameters
  AVG <- apply(CV, 1, mean)
  best_lam <- lam[which.min(AVG)]
  error <- min(AVG)

  # stop cluster
  parallel::stopCluster(cluster)

  # return best lam and alpha values
  return(list(
    lam = best_lam,
    min.error = error,
    avg.error = AVG,
    cv.error = CV
  ))
}