CovEstim: (High-dimensional) Covariance Estimation

Documented in is_posdef is_sparse make_posdef near_posdef sigma_sim sqrt_root_mat_calc

#' Simulated Covariance Matrix
#'
#' Simulates a covariance matrix with specific properties
#'
#' @param p an integer, indicating the number of variables.
#' @param corr_min a double, indicating the minimum possible correlation across the p variables.
#' @param corr_max a double, indicating the maximum possible correlation across the p variables
#' (except the correlation with self of 1).
#' @param vola_min a double, indicating the minimum possible volatility of the p variables.
#' @param vola_max a double, indicating the maximum possible volatility of the p variables.
#' @return a pxp covariance matrix.
#' @details The correlation and volatility values are drown from a uniform distribution.
#' A condition for positive-definiteness of the resulting covariance matrix is not implemented.
#'
#' @examples
#' sim_sigma <- sigma_sim(100, corr_min = 0.01, corr_max = 0.40, vola_min = 0.05, vola_max = 0.30)
#'
#' @export sigma_sim
#'
sigma_sim <- function(p, corr_min, corr_max, vola_min, vola_max) {
  corr_mat <- diag(0.5, p)
  corr_mat[lower.tri(corr_mat)] <-
    stats::runif((p^2 - p) / 2, min = corr_min, max = corr_max)
  corr_mat <- corr_mat + t(corr_mat)
  vola_mat <- diag(stats::runif(p, min = vola_min, max = vola_max))

  sigma_mat <- as.matrix(vola_mat %*% corr_mat %*% vola_mat)

  sigma_mat
}


#' Matrix Sparsity Check
#'
#' Performs a sparsity check on a data matrix.
#'
#' @param mat an nxp data matrix.
#' @return a logical, indicating whether the matrix is sparse (TRUE) or not (FALSE).
#'
#' @examples
#' data(rets_m)
#' sparse_check <- is_sparse(rets_m)
#'
#' @export is_sparse
#'
is_sparse <- function(mat) {
  mat <- as.matrix(mat)
  n <- dim(mat)[1]
  p <- dim(mat)[2]
  check <- length(which(mat == 0)) > (n * p) / 2
  check
}


#' Matrix Positive-Definiteness Check
#'
#' Performs a tolerance check for positive-definiteness of a symmetric matrix.
#'
#' @param mat a pxp matrix.
#' @param tol a double with tolerance for the eigenvalues. Default value is 1e-8.
#' @return a logical. TRUE, if the matrix mat is positive-definite.
#'
#' @examples
#' data(rets_m)
#' ml_sigma <- cov_estim_wrapper2(rets_m, "ML")
#' is_posdef(ml_sigma)
#'
#' ml_sigma <- cov_estim_wrapper2(rets_m[1:100, ], "ML")
#' is_posdef(ml_sigma)
#'
#' @export is_posdef
#'
is_posdef <- function(mat, tol = 1e-8) {
  eigenval <- eigen(mat)$values
  n <- dim(mat)[1]
  for (i in 1:n) {
    if (abs(eigenval[i]) < tol) {
      eigenval[i] <- 0
    }
  }
  check <- !(any(eigenval <= 0))
  check
}


#' Matrix Positive-Definiteness
#'
#' Produces a positive-definite matrix (to a certain tolerance).
#' Originally found under \insertCite{corpcorpackage;textual}{covestim}.
#'
#' @param mat a pxp matrix.
#' @param tol a double, the tolerance for the relative positiveness of eigenvalues compared to the largest eigenvalue.
#' Default value is tol=1e-8. If NULL, a machine tolerance approximation is applied.
#' @return a positive-definite (to the tolerance of tol) matrix.
#'
#' @examples
#' data(rets_m)
#' ml_sigma <- cov_estim_wrapper2(rets_m, "ML")
#' is_posdef(ml_sigma)
#' ml_sigma_posdef <- make_posdef(ml_sigma)
#' is_posdef(ml_sigma_posdef)
#'
#' @importFrom Rdpack reprompt
#' @references
#' \insertRef{corpcorpackage}{covestim}
#'
#' @export make_posdef
#'
make_posdef <- function(mat, tol = 1e-8) {
  p <- dim(mat)[1]
  eigen_tmp <- eigen(mat, symmetric = TRUE)
  eigenval <- eigen_tmp$values
  eigenvec <- eigen_tmp$vectors
  rm(eigen_tmp)
  gc()
  if (is.null(tol)) {
    tol <- p * max(abs(eigenval)) * .Machine$double.eps
  }
  delta <- 2 * tol
  tau <- pmax(0, delta - eigenval)
  dm <- eigenvec %*% diag(tau, p) %*% t(eigenvec)
  mat_posdef <- mat + dm

  mat_posdef
}


#' Nearest Positive-Definite Matrix
#'
#' Implements the algorithm of \insertCite{higham2002computing;textual}{covestim}
#' to compute the nearest positive-definite matrix to an approximate one,
#' typically a correlation or variance-covariance matrix.
#'
#'
#' @param mat a pxp matrix.
#' @param corr a logical, indicating if the result should be a correlation matrix. Default value is FALSE.
#' @param keep_diag a logical, indicating if the result should have the same diagonal as the original matrix mat.
#' @param do_dykstra a logical, indicating if Dykstra's correlation is to be used. Default value is TRUE.
#' @param eig_tol a double, defining the relative positiveness of eigenvalues
#' compared to the largest eigenvalue. Default value is 1e-6.
#' @param conv_tol a double, defining the convergence tolerance for the Higham algorithm. Default value is 1e-7.
#' @param posd_tol a double, defining the tolerance for enforcing positive definiteness. Default value is 1e-8.
#' @param maxit maximum number of iterations. Default value is 100.
#' @param trace a logical, indicating whether iterations are to be traced (printed out). Default value is FALSE.
#' @return a positive-definite matrix.
#' @details Note that setting corr = TRUE just sets diag(.) <- 1 within the algorithm.
#' The near_posdef() is originally found under \insertCite{matrixpackage;textual}{covestim}.
#' @examples
#'
#' data(rets_m)
#' ml_sigma <- cov_estim_wrapper2(rets_m, "ML")
#' ml_sigma_near_posdef <- near_posdef(ml_sigma, eig_tol = 1e-8)
#'
#' @importFrom Rdpack reprompt
#' @references
#' \insertAllCited
#'
#'
#' @export near_posdef
#'
near_posdef <-
  function(mat,
           corr = FALSE,
           keep_diag = FALSE,
           do_dykstra = TRUE,
           eig_tol = 1e-6,
           conv_tol = 1e-7,
           posd_tol = 1e-8,
           maxit = 100,
           trace = FALSE) {
    n <- dim(mat)[2]
    if (keep_diag) {
      diag_mat0 <- diag(mat)
    }
    if (do_dykstra) {
      ds_mat <- matrix(0, n, n)
    }

    iter <- 0
    converged <- FALSE
    conv <- Inf

    while (iter < maxit && !converged) {
      mat_y <- mat
      if (do_dykstra) {
        r_mat <- mat_y - ds_mat
      }

      ## project onto PSD matrices  X_k  =  P_S (R_k)
      eigen_tmp <-
        eigen(if (do_dykstra) {
          r_mat
        } else {
          mat_y
        }, symmetric = TRUE)
      eigenvec <- eigen_tmp$vectors
      eigenval <- eigen_tmp$values

      ## create mask from relative positive eigenvalues
      eigenval_log <- eigenval > eig_tol * eigenval[1]
      if (!any(eigenval_log)) {
        stop("Matrix seems negative semi-definite")
      }

      ## use p mask to only compute 'positive' part
      eigenvec <- eigenvec[, eigenval_log, drop = FALSE]
      mat <-
        tcrossprod(eigenvec * rep(eigenval[eigenval_log], each = nrow(eigenvec)), eigenvec)

      if (do_dykstra) {
        ## update Dykstra's correction D_S = \Delta S_k
        ds_mat <- mat - r_mat
      }

      if (corr) {
        diag(mat) <- 1
      } else if (keep_diag) {
        diag(mat) <- diag_mat0
      }

      conv <- norm(mat_y - mat, "I") / norm(mat_y, "I")
      iter <- iter + 1

      if (trace) {
        cat(sprintf(
          "iter %3d : #{p}=%d, ||Y-X|| / ||Y||= %11g\n",
          iter,
          sum(eigenval),
          conv
        ))
      }

      converged <- (conv <= conv_tol)
    }

    if (!converged) {
      warning(gettextf("'nearPosDef()' did not converge in %d iterations", iter),
        domain = NA
      )
    }

    eigen_tmp <- eigen(mat, symmetric = TRUE) # e
    eigenval <- eigen_tmp$values
    eps <- posd_tol * abs(eigenval[1])
    if (eigenval[n] < eps) {
      eigenval[eigenval < eps] <- eps
      eigenvec <- eigen_tmp$vectors
      o_diag <- diag(mat)
      mat <- eigenvec %*% (eigenval * t(eigenvec))
      d_mat <- sqrt(pmax(eps, o_diag) / diag(mat))
      mat[] <- d_mat * mat * rep(d_mat, each = n)
    }

    if (corr) {
      diag(mat) <- 1
    } else if (keep_diag) {
      diag(mat) <- diag_mat0
    }
    sigma_mat <- as.matrix(mat)

    return(sigma_mat)
  }


#' Squared Root of a Matrix
#'
#' Calculates the squared root of a matrix.
#'
#' @param mat a positive-definite pxp matrix.
#' @return a pxp matrix, the squared root of mat.
#'
#' @details The squared root of a positive-definite symmetric matrix X is calculated with the following formula:
#' \deqn{\sqrt{X}=\Delta \sqrt{\Lambda}\Delta^{-1},}
#' where \eqn{\Delta} is the matrix of eigenvectors and
#' \eqn{\sqrt{\Lambda}} is a diagonal matrix with the squared roots of the eigenvalues of X.
#'
#' @examples
#' data(rets_m)
#' sigma_ml <- cov_estim_wrapper2(rets_m, "ML")
#' sigma_ml_sqrt_root <- sqrt_root_mat_calc(sigma_ml)
#'
#' @export sqrt_root_mat_calc
#'
sqrt_root_mat_calc <- function(mat) {
  eigen_tmp <- eigen(mat)
  eigenval <- eigen_tmp$values
  eigenvec <- eigen_tmp$vectors
  sqrt_roor_mat <-
    eigenvec %*% diag(sqrt(eigenval)) %*% solve(eigenvec)

  sqrt_roor_mat
}

antshi/CovEstim documentation built on June 10, 2025, 3:11 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

antshi/CovEstim
(High-dimensional) Covariance Estimation

R/cov_help_funcs.R
In antshi/CovEstim: (High-dimensional) Covariance Estimation

Defines functions sqrt_root_mat_calc near_posdef make_posdef is_posdef is_sparse sigma_sim

Documented in is_posdef is_sparse make_posdef near_posdef sigma_sim sqrt_root_mat_calc

R Package Documentation

Browse R Packages

We want your feedback!

antshi/CovEstim (High-dimensional) Covariance Estimation

R/cov_help_funcs.R In antshi/CovEstim: (High-dimensional) Covariance Estimation

Defines functions sqrt_root_mat_calc near_posdef make_posdef is_posdef is_sparse sigma_sim

Documented in is_posdef is_sparse make_posdef near_posdef sigma_sim sqrt_root_mat_calc

R Package Documentation

Browse R Packages

We want your feedback!

antshi/CovEstim
(High-dimensional) Covariance Estimation

R/cov_help_funcs.R
In antshi/CovEstim: (High-dimensional) Covariance Estimation