# R/helper-cov.r In sparsediscrim: Sparse and Regularized Discriminant Analysis

#### Documented in cov_eigencov_listcov_mlecov_poolcov_shrink_diagrda_covrda_weights

#' Computes the maximum likelihood estimator for the sample covariance matrix
#' under the assumption of multivariate normality.
#'
#' For a sample matrix, x, we compute the sample covariance matrix of the
#' data as the maximum likelihood estimator (MLE) of the population covariance
#' matrix.
#'
#' If the diag option is set to TRUE, then we assume the population
#' covariance matrix is diagonal, and the MLE is computed under this assumption.
#' In this case, we return a vector of length p instead.
#'
#' @export
#' @importFrom stats var cov
#' @param x data matrix with n observations and p feature vectors
#' @param diag logical value. If TRUE, assumes the population covariance matrix
#' is diagonal. By default, we assume that diag is FALSE.
#' @return sample covariance matrix of size \eqn{p \times p}. If diag is
#' TRUE, then a vector of length p is returned instead.
cov_mle <- function(x, diag = FALSE) {
x <- as.matrix(x)
n <- nrow(x)
if (diag) {
(n - 1) / n * apply(x, 2, var)
} else {
(n - 1) / n * cov(x)
}
}

#' Computes the pooled maximum likelihood estimator (MLE) for the common
#' covariance matrix
#'
#' For the matrix x, we compute the MLE for the population covariance
#' matrix under the assumption that the data are sampled from \eqn{K}
#' multivariate normal populations having equal covariance matrices.
#'
#' @export
#' @importFrom stats cov
#' @param x data matrix with n observations and p feature vectors
#' @param y class labels for observations (rows) in x
#' @return pooled sample covariance matrix of size \eqn{p \times p}
#' @examples
#' cov_pool(iris[, -5], iris$Species) cov_pool <- function(x, y) { x <- as.matrix(x) y <- as.factor(y) n <- length(y) scatter_matrices <- tapply(seq_len(n), y, function(i) { (length(i) - 1) * cov(as.matrix(x[i, ])) }) as.matrix(Reduce("+", scatter_matrices) / n) } #' Computes the covariance-matrix maximum likelihood estimators for each class #' and returns a list. #' #' For a sample matrix, x, we compute the MLE for the covariance matrix #' for each class given in the vector, y. #' @export #' @param x data matrix with n observations and p feature vectors #' @param y class labels for observations (rows) in x #' @return list of the sample covariance matrices of size \eqn{p \times p} for #' each class given in y. cov_list <- function(x, y) { x <- as.matrix(x) y <- as.factor(y) tapply(seq_along(y), y, function(i) { cov_mle(x[i, ]) }) } #' Computes the eigenvalue decomposition of the maximum likelihood estimators #' (MLE) of the covariance matrices for the given data matrix #' #' For the classes given in the vector y, we compute the eigenvalue #' (spectral) decomposition of the class sample covariance matrices (MLEs) using #' the data matrix x. #' #' If the fast argument is selected, we utilize the so-called Fast #' Singular Value Decomposition (SVD) to quickly compute the eigenvalue #' decomposition. To compute the Fast SVD, we use the [corpcor::fast.svd()] #' function, which employs a well-known trick for tall data (large n, #' small p) and wide data (large p, small n) to compute the #' SVD corresponding to the nonzero singular values. For more information about #' the Fast SVD, see [corpcor::fast.svd()]. #' #' @importFrom corpcor fast.svd #' @export #' @param x data matrix with n observations and p feature vectors #' @param y class labels for observations (rows) in x #' @param pool logical. Should the sample covariance matrices be pooled? #' @param fast logical. Should the Fast SVD be used? See details. #' @param tol tolerance value below which the singular values of x are #' considered zero. #' @return a list containing the eigendecomposition for each class. If #' pool = TRUE, then a single list is returned. #' @examples #' cov_eigen(x = iris[, -5], y = iris[, 5]) #' cov_eigen(x = iris[, -5], y = iris[, 5], pool = TRUE) #' cov_eigen(x = iris[, -5], y = iris[, 5], pool = TRUE, fast = TRUE) #' #' # Generates a data set having fewer observations than features. #' # We apply the Fast SVD to compute the eigendecomposition corresponding to the #' # nonzero eigenvalues of the covariance matrices. #' set.seed(42) #' n <- 5 #' p <- 20 #' num_classes <- 3 #' x <- lapply(seq_len(num_classes), function(k) { #' replicate(p, rnorm(n, mean = k)) #' }) #' x <- do.call(rbind, x) #' colnames(x) <- paste0("x", 1:ncol(x)) #' y <- gl(num_classes, n) #' cov_eigen(x = x, y = y, fast = TRUE) #' cov_eigen(x = x, y = y, pool = TRUE, fast = TRUE) cov_eigen <- function(x, y, pool = FALSE, fast = FALSE, tol = 1e-6) { x <- as.matrix(x) y <- as.factor(y) if (fast) { # First, we center each class. x_centered <- center_data(x = x, y = y) if (pool) { n <- nrow(x_centered) svd_out <- fast.svd(x_centered) eigenvals <- svd_out$d^2 / n
# We retain only the 'q' eigenvalues greater than 'tol'. Effectively, 'q'
# is our estimate for the rank of the covariance matrix.
q <- sum(eigenvals > tol)
seq_q <- seq_len(q)
eigen_out <- list(vectors = svd_out$v[, seq_q], values = eigenvals[seq_q]) } else { eigen_out <- tapply(seq_along(y), y, function(i) { n <- length(i) svd_out <- fast.svd(x_centered[i, ]) eigenvals <- svd_out$d^2 / n
# We retain only the 'q' eigenvalues greater than 'tol'. Effectively, 'q'
# is our estimate for the rank of the covariance matrix.
q <- sum(eigenvals > tol)
seq_q <- seq_len(q)
list(vectors = svd_out\$v[, seq_q], values = eigenvals[seq_q])
})
}
} else {
if (pool) {
eigen_out <- eigen(cov_pool(x = x, y = y), symmetric = TRUE)
} else {
eigen_out <- lapply(cov_list(x = x, y = y), eigen, symmetric = TRUE)
}
}
eigen_out
}

#' Computes the observation weights for each class for the HDRDA classifier
#'
#' This function calculates the weight for each observation in the data matrix
#' x in order to calculate the covariance matrices employed in the HDRDA
#' classifier, implemented in [rda_high_dim()].
#'
#' @inheritParams lda_diag
#' @param y vector of class labels for each training observation
#' @param lambda the RDA pooling parameter. Must be between 0 and 1, inclusively.
#' @return list containing the observations for each class given in y
#'
#' @references Ramey, J. A., Stein, C. K., and Young, D. M. (2013),
#' "High-Dimensional Regularized Discriminant Analysis."
rda_weights <- function(x, y, lambda = 1) {
x <- as.matrix(x)
y <- as.factor(y)
N <- length(y)
tapply(seq_along(y), y, function(i) {
n_k <- length(i)
weights_k <- lambda / N + (1 - lambda) / n_k
replace(rep(lambda / N, N), i, weights_k)
})
}

#' Calculates the RDA covariance-matrix estimators for each class
#'
#' For the classes given in the vector y, this function calculates the
#' class covariance-matrix estimators employed in the HDRDA classifier,
#' implemented in [rda_high_dim()].
#'
#' @inheritParams lda_diag
#' @param y vector of class labels for each training observation
#' @param lambda the RDA pooling parameter. Must be between 0 and 1, inclusively.
#' @return list containing the RDA covariance-matrix estimators for each class
#' given in y
#'
#' @references Ramey, J. A., Stein, C. K., and Young, D. M. (2013),
#' "High-Dimensional Regularized Discriminant Analysis."
rda_cov <- function(x, y, lambda = 1) {
x_centered <- center_data(x = x, y = y)
weights <- rda_weights(x = x, y = y, lambda = lambda)
lapply(weights, function(weights_k) {
x_scaled <- sqrt(weights_k) * x_centered
crossprod(x_scaled)
})
}

#' Computes a shrunken version of the maximum likelihood estimator for the
#' sample covariance matrix under the assumption of multivariate normality.
#'
#' For a sample matrix, x, we compute the sample covariance matrix as the
#' maximum likelihood estimator (MLE) of the population covariance matrix and
#' shrink it towards its diagonal.
#'
#' Let \eqn{\widehat{\Sigma}} be the MLE of the covariance matrix \eqn{\Sigma}.
#' Then, we shrink the MLE towards its diagonal by computing
#' \deqn{\widehat{\Sigma}(\gamma) = \gamma \widehat{\Sigma} + (1 - \gamma)
#' \widehat{\Sigma} \circ I_p,} where \eqn{\circ} denotes the Hadamard product
#' and \eqn{\gamma \in [0,1]}.
#'
#' For \eqn{\gamma < 1}, the resulting shrunken covariance matrix estimator is
#' positive definite, and for \eqn{\gamma = 1}, we simply have the MLE, which can
#' potentially be positive semidefinite (singular).
#'
#' The estimator given here is based on Section 18.3.1 of the Hastie et
#' al. (2008) text.
#'
#' @export
#' @param x data matrix with n observations and p feature vectors
#' @param gamma the shrinkage parameter. Must be between 0 and 1, inclusively.
#' By default, the shrinkage parameter is 1, which simply yields the MLE.
#' @return shrunken sample covariance matrix of size \eqn{p \times p}
#'
#' @references Hastie, T., Tibshirani, R., and Friedman, J. (2008), "The
#' Elements of Statistical Learning: Data Mining, Inference, and Prediction,"
#' 2nd edition. \url{http://web.stanford.edu/~hastie/ElemStatLearn/}
cov_shrink_diag <- function(x, gamma = 1) {
if (gamma < 0 || gamma > 1) {
rlang::abort("The value of 'gamma' must be between 0 and 1, inclusively.")
}
cov_x <- cov_mle(x)
gamma * cov_x + (1 - gamma) * diag(diag(cov_x))
}


## Try the sparsediscrim package in your browser

Any scripts or data that you put into this service are public.

sparsediscrim documentation built on July 1, 2021, 9:07 a.m.