# R/data-block-autocorrelation.r In sparsediscrim: Sparse and Regularized Discriminant Analysis

#### Documented in cov_autocorrelationcov_block_autocorrelationgenerate_blockdiag

#' Generates data from K multivariate normal data populations, where each
#' population (class) has a covariance matrix consisting of block-diagonal
#' autocorrelation matrices.
#'
#' This function generates K multivariate normal data sets, where each
#' class is generated with a constant mean vector and a covariance matrix
#' consisting of block-diagonal autocorrelation matrices. The data are returned
#' as a single matrix x along with a vector of class labels y that
#' indicates class membership.
#'
#' For simplicity, we assume that a class mean vector is constant for each
#' feature. That is, we assume that the mean vector of the \eqn{k}th class is
#' \eqn{c_k * j_p}, where \eqn{j_p} is a \eqn{p \times 1} vector of ones and
#' \eqn{c_k} is a real scalar.
#'
#' The \eqn{k}th class covariance matrix is defined as
#' \deqn{\Sigma_k = \Sigma^{(\rho)} \oplus \Sigma^{(-\rho)} \oplus \ldots
#' \oplus \Sigma^{(\rho)},} where \eqn{\oplus} denotes the direct sum and the
#' \eqn{(i,j)}th entry of \eqn{\Sigma^{(\rho)}} is
#' \deqn{\Sigma_{ij}^{(\rho)} = \{ \rho^{|i - j|} \}.}
#'
#' The matrix \eqn{\Sigma^{(\rho)}} is referred to as a block. Its dimensions
#' are provided in the block_size argument, and the number of blocks are
#' specified in the num_blocks argument.
#'
#' Each matrix \eqn{\Sigma_k} is generated by the
#' [cov_block_autocorrelation()] function.
#'
#' The number of classes K is determined with lazy evaluation as the
#' length of n.
#'
#' The number of features p is computed as \code{block_size * num_blocks}.
#'
#' @importFrom mvtnorm rmvnorm
#' @export
#' @param n vector of the sample sizes of each class. The length of n
#' determines the number of classes K.
#' @param mu matrix containing the mean vectors for each class. Expected to have
#' p rows and K columns.
#' @param num_blocks the number of block matrices. See details.
#' @param block_size the dimensions of the square block matrix. See details.
#' @param rho vector of the values of the autocorrelation parameter for each
#' class covariance matrix. Must equal the length of n (i.e., equal to
#' K).
#' @param sigma2 vector of the variance coefficients for each class covariance
#' matrix. Must equal the length of n (i.e., equal to K).
#' @return named list with elements:
#' \itemize{
#'   \item x: matrix of observations with n rows and p
#' columns
#'   \item y: vector of class labels that indicates class membership for
#' each observation (row) in x.
#' }
#' @examples
#' # Generates data from K = 3 classes.
#' means <- matrix(rep(1:3, each=9), ncol=3)
#' data <- generate_blockdiag(n = c(15, 15, 15), block_size = 3, num_blocks = 3,
#' rho = seq(.1, .9, length = 3), mu = means)
#' data$x #' data$y
#'
#' # Generates data from K = 4 classes. Notice that we use specify a variance.
#' means <- matrix(rep(1:4, each=9), ncol=4)
#' data <- generate_blockdiag(n = c(15, 15, 15, 20), block_size = 3, num_blocks = 3,
#' rho = seq(.1, .9, length = 4), mu = means)
#' data$x #' data$y
generate_blockdiag <- function(n, mu, num_blocks, block_size, rho,
sigma2 = rep(1, K)) {
n <- as.integer(n)
block_size <- as.integer(block_size)
num_blocks <- as.integer(num_blocks)
p <- block_size * num_blocks
rho <- as.numeric(rho)
mu <- as.matrix(mu)

K <- length(n)

if (length(rho) != K) {
rlang::abort("The length of 'rho' must equal the length of 'n'.")
} else if(nrow(mu) != p) {
rlang::abort("The matrix 'mu' must have 'p' rows.")
} else if(ncol(mu) != K) {
rlang::abort("The matrix 'mu' must have 'K' columns.")
}

x <- lapply(seq_len(K), function(k) {
if (n[k] > 0) {
rmvnorm(n = n[k], mean = mu[, k],
sigma = cov_block_autocorrelation(num_blocks = num_blocks,
block_size = block_size, rho = rho[k], sigma2 = sigma2[k]))
}
})
x <- do.call(rbind, x)
y <- factor(rep(seq_along(n), n))

list(x = x, y = y)
}

#' Generates a \eqn{p \times p} block-diagonal covariance matrix with
#' autocorrelated blocks.
#'
#' This function generates a \eqn{p \times p} covariance matrix with
#' autocorrelated blocks. The autocorrelation parameter is rho.
#' There are num_blocks blocks each with size, block_size.
#' The variance, sigma2, is constant for each feature and defaulted to 1.
#'
#' The autocorrelated covariance matrix is defined as:
#' \deqn{\Sigma = \Sigma^{(\rho)} \oplus \Sigma^{(-\rho)} \oplus \ldots \oplus
#' \Sigma^{(\rho)},} where \eqn{\oplus} denotes the direct sum and the
#' \eqn{(i,j)}th entry of \eqn{\Sigma^{(\rho)}} is \deqn{\Sigma_{ij}^{(\rho)} =
#' \{ \rho^{|i - j|} \}.}
#'
#' The matrix \eqn{\Sigma^{(\rho)}} is the autocorrelated block discussed above.
#'
#' The value of rho must be such that \eqn{|\rho| < 1} to ensure that
#' the covariance matrix is positive definite.
#'
#' The size of the resulting matrix is \eqn{p \times p}, where
#' p = num_blocks * block_size.
#'
#' @export
#' @importFrom bdsmatrix bdsmatrix
#' @param num_blocks the number of blocks in the covariance matrix
#' @param block_size the size of each square block within the covariance matrix
#' @param rho the autocorrelation parameter. Must be less than 1 in absolute
#' value.
#' @param sigma2 the variance of each feature
#' @return autocorrelated covariance matrix
cov_block_autocorrelation <- function(num_blocks, block_size, rho, sigma2 = 1) {
cov_block <- cov_autocorrelation(p = block_size, rho = rho, sigma2 = sigma2)
cov_block <- as.vector(cov_block)
as.matrix(bdsmatrix(blocksize = rep(block_size, num_blocks),
blocks = replicate(num_blocks, cov_block)))
}

#' Generates a \eqn{p \times p} autocorrelated covariance matrix
#'
#' This function generates a \eqn{p \times p} autocorrelated covariance matrix
#' with autocorrelation parameter rho. The variance sigma2 is
#' constant for each feature and defaulted to 1.
#'
#' The autocorrelated covariance matrix is defined as:
#' The \eqn{(i,j)}th entry of the autocorrelated covariance matrix is defined as:
#' \eqn{\rho^{|i - j|}}.
#'
#' The value of rho must be such that \eqn{|\rho| < 1} to ensure that
#' the covariance matrix is positive definite.
#'
#' @export
#' @param p the size of the covariance matrix
#' @param rho the autocorrelation parameter. Must be less than 1 in absolute
#' value.
#' @param sigma2 the variance of each feature
#' @return autocorrelated covariance matrix
cov_autocorrelation <- function(p, rho, sigma2 = 1) {
p <- as.integer(p)
rho <- as.numeric(rho)
sigma2 <- as.numeric(sigma2)

if (abs(rho) >= 1) {
rlang::abort("The value of 'rho' must be between (1 - p)^(-1) and 1, exclusively.")
}
if (sigma2 <= 0) {
rlang::abort("The value of 'sigma2' must be positive.")
}
x <- diag(p)
sigma2 * rho^abs(row(x) - col(x))
}


## Try the sparsediscrim package in your browser

Any scripts or data that you put into this service are public.

sparsediscrim documentation built on July 1, 2021, 9:07 a.m.