R/data-block-autocorrelation.r

Defines functions generate_blockdiag cov_block_autocorrelation cov_autocorrelation

Documented in cov_autocorrelation cov_block_autocorrelation generate_blockdiag

#' Generates data from \code{K} multivariate normal data populations, where each
#' population (class) has a covariance matrix consisting of block-diagonal
#' autocorrelation matrices.
#'
#' This function generates \code{K} multivariate normal data sets, where each
#' class is generated with a constant mean vector and a covariance matrix
#' consisting of block-diagonal autocorrelation matrices. The data are returned
#' as a single matrix \code{x} along with a vector of class labels \code{y} that
#' indicates class membership.
#'
#' For simplicity, we assume that a class mean vector is constant for each
#' feature. That is, we assume that the mean vector of the \eqn{k}th class is
#' \eqn{c_k * j_p}, where \eqn{j_p} is a \eqn{p \times 1} vector of ones and
#' \eqn{c_k} is a real scalar.
#'
#' The \eqn{k}th class covariance matrix is defined as
#' \deqn{\Sigma_k = \Sigma^{(\rho)} \oplus \Sigma^{(-\rho)} \oplus \ldots
#' \oplus \Sigma^{(\rho)},} where \eqn{\oplus} denotes the direct sum and the
#' \eqn{(i,j)}th entry of \eqn{\Sigma^{(\rho)}} is
#' \deqn{\Sigma_{ij}^{(\rho)} = \{ \rho^{|i - j|} \}.}
#'
#' The matrix \eqn{\Sigma^{(\rho)}} is referred to as a block. Its dimensions
#' are provided in the \code{block_size} argument, and the number of blocks are
#' specified in the \code{num_blocks} argument.
#'
#' Each matrix \eqn{\Sigma_k} is generated by the
#' \code{\link{cov_block_autocorrelation}} function.
#'
#' The number of classes \code{K} is determined with lazy evaluation as the
#' length of \code{n}.
#'
#' The number of features \code{p} is computed as \code{block_size *
#' num_blocks}.
#'
#' @importFrom mvtnorm rmvnorm
#' @export
#' @param n vector of the sample sizes of each class. The length of \code{n}
#' determines the number of classes \code{K}.
#' @param mu matrix containing the mean vectors for each class. Expected to have
#' \code{p} rows and \code{K} columns.
#' @param num_blocks the number of block matrices. See details.
#' @param block_size the dimensions of the square block matrix. See details.
#' @param rho vector of the values of the autocorrelation parameter for each
#' class covariance matrix. Must equal the length of \code{n} (i.e., equal to
#' \code{K}).
#' @param sigma2 vector of the variance coefficients for each class covariance
#' matrix. Must equal the length of \code{n} (i.e., equal to \code{K}).
#' @return named list with elements:
#' \itemize{
#'   \item \code{x}: matrix of observations with \code{n} rows and \code{p}
#' columns
#'   \item \code{y}: vector of class labels that indicates class membership for
#' each observation (row) in \code{x}.
#' }
#' @examples
#' # Generates data from K = 3 classes.
#' means <- matrix(rep(1:3, each=9), ncol=3)
#' data <- generate_blockdiag(n = c(15, 15, 15), block_size = 3, num_blocks = 3,
#' rho = seq(.1, .9, length = 3), mu = means)
#' data$x
#' data$y
#'
#' # Generates data from K = 4 classes. Notice that we use specify a variance.
#' means <- matrix(rep(1:4, each=9), ncol=4)
#' data <- generate_blockdiag(n = c(15, 15, 15, 20), block_size = 3, num_blocks = 3,
#' rho = seq(.1, .9, length = 4), mu = means)
#' data$x
#' data$y
generate_blockdiag <- function(n, mu, num_blocks, block_size, rho,
                               sigma2 = rep(1, K)) {
  n <- as.integer(n)
  block_size <- as.integer(block_size)
  num_blocks <- as.integer(num_blocks)
  p <- block_size * num_blocks
  rho <- as.numeric(rho)
  mu <- as.matrix(mu)

  K <- length(n)

  if (length(rho) != K) {
    stop("The length of 'rho' must equal the length of 'n'.")
  } else if(nrow(mu) != p) {
    stop("The matrix 'mu' must have 'p' rows.")
  } else if(ncol(mu) != K) {
    stop("The matrix 'mu' must have 'K' columns.")
  }

  x <- lapply(seq_len(K), function(k) {
    if (n[k] > 0) {
      rmvnorm(n = n[k], mean = mu[, k],
              sigma = cov_block_autocorrelation(num_blocks = num_blocks,
              block_size = block_size, rho = rho[k], sigma2 = sigma2[k]))
    }
  })
  x <- do.call(rbind, x)
  y <- factor(rep(seq_along(n), n))

  list(x = x, y = y)
}

#' Generates a \eqn{p \times p} block-diagonal covariance matrix with
#' autocorrelated blocks.
#'
#' This function generates a \eqn{p \times p} covariance matrix with
#' autocorrelated blocks. The autocorrelation parameter is \code{rho}.
#' There are \code{num_blocks} blocks each with size, \code{block_size}.
#' The variance, \code{sigma2}, is constant for each feature and defaulted to 1.
#'
#' The autocorrelated covariance matrix is defined as:
#' \deqn{\Sigma = \Sigma^{(\rho)} \oplus \Sigma^{(-\rho)} \oplus \ldots \oplus
#' \Sigma^{(\rho)},} where \eqn{\oplus} denotes the direct sum and the
#' \eqn{(i,j)}th entry of \eqn{\Sigma^{(\rho)}} is \deqn{\Sigma_{ij}^{(\rho)} =
#' \{ \rho^{|i - j|} \}.}
#'
#' The matrix \eqn{\Sigma^{(\rho)}} is the autocorrelated block discussed above.
#'
#' The value of \code{rho} must be such that \eqn{|\rho| < 1} to ensure that
#' the covariance matrix is positive definite.
#'
#' The size of the resulting matrix is \eqn{p \times p}, where
#' \code{p = num_blocks * block_size}.
#'
#' @export
#' @importFrom bdsmatrix bdsmatrix
#' @param num_blocks the number of blocks in the covariance matrix
#' @param block_size the size of each square block within the covariance matrix
#' @param rho the autocorrelation parameter. Must be less than 1 in absolute
#' value.
#' @param sigma2 the variance of each feature
#' @return autocorrelated covariance matrix
cov_block_autocorrelation <- function(num_blocks, block_size, rho, sigma2 = 1) {
  cov_block <- cov_autocorrelation(p = block_size, rho = rho, sigma2 = sigma2)
  cov_block <- as.vector(cov_block)
  as.matrix(bdsmatrix(blocksize = rep(block_size, num_blocks),
                      blocks = replicate(num_blocks, cov_block)))
}

#' Generates a \eqn{p \times p} autocorrelated covariance matrix
#'
#' This function generates a \eqn{p \times p} autocorrelated covariance matrix
#' with autocorrelation parameter \code{rho}. The variance \code{sigma2} is
#' constant for each feature and defaulted to 1.
#'
#' The autocorrelated covariance matrix is defined as:
#' The \eqn{(i,j)}th entry of the autocorrelated covariance matrix is defined as:
#' \eqn{\rho^{|i - j|}}.
#'
#' The value of \code{rho} must be such that \eqn{|\rho| < 1} to ensure that
#' the covariance matrix is positive definite.
#'
#' @export
#' @param p the size of the covariance matrix
#' @param rho the autocorrelation parameter. Must be less than 1 in absolute
#' value.
#' @param sigma2 the variance of each feature
#' @return autocorrelated covariance matrix
cov_autocorrelation <- function(p, rho, sigma2 = 1) {
  p <- as.integer(p)
  rho <- as.numeric(rho)
  sigma2 <- as.numeric(sigma2)

  if (abs(rho) >= 1) {
    stop("The value of 'rho' must be between (1 - p)^(-1) and 1, exclusively.")
  }
  if (sigma2 <= 0) {
    stop("The value of 'sigma2' must be positive.")
  }
  x <- diag(p)
  sigma2 * rho^abs(row(x) - col(x))
}
ramhiser/sparsediscrim documentation built on May 26, 2019, 10:14 p.m.