Nothing
#' Generates data from \code{K} multivariate normal data populations having the
#' covariance structure from Guo et al. (2007).
#'
#' We generate \eqn{n_k} observations \eqn{(k = 1, \ldots, K)} from each of
#' \eqn{K} multivariate normal distributions. Let the \eqn{k}th population have
#' a \eqn{p}-dimensional multivariate normal distribution, \eqn{N_p(\mu_k,
#' \Sigma_k)} with mean vector \eqn{\mu_k} and positive-definite covariance
#' matrix \eqn{\Sigma_k}. Each covariance matrix \eqn{\Sigma_k} consists of
#' block-diagonal autocorrelation matrices.
#'
#' The \eqn{k}th class covariance matrix is defined as
#' \deqn{\Sigma_k = \Sigma^{(\rho)} \oplus \Sigma^{(-\rho)} \oplus \ldots
#' \oplus \Sigma^{(\rho)},} where \eqn{\oplus} denotes the direct sum and the
#' \eqn{(i,j)}th entry of \eqn{\Sigma^{(\rho)}} is
#' \deqn{\Sigma_{ij}^{(\rho)} = \{ \rho^{|i - j|} \}.}
#'
#' The matrix \eqn{\Sigma^{(\rho)}} is referred to as a block. Its dimensions
#' are provided in the \code{block_size} argument, and the number of blocks are
#' specified in the \code{num_blocks} argument.
#'
#' Each matrix \eqn{\Sigma_k} is generated by the
#' \code{\link{cov_block_autocorrelation}} function.
#'
#' The number of populations, \code{K}, is determined from the length of the
#' vector of sample sizes, code{n}. The mean vectors can be given in a list of
#' length \code{K}. If one mean is given (as a vector or a list having 1
#' element), then all populations share this common mean.
#'
#' The block sizes can be given as a numeric vector or a single value, in which
#' case the degrees of freedom is replicated \code{K} times. The same logic
#' applies to \code{num_blocks}, \code{rho}, and \code{sigma2}.
#'
#' For each class, the number of features, \code{p}, is computed as
#' \code{block_size * num_blocks}. The values for \code{p} must agree for each
#' class.
#'
#' The block-diagonal covariance matrix with autocorrelated blocks was
#' popularized by Guo et al. (2007) for studying classification of
#' high-dimensional data.
#'
#' @references Guo, Y., Hastie, T., & Tibshirani, R. (2007). "Regularized linear
#' discriminant analysis and its application in microarrays," Biostatistics, 8,
#' 1, 86-100.
#' @importFrom mvtnorm rmvnorm
#' @export
#' @param n a vector (of length K) of the sample sizes for each population
#' @param mean a vector or a list (of length K) of mean vectors
#' @param block_size a vector (of length K) of the sizes of the square block
#' matrices for each population. See details.
#' @param num_blocks a vector (of length K) giving the number of block matrices
#' for each population. See details.
#' @param rho a vector (of length K) of the values of the autocorrelation
#' parameter for each class covariance matrix
#' @param sigma2 a vector (of length K) of the variance coefficients for each
#' class covariance matrix
#' @param seed seed for random number generation (If \code{NULL}, does not set
#' seed)
#' @return named list containing:
#' \describe{
#' \item{x:}{A matrix whose rows are the observations generated and whose
#' columns are the \code{p} features (variables)}
#' \item{y:}{A vector denoting the population from which the observation in
#' each row was generated.}
#' }
#' @examples
#' # Generates 10 observations from two multivariate normal populations having
#' # a block-diagonal autocorrelation structure.
#' block_size <- 3
#' num_blocks <- 3
#' p <- block_size * num_blocks
#' means_list <- list(seq_len(p), -seq_len(p))
#' data <- simdata_guo(n = c(10, 10), mean = means_list, block_size = block_size,
#' num_blocks = num_blocks, rho = 0.9, seed = 42)
#' dim(data$x)
#' table(data$y)
#'
#' # Generates 15 observations from each of three multivariate normal
#' # populations having block-diagonal autocorrelation structures. The
#' # covariance matrices are unequal.
#' p <- 16
#' block_size <- c(2, 4, 8)
#' num_blocks <- p / block_size
#' rho <- c(0.1, 0.5, 0.9)
#' sigma2 <- 1:3
#' mean_list <- list(rep.int(-5, p), rep.int(0, p), rep.int(5, p))
#'
#' set.seed(42)
#' data2 <- simdata_guo(n = c(15, 15, 15), mean = mean_list,
#' block_size = block_size, num_blocks = num_blocks,
#' rho = rho, sigma2 = sigma2)
#' dim(data2$x)
#' table(data2$y)
simdata_guo <- function(n, mean, block_size, num_blocks, rho, sigma2 = 1,
seed = NULL) {
n <- as.integer(n)
block_size <- as.integer(block_size)
num_blocks <- as.integer(num_blocks)
rho <- as.numeric(rho)
# The number of populations
K <- length(n)
# If only one block size is given, replicate K times
if (length(block_size) == 1) {
block_size <- rep.int(block_size, times = K)
} else if (length(block_size) != K) {
stop("The length of 'block_size' must equal K.")
}
# If only one num_blocks is given, replicate K times
if (length(num_blocks) == 1) {
num_blocks <- rep.int(num_blocks, times = K)
} else if (length(num_blocks) != K) {
stop("The length of 'num_blocks' must equal K.")
}
# If only one rho is given, replicate K times
if (length(rho) == 1) {
rho <- rep.int(rho, times = K)
} else if (length(rho) != K) {
stop("The length of 'rho' must equal K.")
}
# If only one sigma2 is given, replicate K times
if (length(sigma2) == 1) {
sigma2 <- rep.int(sigma2, times = K)
} else if (length(sigma2) != K) {
stop("The length of 'sigma2' must equal K.")
}
# The number of features for each class. These should be equal.
p <- block_size * num_blocks
if (!all_equal(p)) {
stop("The number of features 'p' must be the same for all classes.")
}
if (!is.list(mean)) {
mean <- list(mean)
}
# If only one mean is given, replicate K times
if (length(mean) == 1) {
mean <- replicate(K, mean)
} else if (length(mean) != K) {
stop("The number of 'mean' vectors must equal 'K'.")
}
if (!all(sapply(mean, length) == p)) {
stop("The length of the 'mean' vectors must equal 'p'.")
}
# Construct a list of blockdiagonal autocorrelated covariance matrices
cov_list <- lapply(seq_len(K), function(k) {
cov_block_autocorrelation(num_blocks = num_blocks[k],
block_size = block_size[k],
rho = rho[k],
sigma2 = sigma2[k])
})
# Generate data via simdata_normal
simdata_normal(n = n, mean = mean, cov = cov_list, seed = seed)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.