Nothing
#' @title Generate matrix composed of a sparse matrix and low-rank matrix
#'
#' @description Generate simulated matrix that is the superposition of
#' a low-rank component and a sparse component.
#'
#' @param n The number of observations.
#' @param p The number of predictors of interest.
#' @param rank The rank of low-rank matrix.
#' @param support.size The number of nonzero coefficients in the underlying regression
#' model. Can be omitted if \code{beta} is supplied.
#' @param beta The coefficient values in the underlying regression model.
#' If it is supplied, \code{support.size} would be omitted.
#' @param snr A positive value controlling the signal-to-noise ratio (SNR).
#' A larger SNR implies the identification of sparse matrix is much easier.
#' Default \code{snr = Inf} enforces no noise exists.
#' @param sigma A numerical value supplied the variance of the gaussian noise.
#' Default \code{sigma = NULL} implies it is determined by \code{snr}.
#' @param seed random seed. Default: \code{seed = 1}.
#'
#'
#' @return A \code{list} object comprising:
#' \item{x}{An \eqn{n}-by-\eqn{p} matrix.}
#' \item{L}{The latent low rank matrix.}
#' \item{S}{The latent sparse matrix.}
#'
#' @details
#' The low rank matrix \eqn{L} is generated by \eqn{L = UV}, where
#' \eqn{U} is an \eqn{n}-by-\eqn{rank} matrix and
#' \eqn{V} is a \eqn{rank}-by-\eqn{p} matrix.
#' Each element in \eqn{U} (or \eqn{V}) are i.i.d. drawn from \eqn{N(0, 1/n)}.
#'
#' The sparse matrix \eqn{S} is an \eqn{n}-by-\eqn{rank} matrix.
#' It is generated by choosing a support set of size
#' \code{support.size} uniformly at random.
#' The non-zero entries in \eqn{S} are independent Bernoulli (-1, +1) entries.
#'
#' The noise matrix \eqn{N} is an \eqn{n}-by-\eqn{rank} matrix,
#' the elements in \eqn{N} are i.i.d. gaussian random variable
#' with standard deviation \eqn{\sigma}.
#'
#' The SNR is defined as
#' as the variance of vectorized matrix \eqn{L + S} divided
#' by \eqn{\sigma^2}.
#'
#' The matrix \eqn{x} is the superposition of \eqn{L}, \eqn{S}, \eqn{N}:
#' \deqn{x = L + S + N.}
#'
#' @author Jin Zhu
#'
#' @export
#'
#' @examples
#' # Generate simulated data
#' n <- 30
#' p <- 20
#' dataset <- generate.matrix(n, p)
#' \donttest{
#' stats::heatmap(as.matrix(dataset[["S"]]),
#' Rowv = NA,
#' Colv = NA,
#' scale = "none",
#' col = grDevices::cm.colors(256),
#' frame.plot = TRUE,
#' margins = c(2.4, 2.4)
#' )
#' }
generate.matrix <- function(n,
p,
rank = NULL,
support.size = NULL,
beta = NULL,
snr = Inf,
sigma = NULL,
seed = 1) {
set.seed(seed)
stopifnot(length(n) == 1)
stopifnot(is.numeric(n))
check_integer_warning_variable(n, "n")
n <- as.integer(n)
stopifnot(length(p) == 1)
stopifnot(is.numeric(p))
check_integer_warning_variable(p, "p")
p <- as.integer(p)
if (is.null(rank)) {
L_rank <- round(0.05 * n)
} else {
stopifnot(rank <= min(n, p))
check_integer_warning_variable(rank, "rank")
L_rank <- as.integer(rank)
}
x <- matrix(rnorm(n * L_rank, sd = 1 / sqrt(n)), nrow = n)
y <- matrix(rnorm(p * L_rank, sd = 1 / sqrt(p)), ncol = p)
L <- x %*% y
if (is.null(support.size)) {
k <- round(0.05 * n * p)
} else {
check_integer_warning_variable(support.size, "support.size")
k <- as.integer(support.size)
}
index <- sample(0:(n * p - 1), size = k, replace = FALSE)
row_index <- floor(index / p)
col_index <- index %% p
value <- sample(c(-1, 1), size = k, replace = TRUE)
S <- Matrix::sparseMatrix(
dims = c(n, p),
i = row_index,
j = col_index,
x = value,
index1 = FALSE
)
x <- L + S
if (is.null(sigma)) {
signal_var <- stats::var(as.vector(x))
noise_var <- sqrt(signal_var / snr)
} else {
noise_var <- sigma
}
noise <-
matrix(rnorm(n = n * p, sd = noise_var),
nrow = n,
ncol = p
)
x <- x + noise
x <- Matrix::as.matrix(x)
set.seed(NULL)
colnames(x) <- paste0("x", 1:p)
return(list(
x = x,
L = L,
S = S,
rank = L_rank,
support.size = k
))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.