# R/simuPCA.R In chavent/sparsePCA: sparse and group-sparse PCA

#### Documented in simuPCA

#'@title  Simulation from a sparse PCA model
#'@description Simulation of a data matrix  drawn from a sparse PCA model.
#'The data matrix A of dimension n times p is drawn from a centered multivariate normal
#'distribution. The m < p leading eigenvectors of the covariance matrix of this normal distribution are sparse.
#'@param n the number of observations.
#'@param Ztrue a numerical matrix of dimension p times m. The columns of Ztrue are m sparse and othogonal vectors.
#'They will be normalized and used as the m leading eigenvectors of the covariance matrix.
#'@param valp a numerical vector of size p. The values are the eigenvalues of the covariance matrix.
#'@param seed a seed value to replicate the simulation. By default, seed=FALSE.
#'@return Returns a data matrix A of dimension n times p.
#'@details This function generates data with a covariance matrix C having sparse eigenvectors V. C is synthesized
#'through the eigenvalue decomposition C=VDV^T where D=diag(valp). The first m columns of V are the sparse and orthogonal
#'columns of Ztrue.  The remaining p-m eigenvectors are not specified to be sparse and are generated as follows. A matrix U of
#'dimension n times (p-m) is randomly dawn from U(0,1) and V=Q from the QR decomposition  [Ztrue,U]=QR. The data matrix A is
#'then generated by drawing n samples from a zero-mean distribution with covariance matrix C.
#'
#'@export
#'
#'@references
#'\itemize{
#'\item M. Chavent and G. Chavent, Group-sparse block PCA and explained variance, arXiv:1705.00461
#'\item M. Journee and al., Generalized Power Method for Sparse Principal Component Analysis, Journal of Machine Learning Research 11 (2010) 517-553.
#'\item H. Shen and J.Z. Huang, Journal of Multivariate Analysis 99 (2008) 1015-1034.
#'}
#'@examples
#' # Example from Journee & al. 2010
#'  v1 <- c(rep(1/sqrt(10),10),rep(0,490))
#'  v2 <- c(rep(0,10), rep(1/sqrt(10),10),rep(0,480))
#'  valp <- c(400,300,rep(1,498))
#'  n <- 100
#'  A <- simuPCA(n,cbind(v1,v2),valp,seed=1)
#'  svd(A)$d[1:3]^2 #eigenvalues of the empirical covariance matrix. #' #' # Example from Shen & Huang 2008 #' v1 <- c(1,1,1,1,0,0,0,0,0.9,0.9) #' v2 <- c(0,0,0,0,1,1,1,1,-0.3,0.3) #' valp <- c(200,100,50,50,6,5,4,3,2,1) #' n <- 100 #' A <- simuPCA(n,cbind(v1,v2),valp,seed=1) #' svd(A)$d^2 #eigenvalues of the empirical covariance matrix (times n).
#'
#' # Example from Chavent & Chavent 2017
#'  valp <- c(c(200,180,150,130),rep(1,16))
#'  data("Ztrue")
#'  n <- 100
#'  A <-simuPCA(n,Ztrue,valp,seed=1)
#'  svd(A)\$d^2 #eigenvalues of the empirical covariance matrix.
#'
simuPCA <- function(n=30, Ztrue, valp, seed=FALSE)
{
norm2 <- function(x) sqrt(sum(x^2))
p <- nrow(Ztrue)
m <- ncol(Ztrue)
if (m > p) stop("the number of loadings (number of columns in Ztrue)
must be smaller of equal to the number of variables (rowns)",call.=FALSE)
for (j in 1:ncol(Ztrue))
Ztrue[,j] <- Ztrue[,j]/norm2(Ztrue[,j])
if (seed != FALSE) set.seed(seed)

if (length(valp)!=p)
stop("the length of valp must be equal to the number of rows in Ztrue",call. = FALSE)
U <- matrix(stats::runif(p*(p-m)),p,(p-m))
V <- qr.Q(qr(cbind(Ztrue,U)))
C <- V%*%diag(valp)%*%t(V)
if (seed != FALSE) set.seed(seed)
A <- mvtnorm::rmvnorm(n,sigma=C) /sqrt(n)
return(A)
}

chavent/sparsePCA documentation built on July 2, 2017, 1:14 a.m.