#' @title Sparse PCA
#' @export
#'
#' @description This function performs sparse principal component analysis (PCA)
#' using a block optimisation algorithm or an iterative deflation algorithm.
#'
#' @param A a numerical data matrix of size n by p (observations by variables)
#' @param m the number of components
#' @param lambda a numerical vector of size m providing the reduced sparsity
#' parameters (in relative value with respect to the theoretical upper bound).
#' Each reduced sparsity parameter is a value between 0 and 1.
#' @param block either 0 or 1. block==0 means that deflation is used if more
#' than one component. A block optimisation algorithm is otherwise used
#' that computes m components at once. By default, block=1.
#' @param mu numerical vector of size m with the mu parameters (required for the block algorithms only).
#' By default, mu_j=1/j.
#' @param center a logical value indicating whether the variables should be
#' shifted to be zero centered.
#' @param scale a logical value indicating whether the variables should be
#' scaled to have unit variance.
#' @param iter_max maximum number of admissible iterations.
#' @param epsilon accuracy of the stopping criterion.
#'
#' @return \item{Z}{a p by m numerical matrix with the m sparse loading vectors}
#' @return \item{Y}{a n by m numerical matrix with the m principal components}
#' @return \item{B}{the numerical data matrix centered (if center=TRUE) and scaled
#' (if scale=TRUE)}
#'
#' @details This function implements an optimal projected variance sparse
#' block PCA algorithm and a deflation algorithm applying the block algorithm with
#' one single component iteratively to each deflated data matrix.
#'
#' The block algorithm uses a numerical vector of parameters \code{mu} usually
#' chosen either striclty decreasing (mu_j=1/j) or all equal (mu_j=1 for all j).
#' Striclty decreasing parameters relieve the underdetermination which happens
#' in some situations and drives to a solution close to the PCA solution.
#'
#' The principal components are defined by Y=BZ where B is the centered (if
#' center=TRUE) and scaled (if scale=TRUE) data matrix and where Z is the sparse loading matrix.
#'
#'@references
#'M. Chavent and G. Chavent, Optimal projected variance group-sparse block PCA,
#'submitted, 2020.
#'@references
#'M. Journee, Y. Nesterov, P. Richtarik, and R. Sepulchre. Generalized power
#'method for sparse principal component analysis. Journal of Machine Learning
#'Research, 11:517-553, 2010.
#'
#'@seealso \code{\link{groupsparsePCA}}, \code{\link{pev}},
#'\code{\link{explainedVar}}
#'
#' @examples
#' # Simulated data
#' v1 <- c(1,1,1,1,0,0,0,0,0.9,0.9)
#' v2 <- c(0,0,0,0,1,1,1,1,-0.3,0.3)
#' valp <- c(200,100,50,50,6,5,4,3,2,1)
#' A <- simuPCA(50,cbind(v1,v2),valp,seed=1)
#'
#' # Three sparse PCA algorithms
#' Z <- sparsePCA(A,2,c(0.5,0.5),block=0)$Z #deflation
#' Z <- sparsePCA(A,2,c(0.5,0.5),block=1)$Z #block different mu
#' Z <- sparsePCA(A,2,c(0.5,0.5),block=1,mu=c(1,1))$Z #block same mu
#'
#' # Example of the protein data
#' data("protein")
#' Z <- sparsePCA(protein,2,c(0.5,0.5))$Z #block different mu
#'
#'
sparsePCA <- function(A,m,lambda, block=1, mu=1/1:m,
center=TRUE,scale=TRUE,iter_max=1000,epsilon=0.0001 )
{
n <- nrow(A)
p <- ncol(A)
if (m > min(n-1,p))
stop("m must be smaller than rank(A)",call. = FALSE)
if (length(lambda)!=m)
stop("lambda must be a vector of size m",call. = FALSE)
if ((max(lambda) >1) || (max(lambda) < 0))
stop("Values in lambda must be in [0,1]",call. = FALSE)
norm2 <- function(x) sqrt(sum(x^2))
polar <- function(x)
{
obj <- svd(x)
obj$u %*% t(obj$v)
}
Z <- matrix(0,p,m)
rownames(Z) <- colnames(A)
A <- as.matrix(A)
if (center==TRUE)
A <- scale(A,center=TRUE, scale=FALSE) #center data
if (scale==TRUE)
A <- scale(A,center=FALSE,scale=TRUE)*sqrt(n/(n-1)) #scale data
if ((m==1) || (m>1 && block==0)) # deflation is used if m>1
{
B <- A
gamma <- rep(NA,m)
X <- matrix(NA,n,m)
for (comp in 1:m) #loop on the components
{
i_max <- which.max(apply(B,2,norm2))
gamma_max <- norm2(B[,i_max])
gamma[comp] <- lambda[comp]*gamma_max
x <-svd(B)$u[,1] #initialization
f <- matrix(0,iter_max,1)
iter <- 1
repeat
{
Bx <- t(B)%*%x
tresh <- sign(Bx)*(abs(Bx)>=gamma[comp])*(abs(Bx)-gamma[comp])
f[iter] <- sum(tresh^2) #cost function
if (f[iter]==0) #the sparsity parameter is too high: all entries of the loading vector are zero
break
grad <- B %*% tresh
x <- grad/norm2(grad)
if (((iter>=2) && ((f[iter]-f[iter-1])/f[iter-1] < epsilon)) || (iter >= iter_max)) #stopping criterion
{
if (iter >= iter_max) print("Maximum number of iterations reached")
break
}
iter <- iter+1
}
Bx <- t(B)%*%x
#pattern <- abs(Bx)-gamma >0
z <- sign(Bx)*(abs(Bx)>=gamma[comp])*(abs(Bx)-gamma[comp])
if (max(abs(z))>0)
z <- z/norm2(z)
#if (post==TRUE) z <- pattern_filling(B,z)
y <- B%*%z
B <- B-y%*%t(z)
#B <- scale(B,center=TRUE,scale=FALSE) # center the matrix of residuals
#B <- scale(B)*sqrt(n/(n-1)) # standardize the matrix of residuals
Z[,comp]<-z
X[,comp] <- y/norm2(y)
}
}
if (m>1 && block==1) # block algorithm
{
e <- svd(A)
d <- e$d[1:m]
u <- e$u[,1:m]
i_max <- which.max(apply(A,2,norm2))
gamma_max <- norm2(A[,i_max])
gamma <- lambda*gamma_max*d/d[1]
x <-u #initialization
f <- matrix(0,iter_max,1)
iter <- 1
repeat
{
Ax <- t(A)%*%x
tresh <- sign(Ax)*sweep(abs(Ax),2,gamma,">=")*sweep(abs(Ax),2,gamma,"-")
#f[iter] <-Tr(crossprod(tresh)%*%diag(mu^2)) #cost function
f[iter] <- sum(apply(tresh,2,norm2)^2*mu^2) #cost function
if (f[iter]==0) #the sparsity parameter is too high: all entries of the loading matrix are zero
break
grad <- 2*A %*% tresh%*%diag(mu^2)
x <- polar(grad)
if (((iter>=2) && ((f[iter]-f[iter-1])/f[iter-1] < epsilon)) || (iter >= iter_max)) #stopping criterion
{
if (iter >= iter_max) print("Maximum number of iterations reached")
break
}
iter <- iter+1
}
Ax <- t(A)%*%x
z <- sign(Ax)*sweep(abs(Ax),2,gamma,">=")*sweep(abs(Ax),2,gamma,"-")
for (j in 1:m)
{
if (max(abs(z[,j]))>0)
Z[,j] <- z[,j]/norm2(z[,j])
}
#if (post==TRUE) Z <- pattern_filling(A,Z,mu)
X=x
}
Y <- A %*% Z
res <- list(Z=Z,Y=Y,gamma=gamma,B=A,X=X)
class(res) <- "sparsePCA"
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.