##------------------------------------------------------------##
## Author Giovanni Merola please acknowledge my work
## email giovanni.merola@xjtlu.edu.cn
##------------------------------------------------------------##
#' @title Computes LS SPCA components on selected groups of variables
#'
#' @description Each component, the variables are selected so as to explain
#' a percentage \emph{alpha} of the variance explained by the corresponding principal component.
#' \emph{blocks_list} is a list containing the indeies for each component,
#' Subsets can be overlapping and need not be exhaustive.
#'
#' @usage lsspca_blocked(X, alpha = 0.95, blocks_list = list(),
#' ncomps_per_block = 1, blocks_names = NA, maxcard = 0,
#' spcaMethod = c("u", "c", "p"), scalex = FALSE,
#' variableSelection = c("exhaustive", "seqrep", "backward", "forward", "lasso"),
#' lsspca_forLasso = FALSE, lasso_penalty = 0.5, rtn_all_spca = FALSE)
#'
#' @param X the data matrix.
#' @param alpha Real (or vector) in [0,1]. percentage of variance of the PCs explained by the sparse component.
#' @param blocks_list a list of indices or a vector or factor of codes for each block
#' @param ncomps_per_block number of components per block, integer or vector of. Default = 1.
#' @param blocks_names names of each block, if possible names are taken for the names of blocks_list
#' @param maxcard a vector or an integer of maximal cardinality for each block, if USPCA selected
#' maxcard[j] cannot be < j
#' @param spcaMethod char how lsspca is computed "u" = USPCA (default), "c" = CSPCA, "p" = PSPCA
#' @param scalex Logical, if TRUE variables are scaled to unit variance.default FALSE
#' Variables are automatically centered to zero if they aren't already.
#' @param variableSelection how the variables in each components are selected
#' "exhaustive" = all subsets, "seqrep" = stepwise, "backward", "forward", "lasso".
#' See documentation in packages \code{leaps}, and \code{glmnet} for lasso.
#' @param lasso_penalty real between 0 and 1, , 0-> ridge regression, 1 -> lasso
#' @param lsspca_forLasso use lsspca with indeies selected with lasso, otherwise
#' just the lasso regression
#' @param rtn_all_spca logical, should the lsspca results for each blovck be returned?
#'
#' @details Differently from the lsspca function, the components are computed so as
#' to explain a given percentage of the variance explained by the PCs of the blocks
#' of variables under consideration.
#'
#' @return a list
#' \describe{
#' \item{loadings}{Matrix with the loadings scaled to unit \eqn{L_2} norm.}
#' \item{contributions}{Matrix of loadings scaled to unit \eqn{L_1} norm.}
#' \item{ncomps}{integer number of components computed. Default is 4.}
#' \item{cardinality}{Vector with the cardinalities of each loadings.}
#' \item{ind}{List with the indices of the non-zero loadings for each component.}
#' \item{loadingslist}{A list with only the nonzero ladings for each component.}
#' \item{vexp}{Vector with the \% variance explained by each component.}
#' \item{vexpPC}{Vector with the \% variance explained by each principal component.}
#' \item{cvexp}{Vector with the \% cumulative variance explained by each component.}
#' \item{rcvexp}{Vector with the \% proportion of cumulative variance explained by each component to that explained by the PCs.}
#' \item{scores}{the SPCs scores.}
#' \item{PCloadings}{Matrix with the PCs loadings scaled to unit \eqn{L_2} norm. }
#' \item{PCscores}{the PCs scores.}
#' \item{method}{method used to compute the loadings}
#' \item{corComp}{Matrix of correlations among the sparse components. Only if ncomps > 1.}
#' \item{Call}{The called with its arguments.}
#' }
#'
#' @references Giovanni M. Merola. 2014. \emph{Least Squares Sparse Principal
#' Component Analysis: a Backward Elimination approach to attain large
#' loadings.} Austr.&NZ Jou. Stats. 57, pp 391-429\cr\cr
#' Giovanni M. Merola and Gemai Chen. 2019. \emph{Sparse Principal Component Analysis: an
#' efficient Least Squares approach.} Jou. Multiv. Analysis 173, pp 366--382
#' \url{http://arxiv.org/abs/1406.1381}
#'
#' @seealso \code{\link{lsspca}}
#' @export
lsspca_blocked = function(X, alpha = 0.95, blocks_list = list(),
ncomps_per_block = 1, blocks_names = NA,
maxcard = 0, spcaMethod = c("u", "c", "p"), scalex = FALSE,
variableSelection = c("exhaustive", "seqrep", "backward", "forward", "lasso"),
lsspca_forLasso = FALSE, lasso_penalty = 0.5, rtn_all_spca = FALSE){
p = ncol(X)
n = nrow(X)
##-------------------------------------------##
## Creating the environment and error checking
##-------------------------------------------##
nblocks = length(blocks_list)
if(nblocks <= 1){
stop("must pass at least of 2 sets of indeies\ otherwise, use lsspca")
}
if(!all(sapply(blocks_list, function(x) all(is.integer(x))))){
if(is.vector(blocks_list) | is.factor(blocks_list))
blocks_list <- tapply(1:length(blocks_list), blocks_list, function(x) x)
else
stop("please pass a list of indeies or a vector or factor of codes as block_list")
}
if ((ncomps_per_block[1] == 0) | any(ncomps_per_block > sapply(blocks_list, length))){
stop("ncomps_per_block cannot be zero or larger than block size")
}
else{
ncomps_per_block = makevec(vec = ncomps_per_block, n = nblocks)
}
if (is.na(ncomps_per_block[1]))
ncomps_per_block = rep(1, nblocks)
else{
ncomps_per_block = makevec(vec = ncomps_per_block, n = nblocks)
}
if(is.na(blocks_names[1])){
if (!is.null(names(blocks_list)))
blocks_names = rep(names(blocks_list), times = ncomps_per_block)
else
blocks_names = paste0("Block", rep(1:nblocks, times = ncomps_per_block))
}
else
if (length(blocks_names) == length(blocks_list))
blocks_names = rep(blocks_names, times = ncomps_per_block)
else{
warning("something wring with blocks_names")
blocks_names = paste0("Block", rep(1:nblocks, times = ncomps_per_block))
}
maxcard = makevec(maxcard, nblocks)
if (any(maxcard == 0)){
maxcard[maxcard == 0] = sapply(blocks_list[maxcard == 0], length)
# message("0 maxcard set to maximum")
}
alpha = makevec(alpha, nblocks)
variableSelection = switch(stringr::str_sub(variableSelection[1], 1, 1), "e" = "exhaustive",
"b"= "backward", "f"= "forward", "s"= "seqrep", "l" = "lasso")
if (is.null(variableSelection))
stop("need to pass a valid search direction")
spcaMethod = spcaMethod[1]
##-------------------------------------------------------------------##
## create objects for output
##-------------------------------------------------------------------##
ncomps <- sum(ncomps_per_block)
card = rep(0, ncomps)
ind = as.list(ncomps)
vexp = rep(0, ncomps)
cvexp = rep(0, ncomps)
loadings = matrix(0, p, ncomps)
contributions = loadings
loadingslist = as.list(1:ncomps)
scores = matrix(0, n, ncomps)
if (is.null(colnames(X)))
namx = paste("Var", 1:p)
else
namx = colnames(X)
if (scalex == TRUE)
X = scale(X)
else
if (any(abs(colMeans(X)) > 10^-6)){
message("Centering columns of X to zero mean")
X = scale(X, scale = FALSE)
}
spca_list = list()
ind_list = list()
loadingslist = list()
ncompsdone = 0
##-------------------------------------------------------------------##
## compute of the components
##-------------------------------------------------------------------##
for (j in 1:nblocks){
spca_list[[j]] <- lsspca(X[, blocks_list[[j]]], alpha = alpha[j],
maxcard = maxcard[j], ncomps = ncomps_per_block[j],
spcaMethod = spcaMethod, scalex = FALSE,
variableSelection = variableSelection,
force.in = NULL, force.out = NULL, selectfromthese = NULL,
lsspca_forLasso = lsspca_forLasso, lasso_penalty = lasso_penalty)
ind = lapply(spca_list[[j]]$ind, function(x, ii) ii[x], ii = blocks_list[[j]])
ind_list = c(ind_list, ind)
loadingslist = c(loadingslist, spca_list[[j]]$loadingslist)
scores[, (ncompsdone + 1):(ncompsdone + ncomps_per_block[j])] = spca_list[[j]]$scores
loadings[blocks_list[[j]], (ncompsdone + 1):(ncompsdone + ncomps_per_block[j])] =
spca_list[[j]]$loadings
ncompsdone = ncompsdone + ncomps_per_block[j]
}
##-------------------------------------------------------------------##
## create output
##-------------------------------------------------------------------##
contributions = sweep(loadings, 2, colSums(abs(loadings)), "/")
rownames(contributions) = namx
colnames(contributions) = blocks_names
rownames(loadings) = namx
colnames(loadings) = blocks_names
names(ind_list) = blocks_names
names(loadingslist) = blocks_names
Rx = crossprod(X)
r_ee = eigen(Rx)
PCscores = X %*% r_ee$vec[, 1:ncomps]
vexpPC = r_ee$val
totvar = sum(r_ee$val)
vexpPC = r_ee$val[1: ncomps]/totvar
vexp = makevexp(scores, X)/totvar
cvexp = cumsum(vexp)
out = list(loadings = loadings, contributions = contributions,
vexp = vexp, vexpPC = vexpPC[1:ncompsdone],
cvexp = cvexp,
rcvexp = cvexp/cumsum(vexpPC[1:ncompsdone]),
scores = scores,
ncomps = ncomps,
ind = ind_list, cardinality = sapply(ind_list, length),
loadingslist = loadingslist, spcaMethod = spcaMethod)
out$corComp = cor(scores)
if (rtn_all_spca)
out$all_spca = spca_list
out$Call = match.call()
return(out)
}
makevec = function(vec, n){
le = length(vec)
if(le == 0)
stop("need to pass a vector of lenght > 0")
if(le < n){
m = n - le
v = c(vec, rep(vec[le], m))
}
if (le == n)
v = vec
return(v)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.