#' GSVD
#'
#' @description The generalized singular value decomposition (GSVD) generalizes the standard SVD (see \code{\link{svd}}) procedure through addition of (optional) constraints applied to the rows and/or columns of a matrix.
#' @details A package specifically designed for the generalized eigen decomposition, generalized singular value decomposition, and generalized partial least squares-singular value decomposition.
#' Each decomposition allows for the use of weights (or constraints) applied to the columns and/or rows of each data matrix.
#' This package provides these decompositions as the core for a very large list of standard statistical---and typically multivariate---approaches, including but not limited to principal components analysis, metric multidimensional scaling, standard and multiple correspondence analysis, partial least squares, canonical correlation analysis, and multivariate linear regression/reduced rank regression/redundancy analysis.
#' @seealso \code{\link{gsvd}}, \code{\link{geigen}}, \code{\link{gplssvd}}
#'
#' @references
#' Abdi, H. (2007). Singular Value Decomposition (SVD) and Generalized Singular Value Decomposition (GSVD). In N.J. Salkind (Ed.): \emph{Encyclopedia of Measurement and Statistics}.Thousand Oaks (CA): Sage. pp. 907-912.\cr
#' Beaton, D., Fatt, C. R. C., & Abdi, H. (2014). An ExPosition of multivariate analysis with the singular value decomposition in R. \emph{Computational Statistics & Data Analysis}, 72, 176-189. \cr
#' Cherry, S. (1996). Singular Value Decomposition Analysis and Canonical Correlation Analysis. Journal of Climate, 9(9), 2003–2009. Retrieved from JSTOR. \cr
#' Gower, J. C., Gardner-Lubbe, S., & Le Roux, N. J. DATA ANALYSIS: GOOD BUT…. Statistica Applicata - Italian Journal of Applied Statistics, 29. \cr
#' Greenacre, M. (1984). Theory and applications of correspondence analysis. \emph{Academic Press.}\cr
#' Holmes, S. (2008). Multivariate data analysis: the French way. \emph{In Probability and statistics: Essays in honor of David A. Freedman (pp. 219-233)}. Institute of Mathematical Statistics. \cr
#' Holmes, S., & Josse, J. (2017). Discussion of “50 Years of Data Science”. \emph{Journal of Computational and Graphical Statistics}, 26(4), 768-769. \cr
#' Husson, F., Josse, J., & Saporta, G. (2016). Jan de Leeuw and the French school of data analysis. \emph{Journal of Statistical Software}, 73(6), 1-17. \cr
#' Jolliffe, I. T., & Cadima, J. (2016). Principal component analysis: A review and recent developments. Philosophical Transactions. Series A, Mathematical, Physical, and Engineering Sciences, 374(2065).\cr
#' Lebart L., Morineau A., & Warwick K. (1984). Multivariate Descriptive Statistical Analysis. \emph{J. Wiley}, New York. \cr
#' Nguyen, L. H., & Holmes, S. (2019). Ten quick tips for effective dimensionality reduction. PLOS Computational Biology, 15(6), e1006907. https://doi.org/10.1371/journal.pcbi.1006907 \cr
#' Yanai, H., Takeuchi, K., & Takane, Y. (2011). Projection Matrices, Generalized Inverse Matrices, and Singular Value Decomposition. \emph{Springer-Verlag, New-York.}\cr
#'
#' @keywords internal
#'
"_PACKAGE"
#' @export
#'
#' @title Generalized singular value decomposition
#'
#' @description
#' \code{gsvd} takes in left (\code{LW}) and right (\code{RW}) constraints (usually diagonal matrices, but any positive semi-definite matrix is fine) that are applied to the data (\code{X}).
#' Left and right constraints are used for the orthogonality conditions.
#'
#' @param X a data matrix to decompose
#' @param LW \bold{L}eft \bold{W}eights -- the constraints applied to the left side (rows) of the matrix and thus left singular vectors.
#' @param RW \bold{R}ight \bold{W}eights -- the constraints applied to the right side (columns) of the matrix and thus right singular vectors.
#' @param k total number of components to return though the full variance will still be returned (see \code{d_full}). If 0, the full set of components are returned.
#' @param tol default is .Machine$double.eps. A tolerance level for eliminating effectively zero (small variance), negative, imaginary eigen/singular value components.
#'
#' @return A list with eleven elements:
#' \item{d_full}{A vector containing the singular values of X above the tolerance threshold (based on eigenvalues).}
#' \item{l_full}{A vector containing the eigen values of X above the tolerance threshold (\code{tol}).}
#' \item{d}{A vector of length \code{min(length(d_full), k)} containing the retained singular values of X}
#' \item{l}{A vector of length \code{min(length(l_full), k)} containing the retained eigen values of X}
#' \item{u}{Left (rows) singular vectors. Dimensions are \code{nrow(X)} by k.}
#' \item{p}{Left (rows) generalized singular vectors. Dimensions are \code{nrow(X)} by k.}
#' \item{fi}{Left (rows) component scores. Dimensions are \code{nrow(X)} by k.}
#' \item{v}{Right (columns) singular vectors. Dimensions are \code{ncol(X)} by k.}
#' \item{q}{Right (columns) generalized singular vectors. Dimensions are \code{ncol(X)} by k.}
#' \item{fj}{Right (columns) component scores. Dimensions are \code{ncol(X)} by k.}
#'
#' @seealso \code{\link{tolerance_svd}}, \code{\link{geigen}} and \code{\link{gplssvd}}
#'
#' @examples
#'
#' data(wine, package="GSVD")
#' wine.objective <- wine$objective
#' ## Principal components analysis: "covariance"
#' ## "covariance" PCA
#' cov.pca.data <- scale(wine.objective,scale=FALSE)
#' cov.pca.res <- gsvd(cov.pca.data)
#'
#' ## Principal components analysis: "correlation"
#' cor.pca.data <- scale(wine.objective,scale=TRUE)
#' cor.pca.res <- gsvd(cor.pca.data)
#'
#' ## Principal components analysis: "correlation" with the covariance matrix and constraints
#' cor.pca.res2 <- gsvd(cov.pca.data,RW=1/apply(wine.objective,2,var))
#'
#'
#' ## Correspondence analysis
#' data(authors, package="GSVD")
#' Observed <- authors/sum(authors)
#' row.w <- rowSums(Observed)
#' col.w <- colSums(Observed)
#' Expected <- row.w %o% col.w
#' Deviations <- Observed - Expected
#' ca.res <- gsvd(Deviations,1/row.w,1/col.w)
#'
#'
#' ## Multiple correspondence analysis
#' data("snps.druguse", package="GSVD")
#' X <- model.matrix(~ .,
#' data=snps.druguse$DATA1,
#' contrasts.arg = lapply(snps.druguse$DATA1, contrasts, contrasts=FALSE))[,-1]
#' Observed <- X/sum(X)
#' row.w <- rowSums(Observed)
#' col.w <- colSums(Observed)
#' Expected <- row.w %o% col.w
#' Deviations <- Observed - Expected
#' ca.res <- gsvd(Deviations,1/row.w,1/col.w)
#'
#' @author Derek Beaton
#' @keywords multivariate
gsvd <- function(X, LW, RW, k = 0, tol = .Machine$double.eps){
# preliminaries
X_dimensions <- dim(X)
## stolen from MASS::ginv()
if (length(X_dimensions) > 2 || !(is.numeric(X) || is.complex(X))){
stop("gsvd: 'X' must be a numeric or complex matrix")
}
if (!is.matrix(X)){
X <- as.matrix(X)
}
# a few things about LW for stopping conditions
LW_is_missing <- missing(LW)
if(!LW_is_missing){
LW_is_vector <- is.vector(LW)
if(!LW_is_vector){
if( nrow(LW) != ncol(LW) | nrow(LW) != X_dimensions[1] ){
stop("gsvd: nrow(LW) does not equal ncol(LW) or nrow(X)")
}
# if you gave me all zeros, I'm stopping.
if(is_empty_matrix(LW)){
stop("gsvd: LW is empty (i.e., all 0s")
}
}
if(LW_is_vector){
if(length(LW)!=X_dimensions[1]){
stop("gsvd: length(LW) does not equal nrow(X)")
}
# if you gave me all zeros, I'm stopping.
# if(all(abs(LW)<=tol)){
if(!are_all_values_positive(LW)){
stop("gsvd: LW is not strictly positive values")
}
}
}
# a few things about RW for stopping conditions
RW_is_missing <- missing(RW)
if(!RW_is_missing){
RW_is_vector <- is.vector(RW)
if(!RW_is_vector){
if( nrow(RW) != ncol(RW) | nrow(RW) != X_dimensions[2] ){
stop("gsvd: nrow(RW) does not equal ncol(RW) or ncol(X)")
}
# if you gave me all zeros, I'm stopping.
if(is_empty_matrix(RW)){
stop("gsvd: RW is empty (i.e., all 0s")
}
}
if(RW_is_vector){
if(length(RW)!=X_dimensions[2]){
stop("gsvd: length(RW) does not equal ncol(X)")
}
# if you gave me all zeros, I'm stopping.
# if(all(abs(RW)<=tol)){
if(!are_all_values_positive(RW)){
stop("gsvd: RW is not strictly positive values")
}
}
}
## convenience checks *could* be removed* if problematic
# convenience checks & conversions; these are meant to minimize LW's memory footprint
if(!LW_is_missing){
if( !LW_is_vector){
# if( is_identity_matrix(LW) ){
# LW_is_missing <- T
# LW <- substitute() # neat! this makes it go missing
# }
if( is_diagonal_matrix(LW) ){
LW <- diag(LW)
LW_is_vector <- T # now it's a vector
}
}
if( LW_is_vector & all(LW==1) ){
LW_is_missing <- T
LW <- substitute() # neat! this makes it go missing
}
}
# convenience checks & conversions; these are meant to minimize RW's memory footprint
if(!RW_is_missing){
if( !RW_is_vector ){
# if( is_identity_matrix(RW) ){
# RW_is_missing <- T
# RW <- substitute() # neat! this makes it go missing
# }
if( !RW_is_vector & is_diagonal_matrix(RW) ){
RW <- diag(RW)
RW_is_vector <- T # now it's a vector
}
}
if( RW_is_vector & all(RW==1) ){
RW_is_missing <- T
RW <- substitute() # neat! this makes it go missing
}
}
# this manipulates X as needed based on XLW
if(!LW_is_missing){
if( LW_is_vector ){
sqrt_LW <- sqrt(LW)
# X <- sweep(X,1,sqrt_LW,"*") ## replace the sweep with * & t()
X <- X * sqrt_LW
}else{
LW <- as.matrix(LW)
# X <- (LW %^% (1/2)) %*% X
X <- sqrt_psd_matrix(LW) %*% X
}
}
# this manipulates X as needed based on XRW
if(!RW_is_missing){
if( RW_is_vector ){
sqrt_RW <- sqrt(RW)
# X <- sweep(X,2, sqrt_RW,"*") ## replace the sweep with * & t()
X <- t(t(X) * sqrt_RW)
}else{
RW <- as.matrix(RW)
X <- X %*% sqrt_psd_matrix(RW)
}
}
# all the decomposition things
if(k<=0){
k <- min(X_dimensions)
}
res <- tolerance_svd(X,nu=k,nv=k,tol=tol)
res$d_full <- res$d
res$l_full <- res$d_full^2
# res$tau <- (res$l_full/sum(res$l_full)) * 100
components.to.return <- min(length(res$d_full),k) #a safety check
res$d <- res$d_full[1:components.to.return]
res$l <- res$d^2
res$u <- res$u[,1:components.to.return, drop = FALSE]
res$v <- res$v[,1:components.to.return, drop = FALSE]
# make scores according to weights
if(!LW_is_missing){
if(LW_is_vector){
# res$p <- sweep(res$u,1,1/sqrt_LW,"*")
res$p <- res$u / sqrt_LW
# res$fi <- sweep(sweep(res$p,1,LW,"*"),2,res$d,"*")
res$fi <- t(t(res$p * LW) * res$d)
}else{
# res$p <- (LW %^% (-1/2)) %*% res$u
res$p <- invsqrt_psd_matrix(LW) %*% res$u
# res$fi <- sweep((LW %*% res$p),2,res$d,"*")
res$fi <- t(t(LW %*% res$p) * res$d)
}
}else{
res$p <- res$u
# res$fi <- sweep(res$p,2,res$d,"*")
res$fi <- t(t(res$p) * res$d)
}
if(!RW_is_missing){
if(RW_is_vector){
# res$q <- sweep(res$v,1,1/sqrt_RW,"*")
res$q <- res$v / sqrt_RW
# res$fj <- sweep(sweep(res$q,1,RW,"*"),2,res$d,"*")
res$fj <- t(t(res$q * RW) * res$d)
}else{
res$q <- invsqrt_psd_matrix(RW) %*% res$v
# res$fj <- sweep((RW %*% res$q),2,res$d,"*")
res$fj <- t(t(RW %*% res$q) * res$d)
}
}else{
res$q <- res$v
# res$fj <- sweep(res$q,2,res$d,"*")
res$fj <- t(t(res$q) * res$d)
}
rownames(res$fi) <- rownames(res$u) <- rownames(res$p) <- rownames(X)
rownames(res$fj) <- rownames(res$v) <- rownames(res$q) <- colnames(X)
class(res) <- c("gsvd", "GSVD", "list")
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.