rSVD: Randomized Singular Value Decomposition

Documented in rsvd

#' @title  Randomized Singular Value Decomposition (rsvd).
#
#' @description The randomized SVD computes the near-optimal low-rank approximation of a rectangular matrix
#' using a fast probablistic algorithm.
#
#' @details
#' The singular value decomposition (SVD) plays an important role in data analysis, and scientific computing.
#' Given a rectangular \eqn{(m,n)} matrix \eqn{A}, and a target rank \eqn{k << min(m,n)}, 
#' the SVD factors the input matrix \eqn{A} as
#' 
#' \deqn{ A  =  U_{k} diag(d_{k}) V_{k}^\top }{ A = U diag(d) t(V)}
#' 
#' The \eqn{k} left singular vectors are the columns of the
#' real or complex unitary matrix \eqn{U}. The \eqn{k} right singular vectors are the columns
#' of the real or complex unitary matrix \eqn{V}. The \eqn{k} dominant singular values are the 
#' entries of \eqn{d}, and non-negative and real numbers.
#'
#' \eqn{p} is an oversampling parameter to improve the approximation.
#' A value of at least 10 is recommended, and \eqn{p=10} is set by default.
#'
#' The parameter \eqn{q} specifies the number of power (subspace) iterations
#' to reduce the approximation error. The power scheme is recommended,
#' if the singular values decay slowly. In practice, 2 or 3 iterations
#' achieve good results, however, computing power iterations increases the
#' computational costs. The power scheme is set to \eqn{q=2} by default.
#'
#' If \eqn{k > (min(n,m)/4)}, a deterministic partial or truncated \code{\link{svd}}
#' algorithm might be faster.
#'
#'
#' @param A       array_like; \cr
#'                a real/complex \eqn{(m, n)} input matrix (or data frame) to be decomposed.
#'
#' @param k       integer; \cr
#'                specifies the target rank of the low-rank decomposition. \eqn{k} should satisfy \eqn{k << min(m,n)}.
#'
#' @param nu      integer, optional; \cr
#'                number of left singular vectors to be returned. \eqn{nu} must be between \eqn{0} and \eqn{k}.
#'
#' @param nv      integer, optional; \cr
#'                number of right singular vectors to be returned. \eqn{nv} must be between \eqn{0} and \eqn{k}.
#'
#' @param p       integer, optional; \cr
#'                oversampling parameter (by default \eqn{p=10}).
#'
#' @param q       integer, optional; \cr
#'                number of additional power iterations (by default \eqn{q=2}).
#'
#' @param sdist   string \eqn{c( 'unif', 'normal', 'rademacher')}, optional; \cr
#'                specifies the sampling distribution of the random test matrix: \cr
#'                		\eqn{'unif'} :  Uniform `[-1,1]`. \cr
#'                		\eqn{'normal}' (default) : Normal `~N(0,1)`. \cr
#'                		\eqn{'rademacher'} : Rademacher random variates. \cr
#'
#'@return \code{rsvd} returns a list containing the following three components:
#'\describe{
#'\item{d}{  array_like; \cr
#'           singular values; vector of length \eqn{(k)}.
#'}
#'
#'\item{u}{  array_like; \cr
#'           left singular vectors; \eqn{(m, k)} or \eqn{(m, nu)} dimensional array.
#'}
#'
#'\item{v}{  array_like; \cr
#'           right singular vectors; \eqn{(n, k)} or \eqn{(n, nv)} dimensional array. \cr
#'}
#'}
#'
#' @note The singular vectors are not unique and only defined up to sign
#' (a constant of modulus one in the complex case). If a left singular vector
#' has its sign changed, changing the sign of the corresponding right vector
#' gives an equivalent decomposition.
#'
#'
#' @references
#' \itemize{
#'  \item [1] N. B. Erichson, S. Voronin, S. L. Brunton and J. N. Kutz. 2019.
#'        Randomized Matrix Decompositions Using {R}. 
#'        Journal of Statistical Software, 89(11), 1-48.
#'        \url{http://doi.org/10.18637/jss.v089.i11}.
#' 
#'  \item  [2] N. Halko, P. Martinsson, and J. Tropp.
#'          "Finding structure with randomness: probabilistic
#'          algorithms for constructing approximate matrix
#'          decompositions" (2009).
#'          (available at arXiv \url{http://arxiv.org/abs/0909.4061}).
#' }
#'
#' @author N. Benjamin Erichson, \email{erichson@berkeley.edu}
#' 
#' @seealso \code{\link{svd}}, \code{\link{rpca}}
#' 
#' @examples
#'library('rsvd')
#'
#'# Create a n x n Hilbert matrix of order n,
#'# with entries H[i,j] = 1 / (i + j + 1).
#'hilbert <- function(n) { i <- 1:n; 1 / outer(i - 1, i, "+") }
#'H <- hilbert(n=50)
#'
#'# Low-rank (k=10) matrix approximation using rsvd
#'k=10
#'s <- rsvd(H, k=k)
#'Hre <- s$u %*% diag(s$d) %*% t(s$v) # matrix approximation
#'print(100 * norm( H - Hre, 'F') / norm( H,'F')) # percentage error

#'# Compare to truncated base svd
#'s <- svd(H)
#'Hre <- s$u[,1:k] %*% diag(s$d[1:k]) %*% t(s$v[,1:k]) # matrix approximation
#'print(100 * norm( H - Hre, 'F') / norm( H,'F')) # percentage error
#'

#' @export
rsvd <- function(A, k=NULL, nu=NULL, nv=NULL, p=10, q=2, sdist="normal") UseMethod("rsvd")

#' @export
rsvd.default <- function(A, k=NULL, nu=NULL, nv=NULL, p=10, q=2, sdist="normal") {
    #*************************************************************************
    #***        Author: N. Benjamin Erichson <nbe@st-andrews.ac.uk>        ***
    #***                              <2015>                               ***
    #***                       License: BSD 3 clause                       ***
    #*************************************************************************

    #Dim of input matrix
    m <- nrow(A)
    n <- ncol(A)

    #Flipp matrix, if wide
    if(m < n){
      A <- H(A)
      m <- nrow(A)
      n <- ncol(A)
      flipped <- TRUE
    } else flipped <- FALSE

    #Set target rank
    if(is.null(k)) k = n
    if(k > n) k <- n
    if(is.character(k)) stop("Target rank is not valid!")
    if(k < 1) stop("Target rank is not valid!")

    #Set oversampling parameter
    l <- round(k) + round(p)
    if(l > n) l <- n
    if(l < 1) stop("Target rank is not valid!")

    #Check if array is real or complex
    if(is.complex(A)) {
      isreal <- FALSE
    } else {
      isreal <- TRUE
    }

    #Set number of singular vectors
    if(is.null(nu)) nu <- k
    if(is.null(nv)) nv <- k
    if(nu < 0) nu <- 0
    if(nv < 0) nv <- 0
    if(nu > k) nu <- k
    if(nv > k) nv <- k
    if(flipped==TRUE) {
      temp <- nu
      nu <- nv
      nv <- temp
    }


    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #Generate a random sampling matrix O
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    O <- switch(sdist,
                normal = matrix(stats::rnorm(l*n), n, l),
                unif = matrix(stats::runif(l*n), n, l),
                rademacher = matrix(sample(c(-1,1), (l*n), replace = TRUE, prob = c(0.5,0.5)), n, l),
                stop("Selected sampling distribution is not supported!"))

    if(isreal==FALSE) {
      O <- O + switch(sdist,
                      normal = 1i * matrix(stats::rnorm(l*n), n, l),
                      unif = 1i * matrix(stats::runif(l*n), n, l),
                      rademacher = 1i * matrix(sample(c(-1,1), (l*n), replace = TRUE, prob = c(0.5,0.5)), n, l),
                      stop("Selected sampling distribution is not supported!"))
    }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #Build sample matrix Y : Y = A * O
    #Note: Y should approximate the range of A
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Y <- A %*% O
    remove(O)

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #Orthogonalize Y using economic QR decomposition: Y=QR
    #If q > 0 perfrom q subspace iterations
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if( q > 0 ) {
        for( i in 1:q) {
            Y <- qr.Q( qr(Y, complete = FALSE) , complete = FALSE )
            Z <- crossprod_help(A , Y )
            Z <- qr.Q( qr(Z, complete = FALSE) , complete = FALSE )
            Y <- A %*% Z
        }#End for
        remove(Z)
    }#End if

    Q <- qr.Q( qr(Y, complete = FALSE) , complete = FALSE )
    remove(Y)

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #Project the data matrix a into a lower dimensional subspace
    #B := Q.T * A
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    B <- crossprod_help(Q , A )

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #Singular Value Decomposition
    #Note: B =: U * S * Vt
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    rsvdObj <- svd(B, nu=nu, nv=nv) # Compute SVD
    rsvdObj$d <- rsvdObj$d[1:k] # Truncate singular values
    
    if(nu != 0) rsvdObj$u <- Q %*% rsvdObj$u # Recover left singular vectors

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Flip SVD back
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if(nu == 0){ rsvdObj$u <- NULL}
    if(nv == 0){ rsvdObj$v <- NULL}
    if(flipped == TRUE) {
        u_temp <- rsvdObj$u
        rsvdObj$u <- rsvdObj$v
        rsvdObj$v <- u_temp
    }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #Return
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    class(rsvdObj) <- "rsvd"
    return(rsvdObj) 
    
} # End rsvd