R/ssvdEN.R
In MOSS: Multi-Omic Integration via Sparse Singular Value Decomposition

Documented in ssvdEN

#' Sparse Singular Value Decomposition via Elastic Net.
#'
#' This function performs sparse singular value decomposition (SVD)
#' on a matrix 'x' via Elastic Net types of penalties.
#' For one PC (rank 1 case), the algorithm finds left and right Eigenvectors
#' (u and w, respectively), that minimize:
#'  ||x - u w'||_F^2 +
#'  lambda_w (alpha_w||w||_1 +
#'  (1 - alpha_w)||w||_F^2) +
#'  lambda_u (alpha||u||_1 + (1 - alpha_u)||u||_F^2)
#' such that ||u|| = 1.
#' The right Eigen vector is obtained from v = w / ||w|| and the
#'  corresponding Eigen value = u^T x v.
#' The penalties lambda_u and lambda_w are mapped from
#' specified desired degree of sparsity
#' (dg.spar.features & dg.spar.subjects).
#'
#' The function allows the use of the base svd function
#'  for relatively small problems.
#' For larger problems, functions for fast-partial SVD
#' (irlba and big_randomSVD, from irlba and bigstatsr packages) are used.
#'
#' @note When elastic net is used ('alpha.s' or 'alpha.f' in the (0,1)
#' interval), the resulting number of non-zero subjects or features
#' is larger than the 'dg.spar.subjects' or 'dg.spar.features' values.
#'  This allows to rapidly increase the number of non-zero elements when
#'   tuning the degrees of sparsity with function ssvdEN_sol_path. 
#' In order to get exact values for the degrees
#'  of sparsity at subjects or features levels, the user needs to
#' set the value of 'exact.dg' parameter from 'FALSE' (the default)
#' to 'TRUE'.
#'
#' @param O Numeric matrix of n subjects (rows) and p features (columns).
#'  It can be a File-backed Big Matrix.
#' @param n.PC Number of desired principal axes. Numeric. Defaults to 1.
#' @param dg.spar.features Degree of sparsity at the features level.
#'  Numeric. Defaults to NULL.
#' @param dg.spar.subjects Degree of sparsity at the subjects level.
#'  Numeric. Defaults to NULL.
#' @param maxit Maximum number of iterations for the sparse SVD algorithm.
#'  Numeric. Defaults to 500.
#' @param tol Convergence tolerance for the sparse SVD algorithm.
#'  Numeric. Defaults to 0.001.
#' @param alpha.f Elastic net mixture parameter at the features level.
#'  Measures the compromise between lasso (alpha = 1) and
#'  ridge (alpha = 0) types of sparsity. Numeric. Defaults to 1.
#' @param alpha.s Elastic net mixture parameter at the subjects level.
#' Defaults to alpha.s = 1.
#' @param center.arg Should O be centered? Logical. Defaults to TRUE.
#' @param scale.arg Should O be scaled? Logical. Defaults to TRUE.
#' @param approx.arg Should we use standard SVD or random approximations?
#'  Defaults to FALSE. If TRUE & is(O,'matrix') == TRUE, irlba is called.
#'   If TRUE & is(O, "FBM") == TRUE, big_randomSVD is called.
#' @param svd.0 List containing an initial SVD. Defaults to NULL.
#' @param s.values Should the singular values be calculated? Logical.
#' Defaults to TRUE.
#' @param ncores Number of cores used by big_randomSVD.
#' Default does not use parallelism. Ignored when class(O)!=FBM.
#' @param exact.dg Should we compute exact degrees of sparsity? Logical.
#' Defaults to FALSE. Only relevant When alpha.s or alpha.f are
#' in the (0,1) interval and exact.dg = TRUE.
#' @return A list with the results of the (sparse) SVD, containing:
#' \itemize{
#' \item u: Matrix with left eigenvectors.
#' \item v: Matrix with right eigenvectors.
#' \item d: Matrix with singular values.
#' }
#' @export
#' @references
#'  \itemize{
#'    \item Shen, Haipeng, and Jianhua Z. Huang. 2008.
#'    Sparse Principal Component Analysis via Regularized Low
#'    Rank Matrix Approximation.
#'    Journal of Multivariate Analysis 99 (6).
#'     Academic Press:1015_34.
#'    \item Baglama, Jim, Lothar Reichel, and B W Lewis. 2018.
#'    Irlba: Fast Truncated Singular Value Decomposition and
#'    Principal Components Analysis for Large Dense and Sparse Matrices.
#'  }
#' @examples
#' library("MOSS")
#'
#' # Extracting simulated omic blocks.
#' sim_blocks <- simulate_data()$sim_blocks
#'
#' X <- sim_blocks$`Block 3`
#'
#' # Equal to svd solution: exact singular vectors and values.
#' out <- ssvdEN(X, approx.arg = FALSE)
#' \donttest{
#' # Uses irlba to get approximated singular vectors and values.
#' library(irlba)
#' out <- ssvdEN(X, approx.arg = TRUE)
#' # Uses bigstatsr to get approximated singular vectors and values
#' # of a Filebacked Big Matrix.
#' library(bigstatsr)
#' out <- ssvdEN(as_FBM(X), approx.arg = TRUE)
#'
#' # Sampling a number of subjects and features for a fix sparsity degree.
#' s.u <- sample(1:nrow(X), 1)
#' s.v <- sample(1:ncol(X), 1)
#'
#' # Lasso penalties.
#' all.equal(sum(ssvdEN(X, dg.spar.features = s.v)$v != 0), s.v)
#' all.equal(
#'   unique(colSums(ssvdEN(X, dg.spar.features = s.v, n.PC = 5)$v
#'   != 0)),
#'   s.v
#' )
#'
#' all.equal(sum(ssvdEN(X, dg.spar.subjects = s.u)$u != 0), s.u)
#' all.equal(
#'   unique(colSums(ssvdEN(X, dg.spar.subjects = s.u, n.PC = 5)$u
#'   != 0)),
#'   s.u
#' )
#'
#' out <- ssvdEN(X, dg.spar.features = s.v, dg.spar.subjects = s.u)
#' all.equal(sum(out$u != 0), s.u)
#' all.equal(sum(out$v != 0), s.v)
#'
#' out <- ssvdEN(X,
#'   dg.spar.features = s.v, dg.spar.subjects = s.u,
#'   n.PC = 10
#' )
#' all.equal(unique(colSums(out$u != 0)), s.u)
#' all.equal(unique(colSums(out$v != 0)), s.v)
#'
#' # Ridge penalties.
#' all.equal(
#'   sum(ssvdEN(X, dg.spar.features = s.v, alpha.f = 0)$v != 0),
#'   ncol(X)
#' )
#' all.equal(
#'   unique(colSums(ssvdEN(X,
#'     dg.spar.features = s.v, n.PC = 5,
#'     alpha.f = 0
#'   )$v != 0)),
#'   ncol(X)
#' )
#'
#' all.equal(
#'   sum(ssvdEN(X, dg.spar.subjects = s.u, alpha.s = 0)$u != 0),
#'   nrow(X)
#' )
#' all.equal(
#'   unique(colSums(ssvdEN(X,
#'     dg.spar.subjects = s.u, n.PC = 5,
#'     alpha.s = 0
#'   )$u != 0)),
#'   nrow(X)
#' )
#'
#' out <- ssvdEN(X,
#'   dg.spar.features = s.v, dg.spar.subjects = s.u,
#'   alpha.f = 0, alpha.s = 0
#' )
#' all.equal(sum(out$u != 0), nrow(X))
#' all.equal(sum(out$v != 0), ncol(X))
#'
#' out <- ssvdEN(X,
#'   dg.spar.features = s.v, dg.spar.subjects = s.u,
#'   n.PC = 10, alpha.f = 0,
#'   alpha.s = 0
#' )
#' all.equal(unique(colSums(out$u != 0)), nrow(X))
#' all.equal(unique(colSums(out$v != 0)), ncol(X))
#'
#' # Elastic Net penalties.
#' sum(ssvdEN(X, dg.spar.features = s.v, alpha.f = 0.5)$v != 0) >= s.v
#' all(unique(colSums(ssvdEN(X,
#'   dg.spar.features = s.v, n.PC = 5,
#'   alpha.f = 0.5
#' )$v != 0)) >= s.v)
#'
#' sum(ssvdEN(X, dg.spar.subjects = s.u, alpha.s = 0.5)$u != 0) >= s.u
#' all(unique(colSums(ssvdEN(X,
#'   dg.spar.subjects = s.u, n.PC = 5,
#'   alpha.s = 0.5
#' )$u != 0)) >= s.u)
#'
#' # Elastic Net penalties with exact degrees of sparsity.
#' sum(ssvdEN(X,
#'   dg.spar.features = s.v, alpha.f = 0.5,
#'   exact.dg = TRUE
#' )$v != 0) == s.v
#' all(unique(colSums(ssvdEN(X,
#'   dg.spar.features = s.v, n.PC = 5,
#'   alpha.f = 0.5, exact.dg = TRUE
#' )$v != 0)) == s.v)
#'
#' sum(ssvdEN(X,
#'   dg.spar.subjects = s.u, alpha.s = 0.5,
#'   exact.dg = TRUE
#' )$u != 0) == s.u
#' all(unique(colSums(ssvdEN(X,
#'   dg.spar.subjects = s.u, n.PC = 5,
#'   alpha.s = 0.5, exact.dg = TRUE
#' )$u != 0)) == s.u)
#' }
ssvdEN <- function(O, n.PC = 1, dg.spar.features = NULL,
                   dg.spar.subjects = NULL, maxit = 500, tol = 0.001,
                   scale.arg = TRUE, center.arg = TRUE,
                   approx.arg = FALSE, alpha.f = 1, alpha.s = 1,
                   svd.0 = NULL, s.values = TRUE,
                   ncores = 1, exact.dg = FALSE) {

  # Checking if the right packages are present to handle approximated SVDs.
  if (approx.arg == TRUE) {
    if (inherits(O, "FBM") == TRUE) {
      if (!requireNamespace("bigstatsr",
        quietly = TRUE
      )) {
        stop("Package bigstatsr needs to be installed to handle FBM objects.")
      }
    }
    else {
      if (!requireNamespace("irlba", quietly = TRUE)) {
        stop("Package irlba needs to be installed to get fast 
           truncated SVD solutions.")
      }
    }
  }

  # Getting matrix dimensions.
  n <- nrow(O)
  p <- ncol(O)

  # If dg.spar.subjects = NULL, get full subjects' scores.
  if (is.null(dg.spar.subjects) == TRUE) dg.spar.subjects <- n

  # If dg.spar.features = NULL, get full features' scores.
  if (is.null(dg.spar.features) == TRUE) dg.spar.features <- p

  if (is.null(svd.0) == TRUE) {
    if (approx.arg == TRUE) {
      if (inherits(O, "FBM") == TRUE) {
        s <- bigstatsr::big_randomSVD(O,
          fun.scaling = bigstatsr::big_scale(
            center = center.arg,
            scale = scale.arg
          ),
          k = n.PC, ncores = ncores
        )
      } else {
        O <- scale(O, center = center.arg, scale = scale.arg)
        s <- irlba::irlba(O, nu = n.PC, nv = n.PC)[c("u", "v", "d")]
      }
    }
    else {
      O <- scale(O, center = center.arg, scale = scale.arg)
      s <- svd(O, nu = n.PC, nv = n.PC)
      s$d <- s$d[1:n.PC]
    }
  }
  else {
    s <- svd.0
  }

  if (dg.spar.features <= p || dg.spar.subjects <= n) {
    dg.spar.features <- p - dg.spar.features
    dg.spar.subjects <- n - dg.spar.subjects

    if (length(dg.spar.features) != n.PC) {
      dg.spar.features <- rep(dg.spar.features, length.out = n.PC)
    }
    if (length(dg.spar.subjects) != n.PC) {
      dg.spar.subjects <- rep(dg.spar.subjects, length.out = n.PC)
    }

    # Elastic net solutions.
    softEN <- function(y, dg.spar, alpha) {
      a <- abs(y)
      z <- sort(a)

      # Mapping dg to lambda scale.arg
      lambda <- z[dg.spar]
      b <- a - lambda * alpha
      y.out <- sign(y) * ifelse(b > 0, b, 0) / (1 + (1 - alpha)
      * lambda)

      return(y.out)
    }
    for (j in 1:n.PC) s$v[, j] <- s$d[j] * s$v[, j]

    # Coordinate descend: iterates until convergence or until
    # reaching maxit.
    iter <- 1
    delta_u <- Inf
    shrink.features <- seq_len(length(dg.spar.features))
    shrink.features[which(dg.spar.features == 0)] <- 0
    shrink.subjects <- seq_len(length(dg.spar.subjects))
    shrink.subjects[which(dg.spar.subjects == 0)] <- 0

    while (delta_u > tol && iter < maxit) {
      u <- s$u
      usx <- t(s$u) %*% O
      s$v <- qr.Q(qr(t(usx)))

      # Solving v for fixed u.
      if (sum(shrink.features) > 0) {
        for (j in shrink.features) {
          s$v[, j] <- softEN(
            s$v[, j],
            dg.spar.features[j],
            alpha.f
          )
          if (exact.dg == TRUE & alpha.f > 0 & alpha.f < 1) {
            if (sum(s$v[, j] != 0) > (p - dg.spar.features[j])) {
              aux <- rep(0, p)
              tmp <- order(abs(s$v[, j]), decreasing = T)[1:(p - dg.spar.features[j])]
              aux[tmp] <- s$v[tmp, j]
              s$v[, j] <- aux
            }
          }
        }
      }
      norm. <- stats::sd(s$v)
      if (norm. == 0) norm. <- 1
      s$v <- s$v / norm.

      # Getting orthogonal subject's scores.
      xsv <- O %*% s$v
      s$u <- qr.Q(qr(xsv))

      # Solving u for fixed v.
      if (sum(shrink.subjects) > 0) {
        for (j in shrink.subjects) {
          s$u[, j] <- softEN(
            s$u[, j],
            dg.spar.subjects[j],
            alpha.s
          )
          if (exact.dg == TRUE & alpha.s > 0 & alpha.s < 1) {
            if (sum(s$u[, j] != 0) > (n - dg.spar.subjects[j])) {
              aux <- rep(0, n)
              tmp <- order(abs(s$u[, j]), decreasing = T)[1:(n - dg.spar.subjects[j])]
              aux[tmp] <- s$u[tmp, j]
              s$u[, j] <- aux
            }
          }
        }
      }
      norm. <- stats::sd(s$u)
      if (norm. == 0) norm. <- 1
      s$u <- s$u / norm.

      # Fixing direction
      s$u <- sweep(
        s$u, 2,
        apply(xsv, 2, function(x) sign(utils::head(O[O[, 1] != 0, 1], 1))) /
          apply(s$u, 2, function(x) sign(utils::head(O[O[, 1] != 0, 1], 1))),
        `*`
      )

      # Calculating convergence rate in terms of Frobenious norm.
      delta_u <- norm(u - s$u, "f") / (n * n.PC)
      iter <- iter + 1
    }
    if (iter >= maxit) warning("Maximum number of iterations reached 
 before convergence: solution may not be optimal. 
                               Consider increasing 'maxit'.")

    # Getting scales ("Eigen" values).
    s$v <- tcrossprod(s$v, diag(
      1 / sqrt(apply(s$v, 2, crossprod)),
      ncol(s$v), ncol(s$v)
    ))
    if (s.values == TRUE) {
      s$d <- crossprod(s$u, O %*% s$v)
    } else {
      s$d <- NULL
    }
  }
  return(s)
}