LSMF: Least squares matrix factorization

Documented in rand.init

#' Random initialization of factor models
#'
#' @description Initialize factor models "W" and "H" by sampling values from A, simulating comparable random distributions, or specifying uniform distribution bounds 
#'
#' @details
#' Methods support positive, non-zero initializations only.
#' \itemize{
#'    \item random: samples values from a random normal distribution with the same mean as non-zero values in A and the same standard deviation
#'    \item sample: samples non-zero values in A within \code{n.sd} standard deviations of the mean of non-zero values in A
#'    \item bounded: samples values from a random uniform distribution bounded between values specified in "bounds"
#' }
#'
#' Values in W and H will automatically be scaled to approximate the distribution of A upon multiplication. Possible scenarios:
#' \itemize{
#'    \item W.method and H.method = "sample" or "random":  W and H are the square roots of their respective sampled values
#'    \item W.method and H.method = "bounded" or other given distribution, no normalization is applied
#'    \item W.method = "sample" or "random" and H.method = "bounded" or other given distribution (or the reverse): H is assigned first and is held constant, values in W are raised to the power of log(mean(A)/mean(H))/log(mean(W)) such that mean(W) * mean(H) approximates mean(A)
#' }
#'
#' Basic usage:
#' \itemize{
#'    \item For initialization of most models, "random" should suffice and is the ideal method for fast convergence to an accurate and robust local minima
#'    \item For one-sided bernoulli factorization, initiate the bernoulli matrix bounded between for example \code{bounds = c(0.4, 0.6)} and the non-bounded matrix with "random"
#'    \item For one-sided multinomial factorization, initiate the multinomial matrix with the multinomial distribution or bounds corresponding to the limits of the distribution, and the non-bounded matrix with "random"
#'    \item For models where "A" is comprised of many different values, "sample" may be a good option which is truer to the original distribution than a simple normal "random" method
#' }
#' 
#' If these initialization methods do not satisfy, LSMF also takes matrices as initializations for "W" and "H".
#'
#' @param A input dgCMatrix
#' @param k rank
#' @param W.method One of c("sample", "random", "bounded")
#' @param H.method One of c("sample", "random", "bounded")
#' @param n.sd number of standard deviations from the mean over which to sample values in A (excluding zeros). Applies only to method = c("sample", "random")
#' @param bounds sample values between bounds if method = "bounded"
#' @param seed never ever not remember to set the seed. It's NULL by default
#' @return list of matrices "W" and "H"
#' @seealso \code{\link{nndsvd}}, \code{\link{lsmf}}
rand.init <- function(A, k, W.method = "sample", H.method = "sample", n.sd = 1, bounds = c(1e-5, 1), seed = NULL) {
  if (!is.null(seed)) set.seed(seed)

  nvals.W <- k * nrow(A)
  nvals.H <- k * ncol(A)

  # generate values for random population using one of several methods
  if (W.method %in% c("sample", "random") || H.method %in% c("sample", "random")) {
    # sampling from non-zero values in sparse matrix A
    # use the @x slot to access all non-zero values
    # select only values within "n.sd" standard deviations of the mean of A@x
    Ax <- A@x[A@x > 0]
    sd.Ax <- sd(Ax)
    mean.Ax <- mean(Ax)
    if (W.method == "sample" || H.method == "sample") {
      vals <- Ax[which(abs(Ax - mean.Ax) < (sd.Ax * n.sd))]
    }
  }

  if (W.method == "sample") {
    W.vals <- sample(vals, nvals.W, replace = TRUE)
  } else if (W.method == "random") {
    W.vals <- rnorm(nvals.W, mean = mean.Ax, sd = sd.Ax)
    # convert negative values to values uniformly distributed between 0 and mean.Ax
    if (sum(W.vals <= 0)) {
      W.vals[W.vals < 0] <- runif(sum(W.vals <= 0), min = 1e-5, max = mean.Ax)
    }
    W.vals <- sample(W.vals, nvals.W)
  } else if (W.method == "bounded") {
    W.vals <- runif(nvals.W, min = bounds[1], max = bounds[2])
  }
  W <- matrix(W.vals, nrow = nrow(A), ncol = k)

  if (H.method == "sample") {
    H.vals <- sample(vals, nvals.H, replace = TRUE)
  } else if (H.method == "random") {
    H.vals <- rnorm(nvals.H, mean = mean.Ax, sd = sd.Ax)
    # convert negative values to values uniformly distributed between 0 and mean.Ax
    H.vals[H.vals < 0] <- runif(sum(H.vals <= 0), min = 1e-5, max = mean.Ax)
  } else if (H.method == "bounded") {
    H.vals <- runif(nvals.H, min = bounds[1], max = bounds[2])
  }
  H <- matrix(H.vals, ncol = ncol(A), nrow = k)

  if (W.method %in% c("sample", "random") && H.method %in% c("sample", "random")) {
    W <- sqrt(W)
    H <- sqrt(H)
  } else if (W.method %in% c("sample", "random")) {
    W <- W * mean.Ax / (mean(W) * mean(H))
  } else if (H.method %in% c("sample", "random")) {
    H <- H * mean.Ax / (mean(W) * mean(H))
  }

  return(list("W" = W, "H" = H))
}