#' Random initialization of factor models
#'
#' @description Initialize factor models "W" and "H" by sampling values from A, simulating comparable random distributions, or specifying uniform distribution bounds
#'
#' @details
#' Methods support positive, non-zero initializations only.
#' \itemize{
#' \item random: samples values from a random normal distribution with the same mean as non-zero values in A and the same standard deviation
#' \item sample: samples non-zero values in A within \code{n.sd} standard deviations of the mean of non-zero values in A
#' \item bounded: samples values from a random uniform distribution bounded between values specified in "bounds"
#' }
#'
#' Values in W and H will automatically be scaled to approximate the distribution of A upon multiplication. Possible scenarios:
#' \itemize{
#' \item W.method and H.method = "sample" or "random": W and H are the square roots of their respective sampled values
#' \item W.method and H.method = "bounded" or other given distribution, no normalization is applied
#' \item W.method = "sample" or "random" and H.method = "bounded" or other given distribution (or the reverse): H is assigned first and is held constant, values in W are raised to the power of log(mean(A)/mean(H))/log(mean(W)) such that mean(W) * mean(H) approximates mean(A)
#' }
#'
#' Basic usage:
#' \itemize{
#' \item For initialization of most models, "random" should suffice and is the ideal method for fast convergence to an accurate and robust local minima
#' \item For one-sided bernoulli factorization, initiate the bernoulli matrix bounded between for example \code{bounds = c(0.4, 0.6)} and the non-bounded matrix with "random"
#' \item For one-sided multinomial factorization, initiate the multinomial matrix with the multinomial distribution or bounds corresponding to the limits of the distribution, and the non-bounded matrix with "random"
#' \item For models where "A" is comprised of many different values, "sample" may be a good option which is truer to the original distribution than a simple normal "random" method
#' }
#'
#' If these initialization methods do not satisfy, LSMF also takes matrices as initializations for "W" and "H".
#'
#' @param A input dgCMatrix
#' @param k rank
#' @param W.method One of c("sample", "random", "bounded")
#' @param H.method One of c("sample", "random", "bounded")
#' @param n.sd number of standard deviations from the mean over which to sample values in A (excluding zeros). Applies only to method = c("sample", "random")
#' @param bounds sample values between bounds if method = "bounded"
#' @param seed never ever not remember to set the seed. It's NULL by default
#' @return list of matrices "W" and "H"
#' @seealso \code{\link{nndsvd}}, \code{\link{lsmf}}
rand.init <- function(A, k, W.method = "sample", H.method = "sample", n.sd = 1, bounds = c(1e-5, 1), seed = NULL) {
if (!is.null(seed)) set.seed(seed)
nvals.W <- k * nrow(A)
nvals.H <- k * ncol(A)
# generate values for random population using one of several methods
if (W.method %in% c("sample", "random") || H.method %in% c("sample", "random")) {
# sampling from non-zero values in sparse matrix A
# use the @x slot to access all non-zero values
# select only values within "n.sd" standard deviations of the mean of A@x
Ax <- A@x[A@x > 0]
sd.Ax <- sd(Ax)
mean.Ax <- mean(Ax)
if (W.method == "sample" || H.method == "sample") {
vals <- Ax[which(abs(Ax - mean.Ax) < (sd.Ax * n.sd))]
}
}
if (W.method == "sample") {
W.vals <- sample(vals, nvals.W, replace = TRUE)
} else if (W.method == "random") {
W.vals <- rnorm(nvals.W, mean = mean.Ax, sd = sd.Ax)
# convert negative values to values uniformly distributed between 0 and mean.Ax
if (sum(W.vals <= 0)) {
W.vals[W.vals < 0] <- runif(sum(W.vals <= 0), min = 1e-5, max = mean.Ax)
}
W.vals <- sample(W.vals, nvals.W)
} else if (W.method == "bounded") {
W.vals <- runif(nvals.W, min = bounds[1], max = bounds[2])
}
W <- matrix(W.vals, nrow = nrow(A), ncol = k)
if (H.method == "sample") {
H.vals <- sample(vals, nvals.H, replace = TRUE)
} else if (H.method == "random") {
H.vals <- rnorm(nvals.H, mean = mean.Ax, sd = sd.Ax)
# convert negative values to values uniformly distributed between 0 and mean.Ax
H.vals[H.vals < 0] <- runif(sum(H.vals <= 0), min = 1e-5, max = mean.Ax)
} else if (H.method == "bounded") {
H.vals <- runif(nvals.H, min = bounds[1], max = bounds[2])
}
H <- matrix(H.vals, ncol = ncol(A), nrow = k)
if (W.method %in% c("sample", "random") && H.method %in% c("sample", "random")) {
W <- sqrt(W)
H <- sqrt(H)
} else if (W.method %in% c("sample", "random")) {
W <- W * mean.Ax / (mean(W) * mean(H))
} else if (H.method %in% c("sample", "random")) {
H <- H * mean.Ax / (mean(W) * mean(H))
}
return(list("W" = W, "H" = H))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.