R/ls.project.R

Defines functions ls.project

Documented in ls.project

#' Least squares projection
#'
#' @description Least squares projection (H) of samples (A) onto a feature model (W)
#'
#' @param samples dgCMatrix of samples (columns) by features (rows) to be projected onto "W"
#' @param W factor model of features (rows) by factors (columns) of class "matrix"
#' @param mask.zeros treat zeros as missing values
#' @param n.threads number of threads/CPUs to use, if not all availble threads as decided by OpenMP
#' @param H.nonneg constrain mapping to positive values
#' @param H.L1 lasso regularization
#' @param H.L2 ridge regularization
#' @param H.angular angular regularization
#' @param k rank of projection. By default, k = ncol(W).
#' @param inner.rel.tol Default value should satisfy. Stopping criterion for sequential coordinate descent least squares solver between two successive iterations
#' @param inner.max.iter Default value should satisfy. Maximum number of permitted iterations for sequential coordinate descent least squares solver if inner.rel.tol is not met.
#' @return a sample embeddings matrix of samples (columns) by factor coefficients (rows)
#' @examples
#' \dontrun{
#' data(moca7k)
#' # calculate a model for 1000 cells and then project all 7500 onto that model
#' model <- lsmf(moca7k[,1:1000], k = 20)
#' H.all <- ls.project(moca7k, model$W)
#' 
#' # compare projection to the original weights for the first 1000 cells
#' plot(H.all[,1:1000], model$H)
#' # just about perfect!
#' }
ls.project <- function(samples, W, n.threads = 0, k = ncol(W), mask.zeros = FALSE, H.nonneg = TRUE, H.L1 = 0, H.L2 = 0, H.angular = 0, inner.rel.tol = 1e-8, inner.max.iter = 100) {
  if(class(samples)[1] != "dgCMatrix") stop("lsproject requires a dgCMatrix as input for samples")
  if (class(W)[1] != "matrix") stop("lsproject requires a dense input for W, of class 'matrix'")
  if (n.threads < 0) n.threads <- 0
  if (nrow(W) != samples@Dim[1]) stop("Number of rows in W are not equal to number of rows in samples matrix")
  H <- matrix(0, k, ncol(samples))
  H <- c_lsproject(
    H,
    t(W),
    samples, 
    as.integer(n.threads),
    as.integer(mask.zeros), 
    as.numeric(H.nonneg),
    as.double(H.L1),
    as.double(H.L2),
    as.double(H.angular),
    as.double(inner.rel.tol),
    as.integer(inner.max.iter))
    colnames(H) <- colnames(samples)
  rownames(H) <- paste0("project_", 1:k)
  return(H)
}
zdebruine/LSMF documentation built on Jan. 1, 2021, 1:50 p.m.