R/average_expression.R

Defines functions average.expression

Documented in average.expression

#' Average gene expression in clusters
#'
#' @description
#' Average gene expression by ident class in a Seurat object to find cluster centers using fast sparse matrix methods.
#'
#' @details
#' By default, \code{average.expression} averages gene counts by grouping cells by the active identity in the active assay of the counts slot of the Seurat object.
#' The assay, the identity, and the slot can be specified if other than the active ones or the count slot.
#'
#' If return.seurat is specified as TRUE, \code{average.expression} returns the Seurat object with cluster centers in a dimensional reduction object.
#' By default, the \code{\@reductions} slot is named with the parameter \code{reduction.name = "dclus"}.
#'
#' Additional statistics will be calculated on cluster centers if \code{details = TRUE}, specifically the cells per center, the number of cells in each center, and the cosine distances of cells in that cluster to the cluster center.
#'
#' \code{average.expression} uses \code{Matrix::rowMeans} to perform sparse matrix averaging of each cluster.
#'
#' @param object Seurat object
#' @param ident Ident with sample clustering information (default is the \code{\@active.ident})
#' @param assay Assay to average (default is the \code{\@active.assay})
#' @param slot Slot to average (default is "counts")
#' @param verbose Boolean, show progress bar (default is TRUE)
#' @param details Boolean, calculate additional statistics about each cluster (default is FALSE)
#' @param return.seurat Return the input Seurat object with the cluster centers as a new dimensional reduction object (default = FALSE for consistency with Seurat AverageExpression())
#' @param reduction.name Name of \code{\@reduction} object if return.seurat = TRUE, other than the default "dclus" name
#' @return dense matrix of cluster centers if settings are default. If details = TRUE, returns a list of cluster centers matrix, cells.per.center, cells.in.center, and a vector of cosine distances for all cells to their assigned center
#' @examples
#' \dontrun{
#' library(SeuratData)
#' library(scNMF)
#' InstallData(pbmc3k)
#' pbmc3k <- NormalizeData(pbmc3k)
#' pbmc3k <- cluster.divisive(pbmc3k)
#'
#' cluster.centers <- average.expression(pbmc3k)
#' # or
#' cluster.centers <- average.expression(pbmc3k, details = TRUE, slot = "logcounts", assay = "RNA", ident = "leaf.idents") # but you probably don't want to use logcounts
#' or
#' pbmc3k <- average.expression(pbmc3k, return.seurat = TRUE)
#' #or
#' pbmc3k <- average.expression(pbmc3k, return.seurat = TRUE, reduction.name = "div_clust")
#' }
average.expression <- function(object, ident = "active.ident", assay = "active.assay", slot = "counts", verbose = TRUE, details = FALSE, return.seurat = FALSE, reduction.name = "dclus") {
  require(Matrix)
  require(qlcMatrix)

  if (assay == "active.assay") assay <- object@active.assay
  data <- GetAssayData(object, assay = assay, slot = slot)

  # convert data to a sparse matrix if it isn't already
  if (class(data)[1] != "dgCMatrix") data <- Matrix(data, sparse = TRUE)

  # get the idents class over which to average
  ifelse(ident == "active.ident", idents <- as.vector(Idents(object)), idents <- as.vector(object@meta.data[[ident]]))

  # loop through all idents, averaging them in data
  ident.names <- unique(idents)

  if (verbose > 0) message("\nAveraging expression of ", length(ident.names), " clusters")

  if (verbose > 0) pb <- txtProgressBar(char = "=", style = 3, max = length(ident.names), width = 50)
  m <- list()
  cells.in.center <- list()
  cos.dists <- c()
  counts <- c()
  for (i in 1:length(ident.names)) {
    indices <- which(idents == ident.names[i])
    m[[i]] <- Matrix::rowMeans(data[, indices])
    if (details == TRUE) {
      cells.in.center[[ident.names[i]]] <- colnames(data)[indices]
      counts <- c(counts, length(indices))
      cosine.distances <- 1 - qlcMatrix::cosSparse(cbind(m[[i]], m[[i]]), data[, indices])[1,]
      cos.dists <- c(cos.dists, cosine.distances)
    }
    if (verbose > 0) setTxtProgressBar(pb = pb, value = i)
  }
  result <- do.call(cbind, m)
  colnames(result) <- ident.names
  if (details == TRUE && return.seurat == FALSE) {
    names(counts) <- ident.names
    return(list("centers" = result, "cells.per.center" = counts, "cells.in.center" = cells.in.center, "cos.dists" = cos.dists))
  } else if (return.seurat == TRUE) {
    embeddings <- matrix(0, nrow = length(ident.names), ncol = ncol(data))
    colnames(embeddings) <- colnames(data)
    rownames(embeddings) <- colnames(result) <- paste0(reduction.name, "_", ident.names)
    for (i in 1:length(ident.names)) {
      indices <- which(idents == ident.names[i])
      embeddings[i, indices] <- 1
    }
    if (details == TRUE) {
      dr <- CreateDimReducObject(
        embeddings = t(embeddings),
        loadings = result,
        key = paste0(reduction.name, "_"),
        assay = assay,
        misc = list("cells.per.center" = counts, "cells.in.center" = cells.in.center, "cos.dists" = cos.dists)
     )
    } else if (details == FALSE) {
      dr <- CreateDimReducObject(embeddings = t(embeddings), loadings = result, key = paste0(reduction.name, "_"), assay = assay)
    }
    object[[reduction.name]] <- dr
    return(object)
  } else return(result)
}
zdebruine/scNMF documentation built on Jan. 1, 2021, 1:50 p.m.