R/quminorm_user_function.R

#' @title Quantile normalization of non-UMI single-cell gene expression
#' @rdname quminorm
#' @description Normalizes read counts (without UMIs) or
#'   transcripts per million (TPM) such as those generated by smart-seq2
#'   to match a discrete quasi-UMI target distribution. The resulting QUMI
#'   counts can be analyzed as if they were UMI counts.
#'
#' @param object A \code{SingleCellExperiment}, \code{SummarizedExperiment},
#'   \code{matrix} or sparse \code{Matrix} of non-negative integers (counts).
#' @param assayName In case object is a SingleCellExperiment or
#'   SummarizedExperiment, the assay from which
#'   the quasi-UMIs should be calculated is specified here.
#' @param shape Positive scalar, a fixed shape parameter for the target
#'   distribution. The shape parameter represents sigma for the
#'   Poisson-lognormal target distribution. See \code{\link[sads]{dpoilog}}.
#' @param mc.cores Positive integer indicating the number of cores to use for
#'   parallel processing. See \code{\link[parallel]{mclapply}}.
#'
#' @return An object of same class as the input object
#'   but with the nonzero values normalized
#'   to match the target quasi-UMI distribution. In case of a
#'   SingleCellExperiment or SummarizedExperiment,
#'   the quasiUMIs are added to a new assays slot named qumi_poilog_[shape]
#'   where [shape] is the user-specified numeric shape parameter.
#'
#' @details The default assay of "tpm" for transcripts per million
#'   does refer to such values after
#'   summarizing to gene level. The algorithm in its current form has not been
#'   evaluated for quasi-UMI generation from transcript counts, as full-length
#'   data with UMIs have not until very recently been available.
#'
#'   If NA values appear in the quminorm output, this is likely caused by
#'   numerical errors in computing the Poisson-lognormal cumulative distribution
#'   function. In our experience it is more common in cells with very high
#'   zero fractions.
#'   Consider either discarding these cells or using a smaller quminorm shape
#'   parameter to avoid having the NAs.
#'
#' @references Townes FW and Irizarry RA (2020). Quantile normalization of
#'   single-cell RNA-seq read counts without unique molecular identifiers.
#'   \emph{Genome Biology}
#'   \url{https://doi.org/10.1186/s13059-020-02078-0}
#'
#' @examples
#' library(scRNAseq)
#' library(SingleCellExperiment)
#' sce <- ReprocessedAllenData("rsem_tpm",location=FALSE)
#' sce <- quminorm(sce[,1:4],assayName="rsem_tpm",mc.cores=2)
#' assayNames(sce)
#'
#' @export
setMethod(f = "quminorm",
          signature = signature(object = "Matrix"),
          definition = function(object, shape = 2, mc.cores = 1){
              quminorm_sparse(object, shape, mc.cores=mc.cores)
          })

#' @rdname quminorm
#' @export
setMethod(f = "quminorm",
          signature = signature(object = "matrix"),
          definition = function(object, shape = 2, mc.cores = 1){
              quminorm_dense(object, shape, mc.cores=mc.cores)
          })

#' @rdname quminorm
#' @importFrom SummarizedExperiment assay assay<-
#' @export
setMethod(f = "quminorm",
          signature = signature(object = "SummarizedExperiment"),
          definition = function(object, assayName = "tpm", shape = 2,
                                mc.cores = 1){
              m <- assay(object, assayName)
              name <- paste("qumi_poilog", shape, sep = "_")
              # if(is(m,"sparseMatrix")){
              #     assay(object, name)<-quminorm_sparse(m, shape,
              #                                          mc.cores=mc.cores)
              # } else { #m is dense
              #     assay(object, name)<-quminorm_dense(m, shape,
              #                                         mc.cores=mc.cores)
              # }
              assay(object, name)<-quminorm(m, shape, mc.cores=mc.cores)
              object
          })
willtownes/quminorm documentation built on March 13, 2021, 2:16 a.m.