R/reducedMNN.R

#' MNN correction in reduced dimensions
#'
#' @param ... One or more matrices of low-dimensional representations where rows are cells and columns are dimensions.
#' Each object should contain the same number of columns, corresponding to the same dimensions.
#' These should have been generated by a single call to \code{\link{multiBatchPCA}}.
#'
#' If multiple objects are supplied, each object is assumed to contain all and only cells from a single batch.
#' If a single object is supplied, \code{batch} should also be specified.
#'
#' Alternatively, any number of lists of such objects.
#' this is flattened as if the objects inside each list were passed directly to \code{...}.
#' @inheritParams fastMNN
#'
#' @return
#' A \linkS4class{DataFrame} is returned where each row corresponds to a cell, containing:
#' \itemize{
#' \item \code{corrected}, the matrix of corrected low-dimensional coordinates for each cell.
#' \item \code{batch}, the Rle specifying the batch of origin for each row.
#' }
#' Cells in the output object are always ordered in the same manner as supplied in \code{...}.
#' The metadata on this object is the same as that in the output of \code{\link{fastMNN}}.
#'
#' @details
#' \code{reducedMNN} performs the same operations as \code{\link{fastMNN}} but assumes that the PCA has already been performed.
#' This is useful as the PCA (via \code{\link{multiBatchPCA}}) is often the most time-consuming step.
#' By performing the PCA once, \code{reducedMNN} allows the MNN correction to be quickly repeated with different parameters.
#'
#' \code{reducedMNN} operates on the same principles as \code{\link{fastMNN}}, 
#' so users are referred to the documentation for the latter for more details on the effect of each of the arguments.
#' Obviously, any arguments pertaining to gene-based steps in \code{\link{fastMNN}} are not relevant here.
#' 
#' Note that \code{\link{multiBatchPCA}} will not perform cosine-normalization, 
#' so it is the responsibility of the user to cosine-normalize each batch beforehand with \code{\link{cosineNorm}} to recapitulate results of \code{\link{fastMNN}} with \code{cos.norm=TRUE}.
#' In addition, \code{\link{multiBatchPCA}} must be run on all samples at once, to ensure that all cells are projected to the same low-dimensional space.
#'
#' @author Aaron Lun
#' @examples
#' B1 <- matrix(rnorm(10000), nrow=50) # Batch 1 
#' B2 <- matrix(rnorm(10000), nrow=50) # Batch 2
#'
#' # Corrected values equivalent to fastMNN().
#' cB1 <- cosineNorm(B1)
#' cB2 <- cosineNorm(B2)
#' pcs <- multiBatchPCA(cB1, cB2)
#' mnn.out <- reducedMNN(pcs[[1]], pcs[[2]])
#'
#' mnn.out
#'
#' @seealso
#' \code{\link{multiBatchPCA}}, to obtain the values to be corrected.
#'
#' \code{\link{fastMNN}}, for the version that operates on gene-expression values.
#'
#' \code{\link{clusterMNN}}, for an application on cluster centroids.
#'
#' @export
#' @importFrom BiocNeighbors KmknnParam
#' @importFrom BiocParallel SerialParam bpstart bpstop 
#' @importClassesFrom S4Vectors DataFrame
#' @importFrom scuttle .bpNotSharedOrUp .unpackLists
reducedMNN <- function (..., batch=NULL, k=20, prop.k=NULL, restrict=NULL, ndist=3,
    merge.order=NULL, auto.merge=FALSE, min.batch.skip=0,
    BNPARAM=KmknnParam(), BPPARAM=SerialParam())
{
    batches <- .unpackLists(...)
    is.df <- vapply(batches, is, class2="DataFrame", FUN.VALUE=TRUE)

    checkBatchConsistency(batches, cells.in.columns=FALSE)
    restrict <- checkRestrictions(batches, restrict, cells.in.columns=FALSE)

    # Setting up the parallelization environment.
    if (.bpNotSharedOrUp(BPPARAM)) {
        bpstart(BPPARAM)
        on.exit(bpstop(BPPARAM), add=TRUE)
    }

    args <- list(k=k, prop.k=prop.k, ndist=ndist, 
        merge.order=merge.order, auto.merge=auto.merge, 
        min.batch.skip=min.batch.skip, BNPARAM=BNPARAM, BPPARAM=BPPARAM)

    if (length(batches)==1L) {
        divided <- divideIntoBatches(batches[[1]], batch=batch, restrict=restrict[[1]], byrow=TRUE)
        output <- do.call(.fast_mnn, c(list(batches=divided$batches, restrict=divided$restrict), args))

        d.reo <- divided$reorder
        output <- output[d.reo,,drop=FALSE]
        metadata(output)$merge.info$pairs <- .reindex_pairings(metadata(output)$merge.info$pairs, d.reo)
    } else {
        output <- do.call(.fast_mnn, c(list(batches=batches, restrict=restrict), args))

    }

    rownames(output) <- rownames(output$corrected)
    output 
}
LTLA/batchelor documentation built on Jan. 19, 2024, 6:33 p.m.