R/splitModule.R

Defines functions .splitModule .splitModuleWithSeed

#' @title Split celda feature module
#' @description Manually select a celda feature module to split into 2 or
#'  more modules. Useful for splitting up modules that show divergent
#'  expression of features in multiple cell clusters.
#' @param x A \linkS4class{SingleCellExperiment} object
#'  with the matrix located in the assay slot under \code{useAssay}.
#'  Rows represent features and columns represent cells.
#' @param useAssay A string specifying which \link{assay}
#'  slot to use for \code{x}. Default "counts".
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @param module Integer. The module to be split.
#' @param n Integer. How many modules should \code{module} be split into.
#'  Default 2.
#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
#'  a default value of 12345 is used. If NULL, no calls to
#'  \link[withr]{with_seed} are made.
#' @param ... Ignored. Placeholder to prevent check warning.
#' @return A updated \linkS4class{SingleCellExperiment} object with new
#'  feature modules stored in column \code{celda_feature_module} in
#'  \code{\link{rowData}(x)}.
#' @export
setGeneric("splitModule",
    function(x, ...) {
        standardGeneric("splitModule")
    })


#' @rdname splitModule
#' @examples
#' data(sceCeldaCG)
#' # Split module 5 into 2 new modules.
#' sce <- splitModule(sceCeldaCG, module = 5)
#' @export
setMethod("splitModule", signature(x = "SingleCellExperiment"),
    function(x,
        useAssay = "counts",
        altExpName = "featureSubset",
        module,
        n = 2,
        seed = 12345) {

        altExp <- SingleCellExperiment::altExp(x, altExpName)

        if (!module %in% celdaModules(x, altExpName = altExpName)) {
            stop("Module ", module, " is not found in celdaModules(x,",
                " altExpName = altExpName).",
                " Please specify a valid module.")
        }

        celdaGMod <- .splitModuleWithSeed(x = altExp,
            useAssay = useAssay,
            module = module,
            n = n,
            seed = seed)

        S4Vectors::metadata(altExp)[["celda_parameters"]]$L <-
            params(celdaGMod)$L
        S4Vectors::metadata(altExp)[["celda_parameters"]]$finalLogLik <-
            celdaGMod@finalLogLik
        S4Vectors::metadata(altExp)[["celda_parameters"]]$featureModuleLevels <-
            sort(unique(celdaClusters(celdaGMod)$y))
        SummarizedExperiment::rowData(altExp)["celda_feature_module"] <-
            celdaClusters(celdaGMod)$y
        SingleCellExperiment::altExp(x, altExpName) <- altExp
        return(x)
    }
)


.splitModuleWithSeed <- function(x,
    useAssay,
    module,
    n,
    seed) {

    if (is.null(seed)) {
        celdaGMod <- .splitModule(x, useAssay, module, n)
    } else {
        with_seed(seed, celdaGMod <- .splitModule(x, useAssay, module, n))
    }
    return(celdaGMod)
}


.splitModule <- function(x, useAssay, module, n) {
    counts <- SummarizedExperiment::assay(x, i = useAssay)
    counts <- .processCounts(counts)
    .validateCounts(counts)
    ix <- SummarizedExperiment::rowData(x)$celda_feature_module == module

    if (sum(ix) > 1) {
        tempModel <- .celda_G(
            counts = counts[ix, , drop = FALSE],
            L = n,
            yInitialize = "random",
            splitOnIter = -1,
            splitOnLast = FALSE,
            nchains = 1,
            verbose = FALSE)

        splitY <- celdaClusters(tempModel)$y
        splitIx <- celdaClusters(tempModel)$y > 1
        splitY[splitIx] <- S4Vectors::metadata(x)$celda_parameters$L +
            splitY[splitIx] - 1
        splitY[!splitIx] <- module

        newY <- SummarizedExperiment::rowData(x)$celda_feature_module
        newY[ix] <- splitY
        newL <- max(newY)

        newLl <- .logLikelihoodcelda_G(
            counts = counts,
            y = newY,
            L = newL,
            beta = S4Vectors::metadata(x)$celda_parameters$beta,
            delta = S4Vectors::metadata(x)$celda_parameters$delta,
            gamma = S4Vectors::metadata(x)$celda_parameters$gamma)
        model <- methods::new(
            "celda_G",
            clusters = list(y = newY),
            params = list(
                L = newL,
                beta = S4Vectors::metadata(x)$celda_parameters$beta,
                delta = S4Vectors::metadata(x)$celda_parameters$delta,
                gamma = S4Vectors::metadata(x)$celda_parameters$gamma,
                countChecksum = .createCountChecksum(counts)
            ),
            names = list(row = rownames(x),
                column = colnames(x),
                sample = x@metadata$celda_parameters$sampleLevels),
            finalLogLik = newLl
        )
    } else {
        stop("Module ", module, "contains <= 1 feature. No additional",
            " splitting was able to be performed.")
    }
    return(model)
}

Try the celda package in your browser

Any scripts or data that you put into this service are public.

celda documentation built on Nov. 8, 2020, 8:24 p.m.