R/sparseDC-simulate.R

#' SparseDC simulation
#'
#' Simulate counts from cluster in two conditions using the SparseDC method.
#'
#' @param params SparseDCParams object containing simulation parameters.
#' @param verbose logical. Whether to print progress messages
#' @param ... any additional parameter settings to override what is provided in
#'        \code{params}.
#'
#' @details
#' This function is just a wrapper around
#' \code{\link[SparseDC]{sim_data}} that takes a
#' \code{\link{SparseDCParams}}, runs the simulation then converts the
#' output from log-expression to counts and returns a
#' \code{\link[SingleCellExperiment]{SingleCellExperiment}} object. The original
#' simulated log-expression values are returned in the \code{LogExprs} assay.
#' See \code{\link[SparseDC]{sim_data}} and the SparseDC paper for
#' more details about how the simulation works.
#'
#' @return SingleCellExperiment containing simulated counts
#'
#' @references
#' Campbell K, Yau C. Uncovering genomic trajectories with heterogeneous genetic
#' and environmental backgrounds across single-cells and populations. bioRxiv
#' (2017).
#'
#' Barron M, Zhang S, Li J. A sparse differential clustering algorithm for
#' tracing cell type changes via single-cell RNA-sequencing data.
#' Nucleic Acids Research (2017).
#'
#' Paper: \url{10.1093/nar/gkx1113}
#'
#' @examples
#' sim <- sparseDCSimulate()
#'
#' @export
#' @importFrom SingleCellExperiment SingleCellExperiment
sparseDCSimulate <- function(params = newSparseDCParams(),
                             verbose = TRUE, ...) {

    checkmate::assertClass(params, "SparseDCParams")
    params <- setParams(params, ...)

    # Set random seed
    seed <- getParam(params, "seed")
    set.seed(seed)

    # Get the parameters we are going to use
    nCells <- getParam(params, "nCells")
    nGenes <- getParam(params, "nGenes")
    markers.n <- getParam(params, "markers.n")
    markers.shared <- getParam(params, "markers.shared")
    markers.same <- getParam(params, "markers.same")
    clusts.c1 <- getParam(params, "clusts.c1")
    clusts.c2 <- getParam(params, "clusts.c2")
    mean.lower <- getParam(params, "mean.lower")
    mean.upper <- getParam(params, "mean.upper")

    if (verbose) {message("Simulating counts...")}
    sparsedc.sim <- SparseDC::sim_data(genes = nGenes,
                                       cells = nCells,
                                       sig.genes = markers.n,
                                       sig.genes.s = markers.shared,
                                       clus.t1 = clusts.c1,
                                       clus.t2 = clusts.c2,
                                       same.sig = markers.same,
                                       u.l = mean.lower,
                                       u.h = mean.upper)

    if (verbose) {message("Creating final dataset...")}
    cell.names <- paste0("Cell", seq_len(2 * nCells))
    gene.names <- paste0("Gene", seq_len(nGenes))

    exprs <- cbind(sparsedc.sim$dat.1, sparsedc.sim$dat.2)
    counts <- exp(exprs) - 1
    counts[counts < 0] <- 0
    counts <- round(counts)
    rownames(counts) <- gene.names
    colnames(counts) <- cell.names

    cells <- data.frame(Cell = cell.names,
                        Condition = factor(paste0("Condition",
                                                  rep(1:2, each = nCells))),
                        Cluster = factor(paste0("Cluster",
                                                c(sparsedc.sim$clusters1,
                                                  sparsedc.sim$clusters2))),
                        stringsAsFactors = FALSE)
    rownames(cells) <- cell.names

    features <- data.frame(Gene = gene.names,
                           BaseLogMean = sparsedc.sim$gene.means,
                           stringsAsFactors = FALSE)

    for (i in seq_len(ncol(sparsedc.sim$sig.gene.mat.1))) {
        col.name <- paste0("Condition1Cluster", i, "Marker")
        features[[col.name]] <- sparsedc.sim$sig.gene.mat.1[, i] == 1
    }

    for (i in seq_len(ncol(sparsedc.sim$sig.gene.mat.2))) {
        col.name <- paste0("Condition2Cluster", i, "Marker")
        features[[col.name]] <- sparsedc.sim$sig.gene.mat.2[, i] == 1
    }

    for (i in seq_len(ncol(sparsedc.sim$clus.gene.means))) {
        col.name <- paste0("Cluster", i, "LogMean")
        features[[col.name]] <- sparsedc.sim$clus.gene.means[, i]
    }

    rownames(features) <- gene.names

    sim <- SingleCellExperiment(assays = list(counts = counts,
                                              LogExprs = exprs),
                                rowData = features,
                                colData = cells,
                                metadata = list(Params = params))

    return(sim)
}
Granoia/splatter-mod documentation built on May 28, 2019, 12:31 a.m.