#' Estimate Parameters From Real Datasets by Kersplat
#'
#' This function is used to estimate useful parameters from a real dataset by
#' using `kersplatEstimate` function in Splatter package.
#'
#' @param ref_data A count matrix. Each row represents a gene and each column
#' represents a cell.
#' @param verbose Logical.
#' @param seed An integer of a random seed.
#' @importFrom peakRAM peakRAM
#' @importFrom splatter kersplatEstimate
#'
#' @return A list contains the estimated parameters and the results of execution
#' detection.
#' @export
#' @references
#' Zappia L, Phipson B, Oshlack A. Splatter: simulation of single-cell RNA sequencing data. Genome biology, 2017, 18(1): 1-15. <https://doi.org/10.1186/s13059-017-1305-0>
#'
#' Bioconductor URL: <https://bioconductor.org/packages/release/bioc/html/splatter.html>
#'
#' Github URL: <https://github.com/Oshlack/splatter>
#' @examples
#' \dontrun{
#' ref_data <- simmethods::data
#' # Estimate parameters
#' estimate_result <- Kersplat_estimation(ref_data = ref_data,
#' verbose = TRUE,
#' seed = 111)
#' estimate_result <- estimate_result[["estimate_result"]]
#' ## Check the class
#' class(estimate_result) == "KersplatParams"
#' }
#'
Kersplat_estimation <- function(ref_data,
verbose = FALSE,
seed
){
##############################################################################
#### Check ###
##############################################################################
if(!is.matrix(ref_data)){
ref_data <- as.matrix(ref_data)
}
##############################################################################
#### Estimation ###
##############################################################################
if(verbose){
message("Estimating parameters using Kersplat")
}
# Seed
set.seed(seed)
# Estimation
estimate_detection <- peakRAM::peakRAM(
estimate_result <- splatter::kersplatEstimate(ref_data,
verbose = FALSE)
)
##############################################################################
#### Ouput ###
##############################################################################
estimate_output <- list(estimate_result = estimate_result,
estimate_detection = estimate_detection)
return(estimate_output)
}
#' Simulate Datasets by Kersplat
#'
#' This function is used to simulate datasets from learned parameters by `kersplatSimulate`
#' function in Splatter package.
#'
#' @param parameters A object generated by [splatter::kersplatEstimate()]
#' @param other_prior A list with names of certain parameters. Some methods need
#' extra parameters to execute the estimation step, so you must input them. In
#' simulation step, the number of cells, genes, groups, batches, the percent of
#' DEGs and other variables are usually customed, so before simulating a dataset
#' you must point it out. See `Details` below for more information.
#' @param return_format A character. Alternative choices: list, SingleCellExperiment,
#' Seurat, h5ad. If you select `h5ad`, you will get a path where the .h5ad file saves to.
#' @param verbose Logical. Whether to return messages or not.
#' @param seed A random seed.
#' @importFrom splatter kersplatSimulate
#'
#' @export
#' @details
#' In addtion to simulate datasets with default parameters, users can set two extra
#' parameters before executing simulation step within Simple method:
#' 1. nCells. Just only type `other_prior = list(nCells = n)` when you want to
#' simulate a dataset with n cells.
#' 2. nGenes. Like `nCells`, users just only type `other_prior = list(nGenes = m)`
#' to get the dataset with m genes.
#' @export
#' @references
#' Zappia L, Phipson B, Oshlack A. Splatter: simulation of single-cell RNA sequencing data. Genome biology, 2017, 18(1): 1-15. <https://doi.org/10.1186/s13059-017-1305-0>
#'
#' Bioconductor URL: <https://bioconductor.org/packages/release/bioc/html/splatter.html>
#'
#' Github URL: <https://github.com/Oshlack/splatter>
#' @examples
#' \dontrun{
#' ref_data <- simmethods::data
#' # Estimate parameters
#' estimate_result <- Kersplat_estimation(ref_data = ref_data,
#' verbose = TRUE,
#' seed = 111)
#' # (1) Simulate a dataset with default parameters
#' simulate_result <- Kersplat_simulation(parameters = estimate_result[["estimate_result"]],
#' return_format = "list",
#' verbose = TRUE,
#' seed = 111)
#' counts <- simulate_result[["simulate_result"]][["count_data"]]
#' dim(counts)
#'
#' # (2) Simulate a dataset with customed number of cells and genes
#' simulate_result <- Kersplat_simulation(parameters = estimate_result[["estimate_result"]],
#' return_format = "list",
#' other_prior = list(nCells = 500,
#' nGenes = 3000),
#' verbose = TRUE,
#' seed = 111)
#' counts <- simulate_result[["simulate_result"]][["count_data"]]
#' dim(counts)
#' }
#'
Kersplat_simulation <- function(parameters,
other_prior = NULL,
return_format,
verbose = FALSE,
seed
){
##############################################################################
#### Check ###
##############################################################################
assertthat::assert_that(class(parameters) == "KersplatParams")
if(!is.null(other_prior)){
parameters <- simutils::set_parameters(parameters = parameters,
other_prior = other_prior,
method = "Kersplat")
}
# Get params to check
params_check <- splatter::getParams(parameters, c("nCells",
"nGenes"))
# Return to users
message(paste0("nCells: ", params_check[['nCells']]))
message(paste0("nGenes: ", params_check[['nGenes']]))
##############################################################################
#### Simulation ###
##############################################################################
if(verbose){
message("Simulating datasets using Kersplat")
}
# Seed
parameters <- splatter::setParam(parameters, name = "seed", value = seed)
# Simulation
simulate_detection <- peakRAM::peakRAM(
simulate_result <- splatter::kersplatSimulate(parameters,
verbose = verbose)
)
##############################################################################
#### Format Conversion ###
##############################################################################
## counts
counts <- as.matrix(SingleCellExperiment::counts(simulate_result))
## col_data
col_data <- data.frame("cell_name" = colnames(counts))
rownames(col_data) <- col_data$cell_name
## row_data
row_data <- BiocGenerics::as.data.frame(SingleCellExperiment::rowData(simulate_result)[, 1])
rownames(row_data) <- row_data[, 1]
colnames(row_data) <- "gene_name"
# Establish SingleCellExperiment
simulate_result <- SingleCellExperiment::SingleCellExperiment(list(counts = counts),
colData = col_data,
rowData = row_data)
simulate_result <- simutils::data_conversion(SCE_object = simulate_result,
return_format = return_format)
##############################################################################
#### Ouput ###
##############################################################################
simulate_output <- list(simulate_result = simulate_result,
simulate_detection = simulate_detection)
return(simulate_output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.