#' Estimate Parameters From Real Datasets by zinbwaveZinger
#'
#' This function is used to estimate useful parameters from a real dataset by
#' using `getDatasetMoMPositive` function in zinbwaveZingercollected package.
#'
#' @param ref_data A count matrix. Each row represents a gene and each column
#' represents a cell.
#' @param verbose Logical.
#' @param other_prior A list with names of certain parameters. Some methods need
#' extra parameters to execute the estimation step, so you must input them. In
#' simulation step, the number of cells, genes, groups, batches, the percent of
#' DEGs are usually customed, so before simulating a dataset you must point it out.
#' See `Details` below for more information.
#' @param seed An integer of a random seed.
#' @return A list contains the estimated parameters and the results of execution
#' detection.
#' @export
#' @details
#' When you use zingeR to estimate parameters from a real dataset, you must input
#' a numeric vector to specify the groups that each cell belongs to, like
#' `other_prior = list(group.condition = the numeric vector)`. See `Examples`
#' and learn from it.
#' @references
#' Github URL: <https://github.com/statOmics/zinbwaveZinger>
#' @examples
#' \dontrun{
#' ref_data <- simmethods::data
#' group_condition <- as.numeric(simmethods::group_condition)
#'
#' estimate_result <- simmethods::zinbwaveZinger_estimation(
#' ref_data = ref_data,
#' other_prior = list(group.condition = group_condition),
#' verbose = TRUE,
#' seed = 111
#' )
#' }
#'
zinbwaveZinger_estimation <- function(ref_data,
verbose = FALSE,
other_prior = NULL,
seed
){
##############################################################################
#### Environment ###
##############################################################################
if(!requireNamespace("zinbwaveZingercollected", quietly = TRUE)){
message("zinbwaveZinger is not installed on your device...")
message("Installing zinbwaveZinger...")
devtools::install_github("duohongrui/zinbwaveZingercollected")
}
##############################################################################
#### Check ###
##############################################################################
if(!is.matrix(ref_data)){
ref_data <- as.matrix(ref_data)
}
## Filter
other_prior[["counts"]] <- ref_data[apply(ref_data, 1, function(x) length(x[x>0])) > 1, ]
estimate_formals <- simutils::change_parameters(function_expr = "zinbwaveZingercollected::getDatasetMoMPositive",
other_prior = other_prior,
step = "estimation")
##############################################################################
#### Estimation ###
##############################################################################
if(verbose){
message("Estimating parameters using zinbwaveZinger")
}
# Seed
set.seed(seed)
# Estimation
estimate_detection <- peakRAM::peakRAM(
estimate_result <- BiocGenerics::do.call(zinbwaveZingercollected::getDatasetMoMPositive, estimate_formals)
)
##############################################################################
#### Ouput ###
##############################################################################
estimate_output <- list(estimate_result = estimate_result,
estimate_detection = estimate_detection)
return(estimate_output)
}
#' Simulate Datasets by zinbwaveZinger
#'
#' This function is used to simulate datasets from learned parameters by `NBsimSingleCell_zinbwaveZinger`
#' function in zinbwaveZingercollected package.
#'
#' @param ref_data A matrix for one dataset or a list of datasets with their own
#' names. This is usually unused except for some methods, e.g. SCRIP, scDesign,
#' zingeR, zinbwaveZinger.
#' @param parameters A object generated by [zinbwaveZingercollected::NBsimSingleCell_zinbwaveZinger()]
#' @param other_prior A list with names of certain parameters. Some methods need
#' extra parameters to execute the estimation step, so you must input them. In
#' simulation step, the number of cells, genes, groups, batches, the percent of
#' DEGs are usually customed, so before simulating a dataset you must point it out.
#' See `Details` below for more information.
#' @param return_format A character. Alternative choices: list, SingleCellExperiment,
#' Seurat, h5ad. If you select `h5ad`, you will get a path where the .h5ad file saves to.
#' @param verbose Logical. Whether to return messages or not.
#' @param seed A random seed.
#' @export
#' @details
#' In addtion to simulate datasets with default parameters, users want to simulate
#' other kinds of datasets, e.g. a counts matrix with 2 or more cell groups. In
#' zinbwaveZinger, you can set extra parameters to simulate datasets.
#'
#' The customed parameters you can set are below:
#' 1. nCells. In zinbwaveZinger, You can directly set `other_prior = list(nCells = 5000)` to simulate 5000 cells. nCells must be larger than the reference data.
#' 2. nGenes. You can directly set `other_prior = list(nGenes = 5000)` to simulate 5000 genes. nGenes must be larger than the reference data.
#' 3. de.prob. You can directly set `other_prior = list(de.prob = 0.2)` to simulate DEGs that account for 20 percent of all genes.
#' 4. fc.group. You can directly set `other_prior = list(fc.group = 2)` to specify the fold change of DEGs.
#'
#' For more customed parameters in zinbwaveZinger, please check [zingeR::NBsimSingleCell()].
#' @references
#' Github URL: <https://github.com/statOmics/zinbwaveZinger>
#' @examples
#' \dontrun{
#' ref_data <- simmethods::data
#' group_condition <- as.numeric(simmethods::group_condition)
#'
#' estimate_result <- simmethods::zinbwaveZinger_estimation(
#' ref_data = ref_data,
#' other_prior = list(group.condition = group_condition),
#' verbose = TRUE,
#' seed = 111
#' )
#'
#' ## Default parameters
#' simulate_result <- simmethods::zinbwaveZinger_simulation(
#' ref_data = ref_data,
#' other_prior = list(group.condition = group_condition),
#' parameters = estimate_result[["estimate_result"]],
#' return_format = "list",
#' verbose = TRUE,
#' seed = 111
#' )
#' ## counts
#' counts <- simulate_result[["simulate_result"]][["count_data"]]
#' dim(counts)
#' ## cell information
#' col_data <- simulate_result[["simulate_result"]][["col_meta"]]
#' ## gene information
#' row_data <- simulate_result[["simulate_result"]][["row_meta"]]
#' head(row_data)
#'
#'
#' ## In zinbwaveZinger, users can only set the number of cells and genes which is higher
#' ## than the reference data. In addtion, the proportion of DEGs and fold change are
#' ## able to be customed. Not that zinbwaveZinger dose not return cell group information.
#' ## Default parameters
#' simulate_result <- simmethods::zinbwaveZinger_simulation(
#' ref_data = ref_data,
#' other_prior = list(group.condition = group_condition,
#' nCells = 1000,
#' nGenes = 5000,
#' de.prob = 0.2,
#' fc.group = 4),
#' parameters = estimate_result[["estimate_result"]],
#' return_format = "list",
#' verbose = TRUE,
#' seed = 111
#' )
#' ## counts
#' counts <- simulate_result[["simulate_result"]][["count_data"]]
#' dim(counts)
#' ## cell information
#' col_data <- simulate_result[["simulate_result"]][["col_meta"]]
#' ## gene information
#' row_data <- simulate_result[["simulate_result"]][["row_meta"]]
#' head(row_data)
#' }
#'
zinbwaveZinger_simulation <- function(ref_data,
parameters,
other_prior = NULL,
return_format,
verbose = FALSE,
seed
){
##############################################################################
#### Environment ###
##############################################################################
if(!requireNamespace("zinbwaveZingercollected", quietly = TRUE)){
message("zinbwaveZinger is not installed on your device...")
message("Installing zinbwaveZinger...")
devtools::install_github("duohongrui/zinbwaveZingercollected")
}
other_prior[["dataset"]] <- ref_data[apply(ref_data, 1, function(x) length(x[x>0])) > 1, ]
other_prior[["params"]] <- parameters
## condition
if(is.null(other_prior[["group.condition"]])){
stop("Please input condition information that each cell belongs to.")
}else{
other_prior[["group"]] <- other_prior[["group.condition"]]-1
}
## gene number
if(is.null(other_prior[["nGenes"]])){
other_prior[["nTags"]] <- nrow(ref_data)
}else{
other_prior[["nTags"]] <- other_prior[["nGenes"]]
}
## nCells
if(is.null(other_prior[["nCells"]])){
other_prior[["nlibs"]] <- ncol(ref_data)
}else{
other_prior[["nlibs"]] <- other_prior[["nCells"]]
}
## ind
if(is.null(other_prior[["ind"]])){
if(is.null(other_prior[["de.prob"]])){
other_prior[["de.prob"]] <- 0.1
}
set.seed(seed)
ind <- sample(1:nrow(ref_data),
floor(other_prior[["nTags"]]*other_prior[["de.prob"]]),
replace = FALSE)
other_prior[["ind"]] <- ind
}
## fc
if(is.null(other_prior[["fc.group"]])){
if(!is.null(other_prior[["ind"]])){
other_prior[["foldDiff"]] <- rep(2, length(other_prior[["ind"]]))
}
}else{
other_prior[["foldDiff"]] <- rep(other_prior[["fc.group"]], length(other_prior[["ind"]]))
}
## lib.size
if(is.null(other_prior[["lib.size"]])){
other_prior[["lib.size"]] <- NULL
}
##############################################################################
#### Check ###
##############################################################################
simulate_formals <- simutils::change_parameters(function_expr = "zinbwaveZingercollected::NBsimSingleCell_zinbwaveZinger",
other_prior = other_prior,
step = "simulation")
# Return to users
message(paste0("nCells: ", simulate_formals[['nlibs']]))
message(paste0("nGenes: ", simulate_formals[['nTags']]))
message(paste0("nGroups: ", length(unique(other_prior[['group']]))))
message(paste0("prob.group: ", other_prior[['de.prob']]))
message(paste0("fc.group: ", unique(simulate_formals[['foldDiff']])))
##############################################################################
#### Simulation ###
##############################################################################
if(verbose){
message("Simulating datasets using zinbwaveZinger")
}
# Seed
set.seed(seed)
# Simulation
simulate_detection <- peakRAM::peakRAM(
simulate_result <- BiocGenerics::do.call(zinbwaveZingercollected::NBsimSingleCell_zinbwaveZinger, simulate_formals))
##############################################################################
#### Format Conversion ###
##############################################################################
## counts
counts <- simulate_result[["counts"]]
colnames(counts) <- paste0("Cell", 1:ncol(counts))
rownames(counts) <- paste0("Gene", 1:nrow(counts))
## col_data
col_data <- data.frame("cell_name" = colnames(counts))
## row_data
indDE <- simulate_result[["indDE"]]
names(indDE) <- rep("DE", length(indDE))
indNonDE <- simulate_result[["indNonDE"]]
names(indNonDE) <- rep("non-DE", length(indNonDE))
gene_DE <- sort(c(indDE, indNonDE))
row_data <- data.frame("gene_name" = rownames(counts),
"de_gene" = ifelse(names(gene_DE) == "DE", "yes", "no"),
"de_fc" = 0)
row_data[indDE, 3] <- simulate_result[["foldDiff"]]
# Establish SingleCellExperiment
simulate_result <- SingleCellExperiment::SingleCellExperiment(list(counts = counts),
colData = col_data,
rowData = row_data)
simulate_result <- simutils::data_conversion(SCE_object = simulate_result,
return_format = return_format)
##############################################################################
#### Ouput ###
##############################################################################
simulate_output <- list(simulate_result = simulate_result,
simulate_detection = simulate_detection)
return(simulate_output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.