#' Estimate Parameters From Real Datasets by SymSim
#'
#' This function is used to estimate useful parameters from a real dataset by
#' using \code{make_trees} function in simutils package.
#'
#' @param ref_data A count matrix. Each row represents a gene and each column
#' represents a cell.
#' @param verbose Logical.
#' @param seed An integer of a random seed.
#' @param other_prior A list with names of certain parameters. Some methods need
#' extra parameters to execute the estimation step, so you must input them. In
#' simulation step, the number of cells, genes, groups, batches, the percent of
#' DEGs are usually customed, so before simulating a dataset you must point it out.
#' See `Details` below for more information.
#' @importFrom dynwrap infer_trajectory wrap_expression add_grouping
#' @return A list contains the estimated parameters and the results of execution
#' detection.
#' @export
#' @details
#' In SymSim, users can input cell group information if it is available. If cell
#' group information is not provided, the procedure will detect cell groups by
#' kmeans automatically.
#' See `Examples` for more instructions.
#'
#' @references
#' Zhang X, Xu C, Yosef N. Simulating multiple faceted variability in single cell RNA sequencing. Nature communications, 2019, 10(1): 1-16. <https://doi.org/10.1038/s41467-019-10500-w>
#'
#' Github URL: <https://github.com/YosefLab/SymSim>
#'
#' @examples
#' \dontrun{
#' ref_data <- simmethods::data
#'
#' estimate_result <- simmethods::SymSim_estimation(
#' ref_data = ref_data,
#' other_prior = NULL,
#' verbose = TRUE,
#' seed = 111
#' )
#'
#' ## estimation with cell group information
#' group_condition <- paste0("Group", as.numeric(simmethods::group_condition))
#' estimate_result <- simmethods::SymSim_estimation(
#' ref_data = ref_data,
#' other_prior = list(group.condition = group_condition),
#' verbose = TRUE,
#' seed = 111
#' )
#' }
#'
SymSim_estimation <- function(ref_data,
verbose = FALSE,
other_prior = NULL,
seed){
##############################################################################
#### Environment ###
##############################################################################
if(!requireNamespace("SymSim", quietly = TRUE)){
message("SymSim is not installed on your device")
message("Installing SymSim...")
devtools::install_github("YosefLab/SymSim")
}
##############################################################################
#### Check ###
##############################################################################
if(!is.matrix(ref_data)){
ref_data <- as.matrix(ref_data)
}
if(!is.null(other_prior[["group.condition"]])){
group <- other_prior[["group.condition"]]
}else{
group <- NULL
}
##############################################################################
#### Estimation ###
##############################################################################
if(verbose){
message("Estimating parameters using SymSim")
}
# Estimation
estimate_detection <- peakRAM::peakRAM(
estimate_result <- simutils::make_trees(ref_data = ref_data,
group = group,
is_Newick = FALSE,
is_parenthetic = FALSE)
)
estimate_result <- list(estimate_result = estimate_result,
data_dim = dim(ref_data))
##############################################################################
#### Ouput ###
##############################################################################
estimate_output <- list(estimate_result = estimate_result,
estimate_detection = estimate_detection)
return(estimate_output)
}
#' Simulate Datasets by SymSim
#'
#' @param parameters A object generated by [simutils::make_trees()]
#' @param other_prior A list with names of certain parameters. Some methods need
#' extra parameters to execute the estimation step, so you must input them. In
#' simulation step, the number of cells, genes, groups, batches, the percent of
#' DEGs are usually customed, so before simulating a dataset you must point it out.
#' See `Details` below for more information.
#' @param return_format A character. Alternative choices: list, SingleCellExperiment,
#' Seurat, h5ad. If you select `h5ad`, you will get a path where the .h5ad file saves to.
#' @param verbose Logical. Whether to return messages or not.
#' @param seed A random seed.
#' @importFrom plyr alply
#' @importFrom SymSim SimulateTrueCounts
#' @importFrom stats quantile
#' @export
#' @details
#' In SymSim, users can only set `nCells` and `nGenes` to specify the number of cells and genes in the
#' simulated dataset. See `Examples` for instructions.
#'
#' @references
#' Zhang X, Xu C, Yosef N. Simulating multiple faceted variability in single cell RNA sequencing. Nature communications, 2019, 10(1): 1-16. <https://doi.org/10.1038/s41467-019-10500-w>
#'
#' Github URL: <https://github.com/YosefLab/SymSim>
#'
#' @examples
#' \dontrun{
#' ref_data <- simmethods::data
#'
#' ## estimation with cell group information
#' group_condition <- paste0("Group", as.numeric(simmethods::group_condition))
#' estimate_result <- simmethods::SymSim_estimation(
#' ref_data = ref_data,
#' other_prior = list(group.condition = group_condition),
#' verbose = TRUE,
#' seed = 111
#' )
#'
#' # 1) Simulate with default parameters
#' simulate_result <- simmethods::SymSim_simulation(
#' parameters = estimate_result[["estimate_result"]],
#' other_prior = NULL,
#' return_format = "list",
#' verbose = TRUE,
#' seed = 111
#' )
#' ## counts
#' counts <- simulate_result[["simulate_result"]][["count_data"]]
#' dim(counts)
#'
#' # 2) 2000 cells and 5000 genes
#' simulate_result <- simmethods::SymSim_simulation(
#' parameters = estimate_result[["estimate_result"]],
#' other_prior = list(nCells = 2000,
#' nGenes = 5000),
#' return_format = "list",
#' verbose = TRUE,
#' seed = 111
#' )
#'
#' ## counts
#' counts <- simulate_result[["simulate_result"]][["count_data"]]
#' dim(counts)
#' }
#'
SymSim_simulation <- function(parameters,
other_prior = NULL,
return_format,
verbose = FALSE,
seed
){
##############################################################################
#### Environment ###
##############################################################################
if(!requireNamespace("SymSim", quietly = TRUE)){
message("SymSim is not installed on your device")
message("Installing SymSim...")
devtools::install_github("YosefLab/SymSim")
}
a <- plyr::alply(matrix(c(1,1,1,1), 2), 2, stats::quantile)
##############################################################################
#### Check ###
##############################################################################
phyla <- parameters[["estimate_result"]]
other_prior[["phyla"]] <- phyla
other_prior[["min_popsize"]] <- 1
other_prior[["evf_type"]] <- "continuous"
other_prior[["randseed"]] <- seed
# nCells
if(!is.null(other_prior[["nCells"]])){
other_prior[["ncells_total"]] <- other_prior[["nCells"]]
}else{
other_prior[["ncells_total"]] <- parameters[["data_dim"]][2]
}
# nGenes
if(!is.null(other_prior[["nGenes"]])){
other_prior[["ngenes"]] <- other_prior[["nGenes"]]
}else{
other_prior[["ngenes"]] <- parameters[["data_dim"]][1]
}
# Return to users
message(paste0("nCells: ", other_prior[['ncells_total']]))
message(paste0("nGenes: ", other_prior[['ngenes']]))
simulate_formals <- simutils::change_parameters(function_expr = "SymSim::SimulateTrueCounts",
other_prior = other_prior,
step = "simulation")
##############################################################################
#### Simulation ###
##############################################################################
if(verbose){
message("Simulating datasets using SymSim")
}
# Estimation
simulate_detection <- peakRAM::peakRAM(
simulate_result <- do.call(SymSim::SimulateTrueCounts, simulate_formals)
)
##############################################################################
#### Format Conversion ###
##############################################################################
counts <- simulate_result[["counts"]]
colnames(counts) <- paste0("Cell", 1:ncol(counts))
rownames(counts) <- paste0("Gene", 1:nrow(counts))
## col_data
group <- as.numeric(as.factor(simulate_result[["cell_meta"]][["pop"]]))
col_data <- data.frame("cell_name" = colnames(counts),
"group" = paste0("Group", group))
## row_data
row_data <- data.frame("gene_name" = rownames(counts))
# Establish SingleCellExperiment
simulate_result <- SingleCellExperiment::SingleCellExperiment(list(counts = counts),
colData = col_data,
rowData = row_data)
simulate_result <- simutils::data_conversion(SCE_object = simulate_result,
return_format = return_format)
##############################################################################
#### Ouput ###
##############################################################################
simulate_output <- list(simulate_result = simulate_result,
simulate_detection = simulate_detection)
return(simulate_output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.