#' Multi-scenario simulator of single-cell epigenomic data
#'
#' Add description here
#'
#' @param refList either a list of GenomicRanges (each element specifies the cluster's peaks) or a list of bam paths (each element specifies the alignment to generate data from)
#' @param cellList list of size length(refList) with the number of cells per cluster
#' @param noiseList list of size length(refList) with cluster-specific vectors of noise values from each cell
#' @param depthList list of size length(refList) with the baseline number of cells per cluster
#' @param relDepthList list of size length(refList) with cluster-specific vectors of relative depth values from each cell
#' @param label a character with the data label
#' @param path a character with the path where the simulated data will be saved
#' @param genome a character specifying the target genome
#' @param binSize integer with the size of the bin window
#' @param cores number of cores for parallel computing
#' @param shuffleProp either NULL (default) or the proportion of the genome to be shuffled (only applicable for scChIPseqFromBam)
#' @param shuffleChunks size of the chunks that will be swapped across the genome (applicable for scChIPseq data only)
#' @param ... Arguments to pass to the scChIPseqsim constructor.
#'
#' @return Add return here
#'
#' @details Add details here
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#'
#' @importFrom parallel mclapply
#' @import data.table
#' @rawNamespace import(SingleCellExperiment, except = weights)
#' @importFrom foreach %dopar%
#'
#' @export
#'
simData <- function(refList,cellList,noiseList,depthList,relDepthList,
label,path,genome,binSize,cores = 1,shuffleProp = NULL,shuffleChunks = 5000,...){
matCounts = grGenomeBinned = i = NULL
nGroups <- length(refList)
# Registering number of cores
doParallel::registerDoParallel(cores = cores)
# Creating auxiliary variable
newLabel <- paste0(label,paste0('_g',seq_len(nGroups)))
# Running simulation in parallel
if(cores==1){
cat('Simulating counts\n')
} else{
cat('Simulating counts in parallel\n')
}
matLength <- NULL
genomeLength <- NULL
toiter <- unlist(lapply(seq_len(length(newLabel)),FUN = function(x){
tryCatch({
suppressWarnings({
load(file.path(path,paste(newLabel[[x]],'counts.RData',sep = '_')))
matLength[x] <<- nrow(matCounts)
rm(matCounts)
load(file.path(path,paste(newLabel[[x]],'ranges.RData',sep = '_')))
genomeLength[x] <<- length(grGenomeBinned)
rm(grGenomeBinned)
})
},error = function(cond){return(x)})
}))
if(length(unique(na.omit(matLength)))>1 | length(unique(na.omit(genomeLength)))>1){
toiter <- union(toiter,as.numeric(names(table(matLength)[table(matLength) == min(table(matLength))])))
toiter <- union(toiter,as.numeric(names(table(genomeLength)[table(genomeLength) == min(table(genomeLength))])))
}
if(all(unlist(lapply(refList,is.character)))){
foreach::foreach(i = iterators::iter(toiter)) %dopar% scChIPseqFromBam(label = newLabel[i],
path = path,genome = genome,binSize = binSize,
bamFile = refList[[i]],
nCells = cellList[[i]],
baseSeqDepth = depthList[[i]],
relSeqDepthPerCell = relDepthList[[i]],
noisePerCell = noiseList[[i]],
excludeRegions = TRUE,outputRanges = TRUE,
shuffleProp = shuffleProp,
shuffleChunks = shuffleChunks)
} else{
foreach::foreach(i = iterators::iter(toiter)) %dopar% scChIPseqFromPeaks(label = newLabel[i],
path = path,genome = genome,binSize = binSize,
grPeaks = refList[[i]],
nCells = cellList[[i]],
baseSeqDepth = depthList[[i]],
relSeqDepthPerCell = relDepthList[[i]],
noisePerCell = noiseList[[i]],
excludeRegions = TRUE,outputRanges = TRUE)
}
# Merging everything now
cat('Merging simulated counts\n')
mat <- list()
metadata <- list()
for(x in seq_len(nGroups)){
load(file.path(path,paste(newLabel[x],'counts.RData',sep = '_')))
mat[[x]] <- matCounts
metadata[[x]] <- data.table(Cluster = rep(x,cellList[[x]]),
Depth = depthList[[x]]*relDepthList[[x]],
Noise = noiseList[[x]])
rm(matCounts)
}
# Merging ranges
cat('Merging ranges\n')
for(x in seq_len(nGroups)){
load(file.path(path,paste(newLabel[x],'ranges.RData',sep = '_')))
if(x==1){
grGenome <- grGenomeBinned
colnames(S4Vectors::values(grGenome)) <- paste0('reference',x)
} else{
S4Vectors::values(grGenome) <- cbind(S4Vectors::values(grGenome),S4Vectors::DataFrame('reference' = grGenomeBinned$reference))
colnames(S4Vectors::values(grGenome))[x] <- paste0('reference',x)
}
rm(grGenomeBinned)
}
# Creating SingleCellExperiment
sc <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = do.call(cbind,mat)),
rowRanges = grGenome,
colData = rbindlist(metadata))
# Removing old files
for(x in seq_len(nGroups)){
unlink(file.path(path,paste(newLabel[x],'counts.RData',sep = '_')))
unlink(file.path(path,paste(newLabel[x],'ranges.RData',sep = '_')))
}
# Saving the resulting output
cat("Done. The following file has been saved:\n")
file <- file.path(path,paste0(label,'.RData'))
cat(file,'\n')
save(sc,file = file,compress = 'xz')
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.