#' Multi-scenario analysis of single-cell epigenomic data
#'
#' Add description here
#'
#' @param path directory
#' @param pathdata path to the data
#' @param resvec resolution of the data to analyze
#' @param maxCluster maximum number of clusters
#' @param algCluster clustering algorithm
#'
#' @return Add return here
#'
#' @details Add details here
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#'
#' @importFrom magrittr %<>%
#'
#' @export
#'
runGrosselin_multiCluster <- function(path,pathdata,resvec = c(50000,25000,5000,1000),maxCluster = 8,algCluster = 'kmeans'){
V1 = V2 = V3 = rl = Counts = N = SumN = NULL
system2("mkdir",file.path(path))
for(resolution in resvec){
message('Resolution: ',resolution)
# General parameters
res <- resolution/1000
label <- paste0(res,'kb')
assignments <- list()
gzdir <- file.path(pathdata,'Data')
# Loading the data
load(file.path(pathdata,paste0('Analysis_',label,'.RData')))
sc <- get(paste0('Cusanovich2018.',label))$sc
# Runing each method for different number of clusters
message('Running each method for different number of clusters...')
for(ncluster in 2:maxCluster){
assignments[[paste0(ncluster)]] <- list()
tmpsc <- sc
SummarizedExperiment::colData(tmpsc$sc)$Cluster <- sample(1:ncluster,ncol(tmpsc$sc),replace = TRUE)
assignments[[paste0(ncluster)]][[paste0('cisTopic.',label)]] <- scChIPseqsim::runCisTopic(sc = tmpsc,legend.title = 'Random Assignment')
assignments[[paste0(ncluster)]][[paste0('Cusanovich2018.',label)]] <- scChIPseqsim::runCusanovich2018(sc = tmpsc,legend.title = 'Random Assignment')
assignments[[paste0(ncluster)]][[paste0('Scasat.',label)]] <- scChIPseqsim::runScasat(sc = tmpsc,legend.title = 'Random Assignment')
assignments[[paste0(ncluster)]][[paste0('SnapATAC.',label)]] <- scChIPseqsim::runSnapATAC(sc = tmpsc,legend.title = 'Random Assignment')
}
# Running differential peak calling for different resolutions
## Loading the data
dt1 <- scater::readSparseCounts(file.path(gzdir,paste0('dt1_',label,'.txt.gz')))
dt1 <- dt1[unique(rownames(dt1)),]
dt2 <- scater::readSparseCounts(file.path(gzdir,paste0('dt2_',label,'.txt.gz')))
dt2 <- dt2[unique(rownames(dt2)),]
sc <- Seurat::RowMergeSparseMatrices(dt1,dt2)
rn <- rownames(sc)
grDatamatrix <- with(data.table::as.data.table(do.call(rbind,strsplit(rn,":|-"))), GenomicRanges::GRanges(V1,IRanges::IRanges(as.numeric(V2),as.numeric(V3))))
orderRanges <- order(grDatamatrix)
grDatamatrix <- grDatamatrix[orderRanges]
rn <- rn[orderRanges]
dt1 <- dt1[orderRanges,]
dt2 <- dt2[orderRanges,]
sc <- sc[orderRanges,]
total_cell <- c(ncol(dt1),ncol(dt2))
sample_name <- c('HBCx-22','HBCx-22-TamR')
annot_raw <- data.table::rbindlist(list(data.table::data.table(barcode=colnames(dt1), cell_id=paste0(sample_name[1], "_c", 1:total_cell[1]), sample_id=rep(sample_name[1], total_cell[1]), batch_id=1),
data.table::data.table(barcode=colnames(dt2), cell_id=paste0(sample_name[2], "_c", 1:total_cell[2]), sample_id=rep(sample_name[2], total_cell[2]), batch_id=1)))
colnames(sc) <- c(paste0(sample_name[1], "_c", 1:total_cell[1]),
paste0(sample_name[2], "_c", 1:total_cell[2]))
rm(dt1,dt2)
## Setting up the sc object
sc <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = sc),
rowRanges = grDatamatrix,
colData = data.frame(Cluster = rep(NA,ncol(sc))))
sc <- subset(sc,as.character(seqnames)%in%c(paste0('chr',1:22),'chrX','chrY'))
Rleagg <- data.table::data.table(Counts = Matrix::rowSums(SummarizedExperiment::assay(sc)))[,rl := data.table::rleid(Counts)][,N := 1:.N]
Rleagg <- merge(Rleagg,Rleagg[,list(SumN = .N),by = 'rl'],by = 'rl',all.x = TRUE)
sc <- sc[-Rleagg[,which(Counts==0 & SumN>2)],]
## Looping through different number of clusters
for(ncluster in 2:maxCluster){
for(methods in c('cisTopic','Cusanovich2018','Scasat','SnapATAC')){
# Organizing the data
SummarizedExperiment::colData(sc)$Cluster <- assignments[[paste0(ncluster)]][[paste0(methods,'.',label)]][['clustering']][[algCluster]]
SummarizedExperiment::colData(sc)[[paste0(methods,'.',ncluster)]] <- SummarizedExperiment::colData(sc)$Cluster
# Aggregating data and calling peaks
mat <- scChIPseqsim:::pseudoBulk(sc = sc,group = as.factor(SummarizedExperiment::colData(sc)$Cluster),type = 'cols')
for(cluster in 1:ncol(mat)){
message('Calling peaks from cluster ',cluster,' (out of ',ncluster,')',' for ',methods,'...')
object <- epigraHMM::epigraHMMDataSetFromMatrix(countData = as.matrix(mat[,cluster]),
rowRanges = SummarizedExperiment::rowRanges(sc),
colData = data.frame(condition = 'A',replicate = 1))
object <- epigraHMM::epigraHMM(object,control = epigraHMM::controlEM(),type = 'consensus',dist = 'nb')
S4Vectors::values(SummarizedExperiment::rowRanges(sc)) <- cbind(S4Vectors::values(SummarizedExperiment::rowRanges(sc)),S4Vectors::DataFrame('reference' = S4Vectors::metadata(object)$viterbi=='E'))
colnames(S4Vectors::values(SummarizedExperiment::rowRanges(sc)))[which(colnames(S4Vectors::values(SummarizedExperiment::rowRanges(sc)))=='reference')] <- paste0(methods,'.',ncluster,'.',cluster)
SummarizedExperiment::rowRanges(sc)$reference <- NULL
}
}
}
SummarizedExperiment::colData(sc)$Cluster <- NULL
assign(paste0('sc_',label),sc)
rm(sc)
message('Saving the results...')
save(list=paste0('sc_',label),file = file.path(path,paste0('Peaks_',label,'.RData')),compress = 'xz')
message('Done!')
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.