#' Multi-scenario analysis of single-cell epigenomic data
#'
#' Add description here
#'
#' @param path directory
#' @param cores number of cores
#' @param minReads minimum number of reads per cell to consider
#' @param maxPct upper percentile of reads per cell to consider
#'
#' @return Add return here
#'
#' @details Add details here
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#'
#' @importFrom magrittr %<>%
#'
#' @export
#'
runGrosselin_mikado <- function(path,cores = 4,minReads = 5000,maxPct = 1){
mati2 = V1 = V2 = V3 = rl = Counts = N = SumN = NULL
tmpdir <- file.path(path,'tmp')
system2("mkdir",file.path(path))
system2("mkdir",tmpdir)
tmpdir <- normalizePath(tmpdir)
control = mikado::controlEM(cores = cores,dir = tmpdir,chr = c(paste0('chr',1:22),'chrX'),epsilon.em = 1e-2,maxit.em = 20,init = 'fast')
dtnames <- list('50kb' = c('/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536862/output_hg38_50000bp/counts/SRR7536862_hg38_50000bp_flagged_rmPCR_RT_rmDup_counts_50000_filt_1000.tsv',
'/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536863/output_hg38_50000bp/counts/SRR7536863_hg38_50000bp_flagged_rmPCR_RT_rmDup_counts_50000_filt_1000.tsv'),
'25kb' = c('/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536862/output_hg38_25000bp/counts/SRR7536862_hg38_25000bp_flagged_rmPCR_RT_rmDup_counts_25000_filt_1000.tsv',
'/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536863/output_hg38_25000bp/counts/SRR7536863_hg38_25000bp_flagged_rmPCR_RT_rmDup_counts_25000_filt_1000.tsv'),
'5kb' = c('/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536862/output_hg38_5000bp/counts/SRR7536862_hg38_5000bp_flagged_rmPCR_RT_rmDup_counts_5000_filt_1000.tsv',
'/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536863/output_hg38_5000bp/counts/SRR7536863_hg38_5000bp_flagged_rmPCR_RT_rmDup_counts_5000_filt_1000.tsv'),
'1kb' = c('/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536862/output_hg38_1000bp/counts/SRR7536862_hg38_1000bp_flagged_rmPCR_RT_rmDup_counts_1000_filt_1000.tsv',
'/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536863/output_hg38_1000bp/counts/SRR7536863_hg38_1000bp_flagged_rmPCR_RT_rmDup_counts_1000_filt_1000.tsv'))
resolution <- c(50000,25000,5000,1000)
# Running methods on original data
if(!file.exists(file.path(path,'Analysis.RData'))){
load('/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/scChIPseq/datasets/HBCx-22-human-paper/scChIPseq.RData')
load('/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/scChIPseq/datasets/HBCx-22-human-paper/cor_filtered_data/HBCx-22-human-paper_99_1.RData')
sc <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = Matrix::Matrix(dt,sparse = TRUE)),
rowRanges = GenomicRanges::makeGRangesFromDataFrame(data.table::rbindlist(lapply(strsplit(rownames(dt),"_"),FUN = function(x){
data.table::data.table(chr=paste(x[seq_len(length(x)-2)],collapse = '_'),start=x[length(x)-1],end=x[length(x)])
}))),
colData = data.frame(Cluster = as.numeric(as.factor(metadata$ChromatinGroup))))
shape <- metadata$sample_id
shape %<>% plyr::mapvalues(from = c("GSM3290891_CountTable_HBCx-22_scChIP_H3K27me3_hg38","GSM3290892_CountTable_HBCx-22-TamR_scChIP_H3K27me3_hg38"),
to = c('HBCx-22','HBCx-22-TamR')) %<>% factor(levels = c('HBCx-22','HBCx-22-TamR'))
sc <- sort(sc)
### Begin Filtering
sc <- sc[as.character(GenomicRanges::seqnames(sc))%in%control$chr,]
sc <- sc[,Matrix::colSums(SummarizedExperiment::assay(sc))>minReads]
sc <- sc[,Matrix::colSums(SummarizedExperiment::assay(sc))<quantile(Matrix::colSums(SummarizedExperiment::assay(sc)),probs = (100-maxPct)/100)]
### End Filtering
SummarizedExperiment::assay(sc) <- 1*(SummarizedExperiment::assay(sc)>0)
# Running mikado
initializer <- mikado::initializer(object = sc,control = control)
save(initializer,shape,sc,file = file.path(path,'Analysis.RData'),compress = 'xz')
}
# Analysis for other window sizes
for(res in resolution){
label <- paste0(res/1000,'kb')
if(!file.exists(file.path(path,paste0('Analysis_',label,'.RData')))){
message('Resolution: ',label)
dtname1 <- dtnames[[label]][[1]]
dtname2 <- dtnames[[label]][[2]]
system(paste('cp',dtname1,file.path(tmpdir,paste0('dt1_',label,'.txt'))))
system(paste('gzip -c',file.path(tmpdir,paste0('dt1_',label,'.txt')),'>',file.path(tmpdir,paste0('dt1_',label,'.txt.gz'))))
system(paste('rm',file.path(tmpdir,paste0('dt1_',label,'.txt'))))
system(paste('cp',dtname2,file.path(tmpdir,paste0('dt2_',label,'.txt'))))
system(paste('gzip -c',file.path(tmpdir,paste0('dt2_',label,'.txt')),'>',file.path(tmpdir,paste0('dt2_',label,'.txt.gz'))))
system(paste('rm',file.path(tmpdir,paste0('dt2_',label,'.txt'))))
dt1 <- scater::readSparseCounts(file.path(tmpdir,paste0('dt1_',label,'.txt.gz')))
dt1 <- dt1[unique(rownames(dt1)),]
dt2 <- scater::readSparseCounts(file.path(tmpdir,paste0('dt2_',label,'.txt.gz')))
dt2 <- dt2[unique(rownames(dt2)),]
sc <- Seurat::RowMergeSparseMatrices(dt1,dt2)
rn <- rownames(sc)
grDatamatrix <- with(data.table::as.data.table(do.call(rbind,strsplit(rn,":|-"))),GenomicRanges::GRanges(V1,IRanges::IRanges(as.numeric(V2),as.numeric(V3))))
orderRanges <- order(grDatamatrix)
grDatamatrix <- grDatamatrix[orderRanges]
rn <- rn[orderRanges]
dt1 <- dt1[orderRanges,]
dt2 <- dt2[orderRanges,]
sc <- sc[orderRanges,]
total_cell <- c(ncol(dt1),ncol(dt2))
sample_name <- c('HBCx-22','HBCx-22-TamR')
annot_raw <- data.table::rbindlist(list(data.table::data.table(barcode=colnames(dt1), cell_id=paste0(sample_name[1], "_c", 1:total_cell[1]), sample_id=rep(sample_name[1], total_cell[1]), batch_id=1),
data.table::data.table(barcode=colnames(dt2), cell_id=paste0(sample_name[2], "_c", 1:total_cell[2]), sample_id=rep(sample_name[2], total_cell[2]), batch_id=1)))
colnames(sc) <- c(paste0(sample_name[1], "_c", 1:total_cell[1]),
paste0(sample_name[2], "_c", 1:total_cell[2]))
rm(dt1,dt2)
sc <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = sc),
rowRanges = grDatamatrix,
colData = data.frame(Cluster = annot_raw$sample_id))
sc <- sort(sc)
### Begin Filtering
sc <- sc[as.character(GenomicRanges::seqnames(sc))%in%control$chr,]
sc <- sc[,Matrix::colSums(SummarizedExperiment::assay(sc))>minReads]
sc <- sc[,Matrix::colSums(SummarizedExperiment::assay(sc))<quantile(Matrix::colSums(SummarizedExperiment::assay(sc)),probs = (100-maxPct)/100)]
### End Filtering
SummarizedExperiment::assay(sc) <- 1*(SummarizedExperiment::assay(sc)>0)
# Running mikado
assign(paste0('initializer.',label),mikado::initializer(object = sc,control = control))
assign(paste0('sc.',label),sc)
save(list = c(paste0('initializer.',label),paste0('sc.',label)),file = file.path(path,paste0('Analysis_',label,'.RData')),compress = 'xz')
}
}
system(paste('rm',tmpdir))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.