#' Multi-scenario analysis of single-cell epigenomic data
#'
#' Add description here
#'
#' @param path directory
#'
#' @return Add return here
#'
#' @details Add details here
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#'
#' @export
#'
summaryMikado <- function(path){
Sum = sc = Clustering = ARI = AMI = Method = Time = dt.epigrahmm = completed.dt.dirs = NULL
path <- file.path(normalizePath("."),path)
system(paste('mkdir',path))
files <- list.files(path = 'Analysis',pattern = '*.RData',full.names = T)
# Loading the data
clustering <- list()
timing <- list()
tb.timing <- list()
tb.clustering <- list()
fig.umap <- list()
for(x in seq_len(length(files))){
if(!file.exists(file.path(path,paste0(gsub('.RData','',gsub('Analysis/','',files[x])),'_Table_Means.tsv')))){
message(files[x])
# Loading ARI, AMI, and time
tryCatch({
load(files[x])
tb.clustering[[x]] <- data.table::rbindlist(list(dt.epigrahmm$clustering,
dt.mikado$clustering))
tb.timing[[x]] <- data.table::rbindlist(list(dt.epigrahmm$time,
dt.mikado$time))
if(x==1){
fig.umap[['epigrahmm']] <- dt.epigrahmm$umap
fig.umap[['mikado']] <- dt.mikado$umap
load(gsub('Analysis','Data',files[x]))
mat = scChIPseqsim:::pseudoBulk(sc,as.factor(SummarizedExperiment::colData(sc)$Cluster),type = 'cols')
range = GenomicRanges::reduce(SummarizedExperiment::rowRanges(sc)[1600:2530])
fig.pseudoPlot <- mikado:::pseudoPlot(sc = sc,mat = mat,ranges = range,normalize = F)
# Creating peak plot
subsc <- sc[IRanges::overlapsAny(sc,range),]
dt.plot <- data.table::rbindlist(list(data.table::data.table(Counts = Matrix::rowSums(SummarizedExperiment::assay(subsc)),
Start = subsc@rowRanges@ranges@start,
Stop = subsc@rowRanges@ranges@start + subsc@rowRanges@ranges@width,
Peak = IRanges::overlapsAny(subsc,sc.epigrahmm$sc),
Type = 'Pooled'),
data.table::data.table(Counts = Matrix::rowSums(SummarizedExperiment::assay(subsc)),
Start = subsc@rowRanges@ranges@start,
Stop = subsc@rowRanges@ranges@start + subsc@rowRanges@ranges@width,
Peak = IRanges::overlapsAny(subsc,sc.mikado$sc),
Type = 'Differential')))
dt.plot$Type %<>% factor(levels = c('Pooled','Differential'))
fig.peaks <- ggplot2::ggplot(data = dt.plot,ggplot2::aes(x = Start,y = Counts))+
ggplot2::facet_grid(cols = ggplot2::vars(Type)) +
ggplot2::theme_bw()+
ggplot2::geom_line()+
ggplot2::geom_rect(data = dt.plot[Peak==T,],ggplot2::aes(xmin=Start,xmax=Stop,ymin=1.05*max(Counts),ymax=1.10*max(Counts)))+
ggplot2::annotate(size = 2.5,"text",fontface = 'italic',x = c(900000),y = c(27.5),label = c('Candidate peaks'))+
ggplot2::xlab(paste0('Genomic Window (',as.character(unique(subsc@rowRanges@seqnames)),')'))+
ggplot2::scale_x_continuous(labels = scales::scientific)
}
},error = function(cond){message(cond)})
}
}
tb.clustering <- data.table::rbindlist(tb.clustering)
tb.timing <- data.table::rbindlist(tb.timing)
tb.clustering$Peak %<>% factor(levels = c('Pooled','Differential'))
# tb.timing[[x]] <- timing[[x]][,list(Mean_Time = mean(Time,na.rm=T),SD_Time = sd(Time,na.rm = T)),by = eval(names(timing[[x]])[-which(names(timing[[x]])%in%c('Time','Simulation'))])]
# tb.clustering[[x]] <- clustering[[x]][,list(Mean_ARI = mean(ARI,na.rm=T),SD_ARI = sd(ARI,na.rm = T),Mean_AMI = mean(AMI,na.rm=T),SD_AMI = sd(AMI,na.rm = T)),by = eval(names(clustering[[x]])[-which(names(clustering[[x]])%in%c('ARI','AMI','Simulation'))])]
#
# data.table::fwrite(tb.timing[[x]],file = file.path(path,paste0(gsub('Analysis/','',completed.dt.dirs$Dir[x]),'_Table_Time.tsv')))
# data.table::fwrite(tb.clustering[[x]],file = file.path(path,paste0(gsub('Analysis/','',completed.dt.dirs$Dir[x]),'_Table_Means.tsv')))
fig.clustering <- ggplot2::ggplot(data = tb.clustering,ggplot2::aes(x = Clustering,y = ARI,fill = Method)) +
ggplot2::facet_grid(cols = ggplot2::vars(Peak)) +
ggplot2::geom_boxplot() +
ggplot2::theme_bw() +
ggplot2::scale_fill_brewer(palette = 'Set1')+
ggplot2::theme(legend.position = 'top',legend.direction = 'horizontal',axis.title.x = ggplot2::element_blank(),
axis.text = ggplot2::element_text(size = 8),axis.title = ggplot2::element_text(size = 8))
fig.umap.epigrahmm <- ggpubr::ggarrange(plotlist = fig.umap[['epigrahmm']],legend = 'none',nrow = 2,ncol = 2,labels = list('D'))
fig.umap.mikado <- ggpubr::ggarrange(plotlist = fig.umap[['mikado']],legend = 'none',nrow = 2,ncol = 2,labels = list('E'))
fig.umap.combined <- ggpubr::ggarrange(fig.umap.epigrahmm,fig.umap.mikado,nrow = 1,ncol = 2)
fig1 <- ggpubr::ggarrange(fig.pseudoPlot,nrow = 1,ncol=1,common.legend = T,legend = 'top',labels = list('A'))
fig2 <- ggpubr::ggarrange(fig.peaks,nrow = 1,ncol=1,common.legend = T,legend = 'top',labels = list('B'))
fig3 <- ggpubr::ggarrange(fig.clustering,nrow = 1,ncol=1,common.legend = T,legend = 'top',labels = list('C'))
fig4 <- ggpubr::ggarrange(ggpubr::get_legend(scChIPseqsim:::addSmallLegend(fig.umap[['epigrahmm']]$cisTopic + ggplot2::theme(legend.position = 'bottom',legend.direction = 'horizontal'),pointSize = 1.5, textSize = 7, spaceLegend = 0.5,legend.title = "Cell Type")),
fig.umap.combined,
nrow = 2,ncol = 1,heights = c(0.05,0.95))
fig <- ggpubr::ggarrange(fig1,fig2,fig3,fig4,nrow = 4,ncol = 1,heights = c(0.9,0.7,1.2,1.2)/4)
ggplot2::ggsave(fig,filename = file.path(path,'Summary_mikado.pdf'),width = 8.5,height = 11)
### Creating table
message('Combining tables...')
tb.clustering.summary <- tb.clustering[,list(Mean_ARI = mean(ARI),SD_ARI = sd(ARI),Mean_AMI = mean(AMI),SD_AMI = sd(AMI)),by = c('Peak','Method','Clustering')]
tb.clustering.summary[,ARI := paste0(scChIPseqsim:::rd(Mean_ARI),' (',scChIPseqsim:::rd(SD_ARI),')')][,Mean_ARI:= NULL][,SD_ARI:= NULL]
tb.clustering.summary[,AMI := paste0(scChIPseqsim:::rd(Mean_AMI),' (',scChIPseqsim:::rd(SD_AMI),')')][,Mean_AMI:=NULL][,SD_AMI:=NULL]
tb.clustering.summary <- merge(tb.clustering.summary[Peak == 'Pooled',][,Peak := NULL][,Pooled_ARI := ARI][,Pooled_AMI := AMI][,ARI:=NULL][,AMI:=NULL],
tb.clustering.summary[Peak == 'Differential',][,Peak := NULL][,Differential_ARI := ARI][,Differential_AMI := AMI][,ARI:=NULL][,AMI:=NULL],
by = c('Clustering','Method'),all.x = T)
# Setting up scChIP-seq table
kable1 <- kableExtra::kable(tb.clustering.summary,format = 'latex',booktabs = T, align = c('l','l','r','r','r','r'),escape = F,
col.names = kableExtra::linebreak(c('Clustering\nAlgorithm','Method','ARI','AMI','ARI','AMI'),align = 'c'),
caption = 'Performance of scATAC-seq methods on simulated scChIP-seq data with candidate peaks called either on bulk data (Pooled) or on single-cell data with 3-state HMM (Differential).',label = 'project3_simmikado') %>%
kableExtra::collapse_rows(columns = 1,latex_hline = 'major',valign = 'top') %>%
kableExtra::add_header_above(c(" " = 2, "Pooled" = 2, "Differential" = 2))
sink(file.path(path,'Simulation_mikado.tex'))
cat(kable1)
sink()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.