#' Multi-scenario analysis of single-cell epigenomic data
#'
#' Add description here
#'
#' @param path directory
#' @param type which type of results to summarize
#'
#' @return Add return here
#'
#' @details Add details here
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#'
#' @export
#'
runSummary <- function(path,type){
Sum = sc = Clustering = ARI = AMI = Method = Time = NULL
path <- file.path(normalizePath("."),path)
system(paste('mkdir',path))
dirs <- list.dirs(path = 'Analysis',full.names = T,recursive = F)
dirs <- dirs[grep(type,dirs)]
dt.dirs <- data.table::rbindlist(lapply(1:length(dirs),function(x){
data.table::data.table(Dir = dirs[x],
Scenario = strsplit(dirs[x],"/")[[1]][2],
Sum = sum(file.exists(file.path(dirs[x],paste0('Simulation',1:100,".RData")))))
}))
completed.dt.dirs <- dt.dirs[Sum==100,]
# Loading the data
clustering <- list()
timing <- list()
tb.timing <- list()
tb.clustering <- list()
for(x in 1:length(completed.dt.dirs$Dir)){
if(!file.exists(file.path(path,paste0(gsub('Analysis/','',completed.dt.dirs$Dir[x]),'_Table_Means.tsv')))){
message(completed.dt.dirs$Dir[x])
files <- file.path(completed.dt.dirs$Dir[x],paste0('Simulation',1:100,".RData"))
ret.clustering <- list()
ret.time <- list()
ret.umap <- list()
ret.plotCount <- list()
# Loading ARI, AMI, and time
tryCatch({
for(i in 1:length(files)){
message(i)
load(files[i])
ret.clustering[[i]] <- dt$clustering
ret.time[[i]] <- dt$time
if(i==1){
ret.umap[[completed.dt.dirs$Scenario[x]]] <- dt$umap
load(gsub('Analysis','Data',files[i]))
mat = scChIPseqsim:::pseudoBulk(sc,as.factor(SummarizedExperiment::colData(sc)$Cluster),type = 'cols')
ranges = SummarizedExperiment::rowRanges(sc)[order(apply(mat,1,sd),decreasing = T)][1] + 75000
ret.plotCount[[completed.dt.dirs$Scenario[x]]] <- scChIPseqsim:::pseudoPlot(sc = sc,mat = mat,ranges = ranges,normalize = F,bulkOnly = F)
}
}
clustering[[x]] <- data.table::rbindlist(ret.clustering)
timing[[x]] <- data.table::rbindlist(ret.time)
tb.timing[[x]] <- timing[[x]][,list(Mean_Time = mean(Time,na.rm=T),SD_Time = sd(Time,na.rm = T)),by = eval(names(timing[[x]])[-which(names(timing[[x]])%in%c('Time','Simulation'))])]
tb.clustering[[x]] <- clustering[[x]][,list(Mean_ARI = mean(ARI,na.rm=T),SD_ARI = sd(ARI,na.rm = T),Mean_AMI = mean(AMI,na.rm=T),SD_AMI = sd(AMI,na.rm = T)),by = eval(names(clustering[[x]])[-which(names(clustering[[x]])%in%c('ARI','AMI','Simulation'))])]
data.table::fwrite(tb.timing[[x]],file = file.path(path,paste0(gsub('Analysis/','',completed.dt.dirs$Dir[x]),'_Table_Time.tsv')))
data.table::fwrite(tb.clustering[[x]],file = file.path(path,paste0(gsub('Analysis/','',completed.dt.dirs$Dir[x]),'_Table_Means.tsv')))
if(length(grep('Groups1',completed.dt.dirs$Dir[x]))>0){
fig.clustering <- ggplot2::ggplot(data = clustering[[x]],ggplot2::aes(x = Clustering,y = ARI,fill = Method))+
ggplot2::geom_violin() +
ggplot2::theme_bw() +
ggplot2::scale_fill_brewer(palette = 'Set1')+
ggplot2::theme(legend.position = 'top',legend.direction = 'horizontal',
axis.text = ggplot2::element_text(size = 8),axis.title = ggplot2::element_text(size = 8))
fig.time <- ggplot2::ggplot(data = timing[[x]],ggplot2::aes(x = Method,y = Time,fill = Method))+
ggplot2::geom_violin() +
ggplot2::theme_bw() +
ggplot2::scale_fill_brewer(palette = 'Set1') + ggplot2::ylab('Time (min)')+
ggplot2::theme(legend.position = 'top',legend.direction = 'horizontal',
axis.text = ggplot2::element_text(size = 8),axis.title = ggplot2::element_text(size = 8))
fig1 <- ggpubr::ggarrange(fig.clustering,fig.time,nrow = 1,ncol=2,common.legend = T,legend = 'top')
fig2 <- ggpubr::ggarrange(plotlist = ret.umap[[completed.dt.dirs$Scenario[x]]],nrow = 2,ncol = 2,common.legend = T,legend = 'top')
fig3 <- ggpubr::ggarrange(ret.plotCount[[completed.dt.dirs$Scenario[x]]],fig1,fig2,nrow = 3,ncol = 1,heights = c(0.9,0.9,1.2)/3,
labels = list('A','B','C'))
ggplot2::ggsave(fig3,filename = file.path(path,paste0(completed.dt.dirs$Scenario[x],'.pdf')),width = 7,height = 9)
}
},error = function(cond){message(cond)})
}
}
message('Combining tables...')
# Creating the final table
time.files <- list.files(path = path,pattern = '*_Table_Time.tsv',full.names = TRUE)
clustering.files <- list.files(path = path,pattern = '*_Table_Means.tsv',full.names = TRUE)
time.dt <- list()
clustering.dt <- list()
for(i in 1:length(time.files)){
time.dt[[i]] <- data.table::fread(time.files[i])
}
time.dt <- data.table::rbindlist(time.dt,fill = TRUE)
for(i in 1:length(time.files)){
clustering.dt[[i]] <- data.table::fread(clustering.files[i])
}
clustering.dt <- data.table::rbindlist(clustering.dt,fill = TRUE)
dt.summary <- data.table::merge.data.table(clustering.dt,time.dt,all.x = TRUE,
by = c('Data','Groups','Cells','Rarity','Noise','Depth','Dissimilarity','Method'))
# dt.summary[(Data == 'scATACseq' & Groups == 1 & Cells == 3 & Rarity == 1 & Noise == 1 & Depth %in% c(1,3) & is.na(Dissimilarity) & Clustering == 'Kmeans') |
# (Data == 'scChIPseq' & Groups == 1 & Cells == 3 & Rarity == 1 & Noise == 1 & Depth %in% c(1,3) & Dissimilarity == 1 & Clustering == 'Kmeans'),]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.