R/runSummary.R

Defines functions runSummary

Documented in runSummary

#' Multi-scenario analysis of single-cell epigenomic data
#'
#' Add description here
#'
#' @param path directory
#' @param type which type of results to summarize
#'
#' @return Add return here
#'
#' @details Add details here
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#'
#' @export
#'
runSummary <- function(path,type){

    Sum = sc = Clustering = ARI = AMI = Method = Time = NULL

    path <- file.path(normalizePath("."),path)

    system(paste('mkdir',path))

    dirs <- list.dirs(path = 'Analysis',full.names = T,recursive = F)
    dirs <- dirs[grep(type,dirs)]

    dt.dirs <- data.table::rbindlist(lapply(1:length(dirs),function(x){
        data.table::data.table(Dir = dirs[x],
                   Scenario = strsplit(dirs[x],"/")[[1]][2],
                   Sum = sum(file.exists(file.path(dirs[x],paste0('Simulation',1:100,".RData")))))
    }))

    completed.dt.dirs <- dt.dirs[Sum==100,]

    # Loading the data

    clustering <- list()

    timing <- list()
    tb.timing <- list()
    tb.clustering <- list()

    for(x in 1:length(completed.dt.dirs$Dir)){
        if(!file.exists(file.path(path,paste0(gsub('Analysis/','',completed.dt.dirs$Dir[x]),'_Table_Means.tsv')))){
            message(completed.dt.dirs$Dir[x])
            files <- file.path(completed.dt.dirs$Dir[x],paste0('Simulation',1:100,".RData"))

            ret.clustering <- list()
            ret.time <- list()
            ret.umap <- list()
            ret.plotCount <- list()

            # Loading ARI, AMI, and time
            tryCatch({
                for(i in 1:length(files)){
                    message(i)
                    load(files[i])
                    ret.clustering[[i]] <- dt$clustering
                    ret.time[[i]] <- dt$time
                    if(i==1){
                        ret.umap[[completed.dt.dirs$Scenario[x]]] <- dt$umap

                        load(gsub('Analysis','Data',files[i]))
                        mat = scChIPseqsim:::pseudoBulk(sc,as.factor(SummarizedExperiment::colData(sc)$Cluster),type = 'cols')
                        ranges = SummarizedExperiment::rowRanges(sc)[order(apply(mat,1,sd),decreasing = T)][1] + 75000
                        ret.plotCount[[completed.dt.dirs$Scenario[x]]] <- scChIPseqsim:::pseudoPlot(sc = sc,mat = mat,ranges = ranges,normalize = F,bulkOnly = F)
                    }
                }

                clustering[[x]] <- data.table::rbindlist(ret.clustering)
                timing[[x]] <- data.table::rbindlist(ret.time)

                tb.timing[[x]] <- timing[[x]][,list(Mean_Time = mean(Time,na.rm=T),SD_Time = sd(Time,na.rm = T)),by = eval(names(timing[[x]])[-which(names(timing[[x]])%in%c('Time','Simulation'))])]
                tb.clustering[[x]] <- clustering[[x]][,list(Mean_ARI = mean(ARI,na.rm=T),SD_ARI = sd(ARI,na.rm = T),Mean_AMI = mean(AMI,na.rm=T),SD_AMI = sd(AMI,na.rm = T)),by = eval(names(clustering[[x]])[-which(names(clustering[[x]])%in%c('ARI','AMI','Simulation'))])]

                data.table::fwrite(tb.timing[[x]],file = file.path(path,paste0(gsub('Analysis/','',completed.dt.dirs$Dir[x]),'_Table_Time.tsv')))
                data.table::fwrite(tb.clustering[[x]],file = file.path(path,paste0(gsub('Analysis/','',completed.dt.dirs$Dir[x]),'_Table_Means.tsv')))

                if(length(grep('Groups1',completed.dt.dirs$Dir[x]))>0){
                    fig.clustering <- ggplot2::ggplot(data = clustering[[x]],ggplot2::aes(x = Clustering,y = ARI,fill = Method))+
                        ggplot2::geom_violin() +
                        ggplot2::theme_bw() +
                        ggplot2::scale_fill_brewer(palette = 'Set1')+
                        ggplot2::theme(legend.position = 'top',legend.direction = 'horizontal',
                                       axis.text = ggplot2::element_text(size = 8),axis.title = ggplot2::element_text(size = 8))
                    fig.time <- ggplot2::ggplot(data = timing[[x]],ggplot2::aes(x = Method,y = Time,fill = Method))+
                        ggplot2::geom_violin() +
                        ggplot2::theme_bw() +
                        ggplot2::scale_fill_brewer(palette = 'Set1') + ggplot2::ylab('Time (min)')+
                        ggplot2::theme(legend.position = 'top',legend.direction = 'horizontal',
                                       axis.text = ggplot2::element_text(size = 8),axis.title = ggplot2::element_text(size = 8))

                    fig1 <- ggpubr::ggarrange(fig.clustering,fig.time,nrow = 1,ncol=2,common.legend = T,legend = 'top')
                    fig2 <- ggpubr::ggarrange(plotlist = ret.umap[[completed.dt.dirs$Scenario[x]]],nrow = 2,ncol = 2,common.legend = T,legend = 'top')
                    fig3 <- ggpubr::ggarrange(ret.plotCount[[completed.dt.dirs$Scenario[x]]],fig1,fig2,nrow = 3,ncol = 1,heights = c(0.9,0.9,1.2)/3,
                                              labels = list('A','B','C'))

                    ggplot2::ggsave(fig3,filename = file.path(path,paste0(completed.dt.dirs$Scenario[x],'.pdf')),width = 7,height = 9)
                }

            },error = function(cond){message(cond)})
        }
    }

    message('Combining tables...')

    # Creating the final table
    time.files <- list.files(path = path,pattern = '*_Table_Time.tsv',full.names = TRUE)
    clustering.files <- list.files(path = path,pattern = '*_Table_Means.tsv',full.names = TRUE)

    time.dt <- list()
    clustering.dt <- list()

    for(i in 1:length(time.files)){
        time.dt[[i]] <- data.table::fread(time.files[i])
    }
    time.dt <- data.table::rbindlist(time.dt,fill = TRUE)

    for(i in 1:length(time.files)){
        clustering.dt[[i]] <- data.table::fread(clustering.files[i])
    }
    clustering.dt <- data.table::rbindlist(clustering.dt,fill = TRUE)

    dt.summary <- data.table::merge.data.table(clustering.dt,time.dt,all.x = TRUE,
                                               by = c('Data','Groups','Cells','Rarity','Noise','Depth','Dissimilarity','Method'))

    # dt.summary[(Data == 'scATACseq' & Groups == 1 & Cells == 3 & Rarity == 1 & Noise == 1 & Depth %in% c(1,3) & is.na(Dissimilarity) & Clustering == 'Kmeans') |
    # (Data == 'scChIPseq' & Groups == 1 & Cells == 3 & Rarity == 1 & Noise == 1 & Depth %in% c(1,3) & Dissimilarity == 1 & Clustering == 'Kmeans'),]

}
plbaldoni/scChIPseqsim documentation built on June 11, 2020, 7:41 p.m.