R/runSummaryGrosselin_Figure2.R

Defines functions runSummaryGrosselin_Figure2

Documented in runSummaryGrosselin_Figure2

#' Multi-scenario analysis of single-cell epigenomic data
#'
#' Add description here
#'
#' @param path path where results are located
#' @param pathout path where figure will be saved
#'
#' @return Add return here
#'
#' @details Add details here
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#'
#' @importFrom magrittr %<>%
#'
#' @export
#'
runSummaryGrosselin_Figure2 <- function(path,pathout){

    Resolution = Counts = Gene = Window = Start = End = NULL

    path <- normalizePath(path)
    system(paste('mkdir',pathout))

    res <- c(50,25,5,1)
    sc <- list()

    # Loading the data

    for(i in res){
        label <- paste0(i,'kb')
        message(label)
        sc[[label]] <- mergeSC(tmpdir = file.path(path,'Data'),name1 = paste0('dt1_',label,'.txt.gz'),name2 = paste0('dt2_',label,'.txt.gz'))
    }

    # Loading genes

    session <- rtracklayer::browserSession()
    GenomeInfoDb::genome(session) <- 'hg38'
    grGene <- GenomicRanges::makeGRangesFromDataFrame(
        rtracklayer::getTable(rtracklayer::ucscTableQuery(session,table="ncbiRefSeq")),
        starts.in.df.are.0based = TRUE,seqnames.field = 'chrom',start.field = 'txStart',end.field = 'txEnd',strand.field = 'strand',keep.extra.columns = TRUE)
    grGene <- unlist(reduce(split(grGene, grGene$name2)))

    # Ranges

    ranges <- GenomicRanges::GRanges('chr2',IRanges::IRanges(2.22e08,2.25e08))

    subsc <- list()

    for(i in res){
        label <- paste0(i,'kb')
        subsc[[label]] <- sc[[label]][overlapsAny(sc[[label]],ranges),]
    }

    # Data for plots

    dt.plot <- list()

    for(i in res){
        label <- paste0(i,'kb')
        message(label)
        dt.plot[[label]] <- data.table(Counts = rowSums(SummarizedExperiment::assay(subsc[[label]])),Resolution = label,Window = seq(GenomicRanges::start(ranges),GenomicRanges::end(ranges),length.out = nrow(subsc[[label]])))
    }

    dt.plot <- rbindlist(dt.plot)
    dt.plot$Resolution %<>% factor(levels = paste0(res[length(res):1],'kb'))


    dt.anno <- data.table(Resolution = '1kb', Start = GenomicRanges::start(grGene), End = GenomicRanges::end(grGene),
                          Gene = ifelse(overlapsAny(grGene,subsc[['1kb']]),1.2*dt.plot[Resolution=='1kb',max(Counts)],NA))[!is.na(Gene),]
    dt.anno$Resolution %<>% factor(levels = paste0(res[length(res):1],'kb'))

    # Count plot

    fig.Counts <- ggplot2::ggplot(data = dt.plot,ggplot2::aes(x = Window,y = Counts))+
        ggplot2::facet_grid(rows = ggplot2::vars(Resolution),scales = 'free_y')+
        ggplot2::geom_line() +
        ggplot2::geom_segment(inherit.aes=FALSE,data = dt.anno,ggplot2::aes(x=Start,xend=End,y=Gene,yend=Gene,color = 'Gene'),size=2)+
        ggplot2::theme_bw() + ggplot2::xlab(paste0('Genomic Window (',seqnames(ranges),')')) + ggplot2::ylab('Bulk counts')+
        ggplot2::scale_color_manual(values = 'black')+
        ggplot2::theme(panel.grid = ggplot2::element_blank(),legend.title = ggplot2::element_blank(),legend.position = c(0.8,0.95),legend.direction = 'horizontal')+
        ggplot2::scale_x_continuous(breaks = c(2.22e08,2.24e08))

    # Loading results

    for(i in res){
        label <- paste0(i,'kb')
        message(label)
        load(file.path(path,paste0('Analysis_',label,'.RData')))
    }

    # Creating plots

    plots <- list()

    aux <- 1
    coord <- c('NW','NW','SW','NW', #50
               'NW','SE','NE','SW', #25
               'SE','SW','NW','SE', #5
               'NW','SW','NW','NE') #1

    for(i in res){
        label <- paste0(i,'kb')
        message(label)

        plots[[label]] <- list()

        for(j in c('cisTopic','Cusanovich2018','Scasat','SnapATAC')){
            methodlabel = paste0(j,'.',label)
            message(methodlabel)


            if(label == '50kb'){
                plots[[label]][[j]] <- plot_umap(x = run_umap(get(methodlabel)$feature),
                                                 labels = get(methodlabel)$clustering$kmeans,
                                                 title = paste(j),legend.title = 'Cluster Assignment')+
                    ggplot2::labs(title = ggplot2::element_blank())
            } else{
                plots[[label]][[j]] <- plot_umap(x = run_umap(get(methodlabel)$feature),
                                                 labels = get(methodlabel)$clustering$kmeans,
                                                 title = paste(j),legend.title = 'Cluster Assignment') +
                    ggplot2::theme(plot.title = ggplot2::element_text(size = 12)) +
                    annotation_compass(paste0('ARI = ',sprintf('%.2f',mclust::adjustedRandIndex(get(methodlabel)$clustering$kmeans,get(paste0(j,'.',paste0(res[which(res==i)-1],'kb')))$clustering$kmeans))),coord[aux],fontsize = 10)

                if(!label == '1kb'){plots[[label]][[j]] <- plots[[label]][[j]] + ggplot2::labs(title = ggplot2::element_blank())}

                message('ARI: ',methodlabel,' and ',paste0(j,'.',paste0(res[which(res==i)-1],'kb')))
            }
            aux <- aux + 1
        }
    }

    fig <- list()

    for(i in res[length(res):1]){
        label <- paste0(i,'kb')
        message(label)

        fig[[label]] <- ggpubr::ggarrange(plotlist = plots[[label]],ncol = length(res),nrow = 1,legend = 'none')
    }

    fig.umap <- ggpubr::ggarrange(plotlist = fig,nrow = 4,ncol = 1,legend = 'none')

    # Combine with counts

    fig0 <- ggpubr::ggarrange(fig.umap,fig.Counts,nrow = 1,ncol = 2,common.legend = F,widths = c(0.7,0.3))

    ggplot2::ggsave(fig0,filename = file.path(pathout,'Fig2.pdf'),width = 13,height = 8)
}
plbaldoni/scChIPseqsim documentation built on June 11, 2020, 7:41 p.m.