R/runSummaryGrosselin_Figure1.R

Defines functions runSummaryGrosselin_Figure1

Documented in runSummaryGrosselin_Figure1

#' Multi-scenario analysis of single-cell epigenomic data
#'
#' Add description here
#'
#' @param path path where results are located
#' @param datadir path where dataset is locates
#' @param pathout path where figure will be saved
#'
#' @return Add return here
#'
#' @details Add details here
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#'
#' @importFrom magrittr %<>%
#'
#' @export
#'
runSummaryGrosselin_Figure1 <- function(path,datadir,pathout){
    # path = '~/Downloads/Analysis/'
    # datadir <- '~/Downloads/scChIPseq.RData'
    # pathout = '~/Downloads/Summary/'
    # devtools::load_all()

    AMI = CisTopic = shape = FDR = Mbp_Diff = Method = NULL
    Clustering = Cusanovich2018 = Scasat = SnapATAC = Grosselin2019 = auxx = NULL

    load(file.path(path,'Analysis.RData'))
    load(datadir)

    system(paste('mkdir',pathout))

    # UMAP plots

    fig.cisTopic <- plot_umap(x = run_umap(CisTopic$feature), labels = CisTopic$clustering$kmeans, title = 'cisTopic (LDA)',legend.title = 'Clustering assignment',shape = shape) +
        annotation_compass(paste0('ARI = ',sprintf('%.2f',CisTopic$clustering$metrics[Clustering == 'Kmeans',]$ARI)),'NE')
    fig.Cusanovich2018 <- plot_umap(x = run_umap(Cusanovich2018$feature), labels = Cusanovich2018$clustering$kmeans, title = 'Cusanovich2018 (LSI/SVD)',legend.title = 'Clustering assignment',shape = shape) +
        annotation_compass(paste0('ARI = ',sprintf('%.2f',Cusanovich2018$clustering$metrics[Clustering == 'Kmeans',]$ARI)),'NW')
    fig.Scasat <- plot_umap(x = run_umap(Scasat$feature), labels = Scasat$clustering$kmeans, title = 'Scasat (Jaccard/MDS)',legend.title = 'Clustering assignment',shape = shape) +
        annotation_compass(paste0('ARI = ',sprintf('%.2f',Scasat$clustering$metrics[Clustering == 'Kmeans',]$ARI)),'NW')
    fig.SnapATAC <- plot_umap(x = run_umap(SnapATAC$feature), labels = SnapATAC$clustering$kmeans, title = 'SnapATAC (Jaccard/PCA)',legend.title = 'Clustering assignment',shape = shape) +
        annotation_compass(paste0('ARI = ',sprintf('%.2f',SnapATAC$clustering$metrics[Clustering == 'Kmeans',]$ARI)),'SW')

    figB <- Grosselin2019 +
        ggplot2::labs(title = 'Grosselin2019 (PCA)') +
        ggplot2::guides(color = ggplot2::guide_legend(title = 'Cluster Assignment')) +
        ggplot2::theme(legend.position = 'none')

    figC <- ggpubr::ggarrange(
        fig.cisTopic,
        fig.Cusanovich2018,
        fig.Scasat,
        fig.SnapATAC,
        ncol = 2,nrow = 2,legend = 'none',labels = 'C')

    # Peak plot

    sc <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = Matrix::Matrix(dt,sparse = TRUE)),
                                                     rowRanges = GenomicRanges::makeGRangesFromDataFrame(data.table::rbindlist(lapply(strsplit(rownames(dt),"_"),FUN = function(x){
                                                         data.table::data.table(chr=paste(x[seq_len(length(x)-2)],collapse = '_'),start=x[length(x)-1],end=x[length(x)])
                                                     }))),
                                                     colData = data.frame(Cluster = as.numeric(as.factor(metadata$ChromatinGroup))))
    sc <- sort(sc)

    npeaks <- list()

    for(i in c('Grosselin2019','CisTopic','Cusanovich2018','Scasat','SnapATAC')){
        if(i == 'Grosselin2019'){
            pseudosc <- pseudoBulk(sc = sc,group = as.factor(SummarizedExperiment::colData(sc)$Cluster),type = 'cols')
        } else{
            pseudosc <- pseudoBulk(sc = sc,group = as.factor(get(i)$clustering$kmeans),type = 'cols')
        }

        pseudosc.peaks <- epigraHMM::epigraHMMDataSetFromMatrix(countData = as.matrix(pseudosc),
                                                                rowRanges = SummarizedExperiment::rowRanges(sc),
                                                                colData = data.frame(condition = c('A','B'),replicate = c(1,1),Method = i))

        pseudosc.peaks <- epigraHMM::createOffset(pseudosc.peaks,type = 'loess',span = 1)

        tryCatch({
            pseudosc.peaks <- epigraHMM::epigraHMM(pseudosc.peaks,control = epigraHMM::controlEM(),type = 'differential',dist = 'nb')

            for(fdr in c(0.01,0.05,0.10,0.20)){

                tryCatch({
                    assign('auxx',data.table(Method = i, FDR = as.character(fdr), Mbp_Diff = 5e4*sum(epigraHMM:::fdrControl(metadata(pseudosc.peaks)$prob$Differential,fdr = fdr))/1e6),inherits = TRUE)
                },error = function(cond){
                    assign('auxx',data.table(Method = i, FDR = 'Viterbi', Mbp_Diff = 5e4*sum(metadata(pseudosc.peaks)$viterbi=='D')/1e6),inherits = TRUE)
                })

                npeaks[[i]][[paste0(fdr)]] <- auxx
            }
            npeaks[[i]] <- data.table::rbindlist(npeaks[[i]])
        },
        error = function(cond){
            npeaks[[i]] <<- data.table(Method = i, FDR = as.character(c(0.01,0.05,0.10,0.20)), Mbp_Diff = 0)
        })
    }

    npeaks <- rbindlist(npeaks)
    npeaks$Method %<>% factor(levels = c('CisTopic','Cusanovich2018','Scasat','SnapATAC','Grosselin2019'))

    figD <- ggplot2::ggplot(npeaks, ggplot2::aes(x = FDR,y = Mbp_Diff,color = Method,group = Method)) +
        ggplot2::geom_point() +
        ggplot2::geom_line() +
        ggplot2::theme_bw() +
        ggplot2::scale_color_brewer(palette = 'Set1') +
        ggplot2::theme(panel.grid = ggplot2::element_blank(),legend.position = c(0.325,0.86))+
        ggplot2::labs(x = 'False Discovery Rate',y = 'Differential Regions (Mbp)')

    figBD <- ggpubr::ggarrange(figB,addSmallLegend(figD,textSize = 8),nrow = 2,ncol = 1,labels = list('B','D'))

    figBCD <- ggpubr::ggarrange(ggpubr::ggarrange(figBD,figC,nrow = 1,ncol = 2,widths = c(0.3,0.7),legend = 'none'),
                                ggpubr::get_legend(fig.Cusanovich2018+ggplot2::theme(legend.position = 'bottom',
                                                                                     legend.direction = 'horizontal')),
                                nrow = 2,ncol = 1,heights = c(0.95,0.05))

    figA <- ggpubr::ggarrange(pseudoPlot(sc,pseudoBulk(sc,group = metadata$ChromatinGroup,type = 'cols'),ptssize = 0.1,shuffle = T,
                                         normalize = TRUE,bulkOnly = FALSE,ranges = GRanges('chr1',IRanges::IRanges(200000000,230000000))),
                              labels = list('A'),nrow = 1,ncol = 1)

    fig <- ggpubr::ggarrange(figA,figBCD,nrow = 2,ncol = 1,heights = c(0.3,0.7),legend = 'none')

    ggplot2::ggsave(fig,filename = file.path(pathout,'Fig1.pdf'),width = 8,height = 9)
}
plbaldoni/scChIPseqsim documentation built on June 11, 2020, 7:41 p.m.