R/runSummaryGrosselin_Figure3.R

Defines functions runSummaryGrosselin_Figure3

Documented in runSummaryGrosselin_Figure3

#' Multi-scenario analysis of single-cell epigenomic data
#'
#' Add description here
#'
#' @param path directory
#' @param pathdata path to the data
#' @param resvec resolution of the data to analyze
#' @param maxCluster maximum number of clusters
#'
#' @return Add return here
#'
#' @details Add details here
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#'
#'
runSummaryGrosselin_Figure3 <- function(path = 'Peaks',pathdata = 'Analysis',resvec = c(50000,25000,5000,1000),maxCluster = 8){

    Gene = Method = Cluster = Resolution = NULL

    ################################################################################

    methods <- c('cisTopic','Cusanovich2018','Scasat','SnapATAC')

    dt <- list()

    # Selecting genes

    # session <- rtracklayer::browserSession()
    # GenomeInfoDb::genome(session) <- 'hg38'
    # grGene <- GenomicRanges::makeGRangesFromDataFrame(
    #     rtracklayer::getTable(rtracklayer::ucscTableQuery(session,table="ncbiRefSeq")),
    #     starts.in.df.are.0based = TRUE,seqnames.field = 'chrom',start.field = 'txStart',end.field = 'txEnd',strand.field = 'strand',keep.extra.columns = TRUE)
    # grGene <- unlist(GenomicRanges::reduce(split(grGene, grGene$name2)))
    # grGene <- grGene[width(grGene)>1000]
    # grGene <- grGene[as.character(seqnames(grGene))%in%c(paste0('chr',1:22),'chrX','chrY')]

    for(resolution in resvec){

        message('Resolution: ',resolution)

        # General parameters
        res <- resolution/1000
        label <- paste0(res,'kb')

        # Loading the candidate peaks
        # load(file.path(pathdata,paste0('Analysis_',label,'.RData')))
        # sc <- get(paste0('Cusanovich2018.',label))$sc

        # Loading the peaks themselves
        load(file.path(path,paste0('Peaks_',label,'.RData')))

        # Checking genes in out
        # grGene.in <- grGene[overlapsAny(grGene,sc$sc)]
        # grGene.out <- grGene[!overlapsAny(grGene,sc$sc)]

        # Full genome
        sc_tmp <- get(paste0('sc_',label))

        # Finding overlaps
        # overlaps.in <- data.table::as.data.table(findOverlaps(grGene.in,sc_tmp))
        # overlaps.out <- data.table::as.data.table(findOverlaps(grGene.out,sc_tmp))

        for(ncluster in 2:maxCluster){
            for(meth in methods){
                label2 <- paste0(meth,'.',ncluster)
                message(label2)

                # Comparing clusters
                vec <- apply(S4Vectors::values(SummarizedExperiment::rowRanges(sc_tmp))[,paste0(label2,'.',1:ncluster)],1,sum)

                dt[[paste0(label,'.',label2)]] =  data.table::data.table(Resolution = label, Clusters = ncluster, Method = meth, Diff_Windows_Mbp = sum(vec==1)*resolution/1e6)

                # # Calculating pseudobulk
                # mat <- scChIPseqsim:::pseudoBulk(sc_tmp,group = as.factor(SummarizedExperiment::colData(sc_tmp)[,label2]),type = 'cols')
                #
                # # Normalizing mat
                # mat <- mat/exp(epigraHMM::createOffset(as.matrix(mat),type = 'loess',span = 1))
                #
                # # Caculating overlaps
                # overlaps.in <- cbind(overlaps.in,data.table::as.data.table(as.matrix(mat[overlaps.in$subjectHits,])))
                # overlaps.out <- cbind(overlaps.out,data.table::as.data.table(as.matrix(mat[overlaps.out$subjectHits,])))
                #
                # # Summing reads per gene
                # overlaps.in <- overlaps.in[,lapply(.SD,function(x){sum(x)}),by = 'queryHits',.SDcols = paste0('group',1:ncluster)]
                # overlaps.out <- overlaps.out[,lapply(.SD,function(x){sum(x)}),by = 'queryHits',.SDcols = paste0('group',1:ncluster)]
                #
                # # Putting it all together
                # overlaps.all <- data.table::rbindlist(list(overlaps.in[,Gene := 'In'],
                #                                            overlaps.out[,Gene := 'Out']))
                #
                # overlaps.all <- overlaps.all[,list(Sd = matrixStats::rowSds(as.matrix(.SD))),.SDcols = paste0('group',1:ncluster),by = c('queryHits','Gene')]
                # overlaps.all[,queryHits := NULL][,Method := meth][,Cluster := ncluster][,Resolution := resolution]
                #
                # dt[[paste0(label,'_',label2)]] <- overlaps.all
            }
        }
    }

    dt <- data.table::rbindlist(dt)

    # figD <- ggplot2::ggplot(dt, ggplot2::aes(x = Method,y = Diff_Windows_Mbp,color = Method,fill = Method,grou)) +
    #     ggplot2::facet_wrap(~Clusters,ncol = 3) +
    #     ggplot2::geom_col() +
    #     ggplot2::theme_bw() +
    #     ggplot2::scale_color_brewer(palette = 'Set1') +
    #     ggplot2::theme(panel.grid = ggplot2::element_blank(),legend.position = 'top')+
    #     ggplot2::labs(x = 'Method',y = 'Differential Regions (Mbp)')

    save(dt,file = 'tmp_diffRegions.RData',compress = 'xz')
}
plbaldoni/scChIPseqsim documentation built on June 11, 2020, 7:41 p.m.