mixNBHMM: Differential Peak Calling via Hidden Markov Model With Mixture of Negative Binomial Distributions

Documented in plotPeaks

#' Create a plot of ChIP read counts with called peaks and posterior probabilities
#'
#' `plotPeaks()` imports the output from `mixNBHMM()` and plots the peak
#' calls of a given genomic region. If the mixNBHMMDataSet contains the assay 'offset',
#' reads will be normalized prior to plotting (e.g. counts/exp(offset)).
#' Reads from replicates pertaining to the same condition are aggregated prior to plotting.
#'
#' @param object a mixNBHMMDataSet
#' @param ranges a GRanges object with the genomic corrdinates to be plotted  (e.g. \code{GRanges('chr19',IRanges(50630740,50946896))})
#' @param type either 'viterbi' (default) or an FDR control threshold (e.g. 0.05)
#' @param palette the color brewer palette to be used (default is 'Set1')
#'
#' @return A ggplot
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#' @references \url{https://github.com/plbaldoni/mixNBHMM}
#'
#' @importFrom tidyr gather
#' @importFrom dplyr as.tbl
#' @importFrom plyr mapvalues
#' @importFrom ggpubr ggarrange
#' @importFrom scales comma
#' @rawNamespace import(SummarizedExperiment, except = shift)
#' @rawNamespace import(GenomicRanges, except = shift)
#'
#' @examples
#' data(ENCODE)
#' ENCODE <- createOffset(object = ENCODE,type = 'loess',span = 1)
#' \dontrun{ENCODE <- mixNBHMM(object = ENCODE,control = controlEM())}
#' \dontrun{plotPeaks(object = ENCODE,ranges = GRanges('chr2',IRanges(168750000,169200000)))}
#'
#' @export
#'
plotPeaks = function(object,ranges,type='viterbi',palette='Set1'){
    Patterns = ChIP = Window = Start = Peaks = Counts = Conditions = Component = PP = Background = Enrichment = Label = SDcols = SD = Window.min = Window.max = NULL

    # Checking input
    if(!class(ranges)=="GRanges"){
        stop('The argument ranges must be a GRanges object')
    } else{
        if(!overlapsAny(ranges,rowRanges(object))){
            stop('The argument ranges does not intersect the genomic coordinates of the mixNBHMMDataSet')
        } else{
            if(length(ranges)>1){
                stop('The argument ranges must specify a single genomic coordinate.')
            }
        }
    }
    if(!all(names(S4Vectors::metadata(object))==c("pi","gamma","psi","prob","mixProb","viterbi","logF","logB","logLik","parHist"))){
        stop('The argument object is not valid')
    }

    # Calculating the plot range
    plotindex <- IRanges::overlapsAny(SummarizedExperiment::rowRanges(object),ranges)

    # Normalizing reads
    if('offset'%in%SummarizedExperiment::assayNames(object)){
        offset <- SummarizedExperiment::assay(object,'offset')
    } else{
        offset <- matrix(0,nrow = nrow(ChIP),ncol = ncol(ChIP))
    }

    DT <- as.data.table(SummarizedExperiment::assay(object,'counts')/exp(offset))

    for(i in unique(SummarizedExperiment::colData(object)$Condition)){
        DT[,paste0(i) := rowSums(.SD),.SDcols = which(SummarizedExperiment::colData(object)$Condition==i)]
    }
    DT <- DT[,paste0(unique(SummarizedExperiment::colData(object)$Condition)),with=F]

    datamelt <- data.table::melt(DT,measure.vars=seq_len(ncol(DT)),value.name = 'Counts',variable.name = 'Conditions')
    datamelt[,Window := rep(seq_len(nrow(DT)),ncol(DT))]
    datamelt[,plotindex := rep(plotindex,ncol(DT))]
    datamelt[,Start := rep(IRanges::start(SummarizedExperiment::rowRanges(object)),ncol(DT))]


    datapeak = data.table::data.table(Window = seq_len(nrow(DT)),Conditions = unique(datamelt$Conditions)[1],plotindex = plotindex,Start = IRanges::start(SummarizedExperiment::rowRanges(object)))
    if(type=='viterbi'){
        datapeak[,Peaks := (S4Vectors::metadata(object)$viterbi=='D')]
    } else{
        if(is.numeric(type) & type>0 & type<1){
            datapeak[,Peaks := fdrControl(S4Vectors::metadata(object)$prob$Differential,fdr = type)]
        } else{
            stop('The argument type is not valid')
        }
    }
    datapeak$Peaks = plyr::mapvalues(datapeak$Peaks,from=c(F,T),to=c(NA,'Differential'))
    datapeak[,Patterns := S4Vectors::metadata(object)$mixProb[,names(S4Vectors::metadata(object)$mixProb)[max.col(.SD)],.SDcols=names(S4Vectors::metadata(object)$mixProb)]]
    datapeak$Patterns <- factor(x = datapeak$Patterns,levels = names(S4Vectors::metadata(object)$mixProb))
    datapeak[is.na(Peaks),Patterns := NA]

    # Selecting the most frequent pattern for each peak
    gr.datapeak <- SummarizedExperiment::rowRanges(object)
    gr.datapeak$Patterns <- datapeak$Patterns
    gr.datapeak$Window <- datapeak$Window
    gr.datapeak <- gr.datapeak[!is.na(datapeak$Peaks) & plotindex==T]
    gr.datapeak.reduced <- GenomicRanges::reduce(gr.datapeak)
    gr.datapeak.reduced$Patterns <- base::unlist(base::lapply(base::seq_along(gr.datapeak.reduced),function(i){Mode(gr.datapeak$Patterns[S4Vectors::subjectHits(GenomicRanges::findOverlaps(gr.datapeak.reduced[i],gr.datapeak))])}))
    gr.datapeak.reduced$Window.min <- base::unlist(base::lapply(base::seq_along(gr.datapeak.reduced),function(i){min(gr.datapeak$Window[S4Vectors::subjectHits(GenomicRanges::findOverlaps(gr.datapeak.reduced[i],gr.datapeak))])}))
    gr.datapeak.reduced$Window.max <- base::unlist(base::lapply(base::seq_along(gr.datapeak.reduced),function(i){max(gr.datapeak$Window[S4Vectors::subjectHits(GenomicRanges::findOverlaps(gr.datapeak.reduced[i],gr.datapeak))])}))
    dt.datapeak.reduced <- data.table::as.data.table(gr.datapeak.reduced)
    dt.datapeak.reduced$Patterns <- factor(x = dt.datapeak.reduced$Patterns,levels = names(S4Vectors::metadata(object)$mixProb))
    dt.datapeak.reduced$Conditions <- unique(datamelt$Conditions)[1]

    ### Figure 1: Read Counts and Peak Calls ###
    peaky = max(datamelt[(plotindex==T),Counts])*1.2
    patterny = max(datamelt[(plotindex==T),Counts])*1.1

    fig.ChIP = ggplot2::ggplot(data=datamelt[(plotindex==T),],ggplot2::aes(x=Window,y=Counts))+
        ggplot2::geom_line()+
        ggplot2::facet_grid(rows=ggplot2::vars(Conditions))+
        ggplot2::geom_segment(inherit.aes=FALSE,data = datapeak[(plotindex==T) & !is.na(Peaks),],ggplot2::aes(x=Window,xend=Window+1,y=peaky,yend=peaky),size=2,color='#000000')+
        ggplot2::geom_segment(inherit.aes=FALSE,data = dt.datapeak.reduced,ggplot2::aes(x = Window.min, xend = Window.max,y = patterny,yend = patterny,color = Patterns),size = 2)+
        ggplot2::theme_bw()+ggplot2::labs(x = 'Genomic Window',y ='Normalized Read Counts')+
        ggplot2::scale_color_brewer(palette = 'Set1',drop = FALSE)+
        ggplot2::guides(color = ggplot2::guide_legend(nrow = 1))+
        ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                       axis.line.x = ggplot2::element_blank(),
                       axis.ticks.x = ggplot2::element_blank(),
                       axis.text.x = ggplot2::element_blank(),
                       legend.position = 'top',legend.direction = 'horizontal')

    ### Figure 2: Post. Probabilities ###
    PostProb = data.table::data.table(Window=1:nrow(DT),S4Vectors::metadata(object)$prob,Label='PP',plotindex = plotindex,Start = IRanges::start(SummarizedExperiment::rowRanges(object)))
    PostProb <- tidyr::gather(dplyr::as.tbl(PostProb),Component,PP,Background:Enrichment)
    PostProb$Component = plyr::mapvalues(PostProb$Component,from=c('Background','Differential','Enrichment'),to=c(NA,'Differential',NA))

    fig.Prob = ggplot2::ggplot(data=PostProb[plotindex==T & !is.na(PostProb$Component),],ggplot2::aes(x=Start,y=PP))+
        ggplot2::facet_grid(rows=ggplot2::vars(Label))+
        ggplot2::geom_area(position='identity',alpha=0.5,color='#000000',fill='#000000')+
        ggplot2::scale_y_continuous(limits = c(0,1),breaks=c(0,0.5,1))+
        ggplot2::scale_x_continuous(labels = scales::comma)+
        ggplot2::labs(x = paste0('Genomic Window (',as.character(seqnames(ranges)),')'),y = 'Post. Prob.')+
        ggplot2::theme_bw()+
        ggplot2::theme(legend.position = 'bottom',legend.direction = 'horizontal',strip.text.y = ggplot2::element_text(colour = ggplot2::alpha('grey',0.0)))+
        ggplot2::theme(legend.position="none")

    fig = ggpubr::ggarrange(fig.ChIP,fig.Prob,ncol=1,nrow=2,heights = c(0.8,0.2),common.legend = T,legend='top')

    return(fig)
}

plbaldoni/mixNBHMM documentation built on Dec. 24, 2019, 1:31 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

plbaldoni/mixNBHMM
Differential Peak Calling via Hidden Markov Model With Mixture of Negative Binomial Distributions

R/plotPeaks.R
In plbaldoni/mixNBHMM: Differential Peak Calling via Hidden Markov Model With Mixture of Negative Binomial Distributions

Defines functions plotPeaks

Documented in plotPeaks

R Package Documentation

Browse R Packages

We want your feedback!

plbaldoni/mixNBHMM Differential Peak Calling via Hidden Markov Model With Mixture of Negative Binomial Distributions

R/plotPeaks.R In plbaldoni/mixNBHMM: Differential Peak Calling via Hidden Markov Model With Mixture of Negative Binomial Distributions

Defines functions plotPeaks

Documented in plotPeaks

R Package Documentation

Browse R Packages

We want your feedback!

plbaldoni/mixNBHMM
Differential Peak Calling via Hidden Markov Model With Mixture of Negative Binomial Distributions

R/plotPeaks.R
In plbaldoni/mixNBHMM: Differential Peak Calling via Hidden Markov Model With Mixture of Negative Binomial Distributions