#' Create a plot of ChIP read counts with called peaks and posterior probabilities
#'
#' `plotPeaks()` imports the output from `mixNBHMM()` and plots the peak
#' calls of a given genomic region. If the mixNBHMMDataSet contains the assay 'offset',
#' reads will be normalized prior to plotting (e.g. counts/exp(offset)).
#' Reads from replicates pertaining to the same condition are aggregated prior to plotting.
#'
#' @param object a mixNBHMMDataSet
#' @param ranges a GRanges object with the genomic corrdinates to be plotted (e.g. \code{GRanges('chr19',IRanges(50630740,50946896))})
#' @param type either 'viterbi' (default) or an FDR control threshold (e.g. 0.05)
#' @param palette the color brewer palette to be used (default is 'Set1')
#'
#' @return A ggplot
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#' @references \url{https://github.com/plbaldoni/mixNBHMM}
#'
#' @importFrom tidyr gather
#' @importFrom dplyr as.tbl
#' @importFrom plyr mapvalues
#' @importFrom ggpubr ggarrange
#' @importFrom scales comma
#' @rawNamespace import(SummarizedExperiment, except = shift)
#' @rawNamespace import(GenomicRanges, except = shift)
#'
#' @examples
#' data(ENCODE)
#' ENCODE <- createOffset(object = ENCODE,type = 'loess',span = 1)
#' \dontrun{ENCODE <- mixNBHMM(object = ENCODE,control = controlEM())}
#' \dontrun{plotPeaks(object = ENCODE,ranges = GRanges('chr2',IRanges(168750000,169200000)))}
#'
#' @export
#'
plotPeaks = function(object,ranges,type='viterbi',palette='Set1'){
Patterns = ChIP = Window = Start = Peaks = Counts = Conditions = Component = PP = Background = Enrichment = Label = SDcols = SD = Window.min = Window.max = NULL
# Checking input
if(!class(ranges)=="GRanges"){
stop('The argument ranges must be a GRanges object')
} else{
if(!overlapsAny(ranges,rowRanges(object))){
stop('The argument ranges does not intersect the genomic coordinates of the mixNBHMMDataSet')
} else{
if(length(ranges)>1){
stop('The argument ranges must specify a single genomic coordinate.')
}
}
}
if(!all(names(S4Vectors::metadata(object))==c("pi","gamma","psi","prob","mixProb","viterbi","logF","logB","logLik","parHist"))){
stop('The argument object is not valid')
}
# Calculating the plot range
plotindex <- IRanges::overlapsAny(SummarizedExperiment::rowRanges(object),ranges)
# Normalizing reads
if('offset'%in%SummarizedExperiment::assayNames(object)){
offset <- SummarizedExperiment::assay(object,'offset')
} else{
offset <- matrix(0,nrow = nrow(ChIP),ncol = ncol(ChIP))
}
DT <- as.data.table(SummarizedExperiment::assay(object,'counts')/exp(offset))
for(i in unique(SummarizedExperiment::colData(object)$Condition)){
DT[,paste0(i) := rowSums(.SD),.SDcols = which(SummarizedExperiment::colData(object)$Condition==i)]
}
DT <- DT[,paste0(unique(SummarizedExperiment::colData(object)$Condition)),with=F]
datamelt <- data.table::melt(DT,measure.vars=seq_len(ncol(DT)),value.name = 'Counts',variable.name = 'Conditions')
datamelt[,Window := rep(seq_len(nrow(DT)),ncol(DT))]
datamelt[,plotindex := rep(plotindex,ncol(DT))]
datamelt[,Start := rep(IRanges::start(SummarizedExperiment::rowRanges(object)),ncol(DT))]
datapeak = data.table::data.table(Window = seq_len(nrow(DT)),Conditions = unique(datamelt$Conditions)[1],plotindex = plotindex,Start = IRanges::start(SummarizedExperiment::rowRanges(object)))
if(type=='viterbi'){
datapeak[,Peaks := (S4Vectors::metadata(object)$viterbi=='D')]
} else{
if(is.numeric(type) & type>0 & type<1){
datapeak[,Peaks := fdrControl(S4Vectors::metadata(object)$prob$Differential,fdr = type)]
} else{
stop('The argument type is not valid')
}
}
datapeak$Peaks = plyr::mapvalues(datapeak$Peaks,from=c(F,T),to=c(NA,'Differential'))
datapeak[,Patterns := S4Vectors::metadata(object)$mixProb[,names(S4Vectors::metadata(object)$mixProb)[max.col(.SD)],.SDcols=names(S4Vectors::metadata(object)$mixProb)]]
datapeak$Patterns <- factor(x = datapeak$Patterns,levels = names(S4Vectors::metadata(object)$mixProb))
datapeak[is.na(Peaks),Patterns := NA]
# Selecting the most frequent pattern for each peak
gr.datapeak <- SummarizedExperiment::rowRanges(object)
gr.datapeak$Patterns <- datapeak$Patterns
gr.datapeak$Window <- datapeak$Window
gr.datapeak <- gr.datapeak[!is.na(datapeak$Peaks) & plotindex==T]
gr.datapeak.reduced <- GenomicRanges::reduce(gr.datapeak)
gr.datapeak.reduced$Patterns <- base::unlist(base::lapply(base::seq_along(gr.datapeak.reduced),function(i){Mode(gr.datapeak$Patterns[S4Vectors::subjectHits(GenomicRanges::findOverlaps(gr.datapeak.reduced[i],gr.datapeak))])}))
gr.datapeak.reduced$Window.min <- base::unlist(base::lapply(base::seq_along(gr.datapeak.reduced),function(i){min(gr.datapeak$Window[S4Vectors::subjectHits(GenomicRanges::findOverlaps(gr.datapeak.reduced[i],gr.datapeak))])}))
gr.datapeak.reduced$Window.max <- base::unlist(base::lapply(base::seq_along(gr.datapeak.reduced),function(i){max(gr.datapeak$Window[S4Vectors::subjectHits(GenomicRanges::findOverlaps(gr.datapeak.reduced[i],gr.datapeak))])}))
dt.datapeak.reduced <- data.table::as.data.table(gr.datapeak.reduced)
dt.datapeak.reduced$Patterns <- factor(x = dt.datapeak.reduced$Patterns,levels = names(S4Vectors::metadata(object)$mixProb))
dt.datapeak.reduced$Conditions <- unique(datamelt$Conditions)[1]
### Figure 1: Read Counts and Peak Calls ###
peaky = max(datamelt[(plotindex==T),Counts])*1.2
patterny = max(datamelt[(plotindex==T),Counts])*1.1
fig.ChIP = ggplot2::ggplot(data=datamelt[(plotindex==T),],ggplot2::aes(x=Window,y=Counts))+
ggplot2::geom_line()+
ggplot2::facet_grid(rows=ggplot2::vars(Conditions))+
ggplot2::geom_segment(inherit.aes=FALSE,data = datapeak[(plotindex==T) & !is.na(Peaks),],ggplot2::aes(x=Window,xend=Window+1,y=peaky,yend=peaky),size=2,color='#000000')+
ggplot2::geom_segment(inherit.aes=FALSE,data = dt.datapeak.reduced,ggplot2::aes(x = Window.min, xend = Window.max,y = patterny,yend = patterny,color = Patterns),size = 2)+
ggplot2::theme_bw()+ggplot2::labs(x = 'Genomic Window',y ='Normalized Read Counts')+
ggplot2::scale_color_brewer(palette = 'Set1',drop = FALSE)+
ggplot2::guides(color = ggplot2::guide_legend(nrow = 1))+
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.line.x = ggplot2::element_blank(),
axis.ticks.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_blank(),
legend.position = 'top',legend.direction = 'horizontal')
### Figure 2: Post. Probabilities ###
PostProb = data.table::data.table(Window=1:nrow(DT),S4Vectors::metadata(object)$prob,Label='PP',plotindex = plotindex,Start = IRanges::start(SummarizedExperiment::rowRanges(object)))
PostProb <- tidyr::gather(dplyr::as.tbl(PostProb),Component,PP,Background:Enrichment)
PostProb$Component = plyr::mapvalues(PostProb$Component,from=c('Background','Differential','Enrichment'),to=c(NA,'Differential',NA))
fig.Prob = ggplot2::ggplot(data=PostProb[plotindex==T & !is.na(PostProb$Component),],ggplot2::aes(x=Start,y=PP))+
ggplot2::facet_grid(rows=ggplot2::vars(Label))+
ggplot2::geom_area(position='identity',alpha=0.5,color='#000000',fill='#000000')+
ggplot2::scale_y_continuous(limits = c(0,1),breaks=c(0,0.5,1))+
ggplot2::scale_x_continuous(labels = scales::comma)+
ggplot2::labs(x = paste0('Genomic Window (',as.character(seqnames(ranges)),')'),y = 'Post. Prob.')+
ggplot2::theme_bw()+
ggplot2::theme(legend.position = 'bottom',legend.direction = 'horizontal',strip.text.y = ggplot2::element_text(colour = ggplot2::alpha('grey',0.0)))+
ggplot2::theme(legend.position="none")
fig = ggpubr::ggarrange(fig.ChIP,fig.Prob,ncol=1,nrow=2,heights = c(0.8,0.2),common.legend = T,legend='top')
return(fig)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.