#' Summarize peak calls and optionally create a BED 6+3 file in broadPeak format for visualization
#'
#' `summarizePeaks()` imports the output from `mixNBHMM()` and outputs a set of differential
#' peaks for a given FDR control threshold (or from the Viterbi sequence of HMM states).
#'
#' @param object a mixNBHMMDataSet
#' @param type either 'viterbi' (default) or an FDR control threshold (e.g. 0.05)
#' @param name either \code{NULL} (default) or the name (without extension) for the outputfiles (e.g. 'mixNBHMM')
#' @param path either \code{NULL} (default) or a character string with the path where the output files will be saved (e.g. '~/home/')
#' @param description either \code{NULL} (default) or a character string to be used as header description in the output files (e.g. 'Viterbi peaks')
#'
#' @return A GRanges object with differential peak calls in BED 6+3 format
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#' @references \url{https://github.com/plbaldoni/mixNBHMM}
#'
#' @examples
#' data(ENCODE)
#' ENCODE <- createOffset(object = ENCODE,type = 'loess',span = 1)
#' \dontrun{ENCODE <- mixNBHMM(object = ENCODE,control = controlEM())}
#' \dontrun{summarizePeaks(object = ENCODE, type = 'viterbi', name = 'mypeaks',
#' path = '~/mypeaks/', description = 'Viterbi peaks')}
#'
#' @rawNamespace import(SummarizedExperiment, except = shift)
#' @rawNamespace import(GenomicRanges, except = shift)
#'
#' @export
#'
summarizePeaks = function(object,type='viterbi',
name=NULL,path=NULL,description=NULL)
{
subjectHits = i = NULL
if((!is.null(name) & !is.character(name)) | (!is.null(path) & !is.character(path)) | (!is.null(description) & !is.character(description))){stop('The arguments name, path, and description must be characters.')}
if(!sum(is.null(name)+is.null(path)+is.null(description))%in%c(0,3)){stop('Either name, path, or description is missing')}
if(!all(names(S4Vectors::metadata(object))==c("pi","gamma","psi","prob","mixProb","viterbi","logF","logB","logLik","parHist"))){stop('The argument object is not valid')}
# Reduce peaks
if(type=='viterbi'){
peakindex <- (S4Vectors::metadata(object)$viterbi=='D')
gr.bed <- GenomicRanges::reduce(SummarizedExperiment::rowRanges(object)[peakindex])
gr.graph <-SummarizedExperiment::rowRanges(object)[peakindex]
} else{
if(is.numeric(type) & type>0 & type<1){
peakindex <- fdrControl(S4Vectors::metadata(object)$prob$Differential,fdr = type)
gr.bed <- GenomicRanges::reduce(SummarizedExperiment::rowRanges(object)[peakindex])
gr.graph <- SummarizedExperiment::rowRanges(object)[peakindex]
} else{
stop('The argument type is not valid')
}
}
# Summarize the output
gr.bed$name <- paste0(paste0('Peak.',1:length(gr.bed)))
gr.bed$score <- 1000*data.table::as.data.table(IRanges::findOverlaps(gr.bed,SummarizedExperiment::rowRanges(object)))[,mean(S4Vectors::metadata(object)$prob$Differential[subjectHits]),by='queryHits']$V1
gr.bed$thickStart <- GenomicRanges::start(gr.bed)
gr.bed$thickEnd <- GenomicRanges::end(gr.bed)
# File names
if(!is.null(name)){
filenames <- paste0(path,paste0(name,c('.bed',paste0('_',names(S4Vectors::metadata(object)$mixProb),'.bedGraph'))))
names(filenames) <- c('bed',names(S4Vectors::metadata(object)$mixProb))
ask <- 'placeholder'
if(any(file.exists(filenames))){
ask <- readline(paste0('Do you want to overwrite the exhisting files? (Yes/No, case sensitive) '))
if(!ask%in%c('Yes','No')){
stop('Unrecognized response')
}
}
if(all(file.exists(filenames)==FALSE) | ask=='Yes'){
if(!dir.exists(path)){system2('mkdir',path)}
# Writing bed file
dt.bed = data.frame(chrom=SummarizedExperiment::seqnames(gr.bed),chromStart=SummarizedExperiment::start(gr.bed),
chromEnd=SummarizedExperiment::end(gr.bed),name=gr.bed$name,score=gr.bed$score,strand='.',
thickStart=gr.bed$thickStart,thickEnd=gr.bed$thickEnd)
utils::write.table(dt.bed,file="./temp.bed",row.names=F,col.names=F,quote=F,sep="\t")
header1 = paste0('track name="mixNBHMM" description="',description,'" visibility=1 useScore=1')
header2 = paste0('browser position ',dt.bed[1,'chrom'],':',dt.bed[1,'chromStart'],'-',dt.bed[1,'chromEnd'])
system2('echo',paste0(header2,' | cat - ./temp.bed > ./temp1.bed'))
system2('echo',paste0(header1,' | cat - ./temp1.bed > ',as.character(filenames['bed'])))
system2('rm','./temp.bed ./temp1.bed')
# Writing bedGraph files
for(i in seq_len(ncol(S4Vectors::metadata(object)$mixProb))){
dt.bedgraph = data.frame(chrom=SummarizedExperiment::seqnames(gr.graph),chromStart=SummarizedExperiment::start(gr.graph),
chromEnd=SummarizedExperiment::end(gr.graph),mixProb=S4Vectors::metadata(object)$mixProb[peakindex,][[i]])
utils::write.table(dt.bedgraph,file="./temp.bed",row.names=F,col.names=F,quote=F,sep="\t")
header1 = paste0('track type=bedGraph name="mixNBHMM(',names(S4Vectors::metadata(object)$mixProb)[i],')" description="',description,'" visibility=dense autoScale=off alwaysZero=on viewLimits=0.0:1.0')
header2 = paste0('browser position ',dt.bed[1,'chrom'],':',dt.bed[1,'chromStart'],'-',dt.bed[1,'chromEnd'])
system2('echo',paste0(header2,' | cat - ./temp.bed > ./temp1.bed'))
system2('echo',paste0(header1,' | cat - ./temp1.bed > ',as.character(filenames[names(S4Vectors::metadata(object)$mixPr)[i]])))
system2('rm','./temp.bed ./temp1.bed')
}
cat(paste0('The following files have been saved:\n'))
for(i in seq_len(length(filenames))){cat(filenames[i],'\n')}
}
}
return(gr.bed)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.