R/summarizePeaks.R

Defines functions summarizePeaks

Documented in summarizePeaks

#' Summarize peak calls and optionally create a BED 6+3 file in broadPeak format for visualization
#'
#' `summarizePeaks()` imports the output from `mixNBHMM()` and outputs a set of differential
#' peaks for a given FDR control threshold (or from the Viterbi sequence of HMM states).
#'
#' @param object a mixNBHMMDataSet
#' @param type either 'viterbi' (default) or an FDR control threshold (e.g. 0.05)
#' @param name either \code{NULL} (default) or the name (without extension) for the outputfiles (e.g. 'mixNBHMM')
#' @param path either \code{NULL} (default) or a character string with the path where the output files will be saved (e.g. '~/home/')
#' @param description either \code{NULL} (default) or a character string to be used as header description in the output files (e.g. 'Viterbi peaks')
#'
#' @return A GRanges object with differential peak calls in BED 6+3 format
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#' @references \url{https://github.com/plbaldoni/mixNBHMM}
#'
#' @examples
#' data(ENCODE)
#' ENCODE <- createOffset(object = ENCODE,type = 'loess',span = 1)
#' \dontrun{ENCODE <- mixNBHMM(object = ENCODE,control = controlEM())}
#' \dontrun{summarizePeaks(object = ENCODE, type = 'viterbi', name = 'mypeaks',
#' path = '~/mypeaks/', description = 'Viterbi peaks')}
#'
#' @rawNamespace import(SummarizedExperiment, except = shift)
#' @rawNamespace import(GenomicRanges, except = shift)
#'
#' @export
#'
summarizePeaks = function(object,type='viterbi',
                          name=NULL,path=NULL,description=NULL)
{
  subjectHits = i = NULL

  if((!is.null(name) & !is.character(name)) | (!is.null(path) & !is.character(path)) | (!is.null(description) & !is.character(description))){stop('The arguments name, path, and description must be characters.')}
  if(!sum(is.null(name)+is.null(path)+is.null(description))%in%c(0,3)){stop('Either name, path, or description is missing')}
  if(!all(names(S4Vectors::metadata(object))==c("pi","gamma","psi","prob","mixProb","viterbi","logF","logB","logLik","parHist"))){stop('The argument object is not valid')}

  # Reduce peaks
  if(type=='viterbi'){
    peakindex <- (S4Vectors::metadata(object)$viterbi=='D')
    gr.bed <- GenomicRanges::reduce(SummarizedExperiment::rowRanges(object)[peakindex])
    gr.graph <-SummarizedExperiment::rowRanges(object)[peakindex]
  } else{
    if(is.numeric(type) & type>0 & type<1){
      peakindex <- fdrControl(S4Vectors::metadata(object)$prob$Differential,fdr = type)
      gr.bed <- GenomicRanges::reduce(SummarizedExperiment::rowRanges(object)[peakindex])
      gr.graph <- SummarizedExperiment::rowRanges(object)[peakindex]
    } else{
      stop('The argument type is not valid')
    }
  }

  # Summarize the output
  gr.bed$name <- paste0(paste0('Peak.',1:length(gr.bed)))
  gr.bed$score <- 1000*data.table::as.data.table(IRanges::findOverlaps(gr.bed,SummarizedExperiment::rowRanges(object)))[,mean(S4Vectors::metadata(object)$prob$Differential[subjectHits]),by='queryHits']$V1
  gr.bed$thickStart <- GenomicRanges::start(gr.bed)
  gr.bed$thickEnd <- GenomicRanges::end(gr.bed)

  # File names
  if(!is.null(name)){
    filenames <- paste0(path,paste0(name,c('.bed',paste0('_',names(S4Vectors::metadata(object)$mixProb),'.bedGraph'))))
    names(filenames) <- c('bed',names(S4Vectors::metadata(object)$mixProb))

    ask <- 'placeholder'
    if(any(file.exists(filenames))){
      ask <- readline(paste0('Do you want to overwrite the exhisting files? (Yes/No, case sensitive) '))
      if(!ask%in%c('Yes','No')){
        stop('Unrecognized response')
      }
    }

    if(all(file.exists(filenames)==FALSE) | ask=='Yes'){
      if(!dir.exists(path)){system2('mkdir',path)}

      # Writing bed file
      dt.bed = data.frame(chrom=SummarizedExperiment::seqnames(gr.bed),chromStart=SummarizedExperiment::start(gr.bed),
                          chromEnd=SummarizedExperiment::end(gr.bed),name=gr.bed$name,score=gr.bed$score,strand='.',
                          thickStart=gr.bed$thickStart,thickEnd=gr.bed$thickEnd)

      utils::write.table(dt.bed,file="./temp.bed",row.names=F,col.names=F,quote=F,sep="\t")
      header1 = paste0('track name="mixNBHMM" description="',description,'" visibility=1 useScore=1')
      header2 = paste0('browser position ',dt.bed[1,'chrom'],':',dt.bed[1,'chromStart'],'-',dt.bed[1,'chromEnd'])

      system2('echo',paste0(header2,' | cat - ./temp.bed > ./temp1.bed'))
      system2('echo',paste0(header1,' | cat - ./temp1.bed > ',as.character(filenames['bed'])))
      system2('rm','./temp.bed ./temp1.bed')

      # Writing bedGraph files
      for(i in seq_len(ncol(S4Vectors::metadata(object)$mixProb))){

        dt.bedgraph = data.frame(chrom=SummarizedExperiment::seqnames(gr.graph),chromStart=SummarizedExperiment::start(gr.graph),
                                 chromEnd=SummarizedExperiment::end(gr.graph),mixProb=S4Vectors::metadata(object)$mixProb[peakindex,][[i]])

        utils::write.table(dt.bedgraph,file="./temp.bed",row.names=F,col.names=F,quote=F,sep="\t")
        header1 = paste0('track type=bedGraph name="mixNBHMM(',names(S4Vectors::metadata(object)$mixProb)[i],')" description="',description,'" visibility=dense autoScale=off alwaysZero=on viewLimits=0.0:1.0')
        header2 = paste0('browser position ',dt.bed[1,'chrom'],':',dt.bed[1,'chromStart'],'-',dt.bed[1,'chromEnd'])

        system2('echo',paste0(header2,' | cat - ./temp.bed > ./temp1.bed'))
        system2('echo',paste0(header1,' | cat - ./temp1.bed > ',as.character(filenames[names(S4Vectors::metadata(object)$mixPr)[i]])))
        system2('rm','./temp.bed ./temp1.bed')
      }
      cat(paste0('The following files have been saved:\n'))
      for(i in seq_len(length(filenames))){cat(filenames[i],'\n')}
    }
  }

  return(gr.bed)
}
plbaldoni/mixNBHMM documentation built on Dec. 24, 2019, 1:31 p.m.