R/makeBed.R

Defines functions makeBed

Documented in makeBed

#' Create a BED file from an output of mixHMM
#'
#' `makeBed()` imports the output from `mixHMM()` and a RangedSummarizedExperiment object dataset to
#' create a BED file to be used in downstream analyses.
#'
#' @param output An output from `mixHMM()`
#' @param data A RangedSummarizedExperiment object with the same assay used in `mixHMM()`
#' @param file The full path and name of the output file (e.g. './example.bed')
#' @param tracklabel Optional label for the track (default is 'User Track')
#'
#' @return A GRanges object with differential peak calls
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#' @references \url{https://github.com/plbaldoni/mixHMM}
#'
#' @examples
#' data(H3K36me3)
#' ChIP = SummarizedExperiment::assay(H3K36me3)
#' offset = ZIMHMM::createOffset(ChIP,method = 'loess',span = 1)
#' group = c(1,1,1,2,2,2,3,3,3)
#' B = 2^(length(unique(group)))-2
#' \dontrun{output = mixHMMConstr(ChIP,Control=NULL,offset,group,B,control = controlPeaks())}
#' \dontrun{makeBed(output,data = H3K36me3,file = './H3K36me3.bed')}
#'
#' @importFrom GenomicRanges reduce findOverlaps
#' @importFrom S4Vectors queryHits subjectHits
#' @importFrom SummarizedExperiment rowRanges width seqnames
#'
#' @export
#'
makeBed = function(output,data,file,tracklabel='User Track')
{
  if(!is.character(file)){stop('The argument "file" must be character.')}
  # Reduce peaks
  out = GenomicRanges::reduce(SummarizedExperiment::rowRanges(data)[output$Viterbi==1])

  #Summarize PostProb
  overlaps = GenomicRanges::findOverlaps(SummarizedExperiment::rowRanges(data),out)
  PostProb = output$Prob$PostProb2[S4Vectors::queryHits(overlaps)]
  averagedSignal <- aggregate(PostProb, list(S4Vectors::subjectHits(overlaps)),'mean')
  out$signalValue = -log10(averagedSignal$x)

  #Organizing output
  out$score = ((1000-500)/(max(out$signalValue)-min(out$signalValue)))*(out$signalValue-max(out$signalValue))+1000
  out$pValue = -1
  out$qValue = -1

  #Sorting output
  out = out[order(SummarizedExperiment::width(out),-out$signalValue)]
  out$name = paste0('Peak',1:length(out))

  #Exporting
  bed = data.frame(chrom=SummarizedExperiment::seqnames(out),chromStart=SummarizedExperiment::start(out),chromEnd=SummarizedExperiment::end(out),name=out$name,score=out$score,strand='.',
                   signalValue=out$signalValue,pValue=out$pValue,qValue=out$qValue)
  write.table(bed,file="./temp.bed",row.names=F,col.names=F,quote=F,sep="\t")

  header1 = paste0('track name="mixHMM Peaks - ',tracklabel,'" description="" type=broadPeak useScore=1')
  header2 = paste0('browser position ',bed[1,'chrom'],':',bed[1,'chromStart'],'-',bed[1,'chromEnd'])

  system(paste0("echo '",header2,"' | cat - ./temp.bed > ./temp1.bed"))
  system(paste0("echo '",header1,"' | cat - ./temp1.bed > ",file))
  system('rm ./temp.bed ./temp1.bed')

  cat(paste0('A .bed file with peak calls was saved under name: ',file,'.bed'))
  return(out)
}
plbaldoni/mixHMM documentation built on Nov. 8, 2019, 8:05 p.m.