R/intersectBedgraph.R

#' intersectBedgraph
#'
#' GRbed vs GRbg
#'
#' @param bed A Granges with regions to subset with.
#' @param bg A Granges with scores (AKA bedgraph/bigwig).
#' @param padding Increase search-space around bed.
#' @param alignment When multiple hits, What to do? [upstream, downstream, middle, mean, median]
#' @param vectorOut Instead of a Grangesm output just a vector of scores per TAD
#' @return A genomicRanges-object
#' @export
intersectBedgraph <- function(bed, bg, padding = 0, alignment = 'mean', vectorOut = F){
  require(GenomicRanges)
  require(dplyr)
  colnames(elementMetadata(bg)) <- 'score'

  hitjes <- as.data.frame(GenomicRanges::findOverlaps(bg,bed, maxgap = padding))

  borderIns <- dplyr::group_by(hitjes,subjectHits)

  outGR <- NULL
  if(alignment == "upstream"){
    borderIns <- dplyr::summarise(borderIns, hit = min(queryHits))
    scores <- bg[borderIns$hit,]
    regions <- bed[borderIns$subjectHits,]
    outGR <- GenomicRanges::makeGRangesFromDataFrame(cbind(as.data.frame(regions), mcols(scores)), keep.extra.columns = T)

  } else if(alignment == 'downstream'){
    borderIns <- dplyr::summarise(borderIns, hit = max(queryHits))
    scores <- bg[borderIns$hit,]
    regions <- bed[borderIns$subjectHits,]
    outGR <- GenomicRanges::makeGRangesFromDataFrame(cbind(as.data.frame(regions), mcols(scores)), keep.extra.columns = T)


  } else if(alignment == "mean"){

    rawScores <- bg[borderIns$queryHits,]
    df <- as.data.frame(cbind(borderIns,mcols(rawScores)))
    df <- dplyr::group_by(df, subjectHits)
    borderScores <- dplyr::summarise(df, score = mean(score))
    regions <- bed[borderScores$subjectHits,]
    outGR <- GenomicRanges::makeGRangesFromDataFrame(cbind(as.data.frame(regions), borderScores$score), keep.extra.columns = T)

  } else if(alignment == "median"){
    rawScores <- bg[borderIns$queryHits,]
    df <- as.data.frame(cbind(borderIns,mcols(rawScores)))
    df <- dplyr::group_by(df, subjectHits)
    borderScores <- dplyr::summarise(df, score = median(score))
    regions <- bed[borderScores$subjectHits,]
    outGR <- GenomicRanges::makeGRangesFromDataFrame(cbind(as.data.frame(regions), borderScores$score), keep.extra.columns = T)
  } else {
    stop('Alignment-argument must be one of [upstream, downstream, middle, mean or median]!')
  }

  colnames(elementMetadata(outGR)) <- 'score'
  # GR of vector?
  returnables <- outGR
  if(vectorOut == T){
    returnables <- unname(unlist(mcols(returnables)))
  }
  return(returnables)
}
robinweide/RHWlib documentation built on May 7, 2019, 8:03 a.m.