R/createOffset.R

Defines functions createOffset

Documented in createOffset

#' Create Offsets for Peak Calling
#'
#' Given a mixNBHMMDataSet, this function creates offsets for peak calling. The current implementation can output three different sets of offsets:
#' the sample-specific log sum of read counts (type="sum"), smoothed MA trend (type="loess"), or median ratio (type="ratio").
#'
#' @param object a mixNBHMMDataSet
#' @param type either 'sum' (sample-specific log sum of read counts), 'loess' (smoothed MA trend), or 'ratio' (median ratio).
#' @param span proportion of data to be used in the smoothing (see details)
#'
#' @details
#' If \code{type}='sum', the offsets are calculated as the log column-wise sum of read counts + 1. If \code{type}='loess',
#' the offsets will be the smoothed curve fitted on the MA plot that compares each sample's read counts + 1 over the geometric mean across all samples.
#' The smoothing approach has been similarly implemented in \code{csaw}. If \code{type}='ratio', the offsets will be the median ratio of the sample's
#' read counts + 1 over the geometric mean across all samples.
#'
#' @return
#' The mixNBHMMDataSet with an 'offset' assay filled in.
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#' @references
#' \url{https://github.com/plbaldoni/mixNBHMM}
#' Nucleic Acids Research 44, e45 (2015).
#'
#' @examples
#' data(ENCODE)
#' ENCODE <- createOffset(object = ENCODE,type = 'loess',span = 1)
#'
#' @importFrom limma loessFit
#'
#' @export

createOffset = function(object,type,span=1){
    ChIP = assay(object,'counts')
    N = ncol(ChIP)
    M = nrow(ChIP)
    offset = matrix(NA,nrow=M,ncol=N)
    if(type=='sum'){
        offset = matrix(log(colSums(ChIP+1)),nrow=M,ncol=N,byrow=T)
    }
    if(type=='loess'){
        log.ChIP = log(ChIP+1)
        avg.ChIP = exp(rowMeans(log.ChIP))
        log.avg.ChIP = log(avg.ChIP)

        log.ChIP.M = log.ChIP - matrix(log.avg.ChIP,nrow=M,ncol=N,byrow = F) #M, from MA plot
        log.ChIP.A = 0.5*(log.ChIP + matrix(log.avg.ChIP,nrow=M,ncol=N,byrow = F)) #A, from MA plot

        # Calculating Loess
        offset = sapply(1:N,function(i){limma::loessFit(y=log.ChIP.M[,i],x=log.ChIP.A[,i],span = span)$fitted})
    }
    if(type=='ratio'){
        log.ChIP = log(ChIP+1)
        avg.ChIP = exp(rowMeans(log.ChIP))
        log.avg.ChIP = log(avg.ChIP)

        log.ChIP.M = log.ChIP - matrix(log.avg.ChIP,nrow=M,ncol=N,byrow = F) #M, from MA plot
        log.ChIP.A = 0.5*(log.ChIP + matrix(log.avg.ChIP,nrow=M,ncol=N,byrow = F)) #A, from MA plot

        # Calculating offset
        medianRatio = apply(log.ChIP.M,2,median)
        offset = matrix(medianRatio,nrow = M,ncol = N,byrow = T)
    }
    assay(object,'offset') <- offset
    return(object)
}
plbaldoni/mixNBHMM documentation built on Dec. 24, 2019, 1:31 p.m.