R/FilteringMethods.R

Defines functions .filterWithPoisson

#' Filter raw TSS counts or normalized TSS
#'
#' @description Filters transcriptional or sequencing noise.
#'
#' @usage filterTSS(object, method = "poisson", normalization = TRUE,
#' pVal =0.01, tpmLow = 0.1)
#'
#' @param object A TSSr object.
#' @param method Method to be used for TSS filtering: "poisson" or "TPM". "poisson" can be used
#' only if the input TSS data in raw number of counts.
#' @param normalization Define whether normalization data to TPM.
#' Used only if method = “poisson”. Default is TRUE.
#' @param pVal Used only if method = "poisson". Default value is 0.01.
#' @param tpmLow Used only if method = "TPM". Default value is 0.1.
#' @return Large List of elements - one element for each sample
#'
#'
#' @export
#'
#' @examples
#' data(exampleTSSr)
#' filterTSS(exampleTSSr, method = "TPM", tpmLow=0.1)
setGeneric("filterTSS",function(object, method = "poisson", normalization = TRUE
                                , pVal =0.01, tpmLow = 0.1)standardGeneric("filterTSS"))
#' @rdname filterTSS
#' @export
setMethod("filterTSS",signature(object = "TSSr"), function(object, method, normalization, pVal, tpmLow)
{
  ##initialize values
  Genome <- .getGenome(object@genomeName)
  sampleLabelsMerged <- object@sampleLabelsMerged
  objName <- deparse(substitute(object))
  library.size <- object@librarySizes

  tss.dt <- object@TSSprocessedMatrix
  ##define variable as a NULL value
  tags = NULL

  ##filter tss data
  if(method  == "poisson")
  {
      #calculate size of genome
      genomeSize <- 0
      for (chrom in seq_len(length(Genome))) 
      {
        genomeSize <- genomeSize + length(Genome[[chrom]])
      }
      message("\nFiltering data with ", method," method...")
      if(any(tss.dt[,4] > 0 & tss.dt[,4] < 1))
      {
        stop("Raw count data required for poisson method.")
      }
      tss.new <- lapply(as.list(seq(sampleLabelsMerged)), function(i)
      {
        temp <- tss.dt[,.SD, .SDcols = sampleLabelsMerged[i]]
        setnames(temp, colnames(temp)[[1]], "tags")
        temp <- .filterWithPoisson(temp, library.size[i], genomeSize, pVal)
        if(normalization == "TRUE")
          {
          sizePerMillion <- library.size[i] / 1e6
          temp[, tags := round(tags / sizePerMillion, 6)]
          }
        setnames(temp, colnames(temp)[[1]], sampleLabelsMerged[i])
        return(temp)
      })
    re <- NULL
    for(i in seq(sampleLabelsMerged)){re <- cbind(re, tss.new[[i]])}
    re <- cbind(tss.dt[,c(1,2,3)],re)
    ##removes filtered rows
    re <- re[rowSums(re[,4:ncol(re)]) >0,]
    setorder(re, "strand","chr","pos")
    object@TSSprocessedMatrix <- re
    assign(objName, object, envir = parent.frame())
  }
  else if(method  == "TPM")
  {
    message("\nFiltering data with ", method," method...")
    if(any(tss.dt[,4] > 0 & tss.dt[,4] < 1) == FALSE)
    {
      stop("Data must be normalized.")
    }
    tss.new <- lapply(as.list(seq(sampleLabelsMerged)), function(i){
      temp <- tss.dt[,.SD, .SDcols = sampleLabelsMerged[i]]
      setnames(temp, colnames(temp)[[1]], "tags")
      temp[tags < tpmLow,]=0
      setnames(temp, colnames(temp)[[1]], sampleLabelsMerged[i])
      return(temp)
    })
    re <- NULL
    for(i in seq(sampleLabelsMerged)){re <- cbind(re, tss.new[[i]])}
    re <- cbind(tss.dt[,c(1,2,3)],re)
    ##removes filtered rows
    re <- re[rowSums(re[,4:ncol(re)]) >0,]
    setorder(re, "strand","chr","pos")
    object@TSSprocessedMatrix <- re
    assign(objName, object, envir = parent.frame())
  }
    else
    {
      message("\tNo filtering method is defined...")
    }
})

################################################################################################
.filterWithPoisson <- function(data, coverageDepth, genomeSize, pVal) 
{
  # calculate lambda value (average number of reads per site on each strand of DNA)
  lambda <- coverageDepth / (genomeSize * 2)
  # get cutoff value
  cutoff <- qpois(pVal, lambda, lower.tail=FALSE, log.p=FALSE)
  # fiter tss table
  data[data<cutoff,] =0
  return(data)
}
Linlab-slu/TSSr documentation built on Oct. 24, 2023, 8:32 p.m.