R/filterZeros.R

Defines functions filterZeros

Documented in filterZeros

#' @include class_RegspliceData.R class_RegspliceResults.R
NULL




#' Filter zero-count exons.
#' 
#' Filter exons with zero RNA-seq read counts in all biological samples.
#' 
#' Removes exon bins with zero RNA-seq read counts in all biological samples. Any 
#' remaining single-exon genes (after filtering) are also removed (since differential
#' splicing requires multiple exon bins).
#' 
#' Input data is assumed to be in the form of a \code{RegspliceData} object. See 
#' \code{\link{RegspliceData}} for details.
#' 
#' After filtering zero-count exon bins, any remaining genes containing only a single
#' exon bin are also removed (since differential splicing requires multiple exon bins).
#' 
#' Filtering should be skipped when using exon microarray data. (When using the 
#' \code{regsplice} wrapper function, filtering can be disabled with the argument 
#' \code{filter = FALSE}).
#' 
#' Previous step: Create \code{RegspliceData} object with \code{\link{RegspliceData}}
#' constructor function.
#' Next step: Filter low-count exon bins with \code{\link{filterLowCounts}}.
#' 
#' 
#' @param rs_data \code{\linkS4class{RegspliceData}} object.
#' 
#' 
#' @return Returns a \code{\linkS4class{RegspliceData}} object.
#' 
#' @seealso \code{\linkS4class{RegspliceData}} \code{\link{filterLowCounts}}
#' 
#' @importFrom methods is
#' 
#' @export
#' 
#' @examples
#' file_counts <- system.file("extdata/vignette_counts.txt", package = "regsplice")
#' data <- read.table(file_counts, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
#' head(data)
#' 
#' counts <- data[, 2:7]
#' tbl_exons <- table(sapply(strsplit(data$exon, ":"), function(s) s[[1]]))
#' gene_IDs <- names(tbl_exons)
#' n_exons <- unname(tbl_exons)
#' condition <- rep(c("untreated", "treated"), each = 3)
#' 
#' rs_data <- RegspliceData(counts, gene_IDs, n_exons, condition)
#' 
#' rs_data <- filterZeros(rs_data)
#' 
filterZeros <- function(rs_data) {
  
  if (!("RegspliceData" %in% is(rs_data))) stop("'rs_data' must be a 'RegspliceData' object")
  
  # remove exon bins (rows) with zero counts in all samples (columns)
  counts <- countsData(rs_data)
  ix_zeros <- apply(counts, MARGIN = 1, function(d) all(d == 0))
  
  message(paste("removed", sum(ix_zeros), "exon(s) with zero counts"))
  
  rs_data <- suppressMessages(rs_data[!ix_zeros, ])
  
  # remove any remaining single-exon genes after filtering
  .removeSingleExonGenes(rs_data)
}
lmweber/regsplice documentation built on March 19, 2024, 1:45 p.m.