#' Remove genes based on different criteria
#'
#' Remove genes with no variability (method = "constant"), missing values
#' (method = "missing") or low expression (method = "detection.pvalue",
#' method = "zero" or method = "edgeR").
#'
#' @param se
#' \code{\link[SummarizedExperiment]{RangedSummarizedExperiment-class}}
#' object
#' @param assay Character or integer. Name or number of assay to be used for
#' filtering.
#' @param method Method to determine genes to be removed: "constant", "missing",
#' "detection.pvalue", "zero", "edgeR" (using
#' \code{\link[edgeR]{filterByExpr}}).
#' @param freq Numeric. If more than freq*100 \% of the samples fulfill
#' criterion, the gene is removed (not used if method = "constant").
#' @param min.count Numeric. Minimum count required for at least some samples (
#' used only if method = "edgeR").
#' @param min.total.count Numeric. Minimum total count required (
#' used only if method = "edgeR").
#' @param verbose Logical. Should number of removed genes be reported?
#'
#' @return \code{\link[SummarizedExperiment]{RangedSummarizedExperiment-class}}
#' object with genes removed
#'
#' @importFrom edgeR filterByExpr
#' @export
#'
#' @examples
#' data("se.probeset")
#' print(se.probeset)
#'
#' ## remove probesets with missing values
#' se.probeset.red = remove_genes(se = se.probeset,
#' assay = "exprs.log",
#' method = "missing",
#' freq = 0)
#' print(se.probeset.red)
#'
#' ## remove probesets with low expression
#' se.probeset.red = remove_genes(se = se.probeset,
#' assay = "detection.pvalue",
#' method = "detection.pvalue",
#' freq = 0.75)
#' print(se.probeset.red)
remove_genes <- function(se,
assay = 1,
method,
freq = 0.25,
min.count = 10,
min.total.count = 15,
verbose = FALSE) {
if (is.character(assay) && !(assay %in% names(assays(se)))) {
stop(paste("assay", assay, "not found!"))
}
if (freq < 0 | freq > 1) {
stop("freq needs to be between 0 and 1 (inclusive)!")
}
expr = assays(se)[[assay]]
if (method == "constant") {
sd = apply(expr, 1, sd)
ind.rm = which(sd == 0)
} else if (method == "edgeR") {
info.keep = filterByExpr(y = expr,
min.count = min.count,
min.total.count = min.total.count,
min.prop = freq)
ind.rm = which(!info.keep)
} else {
if (method == "missing") {
crit = apply(expr, 1, function(x) {sum(is.na(x))}) / ncol(expr)
} else if (method == "zero") {
crit = apply(expr, 1, function(x) {sum(x == 0)}) / ncol(expr)
} else if (method == "detection.pvalue") {
range = range(as.numeric(expr), na.rm = TRUE)
if (min(range) < 0 | max(range) > 1) {
stop(paste("assay", assay, "does not contain P values!"))
}
crit = apply(expr, 1, function(x) {sum(x >= 0.05)}) / ncol(expr)
} else {
stop(paste("method", method, "not known!"))
}
if (freq == 1) {
ind.rm = which(crit == freq)
} else {
ind.rm = which(crit > freq)
}
}
if (length(ind.rm) > 0) {
if (verbose) {
print(paste(length(ind.rm), "genes removed"))
}
se = se[-ind.rm, ]
}
return(se)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.