# Suppress loading messages when building the HTML suppressPackageStartupMessages({ library(SCENIC) library(AUCell) library(RcisTarget) library(SingleCellExperiment) })
This tutorial provides the detailed explanation of geneFiltering()
: Filter-out genes (previous to GRN-inference) based on the counts and number of cells in which they are detected.
All the code below is the content of the function geneFiltering()
. This tutorial is meant for advanced users, who want know the details about what this function does internally, or to modify the workflow. There is no need to follow this tutorial for a regular run of SCENIC (see vignette("SCENIC_Running")
).
See SCENIC_running.Rmd
vignette for the description/motivation of this function.
setwd("SCENIC_MouseBrain") scenicOptions <- readRDS("int/scenicOptions.Rds") load("data/sceMouseBrain.RData") exprMat <- counts(sceMouseBrain)
# Default values: minCountsPerGene <- 3*.01*ncol(exprMat) minSamples <- ncol(exprMat)*.01 outFile_exprMatTxt <- NULL # Filtered expression matrix is not saved dbFilePath <- getDatabases(scenicOptions)[[1]] outFile_genesKept <- getIntName(scenicOptions, "genesKept")
# Calculate stats nCountsPerGene <- rowSums(exprMat, na.rm = T) nCellsPerGene <- rowSums(exprMat>0, na.rm = T) ## Show info message("Maximum value in the expression matrix: ", max(exprMat, na.rm=T)) message("Ratio of detected vs non-detected: ", signif(sum(exprMat>0, na.rm=T) / sum(exprMat==0, na.rm=T), 2)) message("Number of counts (in the dataset units) per gene:") print(summary(nCountsPerGene)) message("Number of cells in which each gene is detected:") print(summary(nCellsPerGene)) ## Filter message("\nNumber of genes left after applying the following filters (sequential):") # First filter # minCountsPerGene <- 3*.01*ncol(exprMat) genesLeft_minReads <- names(nCountsPerGene)[which(nCountsPerGene > minCountsPerGene)] message("\t", length(genesLeft_minReads), "\tgenes with counts per gene > ", minCountsPerGene) # Second filter # minSamples <- ncol(exprMat)*.01 nCellsPerGene2 <- nCellsPerGene[genesLeft_minReads] genesLeft_minCells <- names(nCellsPerGene2)[which(nCellsPerGene2 > minSamples)] message("\t", length(genesLeft_minCells), "\tgenes detected in more than ",minSamples," cells") # Exclude genes missing from database: library(RcisTarget) motifRankings <- importRankings(dbFilePath) # either one, they should have the same genes genesInDatabase <- colnames(getRanking(motifRankings)) genesLeft_minCells_inDatabases <- genesLeft_minCells[which(genesLeft_minCells %in% genesInDatabase)] message("\t", length(genesLeft_minCells_inDatabases), "\tgenes available in RcisTarget database") genesKept <- genesLeft_minCells_inDatabases if(is.null(outFile_genesKept)) saveRDS(genesKept, file=outFile_genesKept)
Return value: genesKept
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.