# Suppress loading messages when building the HTML suppressPackageStartupMessages({ library(SCENIC) library(AUCell) library(RcisTarget) library(SingleCellExperiment) })
This tutorial provides the detailed explanation of runSCENIC_3_scoreCells()
: Using AUCell to score the network activity in each individual cell.
All the code below is the content of the function runSCENIC_3_scoreCells()
. This tutorial is meant for advanced users, who want know the details about what this function does internally, or to modify the workflow. There is no need to follow this tutorial for a regular run of SCENIC (see vignette("SCENIC_Running")
).
setwd("SCENIC_MouseBrain") # Expression matrix: library(SingleCellExperiment) load("data/sceMouseBrain.RData") exprMat <- counts(sceMouseBrain) # SCENIC options: scenicOptions <- readRDS("int/scenicOptions.Rds") skipBinaryThresholds=FALSE # Whether to skip the automatic binarization step skipHeatmap=FALSE # hether to plot the AUC heatmap skipTsne=FALSE # Whether to plot the t-SNE
Once the regulons (direct TF targets) that comprise the gene regulatory network are known, it is possible to evaluate the activity this network in the individual cells. This is achieved using AUCell: providing each regulon as input gene-set, and evaluating its expression in each cell.
A detailed tutorial on how to use AUCell is included in the package. See
vignette("AUCell")
.
Load the regulons from the previous step, and keep those with at least 10 genes. Add the TF to the regulon (temporarily, to take its expression into account with AUCell) and rename the regulon to include the number of genes:
regulons <- loadInt(scenicOptions, "regulons") regulons <- regulons[order(lengths(regulons), decreasing=TRUE)] regulons <- regulons[lengths(regulons)>=10] if(length(regulons) <2) stop("Not enough regulons with at least 10 genes.") # Add the TF to the regulon (keeping it only once) & rename regulon regulons <- setNames(lapply(names(regulons), function(tf) sort(unique(c(gsub("_extended", "", tf), regulons[[tf]])))), names(regulons)) names(regulons) <- paste(names(regulons), " (",lengths(regulons), "g)", sep="") saveRDS(regulons, file=getIntName(scenicOptions, "aucell_regulons")) msg <- paste0(format(Sys.time(), "%H:%M"), "\tStep 3. Analyzing the network activity in each individual cell") if(getSettings(scenicOptions, "verbose")) message(msg) msg <- paste0("\nNumber of regulons to evaluate on cells: ", length(regulons), "\nBiggest (non-extended) regulons: \n", paste("\t", grep("_extended",names(regulons),invert = T, value = T)[1:10], collapse="\n")) # TODO maxlen? if(getSettings(scenicOptions, "verbose")) message(msg)
AUCell is run in the standard way (see AUCell vignette for more info), providing the expression matrix and the regulons with at least 10 genes as input.
1. Create gene rankings for each cell
The first step to calculate the activity of a gene-set is to create the 'rankings'. For each cell, the genes are ranked from highest to lowest value. The genes with same expression value are shuffled. Therefore, genes with expression '0' are randomly sorted at the end of the ranking.
2. Regulon activity (AUC)
To calculate whether the regulon is enriched at the top of the gene-ranking for each cell, AUCell uses a statistical method based on the "Area Under the Curve" (AUC). This AUC value will be higher when many of the genes in the regulon are within the genes expressed in the cell. Therefore, it represents the activity of the regulon in each cell.
To increase speed, instead of calculating the AUC on the whole ranking, AUCell can use only the top genes in the ranking (i.e. aucMaxRank
). If this option is used, it is important to check that most cells have at least the number of expressed/detected genes that are going to be used to calculate the AUC. A histogram showing the number of genes detected by cell is returned if 'plotStats=TRUE'.
library(AUCell) nCores <- getSettings(scenicOptions, "nCores")
if(is.data.frame(exprMat)) { supportedClasses <- paste(gsub("AUCell_buildRankings,", "", methods("AUCell_buildRankings")), collapse=", ") supportedClasses <- gsub("-method", "", supportedClasses) stop("'exprMat' should be one of the following classes: ", supportedClasses, "\n(data.frames are not supported. Please, convert the expression matrix to one of these classes.)") } set.seed(getSettings(scenicOptions,"seed")) tryCatch({ .openDev(fileName=getIntName(scenicOptions, "aucell_genesStatsPlot"), devType=getSettings(scenicOptions, "devType")) aucellRankings <- AUCell_buildRankings(exprMat, nCores=nCores, plotStats=TRUE, verbose=getSettings(scenicOptions, "verbose")) abline(v=aucellRankings@nGenesDetected["1%"], col="skyblue3", lwd=5, lty=3) dev.off() },error = function(e) { message("Catched error in AUCell_buildRankings() or in the histogram plot: ", e$message) }) saveRDS(aucellRankings, file=getIntName(scenicOptions, "aucell_rankings"))
regulonAUC <- AUCell_calcAUC(regulons, aucellRankings, aucMaxRank=aucellRankings@nGenesDetected["1%"], nCores=nCores) # Order the modules by similarity, for easier exploration in the upcoming steps & save regulonOrder <- orderAUC(regulonAUC) # added to AUCell 1.5.1 regulonAUC <- regulonAUC[regulonOrder,] saveRDS(regulonAUC, file=getIntName(scenicOptions, "aucell_regulonAUC"))
The distribuion of the AUC of a regulon across all the cells can provide important information about its activity. Regulons that are differentialy active across the cells will often show bimodal or skewed distributions. A way to explore these distributions is to plot the AUC as histograms and exploring the association of the regulon activity with the current clustering, we can project the AUC scores on the t-SNE (plotted in the next sections).
cells_AUCellThresholds <- NULL if(!skipBinaryThresholds) { cells_AUCellThresholds <- AUCell_exploreThresholds(regulonAUC, smallestPopPercent=getSettings(scenicOptions,"aucell/smallestPopPercent"), assignCells=TRUE, plotHist=FALSE, verbose=FALSE, nCores=nCores) saveRDS(cells_AUCellThresholds, file=getIntName(scenicOptions, "aucell_thresholds")) # Get cells assigned to each regulon regulonsCells <- getAssignments(cells_AUCellThresholds) ### Save threshold info as text (e.g. to edit/modify...) trhAssignment <- getThresholdSelected(cells_AUCellThresholds) trhAssignment <- signif(trhAssignment, 3) # TODO why is it sometimes a list? https://github.com/aertslab/AUCell/issues/3 commentsThresholds <- sapply(cells_AUCellThresholds, function(x) unname(x$aucThr$comment)) table2edit <- cbind(regulon=names(cells_AUCellThresholds), threshold=trhAssignment[names(cells_AUCellThresholds)], nCellsAssigned=lengths(regulonsCells)[names(cells_AUCellThresholds)], AUCellComment=commentsThresholds[names(cells_AUCellThresholds)], nGenes=gsub("[\\(g\\)]", "", regmatches(names(cells_AUCellThresholds), gregexpr("\\(.*?\\)", names(cells_AUCellThresholds)))), clusteringOrder=1:length(cells_AUCellThresholds), clusterGroup=regulonClusters[names(cells_AUCellThresholds)], onlyNonDuplicatedExtended=(names(cells_AUCellThresholds) %in% onlyNonDuplicatedExtended(names(cells_AUCellThresholds))), personalNotes="") write.table(table2edit, file=getIntName(scenicOptions, "aucell_thresholdsTxt"), row.names=F, quote=F, sep="\t") rm(trhAssignment) }
if(!skipHeatmap){ nCellsHeatmap <- min(500, ncol(regulonAUC)) cells2plot <- sample(colnames(regulonAUC), nCellsHeatmap) cellInfo <- loadFile(scenicOptions, getDatasetInfo(scenicOptions, "cellInfo"), ifNotExists="null") #TODO check if exists, if not... create/ignore? if(!is.null(cellInfo)) cellInfo <- data.frame(cellInfo)[cells2plot,,drop=F] colVars <- loadFile(scenicOptions, getDatasetInfo(scenicOptions, "colVars"), ifNotExists="null") fileName <- getOutName(scenicOptions, "s3_AUCheatmap") fileName <- .openDevHeatmap(fileName=fileName, devType=getSettings(scenicOptions, "devType")) NMF::aheatmap(getAUC(regulonAUC)[,cells2plot], annCol=cellInfo, annColor=colVars, main="AUC", sub=paste("Subset of",nCellsHeatmap," random cells"), filename=fileName) .closeDevHeatmap(devType=getSettings(scenicOptions, "devType")) }
if(!skipTsne){ tSNE_fileName <- tsneAUC(scenicOptions, aucType="AUC", onlyHighConf=FALSE) # default: nPcs, perpl, seed, tsne prefix tSNE <- readRDS(tSNE_fileName) # AUCell (activity) plots with the default tsne, as html: fileName <- getOutName(scenicOptions, "s3_AUCtSNE_colAct") plotTsne_regulonActivityHTML(scenicOptions, exprMat, fileName, tSNE) #open the resulting html locally # Plot cell properties: sub <- ""; if("type" %in% names(tSNE)) sub <- paste0("t-SNE on ", tSNE$type) cellInfo <- loadFile(scenicOptions, getDatasetInfo(scenicOptions, "cellInfo"), ifNotExists="null") colVars <- loadFile(scenicOptions, getDatasetInfo(scenicOptions, "colVars"), ifNotExists="null") pdf(paste0(getOutName(scenicOptions, "s3_AUCtSNE_colProps"),".pdf")) plotTsne_cellProps(tSNE$Y, cellInfo=cellInfo, colVars=colVars, cex=1, sub=sub) dev.off() }
# Finished. Update status. object@status$current <- 3
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.