# Suppress loading messages when building the HTML
suppressPackageStartupMessages({
  library(SCENIC)
  library(AUCell)
  library(RcisTarget)
  library(SingleCellExperiment)
})

This tutorial provides the detailed explanation of runSCENIC_3_scoreCells(): Using AUCell to score the network activity in each individual cell.

All the code below is the content of the function runSCENIC_3_scoreCells(). This tutorial is meant for advanced users, who want know the details about what this function does internally, or to modify the workflow. There is no need to follow this tutorial for a regular run of SCENIC (see vignette("SCENIC_Running")).

Input

setwd("SCENIC_MouseBrain")

# Expression matrix:
library(SingleCellExperiment)
load("data/sceMouseBrain.RData")
exprMat <- counts(sceMouseBrain)

# SCENIC options:
scenicOptions <- readRDS("int/scenicOptions.Rds")

skipBinaryThresholds=FALSE # Whether to skip the automatic binarization step
skipHeatmap=FALSE # hether to plot the AUC heatmap
skipTsne=FALSE # Whether to plot the t-SNE

runSCENIC_3_scoreCells() code:

Once the regulons (direct TF targets) that comprise the gene regulatory network are known, it is possible to evaluate the activity this network in the individual cells. This is achieved using AUCell: providing each regulon as input gene-set, and evaluating its expression in each cell.

A detailed tutorial on how to use AUCell is included in the package. See vignette("AUCell").

Prepare regulons

Load the regulons from the previous step, and keep those with at least 10 genes. Add the TF to the regulon (temporarily, to take its expression into account with AUCell) and rename the regulon to include the number of genes:

regulons <- loadInt(scenicOptions, "regulons")
regulons <- regulons[order(lengths(regulons), decreasing=TRUE)]
regulons <- regulons[lengths(regulons)>=10]
if(length(regulons) <2)  stop("Not enough regulons with at least 10 genes.")

# Add the TF to the regulon (keeping it only once) & rename regulon
regulons <- setNames(lapply(names(regulons), function(tf) sort(unique(c(gsub("_extended", "", tf), regulons[[tf]])))), names(regulons))
names(regulons) <- paste(names(regulons), " (",lengths(regulons), "g)", sep="")
saveRDS(regulons, file=getIntName(scenicOptions, "aucell_regulons"))

msg <- paste0(format(Sys.time(), "%H:%M"), "\tStep 3. Analyzing the network activity in each individual cell")
if(getSettings(scenicOptions, "verbose")) message(msg)

msg <- paste0("\nNumber of regulons to evaluate on cells: ", length(regulons),
              "\nBiggest (non-extended) regulons: \n",
              paste("\t", grep("_extended",names(regulons),invert = T, value = T)[1:10], collapse="\n")) # TODO maxlen?
if(getSettings(scenicOptions, "verbose")) message(msg)

AUCell

AUCell is run in the standard way (see AUCell vignette for more info), providing the expression matrix and the regulons with at least 10 genes as input.

1. Create gene rankings for each cell

The first step to calculate the activity of a gene-set is to create the 'rankings'. For each cell, the genes are ranked from highest to lowest value. The genes with same expression value are shuffled. Therefore, genes with expression '0' are randomly sorted at the end of the ranking.

2. Regulon activity (AUC)

To calculate whether the regulon is enriched at the top of the gene-ranking for each cell, AUCell uses a statistical method based on the "Area Under the Curve" (AUC). This AUC value will be higher when many of the genes in the regulon are within the genes expressed in the cell. Therefore, it represents the activity of the regulon in each cell.

To increase speed, instead of calculating the AUC on the whole ranking, AUCell can use only the top genes in the ranking (i.e. aucMaxRank). If this option is used, it is important to check that most cells have at least the number of expressed/detected genes that are going to be used to calculate the AUC. A histogram showing the number of genes detected by cell is returned if 'plotStats=TRUE'.

library(AUCell)
nCores <- getSettings(scenicOptions, "nCores")

1. Create rankings

if(is.data.frame(exprMat)) 
{
  supportedClasses <- paste(gsub("AUCell_buildRankings,", "", methods("AUCell_buildRankings")), collapse=", ")
  supportedClasses <- gsub("-method", "", supportedClasses)

  stop("'exprMat' should be one of the following classes: ", supportedClasses, 
       "\n(data.frames are not supported. Please, convert the expression matrix to one of these classes.)")
}

set.seed(getSettings(scenicOptions,"seed"))
tryCatch({
    .openDev(fileName=getIntName(scenicOptions, "aucell_genesStatsPlot"),
            devType=getSettings(scenicOptions, "devType"))
      aucellRankings <- AUCell_buildRankings(exprMat, nCores=nCores, 
                            plotStats=TRUE, verbose=getSettings(scenicOptions, "verbose"))
      abline(v=aucellRankings@nGenesDetected["1%"], col="skyblue3", lwd=5, lty=3)
    dev.off()
  },error = function(e) {
    message("Catched error in AUCell_buildRankings() or in the histogram plot: ", e$message)
  })
saveRDS(aucellRankings, file=getIntName(scenicOptions, "aucell_rankings"))

2. Calculate AUC

regulonAUC <- AUCell_calcAUC(regulons, aucellRankings, 
            aucMaxRank=aucellRankings@nGenesDetected["1%"], nCores=nCores)

# Order the modules by similarity, for easier exploration in the upcoming steps & save
regulonOrder <- orderAUC(regulonAUC) # added to AUCell 1.5.1
regulonAUC <- regulonAUC[regulonOrder,]
saveRDS(regulonAUC, file=getIntName(scenicOptions, "aucell_regulonAUC"))

3. Default thresholds (optional)

The distribuion of the AUC of a regulon across all the cells can provide important information about its activity. Regulons that are differentialy active across the cells will often show bimodal or skewed distributions. A way to explore these distributions is to plot the AUC as histograms and exploring the association of the regulon activity with the current clustering, we can project the AUC scores on the t-SNE (plotted in the next sections).

cells_AUCellThresholds <- NULL
if(!skipBinaryThresholds)
{
  cells_AUCellThresholds <- AUCell_exploreThresholds(regulonAUC, 
                        smallestPopPercent=getSettings(scenicOptions,"aucell/smallestPopPercent"),
                        assignCells=TRUE, plotHist=FALSE, 
                        verbose=FALSE, nCores=nCores)
  saveRDS(cells_AUCellThresholds, file=getIntName(scenicOptions, "aucell_thresholds"))

  # Get cells assigned to each regulon
  regulonsCells <- getAssignments(cells_AUCellThresholds)

  ### Save threshold info as text (e.g. to edit/modify...)
  trhAssignment <- getThresholdSelected(cells_AUCellThresholds)
  trhAssignment <- signif(trhAssignment, 3) # TODO why is it sometimes a list? https://github.com/aertslab/AUCell/issues/3
  commentsThresholds <- sapply(cells_AUCellThresholds, function(x) unname(x$aucThr$comment))

  table2edit <- cbind(regulon=names(cells_AUCellThresholds),
                      threshold=trhAssignment[names(cells_AUCellThresholds)],
                      nCellsAssigned=lengths(regulonsCells)[names(cells_AUCellThresholds)],
                      AUCellComment=commentsThresholds[names(cells_AUCellThresholds)],
                      nGenes=gsub("[\\(g\\)]", "", regmatches(names(cells_AUCellThresholds), gregexpr("\\(.*?\\)", names(cells_AUCellThresholds)))),
                      clusteringOrder=1:length(cells_AUCellThresholds),
                      clusterGroup=regulonClusters[names(cells_AUCellThresholds)],
                      onlyNonDuplicatedExtended=(names(cells_AUCellThresholds) %in% onlyNonDuplicatedExtended(names(cells_AUCellThresholds))),
                      personalNotes="")
  write.table(table2edit, file=getIntName(scenicOptions, "aucell_thresholdsTxt"), row.names=F, quote=F, sep="\t")
  rm(trhAssignment)
}

Heatmap plot (optional)

if(!skipHeatmap){
  nCellsHeatmap <- min(500, ncol(regulonAUC))
  cells2plot <- sample(colnames(regulonAUC), nCellsHeatmap)

  cellInfo <- loadFile(scenicOptions, getDatasetInfo(scenicOptions, "cellInfo"), ifNotExists="null")   #TODO check if exists, if not... create/ignore?
  if(!is.null(cellInfo)) cellInfo <- data.frame(cellInfo)[cells2plot,,drop=F]
  colVars <- loadFile(scenicOptions, getDatasetInfo(scenicOptions, "colVars"), ifNotExists="null")

  fileName <- getOutName(scenicOptions, "s3_AUCheatmap")

  fileName <- .openDevHeatmap(fileName=fileName, devType=getSettings(scenicOptions, "devType"))
  NMF::aheatmap(getAUC(regulonAUC)[,cells2plot],
                annCol=cellInfo,
                annColor=colVars,
                main="AUC",
                sub=paste("Subset of",nCellsHeatmap," random cells"),
                filename=fileName)
  .closeDevHeatmap(devType=getSettings(scenicOptions, "devType"))
}

t-SNE plot (optional)

if(!skipTsne){
  tSNE_fileName <- tsneAUC(scenicOptions, aucType="AUC", onlyHighConf=FALSE) # default: nPcs, perpl, seed, tsne prefix
  tSNE <- readRDS(tSNE_fileName)

  # AUCell (activity) plots with the default tsne, as html: 
  fileName <- getOutName(scenicOptions, "s3_AUCtSNE_colAct")
  plotTsne_regulonActivityHTML(scenicOptions, exprMat, fileName, tSNE) #open the resulting html locally

  # Plot cell properties:
  sub <- ""; if("type" %in% names(tSNE)) sub <- paste0("t-SNE on ", tSNE$type)
  cellInfo <- loadFile(scenicOptions, getDatasetInfo(scenicOptions, "cellInfo"), ifNotExists="null") 
  colVars <- loadFile(scenicOptions, getDatasetInfo(scenicOptions, "colVars"), ifNotExists="null")
  pdf(paste0(getOutName(scenicOptions, "s3_AUCtSNE_colProps"),".pdf"))
  plotTsne_cellProps(tSNE$Y, cellInfo=cellInfo, colVars=colVars, cex=1, sub=sub)
  dev.off()
}
# Finished. Update status.
object@status$current <- 3


aertslab/SCENIC documentation built on April 7, 2024, 10 a.m.