#' @title Run escape ssGSEA
#'
#' @description This will run escape to calcualte ssGSEA on Hallmark gene sets
#' @param seuratObj A Seurat object.
#' @param outputAssayName The name of the assay to store results
#' @param doPlot If true, a FeaturePlot will be printed for each pathway
#' @param doPca boolean determining if the PCA plot should be computed and printed.
#' @param performDimRedux If true, the standard seurat PCA/FindClusters/UMAP process will be run on the escape data. This may be most useful when using a customGeneSet or a smaller set of features/pathways
#' @param msigdbGeneSets A vector containing gene set codes specifying which gene sets should be fetched from MSigDB and calculated. Some recommendations in increasing computation time: H (hallmark, 50 gene sets), C8 (scRNASeq cell type markers, 830 gene sets), C2 (curated pathways, 6366 gene sets), GO:BP (GO biological processes, 7658).
#' @param customGeneSets A (preferably named) list containing gene sets to be scored by escape.
#' @param assayName The name of the source assay
#' @return The seurat object with results stored in an assay
#' @export
RunEscape <- function(seuratObj, outputAssayName = "escape.ssGSEA", doPlot = FALSE, doPca = TRUE, performDimRedux = FALSE, msigdbGeneSets = c("H"), customGeneSets = NULL, assayName = 'RNA') {
#gene set vector to be populated by msigdb or custom gene sets
GS <- c()
#retrieve msigdb gene sets
if (any(!is.null(msigdbGeneSets), !is.na(msigdbGeneSets))) {
#GetMsigdbGeneSet sanity checks the input, so we don't need to here.
GS <- GetMsigdbGeneSet(msigdbGeneSets)
}
#parse customGeneSets
if (all(!is.null(customGeneSets), !(length(customGeneSets) == 0))) {
#check typing (msigdbGeneSets is a vector, so this could be a point of confusion).
if (!is.list(customGeneSets)){
stop("customGeneSets is not a list. Please coerce it into a named list.")
}
#check for feature existence.
#I think it's reasonable to expect someone might grab a new/non-msigdb human gene set and supply it to customGeneSets, so a warning rather than stop for feature existence is sufficient.
if (!(all(unlist(customGeneSets) %in% rownames(seuratObj)))) {
warning(paste0("Some genes in customGeneSets are not features in the Seurat object. These features are missing and will not contribute to scoring: ",
paste0(unlist(customGeneSets)[!(unlist(customGeneSets) %in% rownames(seuratObj))], ", ")))
}
#if the names are not provided in the custom gene sets, we should warn the user but we can label them here.
if (is.null(names(customGeneSets))){
names(customGeneSets) <- paste0("CustomGeneSet", seq_along(customGeneSets))
warning("The customGeneSets list is unnamed. Naming the genesets with generic names such as: CustomGeneSet1, CustomGeneSet2 ...")
}
if (any(grepl(names(customGeneSets), pattern = '_'))) {
print('Converting underscore to hyphen in set names')
toFix <- grepl(names(customGeneSets), pattern = '_')
names(customGeneSets)[toFix] <- gsub(names(customGeneSets)[toFix], pattern = '_', replacement = '-')
}
#concatenate the customGeneSets to the running list of genes (empty if msigdb is empty)
GS <- c(GS, customGeneSets)
}
seuratObj <- escape::runEscape(seuratObj,
method = "ssGSEA",
gene.sets = GS,
groups = 5000,
min.size = 0,
assay = assayName,
new.assay.name = outputAssayName)
seuratObj <- SeuratObject::SetAssayData(seuratObj, assay = outputAssayName, layer = 'counts', new.data = SeuratObject::GetAssayData(seuratObj, assay = outputAssayName, layer = 'data'))
seuratObj <- .NormalizeEscape(seuratObj, assayToNormalize = outputAssayName, assayForLibrarySize = assayName)
#you may want to only score a couple of gene sets (e.g. msigdbGeneSets = NULL), in which case you need to determine the pca parameters to ensure it will run.
if (doPca) {
seuratObj <- escape::performPCA(seuratObj,
assay = outputAssayName,
n.dim = 1:10)
print(escape::pcaEnrichment(seuratObj,
dimRed = "escape.PCA",
x.axis = "PC1",
y.axis = "PC2",
add.percent.contribution = TRUE,
display.factors = TRUE,
number.of.factors = 10
))
}
if (doPlot) {
pathways <- rownames(seuratObj@assays[[outputAssayName]])
key <- seuratObj@assays[[outputAssayName]]@key
for (fn in pathways) {
print(suppressWarnings(Seurat::FeaturePlot(seuratObj, features = paste0(key, fn), min.cutoff = 'q02', max.cutoff = 'q98')))
}
}
if (performDimRedux) {
seuratObj <- .RunEscapePca(seuratObj, assayName = outputAssayName)
}
return(seuratObj)
}
.RunEscapePca <- function(seuratObj, assayName, dimsToUse = NULL, resolutionsToUse = 0.2) {
Seurat::VariableFeatures(seuratObj, assay = assayName) <- rownames(seuratObj@assays[[assayName]])
seuratObj <- Seurat::ScaleData(seuratObj, assay = assayName)
assayNameForKeys <- gsub(assayName, pattern = '\\.', replacement = '')
pca.reduction.key <- paste0(assayNameForKeys, 'pca_')
pca.reduction.name <- paste0('pca.', assayName)
seuratObj <- Seurat::RunPCA(seuratObj, assay = assayName, npcs = length(Seurat::VariableFeatures(seuratObj, assay = assayName)), reduction.key = pca.reduction.key, reduction.name = pca.reduction.name)
print(Seurat::ProjectDim(seuratObj, reduction = pca.reduction.name, assay = assayName))
print(Seurat::VizDimLoadings(object = seuratObj, dims = 1:4, nfeatures = nrow(seuratObj@assays[[assayName]]), reduction = pca.reduction.name))
if (all(is.null(dimsToUse))) {
npc <- dim(seuratObj@reductions[[pca.reduction.name]]@cell.embeddings)[2]
dimsToUse <- 1:min(npc, nrow(seuratObj@assays[[assayName]]))
}
graphName <- paste0(assayName, '.nn')
seuratObj <- Seurat::FindNeighbors(seuratObj, dims = dimsToUse, reduction = pca.reduction.name, assay = assayName, graph.name = graphName)
origIdents <- Idents(seuratObj)
for (resolutionToUse in resolutionsToUse) {
seuratObj <- Seurat::FindClusters(object = seuratObj, resolution = resolutionToUse, verbose = FALSE, graph.name = graphName, cluster.name = paste0('ClusterNames.', assayName, '_', resolutionToUse))
}
Idents(seuratObj) <- origIdents
umap.reduction.name <- paste0(assayName, '.umap')
umap.reduction.key <- paste0(assayNameForKeys, 'umap_')
seuratObj <- Seurat::RunUMAP(seuratObj, dims = dimsToUse, assay = assayName, reduction = pca.reduction.name, reduction.name = umap.reduction.name, reduction.key = umap.reduction.key, verbose = FALSE)
print(DimPlot(seuratObj, reduction = umap.reduction.name))
return(seuratObj)
}
.NormalizeEscape <- function(seuratObj, assayToNormalize, assayForLibrarySize = 'RNA') {
toNormalize <- Seurat::GetAssayData(seuratObj, assayToNormalize, layer = 'counts')
assayForLibrarySizeData <- Seurat::GetAssayData(seuratObj, assay = assayForLibrarySize, layer = 'counts')
if (any(colnames(toNormalize) != colnames(assayForLibrarySize))) {
stop(paste0('The assayToNormalize and assayForLibrarySize do not have the same cell names!'))
}
margin <- 2
ncells <- dim(x = toNormalize)[margin]
for (i in seq_len(length.out = ncells)) {
x <- toNormalize[, i]
if (any(is.na(x))) {
warning('NAs were found in the escape data!')
x[is.na(x)] <- 0
}
librarySize <- sum(assayForLibrarySizeData[, i], na.rm = TRUE)
if (librarySize == 0) {
stop(paste0('librarySize was zero for: ', colnames(seuratObj)[i]))
}
toNormalize[, i] <- x / librarySize
}
seuratObj <- Seurat::SetAssayData(seuratObj, assay = assayToNormalize, layer = 'data', new.data = toNormalize)
seuratObj <- Seurat::ScaleData(seuratObj, assay = assayToNormalize)
return(seuratObj)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.