#' getAllGOlists
#'
#' Returned as list of genes for each GO term
#'
#' @param species Two-letter species code (corresponding to the 'org.Xx.eg' package)
#' @param translate.cat.ids Logical; whether to translate GO category IDs to terms
#' @param gene.id.type The type of gene identifier to return;
#' either 'symbol' (default) or 'ensembl'.
#' @param genes The subset of genes to use.
#' @param minSize The minimum set size (default 10).
#' @param maxSize The maximum set size (default 500).
#' @param returnGeneSets Logical; whether to return an object of class
#' `GeneSetCollection` rather than a normal list (default FALSE).
#' @param categories The GO categories to use (default all)
#' @param ontologies The GO tree to use; any combination of "BP", "CC", and "MF"
#' (default all)
#'
#' @return A named list of genes, or a `GeneSetCollection` if `returnGeneSets=T`
#' @export
getAllGOlists <- function( species = "Mm",
translate.cat.ids = TRUE,
gene.id.type = "symbol",
genes = NULL,
minSize = 10,
maxSize = 500,
returnGeneSets = FALSE,
categories = NULL,
ontologies = c("BP", "CC", "MF")){
library(AnnotationDbi)
library(GO.db)
gene.id.type <- match.arg(tolower(gene.id.type), c("symbol","ensembl"))
db <- paste0('org.',species,'.eg')
library(package=paste0(db,'.db'), character.only = T)
ontologies <- match.arg(ontologies, c("BP","MF","CC"), several.ok=T)
allcats <- unique(unlist(lapply(paste0("GO",ontologies,"CHILDREN"), FUN=function(x){
unique(unlist(as.list(get(x))))
})))
allcats <- unique(allcats[!is.na(allcats)])
if(!is.null(categories)) allcats <- intersect(categories, allcats)
x <- AnnotationDbi::mget(allcats, get(paste0(db,"GO2ALLEGS")), ifnotfound=NA)
x <- x[which(sapply(x,FUN=function(x){ !all(is.na(x)) }))]
db2id <- paste0(db, switch(gene.id.type,
symbol="SYMBOL",
ensembl="ENSEMBL",
stop("unknown gene id type")))
x <- lapply(x, db=db2id, FUN=function(x,db){ unique(as.character(unlist(AnnotationDbi::mget(as.character(x), get(db))))) })
if(!is.null(genes)){
x <- lapply(x, y=genes, FUN=intersect)
}
x <- x[which(sapply(x, min=minSize, max=maxSize, FUN=function(x, min, max){ length(x) > min & length(x) < max }))]
if(translate.cat.ids) names(x) <- as.character(Term(names(x)))
if(returnGeneSets){
library(GSEABase)
x <- GeneSetCollection(lapply(1:length(x), FUN=function(i){
GeneSet(x[[i]], setName=names(x)[i])
}))
}
x
}
#' ORA
#'
#' A simple hypergeometric over-enrichment analysis
#'
#' @param set1 Character vector; the set to test
#' @param sets Named list containing the sets for which to test enrichment
#' @param universe A character vector containing the universe/background,
#' or a numeric or length 1 containing the size of the universe.
#' @param topN The number of top enrichments to return (default 10)
#'
#' @return A data.frame
#' @export
ORA <- function(set1, sets, universe, topN=10){
set1 <- unique(as.character(set1))
sets <- lapply(sets, FUN=as.character)
sets <- lapply(sets, FUN=unique)
if (class(universe) == "character"){
set1 <- intersect(set1, universe)
sets <- lapply(sets, y=universe, FUN=intersect)
universe <- length(unique(universe))
}
res <- t(sapply(sets, set1=set1, u=universe, FUN=function(set2, set1, u){
ov <- sum(set1 %in% set2)
p <- phyper(max(0, ov - 1), length(set1), u - length(set1),
length(set2), lower.tail=F)
expected <- length(set1)*length(set2)/u
c(n=ov, enrichment=round(ov/expected,2), pval=p)
}))
res[order(as.numeric(res[,"pval"]))[1:min(topN,nrow(res))],]
}
.getEnsemblFromConcat <- function(genes){
sapply(as.character(genes),FUN=function(x){ strsplit(x,".",fixed=T)[[1]][[1]] })
}
.annotateMarkers <- function(markers, go=NULL, topN=10, ...){
markers <- split(.getEnsemblFromConcat(markers$gene), markers$cluster)
if(is.null(go)) go <- getAllGOlists(..., genes = unique(unlist(markers)))
lapply(markers, sets=go, universe=unlist(markers), topN=topN, FUN=ORA)
}
#' annotateAll
#'
#' @param markerslist A list of markers table (one table per resolution),
#' as produced by `Seurat::FindAllMarkers`
#' @param go An optional named list of genesets. If NULL, will be fetched
#' using `getAllGOlists`.
#' @param topN The top number of terms to report (default 10)
#' @param ... Passed to `getAllGOlists` if `go=NULL`
#'
#' @return A nested list of data.frames.
#' @export
annotateAll <- function(markerslist, go=NULL, topN=10, ...){
gall <- as.character(unlist(lapply(markerslist, FUN=function(x) x$gene)))
gall <- unique(.getEnsemblFromConcat(gall))
if(is.null(go)) go <- getAllGOlists(..., genes=gall)
lapply(markerslist, go=go, topN=topN, FUN=.annotateMarkers)
}
#' prepSeuratForClustView
#'
#' @param seurat An object of class `Seurat`
#' @param markerslist A nested list of markers' tables (as produced by
#' `Seurat::FindAllMarkers()`) for each method and resolution.
#' @param res_prefixes The list of clustering prefixes to use from
#' `seurat@meta.data` (default all detected)
#' @param species Two-letter species code (corresponding to the
#' 'org.Xx.eg' package), default 'Mm'.
#' @param ontologies The GO tree to use; any combination of
#' "BP", "CC", and "MF" (default `c("BP", "CC")`)
#'
#' @return The `seurat` object, with cluster annotation in the slot
#' `seurat@misc$clusterAnnotation`
#' @export
prepSeuratForClustView <- function( seurat,
markerslist=NULL,
res_prefixes=NULL,
species="Mm",
ontologies=c("BP", "CC")
){
# we extract the different clusterings from the seurat object
cn <- colnames(seurat@meta.data)
cn <- cn[grep("res\\.[0-9\\.]+$",cn)]
cn2 <- t(sapply(cn, FUN=function(x){
x <- strsplit(x,"res.",fixed=T)[[1]]
c(paste(c(x[1:(length(x)-1)],""),collapse="res."),x[length(x)])
}))
if(is.null(res_prefixes)){
res_prefixes <- unique(cn2[,1])
message( paste("Using prefixes:",
paste(res_prefixes, collapse=", ")
)
)
}else{
if(!all(res_prefixes %in% cn2[,1])){
stop(paste("The following prefix(es) could not be found:",
paste(setdiff(res_prefixes, cn2[,1]),collapse=", ")))
}
cn2 <- cn2[which(cn2[,1] %in% res_prefixes),,drop=F]
}
# overview of the different clustering and resolutions
print(aggregate(cn2[,2], by=list(prefix=cn2[,1]), FUN=length))
if(length(res_prefixes)==1){
if(!(res_prefixes %in% names(markerslist))){
m <- list()
m[[res_prefixes]] <- markerslist
markerslist <- m
}
}
# go enrichment analysis of the markers
allg <- .getEnsemblFromConcat(as.character(unique(unlist(markerslist))))
go <- list()
for(o in ontologies){
go[[o]] <- getAllGOlists(species=species, gene.id.type='ensembl', ontologies=o, genes=allg)
}
gores <- lapply(markerslist, go=go, FUN=function(m, go){
lapply(go, m=m, FUN=function(go, m){
annotateAll(m, go=go)
})
})
seurat@misc$clusterAnnotation <- list()
for(i in names(markerslist)){
seurat@misc$clusterAnnotation[[i]] <- list(markers=markerslist[[i]], go=gores[[i]])
}
seurat
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.