#' annotate individual cells with SingleR
#'
#' @param data matrix of normalized expression values
#' @param reference reference character string matching name of dataset to compare against (uses a selection from celldex package; default "MonacoImmuneData" covers most populations in PBMC). Alternatively, a custom SummarizedExperiment can be provided.
#' @param labels_column name of labels column in the reference dataset to be used (labels.main or labels.fine if using celldex reference)
#' @param ... Passed on to SingleR::SingleR
#'
#' @return data.frame containing labels for each cell
#' @export
annotate_SingleR <- function(data, reference="MonacoImmuneData", labels_column="label.fine", ...){
#BiocManager::install(c("celldex", "SingleR", "scRNAseq"))
## SingleR manual: https://bioconductor.org/packages/devel/bioc/vignettes/SingleR/inst/doc/SingleR.html
#### Bulk RNA-seq references:
#library(celldex)
## CHOOSING REFERENCE: https://bioconductor.org/packages/3.12/data/experiment/vignettes/celldex/inst/doc/userguide.html
if(is.character(reference)){
ref <- switch(reference,
# The Monaco reference consists of bulk RNA-seq samples of sorted immune cell populations from GSE107011 (Monaco et al. 2019).
# This is the human immune reference that best covers all of the bases for a typical PBMC sample. It provides expansive B and T cell subsets, differentiates between classical and non-classical monocytes, includes basic dendritic cell subsets, and also includes neutrophil and basophil samples to help identify small contaminating populations that may have slipped into a PBMC preparation.
"MonacoImmuneData"=celldex::MonacoImmuneData(),
## The Novershtern reference (previously known as Differentiation Map) consists of microarray datasets for sorted hematopoietic cell populations from GSE24759 (Novershtern et al. 2011).
# This reference provides the greatest resolution for myeloid and progenitor cells among the human immune references. It has fewer T cell subsets than the other immune references but contains many more NK, erythroid, and granulocytic subsets. It is likely the best option for bone marrow samples
"NovershternHematopoieticData"=celldex::NovershternHematopoieticData(),
## The DICE reference consists of bulk RNA-seq samples of sorted cell populations from the project of the same name (Schmiedel et al. 2018).
# This reference is particularly useful to those interested in CD4+ T cell subsets, though the lack of CD4+ central memory and effector memory samples may decrease accuracy in some cases. In addition, the lack of dendritic cells and a single B cell subset may result in those populations being improperly labeled or having their label pruned in a typical PBMC sample.
"DatabaseImmuneCellExpressionData"=celldex::DatabaseImmuneCellExpressionData(),
## The Blueprint/ENCODE reference consists of bulk RNA-seq data for pure stroma and immune cells generated by Blueprint (Martens and Stunnenberg 2013) and ENCODE projects (The ENCODE Project Consortium 2012).
# This reference is best suited to mixed samples that do not require fine resolution, and is particularly suited for situations where easily interpretable labels are required quickly. It provides decent immune cell granularity, though it does not contain finer monocyte and dendritic cell subtypes.
"BlueprintEncodeData"=celldex::BlueprintEncodeData(),
## The HPCA reference consists of publicly available microarray datasets derived from human primary cells (Mabbott et al. 2013). Most of the labels refer to blood subpopulations but cell types from other tissues are also available.
# This reference also contains many cells and cell lines that have been treated or collected from pathogenic conditions.
"HumanPrimaryCellAtlasData"=HumanPrimaryCellAtlasData()
)
} else {
ref <- reference
}
#### scRNA-seq references - NOT IMPLEMENTED YET
#library(scRNAseq)
## CHOOSING REFERENCE: https://bioconductor.org/packages/3.12/data/experiment/vignettes/scRNAseq/inst/doc/scRNAseq.html#references
#ref <- scRNAseq::StoeckiusHashingData(mode='human')
#ref <- scRNAseq::KotliarovPBMCData()
#ref <- scRNAseq::MairPBMCData()
labels <- SingleR::SingleR(test=data, ref=ref, assay.type.test=1, labels=ref[[labels_column]], ...)
return(labels)
}
#' Run Annotate_SingleR on Seurat object
#'
#' @return Seurat object
#' @importFrom Seurat AddMetaData GetAssayData
#' @export
seurat_annotate_SingleR <- function(object, assay="RNA", slot="data", metadata_column="SingleR", annotation_column="pruned.labels", ...){
data = Seurat::GetAssayData(object, assay=assay, slot=slot)
annotation <- annotate_SingleR(data=data, ...)
ann <- annotation[,annotation_column]
ann[is.na(ann)] <- "Unknown"
object <- Seurat::AddMetaData(object, metadata=ann, col.name=metadata_column)
return(object)
}
#' Make hierarchical named groups from SingleR annotation
#'
#' Only "MonacoImmuneData" supported at the moment.
#'
#' @param group label.fine annotation from SingleR
#' @param reference reference character string matching name of dataset to compare against (uses a selection from celldex package; default "MonacoImmuneData" covers most populations in PBMC). Alternatively, a custom SummarizedExperiment can be provided.
#'
#' @return list of hierarical cell type assignments
#' @export
annotate_hierarchy <- function(group, reference){
if(reference == "MonacoImmuneData"){
return(annotate_hierarchy_MonacoImmuneData(group))
} else {
error(paste0(reference," is not a supported reference."))
}
}
#' Make hierarchical named groups from MonacoImmuneData annotation
#'
#' A bit of a hacky approach for primarily for internal use.
#' @param group MonacoImmuneData$label.fine annotation
#'
#' @return list of hierarical cell type assignments
#' @export
annotate_hierarchy_MonacoImmuneData <- function(group){
celltype <- group
celltype[grep("(T cells)|(Th[0-9]+)|(T regulatory)|(MAIT)",celltype)] <- "T"
celltype[grep("(B cells)|(Plasmablasts)",celltype)] <- "B"
celltype[grep("(monocytes)",celltype)] <- "MO"
celltype[grep("(neutrophils)|(basophils)",celltype)] <- "Gr"
celltype[grep("(Natural killer cells)",celltype)] <- "NK"
celltype[grep("([dD]endritic cells)",celltype)] <- "DC"
celltype[grep("(Progenitor)",celltype)] <- "SC"
celltype.2 <- NA
celltype.2[grep("(CD8)",group)] <- "CD8"
celltype.2[grep("(CD4)|(Th[0-9]+)|(Follicular)|(regulatory)",group)] <- "CD4"
celltype.2[grep("(gd T)",group)] <- "gdT"
celltype.2[grep("(MAIT)",group)] <- "MAIT"
celltype.3 <- NA
celltype.3[celltype == "T"] <- gsub("(.*) (CD[48])| (gd) T cells","\\1",group[celltype == "T"])
celltype.3[celltype == "T"] <- gsub("(.*) T cells","\\1",celltype.3[celltype == "T"])
celltype.3[celltype == "T"] <- gsub(" cells","",celltype.3[celltype == "T"])
celltype.3[celltype == "T"] <- gsub("T regulatory","Treg",celltype.3[celltype == "T"])
celltype.3[celltype == "T"] <- gsub("Central memory","CM",celltype.3[celltype == "T"])
celltype.3[celltype == "T"] <- gsub("Effector memory","EM",celltype.3[celltype == "T"])
celltype.3[celltype == "T"] <- gsub("Follicular helper","Tfh",celltype.3[celltype == "T"])
celltype.3[celltype == "T"] <- gsub("Terminal effector","Eff",celltype.3[celltype == "T"])
celltype.3[celltype == "T"] <- gsub("MAIT",NA,celltype.3[celltype == "T"])
celltype.3[celltype == "B"] <- gsub("(.*)(memory|Naive|Exhausted) B cells","\\2",group[celltype == "B"])
celltype.3[grep("Non-switched memory B cells",group)] <- "NSM"
celltype.3[grep("Switched memory B cells",group)] <- "SM"
celltype.3[grep("(neutrophils)",group)] <- "Neutrophil"
celltype.3[grep("(basophils)",group)] <- "Basophil"
celltype.3[celltype == "MO"] <- gsub("(.*) monocytes","\\1",group[celltype == "MO"])
celltype.3[celltype == "DC"] <- gsub("(.*) dendritic cells","\\1",group[celltype == "DC"])
cell.3 <- paste(celltype, celltype.2, celltype.3, sep=".")
cell.3 <- gsub(".NA","",cell.3)
cell.3[celltype == "Gr"] <- "Unknown"
cell.2 <- paste(celltype, celltype.2, sep=".")
cell.2 <- gsub(".NA","",cell.2)
cell.2[celltype == "Gr"] <- "Unknown"
return(list(celltype, cell.2, cell.3))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.