In satijalab/seurat: Tools for Single Cell Genomics

knitr::opts_chunk$set(
  echo = TRUE,
  tidy = TRUE,
  tidy.opts = list(width.cutoff = 120),
  message = FALSE,
  warning = FALSE,
  results = 'hold',
  eval = FALSE,
  error = TRUE
)

library(Seurat)
library(ggplot2)
library(SeuratData)

# install datasets 
InstallData('pbmc3k')
InstallData('cbmc')
InstallData('ifnb')

#load in pbmc example
pbmc <- LoadData('pbmc3k')

# load in multimodal example
# type ?cbmc for more info
cbmc <- LoadData('cbmc')

#load in case/control example
# type ?ifnb for more info
ifnb <- LoadData('ifnb')

Standard `Seurat` workflow

pbmc <- NormalizeData(object = pbmc)
pbmc <- FindVariableFeatures(object = pbmc)
pbmc <- ScaleData(object = pbmc)
pbmc <- RunPCA(object = pbmc)
pbmc <- FindNeighbors(object = pbmc, dims = 1:30)
pbmc <- FindClusters(object = pbmc)
pbmc <- RunUMAP(object = pbmc, dims = 1:30)
DimPlot(object = pbmc, reduction = 'umap')

SCtransform version

pbmc <- SCTransform(object = pbmc)
pbmc <- RunPCA(object = pbmc)
pbmc <- FindNeighbors(object = pbmc, dims = 1:30)
pbmc <- FindClusters(object = pbmc)
pbmc <- RunUMAP(object = pbmc, dims = 1:30)
DimPlot(object = pbmc, reduction = 'umap')

# note that you can chain multiple commands together with %>%
pbmc <- SCTransform(pbmc) %>% RunPCA() %>% FindNeighbors(dims=1:30) %>% 
  FindClusters() %>% RunUMAP(dims=1:30)

`Seurat` Object Data Access

Cell, feature, and layer names

# Get cell and feature names, and total numbers
# We show multiple ways to get the same output
# cell names
colnames(pbmc)
Cells(pbmc)

# feature names
Features(pbmc)
rownames(pbmc)

# number of cells/features
num_cells <- ncol(pbmc)
num_features <- nrow(pbmc)

# List of object  layers
Layers(pbmc)

# working with multimodal objects
# list assays
Assays(cbmc)

# Assay-specific features (genes/ADT)
Features(cbmc[["RNA"]])
Features(cbmc[["ADT"]])

# Variable feature names
VariableFeatures(pbmc)

# Set variable features
VariableFeatures(cbmc) <- var.gene.names

# set for a specific assay
VariableFeatures(cbmc[["ADT"]]) <- var.gene.names

Identity class labels

# Setting and retrieving cell identities

# Set identity classes to an existing column in meta data
Idents(object = pbmc) <- 'seurat_annotations'

# View cell identities, get summary table 
Idents(pbmc)
table(Idents(pbmc))

# Set identity to CD4 T cells for all cells
Idents(pbmc) <- 'CD4 T cells'

# Set for a selected group of cells
pbmc.cells <- Cells(pbmc)
Idents(object = pbmc, cells = pbmc.cells[1:10]) <- 'CD4 T cells'

# Get cell identity classes
Idents(object = pbmc)
levels(x = pbmc)

# Stash cell identity classes in metadata
pbmc[['old.ident']] <- Idents(object = pbmc)
pbmc <- StashIdent(object = pbmc, save.name = 'old.ident')

# Rename identity classes
pbmc <- RenameIdents(object = pbmc, 'CD4 T cells' = 'T Helper cells')

Cell metadata

# View metadata data frame, stored in object@meta.data
pbmc[[]]

# Retrieve specific values from the metadata
pbmc$nCount_RNA
pbmc[[c('percent.mito', 'nFeature_RNA')]]

# Add metadata, see ?AddMetaData
random_group_labels <- sample(x = c('g1', 'g2'), size = ncol(x = pbmc), replace = TRUE)
pbmc$groups <- random_group_labels

Expression data (stored as layers in Seurat v5)

# Retrieve data in an expression matrix 
# RNA counts matrix 
pbmc[["RNA"]]$counts

# Alternate accessor function with the same result
LayerData(pbmc,assay = 'RNA',layer = 'counts')

# GetAssayData from Seurat v4 is still supported
GetAssayData(object = pbmc, assay = 'RNA', slot = 'counts')

# ADT counts matrix (multimodal object)
cbmc[["ADT"]]$counts

# Set expression data  
# assume new.data is a new expression matrix
pbmc[["RNA"]]$counts <- new.data

# Alternate setter function with the same result
LayerData(pbmc,assay = 'RNA',layer = 'counts') <- new.data

# SetAssayData from Seurat v4 is still supported
pbmc <- SetAssayData(object = pbmc, slot = 'counts',new.data = new.data)

Dimensional reductions

# Get cell embeddings and feature loadings
# stored on pbmc[["pca"]]@cell.embeddings
Embeddings(pbmc, reduction = 'pca')

# stored in pbmc[["pca]]@feature.loadings
Loadings(pbmc, reduction = 'pca')

# Create custom dimensional reduction
# loadings matrix is optional
new_reduction <- CreateDimReducObject(embeddings = new.embeddings,loadings = new.loadings, key = "custom_pca")
pbmc[["custom_pca"]] <- new_reduction

FetchData

# FetchData can access anything from expression matrices, cell embeddings, or metadata
# Use the previously listed commands to access entire matrices
# Use FetchData to access individual/small groups of variables
FetchData(object = pbmc, vars = c('PC_1', 'nFeature_RNA', 'MS4A1'),layer = 'counts')

Subsetting and merging

Subset Seurat objects

# Subset Seurat object based on identity class, also see ?SubsetData
subset(x = pbmc, idents = 'B')
subset(x = pbmc, idents = c('Naive CD4 T', 'CD8 T'), invert = TRUE)

# Subset on the expression level of a gene/feature
subset(x = pbmc, subset = MS4A1 > 2.5)

# Subset on a combination of criteria
subset(x = pbmc, subset = MS4A1 > 2.5 & PC_1 > 5)
subset(x = pbmc, subset = MS4A1 > 2.5, idents = 'B')

# Subset on a value in the object meta data
subset(x = pbmc, subset = groups == "g1")

# Downsample the number of cells per identity class
subset(x = pbmc, downsample = 100)

Split layers

# In Seurat v5, users can now split in object directly into different layers
# keeps expression data in one object, but splits multiple samples into layers
# can proceed directly to integration workflow after splitting layers
ifnb[["RNA"]] <- split(ifnb[["RNA"]],f = ifnb$stim)
Layers(ifnb)

# If desired, for example after intergation, the layers can be joined together again
ifnb <- JoinLayers(ifnb)

Split objects

# In line with prior workflows, you can also into split your object into a list of multiple objects
# based on a metadata column 
# creates a list of two objects 
ifnb_list <- SplitObject(ifnb,split.by = 'stim')
ifnb_list$CTRL
ifnb_list$STIM

Merge objects (without integration)

In Seurat v5, merging creates a single object, but keeps the expression information split into different layers for integration. If not proceeding with integration, rejoin the layers after merging.

# Merge two Seurat objects
merged_obj <- merge(x = ifnb_list$CTRL, y = ifnb_list$STIM)
merged_obj[["RNA"]] <- JoinLayers(merged_obj)

# Example to merge more than two Seurat objects
merge(x = pbmc1, y = list(pbmc2, pbmc3))

Merge objects (with integration)

See introduction to integration for more information.

merged_obj <- merge(x = ifnb_list$CTRL, y = ifnb_list$STIM)
merged_obj <- NormalizeData(merged_obj)
merged_obj <- FindVariableFeatures(merged_obj)
merged_obj <- ScaleData(merged_obj)
merged_obj <- RunPCA(merged_obj)
merged_obj <- IntegrateLayers(
  object = obj, method = RPCAIntegration,
  orig.reduction = "pca", new.reduction = 'integrated.rpca',
  verbose = FALSE)

# now that integration is complete, rejoin layers
merged_obj[["RNA"]] <- JoinLayers(merged_obj)

Pseudobulk analysis

Group cells together, based on multiple categories

See DE vignette for information on how to add the donor_id column to meta data.

# pseudobulk cells only by cell type
bulk <- AggregateExpression(ifnb,group.by ='seurat_annotations', return.seurat = TRUE)
Cells(bulk)

# pseudobulk cells by stimulation condition AND cell type
bulk <- AggregateExpression(ifnb,group.by = c('stim','seurat_annotations'), return.seurat = TRUE)
Cells(bulk)

# pseudobulk cells by stimulation condition AND cell type AND donor
bulk <- AggregateExpression(ifnb,group.by = c('stim','seurat_annotations',"donor_id"), return.seurat = TRUE)
Cells(bulk)

Visualization in `Seurat`

Seurat has a vast, ggplot2-based plotting library. All plotting functions will return a ggplot2 plot by default, allowing easy customization with ggplot2.

# Dimensional reduction plot 
DimPlot(object = pbmc, reduction = 'pca')

# Dimensional reduction plot, with cells colored by a quantitative feature
# Defaults to UMAP if available
FeaturePlot(object = pbmc, features = "MS4A1")

# Scatter plot across single cells
FeatureScatter(object = pbmc, feature1 = "MS4A1", feature2 = "PC_1")
FeatureScatter(object = pbmc, feature1 = "MS4A1", feature2 = "CD3D")

# Scatter plot across individual features, repleaces CellPlot
CellScatter(object = pbmc, cell1 = "AGTCTACTAGGGTG", cell2 = "CACAGATGGTTTCT")

VariableFeaturePlot(object = pbmc)

#Violin and Ridge plots
VlnPlot(object = pbmc, features = c("LYZ", "CCL5", "IL32"))
RidgePlot(object = pbmc, feature = c("LYZ", "CCL5", "IL32"))

# Heatmaps (visualize scale.data slot)
DimHeatmap(object = pbmc,reduction = 'pca', cells = 200)

# standard workflow
pbmc <- ScaleData(pbmc,features = heatmap_markers)
DoHeatmap(object = pbmc,features = heatmap_markers)

# sctransform workflow
pbmc <- GetResidual(pbmc,features = heatmap_markers)
DoHeatmap(object = pbmc,features = heatmap_markers)

# heatmap with maximum of 100 cells per group
DoHeatmap(pbmc,heatmap_markers,cells = subset(pbmc,downsample = 100))

# New things to try! 
# Note that plotting functions now return ggplot2 objects, so you can add themes, titles, and options onto them
VlnPlot(object = pbmc, features = "MS4A1", split.by = "groups")
DotPlot(object = pbmc, features = c("LYZ", "CCL5", "IL32"), split.by = "groups")
FeaturePlot(object = pbmc, features = c("MS4A1", "CD79A"), blend = TRUE)
DimPlot(object = pbmc) + DarkTheme()
DimPlot(object = pbmc) + labs(title = '2,700 PBMCs clustered using Seurat and viewed\non a two-dimensional UMAP')

Seurat provides many prebuilt themes that can be added to ggplot2 plots for quick customization

| Theme | Function | | ----- | -------- | | DarkTheme | Set a black background with white text | | FontSize | Set font sizes for various elements of a plot | | NoAxes | Remove axes and axis text | | NoLegend | Remove all legend elements | | RestoreLegend | Restores a legend after removal | | RotatedAxis | Rotates x-axis labels |

# Plotting helper functions work with ggplot2-based scatter plots, such as DimPlot, FeaturePlot, CellScatter, and FeatureScatter
plot <- DimPlot(object = pbmc) + NoLegend()

# HoverLocator replaces the former `do.hover` argument
# It can also show extra data throught the `information` argument, designed to work smoothly with FetchData
HoverLocator(plot = plot, information = FetchData(object = pbmc, vars = c("ident", "PC_1", "nFeature_RNA")))

# FeatureLocator replaces the former `do.identify`
select.cells <- FeatureLocator(plot = plot)

# Label points on a ggplot object
LabelPoints(plot = plot, points = TopCells(object = pbmc[["pca"]]), repel = TRUE)

Multi-Assay Features

With Seurat, you can easily switch between different assays at the single cell level (such as ADT counts from CITE-seq, or integrated/batch-corrected data). Most functions now take an assay parameter, but you can set a Default Assay to avoid repetitive statements.

cbmc <- CreateSeuratObject(counts = cbmc.rna)
# Add ADT data
cbmc[['ADT']] <- CreateAssayObject(counts = cbmc.adt)
# Run analyses by specifying the assay to use
NormalizeData(object = cbmc, assay = 'RNA')
NormalizeData(object = cbmc, assay = 'ADT', method = 'CLR')

# Retrieve and set the default assay
DefaultAssay(object = cbmc)
DefaultAssay(object = cbmc) <- 'ADT'
DefaultAssay(object = cbmc)

# Pull feature expression from both assays by using keys
FetchData(object = cbmc, vars = c('rna_CD3E', 'adt_CD3'))

# Plot data from multiple assays using keys
FeatureScatter(object = cbmc, feature1 = "rna_CD3E", feature2 = "adt_CD3")

Additional resources

Users who are particularly interested in some of the technical changes to data storage in Seurat v5 can explore the following resources:

Session Info

sessionInfo()

satijalab/seurat documentation built on April 11, 2025, 4:32 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

satijalab/seurat
Tools for Single Cell Genomics

In satijalab/seurat: Tools for Single Cell Genomics

Standard `Seurat` workflow

SCtransform version

`Seurat` Object Data Access

Cell, feature, and layer names

Identity class labels

Cell metadata

Expression data (stored as layers in Seurat v5)

Dimensional reductions

FetchData

Subsetting and merging

Subset Seurat objects

Split layers

Split objects

Merge objects (without integration)

Merge objects (with integration)

Pseudobulk analysis

Group cells together, based on multiple categories

Visualization in `Seurat`

Multi-Assay Features

Additional resources

R Package Documentation

Browse R Packages

We want your feedback!

satijalab/seurat Tools for Single Cell Genomics

In satijalab/seurat: Tools for Single Cell Genomics

Standard Seurat workflow

SCtransform version

Seurat Object Data Access

Cell, feature, and layer names

Identity class labels

Cell metadata

Expression data (stored as layers in Seurat v5)

Dimensional reductions

FetchData

Subsetting and merging

Subset Seurat objects

Split layers

Split objects

Merge objects (without integration)

Merge objects (with integration)

Pseudobulk analysis

Group cells together, based on multiple categories

Visualization in Seurat

Multi-Assay Features

Additional resources

R Package Documentation

Browse R Packages

We want your feedback!

satijalab/seurat
Tools for Single Cell Genomics

Standard `Seurat` workflow

`Seurat` Object Data Access

Visualization in `Seurat`