library('Matrix')
library('ggplot2')
library('reshape2')
library('sctransform')
library('knitr')
knit_hooks$set(optipng = hook_optipng)
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  digits = 2,
  tidy = TRUE,
  tidy.opts = list(width.cutoff=80),
  optipng = '-o 5 -strip all -quiet',
  fig.width=13, fig.height=6.5, dpi=100, out.width = '95%'
)
library('Seurat')
library('patchwork')
#old_theme <- theme_set(theme_classic(base_size=8))
# some of the vst steps can use multiple cores
# We use the Future API for parallel processing; set parameters here
future::plan(strategy = 'multicore', workers = 3)
options(future.globals.maxSize = 8 * 1024 ^ 3)
options(future.fork.enable = TRUE)

This vignette shows how to use the sctransform wrapper in Seurat.

Make sure the Seurat package is loaded

library(Seurat)

Load data and create Seurat object

pbmc_data <- Read10X(data.dir = "~/Downloads/pbmc3k_filtered_gene_bc_matrices/hg19/")
pbmc <- CreateSeuratObject(counts = pbmc_data)

For reference, we first apply the standard Seurat workflow, with log-normalization

pbmc_logtransform <- pbmc
pbmc_logtransform <- NormalizeData(pbmc_logtransform, verbose = FALSE)
pbmc_logtransform <- FindVariableFeatures(pbmc_logtransform, verbose = FALSE) 
pbmc_logtransform <- ScaleData(pbmc_logtransform, verbose = FALSE) 
pbmc_logtransform <- RunPCA(pbmc_logtransform, verbose = FALSE) 
pbmc_logtransform <- RunUMAP(pbmc_logtransform,dims = 1:20, verbose = FALSE)

For comparison, we now apply sctransform normalization

# Note that this single command replaces NormalizeData, ScaleData, and FindVariableFeatures.
# Transformed data will be available in the SCT assay, which is set as the default after running sctransform
pbmc <- SCTransform(object = pbmc, verbose = FALSE)

Perform dimensionality reduction by PCA and UMAP embedding

# These are now standard steps in the Seurat workflow for visualization and clustering
pbmc <- RunPCA(object = pbmc, verbose = FALSE)
pbmc <- RunUMAP(object = pbmc, dims = 1:20, verbose = FALSE)
pbmc <- FindNeighbors(object = pbmc, dims = 1:20, verbose = FALSE)
pbmc <- FindClusters(object = pbmc, verbose = FALSE)

Visualize the clustering results on the sctransform and log-normalized embeddings.

pbmc_logtransform$clusterID <- Idents(pbmc)
Idents(pbmc_logtransform) <- 'clusterID'
plot1 <- DimPlot(object = pbmc, label = TRUE) + NoLegend() + ggtitle('sctransform') 
plot2 <- DimPlot(object = pbmc_logtransform, label = TRUE)
plot2 <- plot2 + NoLegend() + ggtitle('Log-normalization') 
plot1 + plot2

Users can individually annotate clusters based on canonical markers. However, the sctransform normalization reveals sharper biological distinctions compared to the log-normalized analysis. For example, note how clusters 0, 1, 9, and 11 (all distinct clusters of T cells), are blended together in log-normalized analyses. The sctransform analysis reveals:

Visualize canonical marker genes as violin plots.

VlnPlot(object = pbmc, features = c("CD8A","GZMK","CCL5","S100A4","S100A4","CCR7","ISG15","CD3D"), pt.size = 0.2,ncol = 4)

Visualize canonical marker genes on the sctransform embedding.

FeaturePlot(object = pbmc, features = c("CD8A","GZMK","CCL5","S100A4","S100A4","CCR7"), pt.size = 0.2,ncol = 3)
FeaturePlot(object = pbmc, features = c("CD3D","ISG15","TCL1A","FCER2","XCL1","FCGR3A"), pt.size = 0.2,ncol = 3)


ChristophH/sctransform documentation built on Oct. 22, 2023, 3:28 p.m.