library('Matrix') library('ggplot2') library('reshape2') library('sctransform') library('knitr') knit_hooks$set(optipng = hook_optipng) knitr::opts_chunk$set( collapse = TRUE, comment = "#>", digits = 2, tidy = TRUE, tidy.opts = list(width.cutoff=80), optipng = '-o 5 -strip all -quiet', fig.width=13, fig.height=6.5, dpi=100, out.width = '95%' ) library('Seurat') library('patchwork') #old_theme <- theme_set(theme_classic(base_size=8)) # some of the vst steps can use multiple cores # We use the Future API for parallel processing; set parameters here future::plan(strategy = 'multicore', workers = 3) options(future.globals.maxSize = 8 * 1024 ^ 3) options(future.fork.enable = TRUE)
This vignette shows how to use the sctransform wrapper in Seurat.
Make sure the Seurat package is loaded
library(Seurat)
Load data and create Seurat object
pbmc_data <- Read10X(data.dir = "~/Downloads/pbmc3k_filtered_gene_bc_matrices/hg19/") pbmc <- CreateSeuratObject(counts = pbmc_data)
For reference, we first apply the standard Seurat workflow, with log-normalization
pbmc_logtransform <- pbmc pbmc_logtransform <- NormalizeData(pbmc_logtransform, verbose = FALSE) pbmc_logtransform <- FindVariableFeatures(pbmc_logtransform, verbose = FALSE) pbmc_logtransform <- ScaleData(pbmc_logtransform, verbose = FALSE) pbmc_logtransform <- RunPCA(pbmc_logtransform, verbose = FALSE) pbmc_logtransform <- RunUMAP(pbmc_logtransform,dims = 1:20, verbose = FALSE)
For comparison, we now apply sctransform normalization
# Note that this single command replaces NormalizeData, ScaleData, and FindVariableFeatures. # Transformed data will be available in the SCT assay, which is set as the default after running sctransform pbmc <- SCTransform(object = pbmc, verbose = FALSE)
Perform dimensionality reduction by PCA and UMAP embedding
# These are now standard steps in the Seurat workflow for visualization and clustering pbmc <- RunPCA(object = pbmc, verbose = FALSE) pbmc <- RunUMAP(object = pbmc, dims = 1:20, verbose = FALSE) pbmc <- FindNeighbors(object = pbmc, dims = 1:20, verbose = FALSE) pbmc <- FindClusters(object = pbmc, verbose = FALSE)
Visualize the clustering results on the sctransform and log-normalized embeddings.
pbmc_logtransform$clusterID <- Idents(pbmc) Idents(pbmc_logtransform) <- 'clusterID' plot1 <- DimPlot(object = pbmc, label = TRUE) + NoLegend() + ggtitle('sctransform') plot2 <- DimPlot(object = pbmc_logtransform, label = TRUE) plot2 <- plot2 + NoLegend() + ggtitle('Log-normalization') plot1 + plot2
Users can individually annotate clusters based on canonical markers. However, the sctransform normalization reveals sharper biological distinctions compared to the log-normalized analysis. For example, note how clusters 0, 1, 9, and 11 (all distinct clusters of T cells), are blended together in log-normalized analyses. The sctransform analysis reveals:
Visualize canonical marker genes as violin plots.
VlnPlot(object = pbmc, features = c("CD8A","GZMK","CCL5","S100A4","S100A4","CCR7","ISG15","CD3D"), pt.size = 0.2,ncol = 4)
Visualize canonical marker genes on the sctransform embedding.
FeaturePlot(object = pbmc, features = c("CD8A","GZMK","CCL5","S100A4","S100A4","CCR7"), pt.size = 0.2,ncol = 3)
FeaturePlot(object = pbmc, features = c("CD3D","ISG15","TCL1A","FCER2","XCL1","FCGR3A"), pt.size = 0.2,ncol = 3)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.