all_times <- list() # store the time for each chunk knitr::knit_hooks$set(time_it = local({ now <- NULL function(before, options) { if (before) { now <<- Sys.time() } else { res <- difftime(Sys.time(), now, units = "secs") all_times[[options$label]] <<- res } } })) knitr::opts_chunk$set( tidy = TRUE, tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, time_it = TRUE ) celltypes_fast = readRDS("./fls/celltypes_fast_citeseq.rds") celltypes = readRDS("./fls/celltypes_citeseq.rds") # pbmc = readRDS("fls/pbmcs_signac_citeseq.rds")
This vignette shows how to use Signac with Seurat. There are three parts: Seurat, Signac and then visualization. We use an example PBMCs CITE-seq data set from 10X Genomics.
Start with the standard pre-processing steps for a Seurat object.
library(Seurat)
Download data from 10X Genomics.
dir.create("fls") download.file("https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_protein_v3/pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5", destfile = "fls/pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5")
Create a Seurat object, and then perform SCTransform normalization. Note:
# load dataset E = Read10X_h5(filename = "fls/pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5") pbmc <- CreateSeuratObject(counts = E$`Gene Expression`, project = "pbmc") # run sctransform pbmc <- SCTransform(pbmc) # optionally just normalize data # pbmc <- NormalizeData(pbmc) # pbmc <- FindVariableFeatures(pbmc) # pbmc <- ScaleData(pbmc)
Perform dimensionality reduction by PCA and UMAP embedding. Note:
# These are now standard steps in the Seurat workflow for visualization and clustering pbmc <- RunPCA(pbmc, verbose = FALSE) pbmc <- RunUMAP(pbmc, dims = 1:30, verbose = FALSE) pbmc <- FindNeighbors(pbmc, dims = 1:30, verbose = FALSE)
Load the package
require(SignacX)
Generate SignacX labels for the Seurat object. Note:
# Run Signac labels <- Signac(pbmc, num.cores = 4) celltypes = GenerateLabels(labels, E = pbmc)
Can we make Signac faster?
Sometimes, training the neural networks takes a lot of time. To make Signac faster, we implemented SignacFast which uses an ensemble of pre-trained neural network models. Note:
# Run Signac labels_fast <- SignacFast(pbmc, num.cores = 12) celltypes_fast = GenerateLabels(labels_fast, E = pbmc)
SignacFast took only ~30 seconds. Relative to Signac, the main difference is that SignacFast tends to leave a few more cells "Unclassified."
How does SignacFast compare to Signac?
knitr::kable(table(Signac = celltypes$CellTypes, SignacFast = celltypes_fast$CellTypes), format = "html")
Now we can visualize the cell type classifications at many different levels: Immune and nonimmune
pbmc <- AddMetaData(pbmc, metadata=celltypes_fast$Immune, col.name = "immmune") pbmc <- SetIdent(pbmc, value='immmune') png(filename="fls/plot1_citeseq.png") DimPlot(pbmc) dev.off()
pbmc <- AddMetaData(pbmc, metadata=celltypes$L2, col.name = "celltypes") pbmc <- SetIdent(pbmc, value='celltypes') png(filename="fls/plot2_citeseq.png") DimPlot(pbmc) dev.off()
pbmc <- AddMetaData(pbmc, metadata=celltypes$CellTypes, col.name = "celltypes") pbmc <- SetIdent(pbmc, value='celltypes') png(filename="./fls/plot3_citeseq.png") DimPlot(pbmc) dev.off()
pbmc <- AddMetaData(pbmc, metadata=celltypes$CellTypes_novel, col.name = "celltypes_novel") pbmc <- SetIdent(pbmc, value='celltypes_novel') png(filename="./fls/plot4_citeseq.png") DimPlot(pbmc) dev.off()
pbmc <- AddMetaData(pbmc, metadata=celltypes$CellStates, col.name = "cellstates") pbmc <- SetIdent(pbmc, value='cellstates') png(filename="./fls/plot5_citeseq.png") DimPlot(pbmc) dev.off()
Identify differentially expressed genes between cell types.
pbmc <- SetIdent(pbmc, value='celltypes') # Find markers for all clusters, and draw a heatmap markers <- FindAllMarkers(pbmc, only.pos = TRUE, verbose = F, logfc.threshold = 1) library(dplyr) top5 <- markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC) png(filename="./fls/plot9_citeseq.png", width = 640, height = 720) DoHeatmap(pbmc, features = unique(top5$gene), angle = 90) dev.off()
pbmc <- SetIdent(pbmc, value='cellstates') # Find markers for all clusters, and draw a heatmap markers <- FindAllMarkers(pbmc, only.pos = TRUE, verbose = F, logfc.threshold = 1) top5 <- markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC) png(filename="./fls/plot6_citeseq.png", width = 640, height = 720) DoHeatmap(pbmc, features = unique(top5$gene), angle = 90) dev.off()
Add protein expression information
pbmc[["ADT"]] <- CreateAssayObject(counts = E$`Antibody Capture`[,colnames(E$`Antibody Capture`) %in% colnames(pbmc)]) pbmc <- NormalizeData(pbmc, assay = "ADT", normalization.method = "CLR") pbmc <- ScaleData(pbmc, assay = "ADT")
Identify differentially expressed proteins between clusters
DefaultAssay(pbmc) <- "ADT" # Find protein markers for all clusters, and draw a heatmap adt.markers <- FindAllMarkers(pbmc, assay = "ADT", only.pos = TRUE, verbose = F) png(filename="./fls/plot7_citeseq.png", width = 640, height = 720) DoHeatmap(pbmc, features = unique(adt.markers$gene), angle = 90) dev.off()
Save results
saveRDS(pbmc, file = "fls/pbmcs_signac_citeseq.rds") saveRDS(celltypes, file = "fls/celltypes_citeseq.rds") saveRDS(celltypes_fast, file = "fls/celltypes_fast_citeseq.rds")
write.csv(x = t(as.data.frame(all_times)), file = "fls/tutorial_times_signac-Seurat_citeseq.csv")
Session Info
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.