knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, fig.align = "center") set.seed(629) reticulate::use_virtualenv("~/Desktop/Python/science/venv/", required = TRUE)
library(dplyr) # tidy data library(Seurat) # single cell infrastructure library(ggplot2) # plots library(cluster) # clustering tools library(magrittr) # piping library(SCISSORS) # reclustering library(ggsignif) # significance bars library(paletteer) # advanced colors library(reticulate) # Python interface library(SeuratData) # PBMC3k dataset library(SAFEclustering) # the SAFE algorithm
import anndata # annotated single cell data import numpy as np # matrix algebra import pandas as pd # DataFrames import scanpy as sc # ScanPy import giniclust3 as gc # GiniClust3
First we'll read in both our SCISSORS
-processed PBMC3k dataset as well as the same data as processed by the Satija Lab. We also load in a blank version of the object and pre-process it exactly as we did the SCISSORS
-processed object.
pbmc_SCISSORS <- readRDS("~/Desktop/Data/pbmc3k.Rds") sils_SCISSORS <- ComputeSilhouetteScores(pbmc_SCISSORS, avg = FALSE) pbmc_Seurat <- LoadData("pbmc3k") %>% subset(subset = nFeature_RNA < 2500) %>% NormalizeData(verbose = FALSE) %>% FindVariableFeatures(selection.method = "vst", nfeatures = 2000, verbose = FALSE) %>% ScaleData(verbose = FALSE) %>% RunPCA(verbose = FALSE) %>% FindNeighbors(dims = 1:10, verbose = FALSE) %>% FindClusters(resolution = 0.5, verbose = FALSE) %>% RunTSNE(reduction = "pca", dims = 1:10, dim.embed = 2, verbose = FALSE) pbmc_Seurat@reductions$fitsne <- pbmc_SCISSORS@reductions$tsne pbmc <- LoadData("pbmc3k") %>% subset(subset = nFeature_RNA < 2500) %>% PrepareData(n.HVG = 4000, regress.cc = FALSE, regress.mt = FALSE, n.PC = 15, which.dim.reduc = "tsne", initial.resolution = .4, random.seed = 629) pbmc@reductions$fitsne <- pbmc_SCISSORS@reductions$tsne sils_satija <- ComputeSilhouetteScores(pbmc_Seurat, avg = FALSE)
pbmc_raw <- LoadData("pbmc3k") %>% subset(subset = nFeature_RNA < 2500) system.time( mk <- MultiK(seu = pbmc_raw, seed = 312) ) pbmc_raw@reductions$fitsne <- pbmc_SCISSORS@reductions$tsne
DiagMultiKPlot(mk$k, mk$consensus)
clusters_k3 <- getClusters(pbmc_raw, optK = 3) clusters_k7 <- getClusters(pbmc_raw, optK = 7) clusters_k9 <- getClusters(pbmc_raw, optK = 9) clusters_k10 <- getClusters(pbmc_raw, optK = 10) pbmc_raw <- AddMetaData(pbmc_raw, metadata = clusters_k3$clusters, col.name = "MultiK_k3") pbmc_raw <- AddMetaData(pbmc_raw, metadata = clusters_k7$clusters, col.name = "MultiK_k7") pbmc_raw <- AddMetaData(pbmc_raw, metadata = clusters_k9$clusters, col.name = "MultiK_k9") pbmc_raw <- AddMetaData(pbmc_raw, metadata = clusters_k10$clusters, col.name = "MultiK_k10") pbmc_raw@reductions$fitsne <- pbmc_SCISSORS@reductions$tsne
DimPlot(pbmc_raw, reduction = "fitsne", group.by = "MultiK_k3", cols = paletteer_d("ggsci::category20_d3")) DimPlot(pbmc_raw, reduction = "fitsne", group.by = "MultiK_k7", cols = paletteer_d("ggsci::category20_d3")) DimPlot(pbmc_raw, reduction = "fitsne", group.by = "MultiK_k9", cols = paletteer_d("ggsci::category20_d3")) DimPlot(pbmc_raw, reduction = "fitsne", group.by = "MultiK_k10", cols = paletteer_d("ggsci::category20_d3"))
pbmc_raw <- pbmc_raw %>% NormalizeData(verbose = FALSE) %>% FindVariableFeatures(selection.method = "vst", nfeatures = 2000, verbose = FALSE) %>% ScaleData(verbose = FALSE) %>% RunPCA(verbose = FALSE) Idents(pbmc_raw) <- "MultiK_k10" multik_k10_markers <- FindAllMarkers(pbmc_raw, test.use = "wilcox", only.pos = TRUE, verbose = FALSE, random.seed = 629) multik_k10_top5 <- multik_k10_markers %>% group_by(cluster) %>% slice_head(n = 5) DotPlot(pbmc_raw, features = unique(multik_k10_top5$gene)) + theme(axis.text.x = element_text(angle = 90, size = 9))
Here's what the default Satija Lab clustering looks like:
p0a <- DimPlot(pbmc_Seurat, reduction = "fitsne", group.by = "seurat_clusters", pt.size = 1.5, cols = paletteer_d("ggthemes::Classic_10")[c(1, 2, 7, 4, 3, 5, 6, 8, 9)]) + labs(x = NULL, y = NULL, title = NULL) + theme_yehlab() + theme(panel.border = element_blank(), legend.text = element_text(size = 20)) + guides(color = guide_legend(nrow = 1, override.aes = list(size = 8))) p0a
And our results:
p0b <- DimPlot(pbmc_SCISSORS, pt.size = 1.5, group.by = "seurat_clusters", reduction = "tsne", cols = paletteer_d("ggthemes::Classic_10")[c(4, 6, 2, 10, 5, 8, 9, 1, 7, 3)]) + labs(x = NULL, y = NULL, title = NULL) + theme_yehlab() + theme(panel.border = element_blank(), legend.text = element_text(size = 20)) + guides(color = guide_legend(nrow = 1, override.aes = list(size = 8))) p0b
SAFE_res <- individual_clustering(inputTags = data.frame(GetAssayData(pbmc, slot = "counts")), mt_filter = FALSE, nGene_filter = FALSE, SC3 = TRUE, gene_filter = FALSE, CIDR = FALSE, nPC.seurat = 15, Seurat = TRUE, tSNE = TRUE, low.genes = 200, SEED = 629) clust_ensembl <- SAFE(cluster_results = SAFE_res, program.dir = "~/Downloads/SAFE", HGPA = TRUE, MCLA = TRUE, CSPA = TRUE, SEED = 629) pbmc@meta.data$SAFE_clust <- as.factor(as.integer(clust_ensembl$optimal_clustering) - 1) cos_dist <- CosineDist(input = Embeddings(pbmc, "pca")) sils_safe <- silhouette(dist = cos_dist, x = as.integer(pbmc@meta.data$SAFE_clust) - 1)
Here's what the results from SAFE
look like on our t-SNE embedding:
p0c <- DimPlot(pbmc, pt.size = 1.5, group.by = "SAFE_clust", reduction = "fitsne", cols = paletteer_d("ggthemes::Classic_10")[c(4, 8, 2, 1, 5, 7, 6, 3, 9)]) + labs(x = NULL, y = NULL, title = NULL) + theme_yehlab() + theme(panel.border = element_blank(), legend.text = element_text(size = 20)) + guides(color = guide_legend(nrow = 1, override.aes = list(size = 8))) p0c
Here we'll use the GiniClust3
Python library to identify rare cell clusters.
counts <- GetAssayData(pbmc, assay = "SCT", slot = "counts") %>% as.data.frame()
We bring the cells into Python.
counts = r.counts adata = anndata.AnnData(X=counts.T) sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
Next we process them using GiniClust
and FanoClust
, and the consensus between the two.
# GiniIndexClust gc.gini.calGini(adata) adataGini = gc.gini.clusterGini(adata, neighbors=7) # FanoClust gc.fano.calFano(adata) adataFano = gc.fano.clusterFano(adata) # consensus cluster consensusCluster = {} consensusCluster['giniCluster'] = np.array(adata.obs['rare'].values.tolist()) consensusCluster['fanoCluster'] = np.array(adata.obs['fano'].values.tolist()) gc.consensus.generateMtilde(consensusCluster) gc.consensus.clusterMtilde(consensusCluster)
We bring the results back into R.
gini_clusts <- as.integer(py$consensusCluster$finalCluster) pbmc@meta.data$gini_clust <- gini_clusts sils_gini <- cluster::silhouette(dist = cos_dist, x = gini_clusts)
Here's what the GiniClust3
results look like on our t-SNE embedding:
p0d <- DimPlot(pbmc, pt.size = 1.5, group.by = "gini_clust", reduction = "fitsne", cols = c(paletteer_d("ggthemes::Classic_10")[c(1, 2, 4)], paletteer_d("ggsci::category20_d3")[-c(1:4)])) + labs(x = NULL, y = NULL, title = NULL) + theme_yehlab() + theme(panel.border = element_blank(), legend.text = element_text(size = 20)) + guides(color = guide_legend(nrow = 1, override.aes = list(size = 8))) p0d
We build a dataframe of the results that we'll pass into ggplot2
.
clust_sils <- data.frame(Method = rep(c("SCISSORS", "Seurat", "SAFE", "GiniClust3"), each = ncol(pbmc)), Silhouette = c(sils_SCISSORS$Score, sils_satija$Score, sils_safe[, 3], sils_gini[, 3])) clust_sils %<>% mutate(Method = factor(Method, levels = c("GiniClust3", "Seurat", "SAFE", "SCISSORS")))
We see that our method gives the highest median silhouette score over the Seurat
vignette's results, GiniClust3
, and SAFE
; the differences between SCISSORS
and the other methods are statistically significant
p1 <- ggplot(clust_sils, aes(x = Method, y = Silhouette, fill = Method)) + geom_violin(draw_quantiles = 0.5, color = "black", scale = "width", size = 1.2) + geom_signif(test = wilcox.test, map_signif_level = TRUE, step_increase = .12, size = 1.2, textsize = 8, comparisons = list(c("SCISSORS", "GiniClust3"), c("SCISSORS", "Seurat"), c("SCISSORS", "SAFE"))) + scale_fill_paletteer_d("ggsci::default_locuszoom") + scale_y_continuous(breaks = c(-1, -0.5, 0, 0.5, 1), limits = c(-1, 1.6)) + labs(y = "Silhouette Score", x = "Method", fill = NULL) + theme(panel.grid = element_blank(), legend.position = "none", panel.background = element_blank(), panel.border = element_rect(fill = NA, size = 1), axis.title.x = element_text(size = 24), axis.title.y = element_text(size = 24), axis.text.y = element_text(size = 20), axis.text.x = element_text(size = 20)) p1
ggsave(p0a, filename = "Seurat_Clusters.pdf", device = "pdf", units = "in", path = "~/Desktop/R/SCISSORS/vignettes/figures_pub/PBMC/Method Comparison", height = 8, width = 8) ggsave(p0b, filename = "SCISSORS_Clusters.pdf", device = "pdf", units = "in", path = "~/Desktop/R/SCISSORS/vignettes/figures_pub/PBMC/Method Comparison", height = 8, width = 8) ggsave(p0c, filename = "SAFE_Clusters.pdf", device = "pdf", units = "in", path = "~/Desktop/R/SCISSORS/vignettes/figures_pub/PBMC/Method Comparison", height = 8, width = 8) ggsave(p0d, filename = "GiniClust3_Clusters.pdf", device = "pdf", units = "in", path = "~/Desktop/R/SCISSORS/vignettes/figures_pub/PBMC/Method Comparison", height = 9.5, width = 9.5) ggsave(p1, filename = "Sil_Scores.pdf", device = "pdf", units = "in", path = "~/Desktop/R/SCISSORS/vignettes/figures_pub/PBMC/Method Comparison", height = 8, width = 8)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.