options(width = 150) knitr::opts_chunk$set( collapse = TRUE, tidy = FALSE, message = FALSE, warning = FALSE )
For this tutorial, we will be analyzing the Visium Sagittal-Posterior Mouse Brain data produced by 10x Genomics.
A fraction of this data containing 3353 spatial barcodes (spots) and 2000 highly variable genes can be loaded using the LoadData
function.
We will be using Seurat for this analysis.
# install Seurat v4.0.0 if (!requireNamespace("Seurat", quietly = TRUE) | utils::packageVersion("Seurat") < "4.0.0") remotes::install_version("Seurat", version = "4.0.0") # load data library(spots) library(Seurat) library(ggplot2) posterior1 <- LoadData("~/Downloads/","Visium.Brain") SpatialDimPlot(posterior1, pt.size.factor = 1.2, group.by = "region")
To run Spatial Component Analysis (SCA), we will need to define a weight matrix between each pair of datapoints (spots).
To achieve this, we first calculate the distance between each spots using hexagonal nearest neighbor distance (VisiumHnn
and HnnNeighbor
functions) as it resembles the way Visium slides were generated.
Typically, datapoints not located on the edges should have 6 1st-degree, 12 2nd-degree, 18 3rd-degree neighbors and so on. These specific parameters can be controlled using HnnNeighbor
function.
To turn this distance matrix to a weight matrix, we call HnnWeight
function to apply a Gaussian filter.
I'm using parameter dist.k = 2
to consider only the 2nd-degree neighbors and sigma = 2
for filter width.
# HNN distance posterior1.hnn.dist <- VisiumHnn("~/Downloads/", Cells(posterior1)) # HNN weights posterior1.data.hnn <- HnnNeighbor(posterior1.hnn.dist, k = 19, include.self = FALSE) posterior1.data.weight <- HnnWeight(posterior1.data.hnn$dist.mat, dist.k = 2, sigma = 2)
After this, we are ready to run SCA using the 2000 spatial transcriptomic measurements and the weight matrix we just calculated.
Here I set the n.eigen = 30
to return only the first 30 Spatial Components (SCs), and plot four different visualizations:
# Run SCA posterior1 <- ScaleData(posterior1) posterior1.sca <- SCA(X = Matrix::t(posterior1@assays$Spatial@data), W = posterior1.data.weight, scaled.data = t(posterior1@assays$Spatial@scale.data), n.eigen = 30) # Store in the Seurat object posterior1@reductions[["sca"]] <- CreateDimReducObject(embeddings = posterior1.sca$X, loadings = posterior1.sca$rotation, stdev = posterior1.sca$eigenvalues, key = "SC_", assay = "Spatial") # Visualization p1 <- DimPlot(posterior1, reduction = "sca", group.by = "region") p2 <- ElbowPlot(posterior1, reduction = "sca", ndims = 30) p3 <- SpatialFeaturePlot(posterior1, features = 'SC_1', pt.size.factor = 1.2) p4 <- DimHeatmap(posterior1, reduction = "sca", fast = FALSE) (p1+p2)/(p3+p4)
Once finished, we can use the Spatial Components (SCs) for downstream analysis such as clustering.
Here we utilized Seurat's FindClusters
function to identify clusters of spots using the first 20 SCs we just calculated.
posterior1 <- FindNeighbors(posterior1, reduction = "sca", dims = 1:20,graph.name = c("sca.nn", "sca.snn")) posterior1 <- FindClusters(posterior1, resolution = 1, verbose = FALSE, graph.name = "sca.snn") SpatialDimPlot(posterior1, label = TRUE, label.size = 7, repel = TRUE, pt.size.factor = 1.2)
We can compare the clustering result based on SCA to PCA. Here I used the same clustering parameters but with the first 20 PCs instead of SCs.
posterior1 <- RunPCA(posterior1, npcs = 30, verbose = FALSE) posterior1 <- FindNeighbors(posterior1, reduction = "pca", dims = 1:20, graph.name = c("pca.nn", "pca.snn")) posterior1 <- FindClusters(posterior1, resolution = 1, verbose = FALSE, graph.name = "pca.snn") p1 <- SpatialDimPlot(posterior1, label = TRUE, label.size = 5, group.by = "sca.snn_res.1", repel = TRUE, pt.size.factor = 1.2) p2 <- SpatialDimPlot(posterior1, label = TRUE, label.size = 5, group.by = "pca.snn_res.1", repel = TRUE, pt.size.factor = 1.2) p1+p2
There are 19 clusters (0-18) and 17 clusters (0-16) that were identified by SCA and PCA respectively.
We can visualize the differences between the two clustering results using Chi-square test and interpret the result with Chi-square residuals.
# Chi-square test tab <- xtabs(data = posterior1@meta.data[,c("sca.snn_res.1","pca.snn_res.1")]) chi.result <- chisq.test(tab) chi.resid <- unclass(chi.result$residuals) # Data for visualization data.plot <- Seurat:::Melt(chi.resid) colnames(data.plot)[1:3] <- c("SCA clustering", "PCA clustering", "Chi-square Residual") # Reorder the columns data.plot[,1] <- factor(data.plot[,1], levels = order(apply(chi.resid, 1, which.max))[c(1:10,12:11,13:19)]-1, ordered = TRUE) data.plot[,2] <- factor(data.plot[,2], levels = unique(as.numeric(data.plot[,2])), ordered = TRUE) # Heatmap ggplot(data = data.plot, aes(x=`PCA clustering`, y=`SCA clustering`, fill=`Chi-square Residual`)) + geom_tile() + cowplot::theme_cowplot(font_size = 20) + labs(fill="Chi-square\nResidual")
For this data, the clustering results based on SCA and PCA in general agree with each other. However, as we noted PCA cluster 4, 9 and 10 were split by the SCA each into two distinct groups.
Here, we will go through one of them in detail to check whether SCA made a false discovery or PCA underestimated certain distinct clusters.
PCA Cluster 4 vs SCA Cluster 10 and 14
cluster4 <- subset(posterior1, pca.snn_res.1 == 4 & sca.snn_res.1 %in% c(10, 14)) cluster4$sca.snn_res.1 <- factor(cluster4$sca.snn_res.1) cluster4$pca.snn_res.1 <- factor(cluster4$pca.snn_res.1) p1 <- SpatialDimPlot(cluster4, group.by = "pca.snn_res.1", pt.size.factor = 3, label = TRUE, label.size = 5) + ggtitle("PCA Clustering") + NoLegend() + theme(plot.title = element_text(size = 15, face = 2, hjust = 0.5)) p2 <- SpatialDimPlot(cluster4, group.by = "sca.snn_res.1", pt.size.factor = 3, label = TRUE, label.size = 5) + ggtitle("SCA Clustering") + NoLegend() + theme(plot.title = element_text(size = 15, face = 2, hjust = 0.5)) p1 + p2
We can examine the differentially expressed genes between cluster 10 and 14 from SCA clustering. We see that these are heterogeneous populations marked by different gene expressions.
de.genes <- FindMarkers(cluster4, 14, 10, group.by = "sca.snn_res.1", verbose = FALSE) de.genes <- de.genes[de.genes$p_val_adj < 0.05, ] de.genes <- de.genes[order(de.genes$avg_log2FC),] cluster4 <- ScaleData(cluster4, features = rownames(de.genes)) p1 <- DoHeatmap(cluster4, rownames(de.genes), group.by = "sca.snn_res.1") p2 <- SpatialFeaturePlot(cluster4, c(head(rownames(de.genes),2), tail(rownames(de.genes),2)), pt.size.factor = 3) p1 + p2
print(sessionInfo())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.