knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "figures/", fig.width = 6, fig.height = 6, eval = TRUE )
Installing SuperCell package from gitHub
if (!requireNamespace("remotes")) install.packages("remotes") remotes::install_github("GfellerLab/SuperCell")
library(SuperCell)
Data available at authors' GitHub under file name sincell_with_class_5cl.Rdata.
data(cell_lines) # list with GE - gene expression matrix (logcounts), meta - cell meta data GE <- cell_lines$GE dim(GE) # genes as rows and cells as columns cell.meta <- cell_lines$meta
(i.e., 20
times less metacells (called 'supercells' in the package functions) than single cells) by first building a kNN ($k=5$) network using top $n.var.genes=1000$ most variable genes for dimentionality reduction. Function SCimplify()
computes the partition into metacells, this information is available with the field membership
.
gamma <- 20 # graining level k.knn <- 5 # Building metacells from gene expressio (GE) SC <- SCimplify(GE, # gene expression matrix k.knn = k.knn, # number of nearest neighbors to build kNN network gamma = gamma, # graining level n.var.genes = 1000 # number of the top variable genes to use for dimentionality reduction ) # Alternative, metacells can be build from low-dimensional embedding. # For this, first compute low-dim embedding and pass it into `SCimplify_from_embedding()` if(0){ SC_alt <- SCimplify_from_embedding( X = stats::prcomp(Matrix::t(GE[SC$genes.use,]), rank. = 10)$x, # PCA embedding k.knn = k.knn, # number of nearest neighbors to build kNN network gamma = gamma # graining level) ) } # plot network of metacells supercell_plot(SC$graph.supercells, # network color.use = "gray", # color of the nodes main = paste("Metacell network, gamma =", gamma), seed = 1) # plot single-cell network supercell_plot(SC$graph.singlecell, # network group = cell.meta, # colored by cell line assignment do.frames = F, # not drawing frames around each node main = paste("Single-cell network, N =", dim(GE)[2]), lay.method = "components") # method to compute the network 2D embedding
To get a gene expression of metacells, we need to average gene expressions within each metacell with function supercell_GE()
SC.GE <- supercell_GE(GE, SC$membership) dim(SC.GE)
We now assign each metcell to a particular cell line based on the cell line data, for this, we use function supercell_assign()
.
By default, this function assign each metacell to a cluster with the largest Jaccard coefficient to avoid biases towards very rare or very abundant clusters. Alternatively, assigmnent can be performed using relative (may cause biase towards very small populations) or absolute (may cause bias towards large populations) abundance with method = "relative"
or method = "absolute"
, respectively.
SC$cell_line <- supercell_assign(clusters = cell.meta, # single-cell assigment to cell lines (clusters) supercell_membership = SC$membership, # single-cell assignment to metacells method = "jaccard") seed <- 1 # seed for network plotting # plot network of metacells colored by cell line assignment supercell_plot(SC$graph.supercells, group = SC$cell_line, seed = seed, main = "Metacells colored by cell line assignment")
The quality of assignment can be evaluated with metacell purity (function supercell_purity()
) that returns the proportion of the most abundant cell type (in this case, cell line) in each metacell.
# compute purity of metacells in terms of cell line composition purity <- supercell_purity(clusters = cell.meta, supercell_membership = SC$membership, method = 'max_proportion') hist(purity, main = "Purity of metacells \nin terms of cell line composition") SC$purity <- purity
Some options to plot networks of metacells
## rotate network to be more consistent with the single-cell one supercell_plot(SC$graph.supercells, group = SC$cell_line, seed = seed, alpha = -pi/2, main = "Metacells colored by cell line assignment (rotated)") ## alternatively, any layout can be provided as 2xN numerical matrix, where N is number of nodes (cells) ## Let's plot metacell network using the layout of the single-cell network: ## 1) get single-cell network layout my.lay.sc <- igraph::layout_components(SC$graph.singlecell) ## 2) compute metacell network layout averaging coordinates withing metacells my.lay.SC <- Matrix::t(supercell_GE(ge = t(my.lay.sc), groups = SC$membership)) ## 3) provide layout with the parameter $lay$ supercell_plot(SC$graph.supercells, group = SC$cell_line, lay = my.lay.SC, main = "Metacells colored by cell line assignment (averaged coordinates)")
#dimensionality reduction SC.PCA <- supercell_prcomp(Matrix::t(SC.GE), # metacell gene expression matrix genes.use = SC$genes.use, # genes used for the coarse-graining, but any set can be provided supercell_size = SC$supercell_size, # sample-weighted pca k = 20) ## compute distance D <- dist(SC.PCA$x) ## cluster metacells SC.clusters <- supercell_cluster(D = D, k = 5, supercell_size = SC$supercell_size) SC$clustering <- SC.clusters$clustering
## mapping metacell cluster to cell line map.cluster.to.cell.line <- supercell_assign(supercell_membership = SC$clustering, clusters = SC$cell_line) ## clustering as cell line SC$clustering_reordered <- map.cluster.to.cell.line[SC$clustering] supercell_plot(SC$graph.supercells, group = SC$clustering_reordered, seed = seed, alpha = -pi/2, main = "Metacells colored by cluster")
markers.all.positive <- supercell_FindAllMarkers(ge = SC.GE, # metacell gene expression matrix supercell_size = SC$supercell_size, # size of metacell for sample-weighted method clusters = SC$clustering_reordered, # clustering logfc.threshold = 1, # mininum log fold-change only.pos = T) # keep only upregulated genes markers.all.positive$H2228[1:20,]
genes.to.plot <- c("DHRS2", "MT1P1", "TFF1", "G6PD", "CD74", "CXCL8") supercell_VlnPlot(ge = SC.GE, supercell_size = SC$supercell_size, clusters = SC$clustering_reordered, features = genes.to.plot, idents = c("H1975", "H2228", "A549"), ncol = 3) supercell_GeneGenePlot(ge = SC.GE, gene_x = genes.to.plot[1:3], gene_y = genes.to.plot[4:6], supercell_size = SC$supercell_size, clusters = SC$clustering_reordered,)
supercell_rescale()
functionSC10 <- supercell_rescale(SC, gamma = 10) SC10$cell_line <- supercell_assign(clusters = cell.meta, # single-cell assigment to cell lines (clusters) supercell_membership = SC10$membership, # single-cell assignment to metacells method = "jaccard") supercell_plot(SC10$graph.supercells, group = SC10$cell_line, seed = 1, main = "Metacells at gamma = 10 colored by cell line assignment") ### don't forget to recompute metacell gene expression matrix for a new grainig level with # GE10 <- supercell_GE(GE, SC10$membership) ### if you are going to perform downstream analyses at the new graining level
In case you want to perform other analyses available with Seurat package, we can convert SuperCell to Seurat object with function supercell_2_Seurat()
or to SingleCellExperiment object with function 'supercell_2_sce()'. Let consider a Seurat example.
#install.packages("Seurat") library(Seurat) m.seurat <- supercell_2_Seurat(SC.GE = as.matrix(SC.GE), SC = SC, fields = c("cell_line", "clustering","clustering_reordered"))
Note: since metacells have different size (consist of different number of cells), we apply sample-weighted algorithms at most af the steps of the downstream analyses. Thus, when coercing SuperCell to Seurat, we replaced PCA, saling and kNN graph of Seurat object with those obtained applying sample-weighted version of PCA, scaling or SuperCell graph (i.e., metacell network), respectively. If you then again apply RunPCA
, ScaleData
, or FindNeighbors
, the result will be rewritten, but you will be able to access them with Embeddings(m.seurat, reduction = "pca_weigted")
, m.seurat@assays$RNA@misc[["scale.data.weighted"]]
, or m.seurat@graphs$RNA_super_cells
, respectively.
PCAPlot(m.seurat) ### cluster SuperCell network (unweighted clustering) m.seurat <- FindClusters(m.seurat, graph.name = "RNA_nn") # now RNA_nn is metacell network m.seurat <- FindNeighbors(m.seurat, verbose = FALSE) # RNA_nn has been replaced with kNN graph of metacell (unweigted) m.seurat <- FindClusters(m.seurat, graph.name = "RNA_nn")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.