inst/doc/a_SuperCell.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "figures/",
  fig.width = 6, fig.height = 6,
  eval = TRUE
)

## ----library, warning=FALSE, eval=FALSE---------------------------------------
#  if (!requireNamespace("remotes")) install.packages("remotes")
#  remotes::install_github("GfellerLab/SuperCell")

## ----load library, warning=FALSE----------------------------------------------
library(SuperCell)

## ----load data----------------------------------------------------------------
data(cell_lines) # list with GE - gene expression matrix (logcounts), meta - cell meta data
GE <- cell_lines$GE
dim(GE) # genes as rows and cells as columns
cell.meta <- cell_lines$meta

## ----Simplification, warning=FALSE, paged.print=FALSE-------------------------
gamma <- 20 # graining level
k.knn <- 5

# Building metacells from gene expressio (GE)
SC <- SCimplify(GE,  # gene expression matrix 
                k.knn = k.knn, # number of nearest neighbors to build kNN network
                gamma = gamma, # graining level
                n.var.genes = 1000 # number of the top variable genes to use for dimentionality reduction 
)


# Alternative, metacells can be build from low-dimensional embedding.
# For this, first compute low-dim embedding and pass it into `SCimplify_from_embedding()`
if(0){
  SC_alt <- SCimplify_from_embedding(
    X = stats::prcomp(Matrix::t(GE[SC$genes.use,]), rank. = 10)$x, # PCA embedding
    k.knn = k.knn, # number of nearest neighbors to build kNN network
    gamma = gamma # graining level)
  )
}

# plot network of metacells

supercell_plot(SC$graph.supercells, # network
               color.use = "gray", # color of the nodes
               main = paste("Metacell network, gamma =", gamma), 
               seed = 1) 

# plot single-cell network
supercell_plot(SC$graph.singlecell, # network
               group = cell.meta, # colored by cell line assignment
               do.frames = F, # not drawing frames around each node 
               main = paste("Single-cell network, N =", dim(GE)[2]), 
               lay.method = "components") # method to compute the network 2D embedding 

## ----average gene expression--------------------------------------------------
SC.GE <- supercell_GE(GE, SC$membership)
dim(SC.GE) 

## ----assign metacells to cell line infromation--------------------------------
SC$cell_line <- supercell_assign(clusters = cell.meta, # single-cell assigment to cell lines (clusters)
                                 supercell_membership = SC$membership, # single-cell assignment to metacells
                                 method = "jaccard")


seed <- 1 # seed for network plotting 

# plot network of metacells colored by cell line assignment 
supercell_plot(SC$graph.supercells, 
               group = SC$cell_line, 
               seed = seed, 
               main = "Metacells colored by cell line assignment")

## ----purity of supercell in terms of cell line composition--------------------
# compute purity of metacells in terms of cell line composition
purity <- supercell_purity(clusters = cell.meta, 
                           supercell_membership = SC$membership, method = 'max_proportion')
hist(purity, main = "Purity of metacells \nin terms of cell line composition")
SC$purity <- purity

## ----plotting options---------------------------------------------------------
## rotate network to be more consistent with the single-cell one
supercell_plot(SC$graph.supercells, 
               group = SC$cell_line, 
               seed = seed, 
               alpha = -pi/2,
               main  = "Metacells colored by cell line assignment (rotated)")

## alternatively, any layout can be provided as 2xN numerical matrix, where N is number of nodes (cells)

## Let's plot metacell network using the layout of the single-cell network:
## 1) get single-cell network layout 
my.lay.sc <- igraph::layout_components(SC$graph.singlecell) 

## 2) compute metacell network layout averaging coordinates withing metacells
my.lay.SC <- Matrix::t(supercell_GE(ge = t(my.lay.sc), groups = SC$membership))

## 3) provide layout with the parameter $lay$
supercell_plot(SC$graph.supercells, 
               group = SC$cell_line, 
               lay = my.lay.SC,
               main  = "Metacells colored by cell line assignment (averaged coordinates)")

## ----clustering---------------------------------------------------------------
#dimensionality reduction 
SC.PCA         <- supercell_prcomp(Matrix::t(SC.GE), # metacell gene expression matrix
                                   genes.use = SC$genes.use, # genes used for the coarse-graining, but any set can be provided
                                   supercell_size = SC$supercell_size, # sample-weighted pca
                                   k = 20) 
## compute distance
D              <- dist(SC.PCA$x)

## cluster metacells
SC.clusters    <- supercell_cluster(D = D, k = 5, supercell_size = SC$supercell_size) 
SC$clustering  <- SC.clusters$clustering

## ----assign metacell clustering results to cell line information--------------
## mapping metacell cluster to cell line 
map.cluster.to.cell.line    <- supercell_assign(supercell_membership = SC$clustering, clusters  = SC$cell_line)
## clustering as cell line
SC$clustering_reordered     <- map.cluster.to.cell.line[SC$clustering]

supercell_plot(SC$graph.supercells, 
               group = SC$clustering_reordered, 
               seed = seed,
               alpha = -pi/2,
               main = "Metacells colored by cluster")

## ----differential expression analysis-----------------------------------------
markers.all.positive <- supercell_FindAllMarkers(ge = SC.GE, # metacell gene expression matrix
                                                 supercell_size = SC$supercell_size, # size of metacell for sample-weighted method
                                                 clusters = SC$clustering_reordered, # clustering
                                                 logfc.threshold = 1, # mininum log fold-change
                                                 only.pos = T) # keep only upregulated genes
markers.all.positive$H2228[1:20,]

## ----Violin plots-------------------------------------------------------------
genes.to.plot <- c("DHRS2", "MT1P1", "TFF1", "G6PD", "CD74", "CXCL8")

supercell_VlnPlot(ge = SC.GE, 
                  supercell_size = SC$supercell_size, 
                  clusters = SC$clustering_reordered,
                  features = genes.to.plot,
                  idents = c("H1975", "H2228", "A549"), 
                  ncol = 3)

supercell_GeneGenePlot(ge = SC.GE, 
                       gene_x = genes.to.plot[1:3],
                       gene_y = genes.to.plot[4:6],
                       supercell_size = SC$supercell_size, 
                       clusters = SC$clustering_reordered,)

## -----------------------------------------------------------------------------
SC10 <- supercell_rescale(SC, gamma = 10)

SC10$cell_line <- supercell_assign(clusters = cell.meta, # single-cell assigment to cell lines (clusters)
                                 supercell_membership = SC10$membership, # single-cell assignment to metacells
                                 method = "jaccard")

supercell_plot(SC10$graph.supercells, 
               group = SC10$cell_line, 
               seed = 1,
               main  = "Metacells at gamma = 10 colored by cell line assignment")

### don't forget to recompute metacell gene expression matrix for a new grainig level with 
# GE10 <- supercell_GE(GE, SC10$membership)
### if you are going to perform downstream analyses at the new graining level

## ----Seurat-------------------------------------------------------------------
#install.packages("Seurat")
library(Seurat)

m.seurat <- supercell_2_Seurat(SC.GE = as.matrix(SC.GE), SC = SC, fields = c("cell_line", "clustering","clustering_reordered"))

## ----PCAplot------------------------------------------------------------------
PCAPlot(m.seurat)

### cluster SuperCell network (unweighted clustering)
m.seurat <- FindClusters(m.seurat, graph.name = "RNA_nn") # now RNA_nn is metacell network

m.seurat <- FindNeighbors(m.seurat, verbose = FALSE)  # RNA_nn has been replaced with kNN graph of metacell (unweigted)
m.seurat <- FindClusters(m.seurat, graph.name = "RNA_nn") 

Try the SuperCell package in your browser

Any scripts or data that you put into this service are public.

SuperCell documentation built on Oct. 25, 2024, 5:07 p.m.