In zhiyhu/CIDER: Meta-Clustering for Single-Cell Data Integration and Evaluation

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Introduction

This vignette performs dnCIDER on a cross-species pancreas dataset. It is aimed to show the underneath structure of dnCIDER compared to the other high level vignette.

Set up

In addition to CIDER, we will load the following packages:

library(CIDER)
library(Seurat)
library(parallel)
library(cowplot)

Load example data

The example data can be downloaded from https://figshare.com/s/d5474749ca8c711cc205.

Pancreatic cell data$^1$ contain cells from human (8241 cells) and mouse (1886 cells).

load("../data/pancreas_counts.RData") # count matrix
load("../data/pancreas_meta.RData") # meta data/cell information
seu <- CreateSeuratObject(counts = pancreas_counts, meta.data = pancreas_meta)
table(seu$Batch)

Perform initial clustering

seu_list <- Seurat::SplitObject(seu, split.by = "Batch")
seu_list <- mclapply(seu_list, function(x) {
  x <- NormalizeData(x, normalization.method = "LogNormalize", 
                     scale.factor = 10000, verbose = FALSE)
  x <- FindVariableFeatures(x, selection.method = "vst", 
                            nfeatures = 2000, verbose = FALSE)
  x <- ScaleData(x, verbose = FALSE, vars.to.regress = "Sample")
  x <- RunPCA(x, features = VariableFeatures(object = x), verbose = FALSE)  
  x <- FindNeighbors(x, dims = 1:15, verbose = FALSE)
  x <- FindClusters(x, resolution = 0.6, verbose = FALSE)
  return(x)
})

knitr::kable(table(seu_list[[2]]$Group, seu_list[[2]]$seurat_clusters))

seu_list <- mclapply(seu_list, RunTSNE, dims = 1:15)
p1 <- scatterPlot(seu_list[[1]], "tsne", colour.by = "seurat_clusters")
p2 <- scatterPlot(seu_list[[2]], "tsne", colour.by = "seurat_clusters")
cowplot::plot_grid(p1,p2)

dist_coef <- getDistMat(seu_list, downsampling.size = 50)

par(mfrow = c(length(seu_list),1))
for(i in which(sapply(dist_coef, function(x) return(!is.null(x))))){
  tmp <- dist_coef[[i]] + t(dist_coef[[i]])
  diag(tmp) <- 1
  pheatmap::pheatmap(tmp, display_numbers = TRUE)
}

for(seu_itor in 1:2){
  tmp <- dist_coef[[seu_itor]] + t(dist_coef[[seu_itor]])
  diag(tmp) <- 1
  tmp <- 1 - tmp
  hc <- hclust(as.dist(tmp), method = "average")
  hres <- cutree(hc, h = 0.4)
  df_hres <- data.frame(hres)
  df_hres$hres <- paste0(df_hres$hres, "_", unique(seu_list[[seu_itor]]$Batch))
  seu_list[[seu_itor]]$inicluster_tmp <- paste0(seu_list[[seu_itor]]$seurat_clusters, "_", seu_list[[seu_itor]]$Batch)
  seu_list[[seu_itor]]$inicluster <- df_hres$hres[match(seu_list[[seu_itor]]$inicluster_tmp,rownames(df_hres))]
}

# plot(as.dendrogram(hc), horiz = T)

p1 <- scatterPlot(seu_list[[1]], "tsne", "inicluster")
p2 <- scatterPlot(seu_list[[2]], "tsne", "inicluster")
plot_grid(p1,p2)

scatterPlot(seu_list[[2]], "tsne", "Group")

Calculate of IDER similarity matrix

res <- unlist(lapply(seu_list, function(x) return(x$inicluster)))
res_names <- unlist(lapply(seu_list, function(x) return(colnames(x))))
seu$initial_cluster <- res[match(colnames(seu), res_names)]

ider <- getIDEr(seu, 
                group.by.var = "initial_cluster",
                batch.by.var = "Batch",
                downsampling.size = 35, 
                use.parallel = FALSE, verbose = FALSE)

net <- plotNetwork(seu, ider, colour.by = "Group" , vertex.size = 0.6, weight.factor = 5)

hc <- hclust(as.dist(1-(ider[[1]] + t(ider[[1]])))/2)
plot(as.dendrogram(hc), horiz = TRUE)

Perform final Clustering

seu <- finalClustering(seu, ider, cutree.h = 0.35) # final clustering

seu <- NormalizeData(seu, verbose = FALSE)
seu <- FindVariableFeatures(seu, selection.method = "vst", 
                            nfeatures = 2000, verbose = FALSE)
seu <- ScaleData(seu, verbose = FALSE)
seu <- RunPCA(seu, npcs = 20, verbose = FALSE)
seu <- RunTSNE(seu, reduction = "pca", dims = 1:12)

plot_list <- list()
plot_list[[1]] <- scatterPlot(seu, "tsne", colour.by = "CIDER_cluster", title = "asCIDER clustering results") 
plot_list[[2]] <- scatterPlot(seu, "tsne", colour.by = "Group", title = "Ground truth of cell populations") 
plot_grid(plotlist = plot_list, ncol = 2)

Technical

sessionInfo()

References

Baron, M. et al. A Single-Cell Transcriptomic Map of the Human and Mouse Pancreas Reveals Inter- and Intra-cell Population Structure. Cell Syst 3, 346–360.e4 (2016).
Satija R, et al. Spatial reconstruction of single-cell gene expression data. Nature Biotechnology 33, 495-502 (2015).

zhiyhu/CIDER documentation built on Feb. 4, 2025, 1:09 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Tweet to @rdrrHQ

GitHub issue tracker

ian@mutexlabs.com