CIARA

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width=7, 
  fig.height=5
)

The vignette depends on Seurat package.

library(CIARA)

required <- c("Seurat")
if (!all(unlist(lapply(required, function(pkg) requireNamespace(pkg, quietly = TRUE)))))
  knitr::opts_chunk$set(eval = FALSE)

In this vignette it is shown the analysis performed on single cell RNA seq human gastrula data from Tyser, R.C.V. et al., 2021.

Load human gastrula data and create norm counts and knn matrices using Seurat

We load the raw count matrix and umap coordinate defined in the original paper. Raw count matrix can be downloaded here

umap_elmir <- readRDS(system.file("extdata", "annot_umap.rds", package = "CIARA"))

coordinate_umap_human <- umap_elmir[, 2:3]

Obtain normalized count matrix and knn matrix ( euclidean distance on highly variable genes) using Seurat.

  human_data_seurat <- cluster_analysis_integrate_rare(raw_counts_human_data, "Human_data", 0.1, 5, 30)
norm_human_data <- as.matrix(Seurat::GetAssayData(human_data_seurat, slot = "data", assay = "RNA"))

knn_human_data <- as.matrix(human_data_seurat@graphs$RNA_nn)

Cluster annotation provided in the original paper

original_cluster_human <- as.vector(umap_elmir$cluster_id)
names(original_cluster_human) <- names(umap_elmir$cell_name)
plot_umap(coordinate_umap_human, original_cluster_human)

Run CIARA

CIARA (Cluster Independent Algorithm for the identification of RAre cell types) is a cluster independent approach that selects genes localized in a small number of neighboring cells from high dimensional PCA space. We don't execute the CIARA algorithm and we directly load the result

background <- get_background_full(norm_human_data, threshold = 1, n_cells_low = 3, n_cells_high = 20)
result <- CIARA(norm_human_data, knn_human_data, background, cores_number = 1, p_value = 0.001, odds_ratio = 2, local_region = 1, approximation = FALSE)
load(system.file("extdata", "result.Rda", package = "CIARA"))

Identifying highly localized genes

ciara_genes <- row.names(result)[result[, 1]<1]
ciara_genes_top <- row.names(result)[order(as.numeric(result[, 1]))]
norm_human_data_ciara <- norm_human_data[ciara_genes, ]
load(system.file("extdata", "norm_human_data_ciara.Rda", package = "CIARA"))
p <- list()
for(i in (ciara_genes_top)[1:5]) {
  q <- plot_gene(norm_human_data_ciara, coordinate_umap_human, i, i)
  p <- list(p, q)
}
p
background <- row.names(result)
plot_genes_sum(coordinate_umap_human, norm_human_data_ciara, (ciara_genes), "Sum from top CIARA genes")

It is also possible to explore wich genes are highly localized in which cells in an interactive way

norm_counts_small <- apply(norm_human_data_ciara, 1, function(x) {
  y <- x/sum(x)
  return(y)
  })
gene_sum <- apply(norm_counts_small, 1, sum)

genes_name_text <- selection_localized_genes(norm_human_data_ciara, ciara_genes, min_number_cells = 4, max_number_genes = 4)
colnames(coordinate_umap_human) <- c("UMAP_1", "UMAP_2")

if ((requireNamespace("plotly", quietly = TRUE))) {
plot_interactive(coordinate_umap_human, gene_sum, genes_name_text, min_x = NULL, max_x = NULL, min_y = NULL, max_y = NULL)
  }

Cluster analysis using CIARA highly localized genes

We run louvain cluster analysis implemented in Seurat using as features the highly localized genes provided by CIARA

human_data_ciara <- cluster_analysis_integrate_rare(raw_counts_human_data, "Elmir data", 0.01, 5, 30, (ciara_genes))

We can explore how much the number of clusters changes with the resolution. The original cluster 1 and 2 for resolution 0.01 are constantly present for higher values of resolution.

if ((requireNamespace("clustree", quietly = TRUE))) {
  find_resolution(human_data_ciara, seq(0.01, 1, 0.1))
  }

Cluster 2 (7 cells) is made up by primordial germ cells (PGCs). These cells expressed typical PGCs markers ad NANOS3, NANOG and DPPA5

ciara_cluster_human <- human_data_ciara$RNA_snn_res.0.01
load(system.file("extdata", "ciara_cluster_human.Rda", package = "CIARA"))
plot_umap(coordinate_umap_human, ciara_cluster_human)
plot_gene(norm_human_data_ciara, coordinate_umap_human, "NANOS3", "NANOS3")
plot_gene(norm_human_data_ciara, coordinate_umap_human, "NANOG", "NANOG")
plot_gene(norm_human_data_ciara, coordinate_umap_human, "DPPA5", "DPPA5")

` Merge the original cluster annotation with the one found using CIARA highly localized genes

final_cluster_human <- merge_cluster(original_cluster_human, ciara_cluster_human, 10)
final_cluster_human[final_cluster_human == "2-step_2"] <- "PGC"
plot_umap(coordinate_umap_human, final_cluster_human)

Sub-cluster analysis using CIARA

Detect the clusters enriched in highly localized genes and then performs on these a sub-cluster analysis (using louvain algorithm implemented in Seurat)

result_test <- test_hvg(raw_counts_human_data, final_cluster_human, (ciara_genes), background, 100, 0.05)
result_test[[2]]
raw_endoderm <- raw_counts_human_data[, as.vector(umap_elmir$cluster_id) == "Endoderm"]
raw_hemo <- raw_counts_human_data[, as.vector(umap_elmir$cluster_id) == "Hemogenic Endothelial Progenitors"]
raw_exe_meso <- raw_counts_human_data[, as.vector(umap_elmir$cluster_id) == "ExE Mesoderm"]



combined_endoderm <- cluster_analysis_sub(raw_endoderm, 0.2, 5, 30, "Endoderm")

combined_hemo <- cluster_analysis_sub(raw_hemo, 0.6, 5, 30, "Hemogenic Endothelial Progenitors")

combined_exe_meso <- cluster_analysis_sub(raw_exe_meso, 0.5, 5, 30, "ExE Mesoderm")
all_sub_cluster <- c(combined_endoderm$seurat_clusters, combined_hemo$seurat_clusters, combined_exe_meso$seurat_clusters)
final_cluster_human_version_sub <- merge_cluster(final_cluster_human, all_sub_cluster)
load(system.file("extdata", "final_cluster_human_version_sub.Rda", package = "CIARA"))
plot_umap(coordinate_umap_human, final_cluster_human_version_sub)
table(as.vector(final_cluster_human_version_sub))

CIARA identifies three rare population of cells with highly distinctive transcriptional profile

Seurat::DefaultAssay(human_data_seurat) <- "RNA"
markers_human_final <- markers_cluster_seurat(human_data_seurat, final_cluster_human_version_sub, names(human_data_seurat$RNA_snn_res.0.1), 5)

markers_human_top_final <- markers_human_final[[1]]
markers_human_all_final <- markers_human_final[[3]]
white_black_markers <- white_black_markers(final_cluster_human_version_sub, "Hemogenic Endothelial Progenitors_4", norm_human_data, markers_human_all_final, 0)
sum(white_black_markers)
white_black_markers <- white_black_markers(final_cluster_human_version_sub, "Endoderm_2", norm_human_data, markers_human_all_final, 0)
sum(white_black_markers)
white_black_markers <- white_black_markers(final_cluster_human_version_sub, "ExE Mesoderm_0", norm_human_data, markers_human_all_final, 0)
sum(white_black_markers)
top_endo <- white_black_markers(final_cluster_human_version_sub, "Endoderm_2", norm_human_data, markers_human_all_final, 0)
top_endo <- names(top_endo)[top_endo]


mean_top_endo <- apply(norm_human_data[top_endo, final_cluster_human_version_sub == "Endoderm_2"], 1, mean)
mean_top_endo <- sort(mean_top_endo, decreasing = T)

top_endo <- names(mean_top_endo)
names(top_endo) <- rep("Endoderm_2", length(top_endo))
top_hemo <- white_black_markers(final_cluster_human_version_sub, "Hemogenic Endothelial Progenitors_4", norm_human_data, markers_human_all_final, 0)
top_hemo <- names(top_hemo)[top_hemo]


mean_top_hemo <- apply(norm_human_data[top_hemo, final_cluster_human_version_sub == "Hemogenic Endothelial Progenitors_4"], 1, mean)
mean_top_hemo <- sort(mean_top_hemo, decreasing = T)

top_hemo <- names(mean_top_hemo)
names(top_hemo) <- rep("Hemogenic Endothelial Progenitors_4", length(top_hemo))
top_meso <- white_black_markers(final_cluster_human_version_sub, "ExE Mesoderm_1", norm_human_data, markers_human_all_final, 0)
top_meso <- names(top_meso)[top_meso]


mean_top_meso <- apply(norm_human_data[top_meso, final_cluster_human_version_sub == "ExE Mesoderm_1"], 1, mean)
mean_top_meso <- sort(mean_top_meso, decreasing = T)

top_meso <- names(mean_top_meso)
names(top_meso) <- rep("ExE Mesoderm_1", length(top_meso))
norm_human_data_plot <- norm_human_data[c(top_endo, top_hemo, top_meso), ]
load(system.file("extdata", "norm_human_data_plot.Rda", package = "CIARA"))
load(system.file("extdata", "top_meso.Rda", package = "CIARA"))
load(system.file("extdata", "top_endo.Rda", package = "CIARA"))
load(system.file("extdata", "top_hemo.Rda", package = "CIARA"))
toMatch <- c("Endoderm")

plot_balloon_marker(norm_human_data_plot[, grep(paste(toMatch, collapse="|"), final_cluster_human)], final_cluster_human_version_sub[grep(paste(toMatch, collapse="|"), final_cluster_human)], top_endo, 20, max_size=5, text_size=10)


toMatch <- c("Hemogenic Endothelial Progenitors")
plot_balloon_marker(norm_human_data_plot[, grep(paste(toMatch, collapse = "|"), final_cluster_human)], final_cluster_human_version_sub[grep(paste(toMatch, collapse = "|"), final_cluster_human)], top_hemo, 20, max_size = 5, text_size = 8)


toMatch <- c("ExE Mesoderm")
plot_balloon_marker(norm_human_data_plot[, grep(paste(toMatch, collapse = "|"), final_cluster_human)], final_cluster_human_version_sub[grep(paste(toMatch, collapse = "|"), final_cluster_human)], top_meso, length(top_meso), max_size = 5, text_size = 8)

Expression of some of the highly localized genes detected by CIARA that are markers of the three rare populations of cells.

plot_gene(norm_human_data_ciara, coordinate_umap_human, "C1R", "C1R")
plot_gene(norm_human_data_ciara, coordinate_umap_human, "NANOS3", "NANOS3")
plot_gene(norm_human_data_ciara, coordinate_umap_human, "NANOG", "NANOG")
plot_gene(norm_human_data_ciara, coordinate_umap_human, "SOX17", "SOX17")
plot_gene(norm_human_data_ciara, coordinate_umap_human, "DPPA5", "DPPA5")
plot_gene(norm_human_data_ciara, coordinate_umap_human, "CSF1", "CSF1")
utils::sessionInfo()


Try the CIARA package in your browser

Any scripts or data that you put into this service are public.

CIARA documentation built on March 18, 2022, 6:48 p.m.