| percell_workflow | R Documentation |
Example workflow for using SlimR's per-cell annotation functions
The per-cell annotation workflow in SlimR provides an alternative to cluster-based annotation by scoring and labeling individual cells based on marker expression. This is useful when:
Clusters contain mixed cell types
You want finer-grained annotations
Cell states exist on a continuum
UMAP spatial context can improve annotation quality
# 1. Prepare your Seurat object (must have normalized data)
library(SlimR)
library(Seurat)
# 2. Create or load marker list
Markers_list <- Markers_filter_Cellmarker2(
Cellmarker2,
species = "Human",
tissue_class = "Intestine"
)
# 3. Run per-cell annotation
result <- Celltype_Calculate_PerCell(
seurat_obj = sce,
gene_list = Markers_list,
species = "Human",
method = "weighted", # "weighted", "mean", or "AUCell"
min_expression = 0.1,
min_score = 0.1,
verbose = TRUE
)
# 4. Annotate Seurat object
sce <- Celltype_Annotation_PerCell(
seurat_obj = sce,
SlimR_percell_result = result,
plot_UMAP = TRUE,
plot_confidence = TRUE,
annotation_col = "Cell_type_PerCell"
)
# 5. Verify annotations
dotplot <- Celltype_Verification_PerCell(
seurat_obj = sce,
SlimR_percell_result = result,
gene_number = 5,
annotation_col = "Cell_type_PerCell"
)
print(dotplot)
UMAP Spatial Smoothing:
# Use UMAP coordinates to smooth predictions via k-NN
# This reduces noise and improves consistency in spatial regions
result_smooth <- Celltype_Calculate_PerCell(
seurat_obj = sce,
gene_list = Markers_list,
species = "Human",
use_umap_smoothing = TRUE,
k_neighbors = 20, # Number of neighbors to consider
smoothing_weight = 0.3, # 30
verbose = TRUE
)
# Compare smoothed vs unsmoothed
sce$Cell_type_Smooth <- result_smooth$Cell_annotations$Predicted_cell_type
sce$Cell_type_Raw <- result$Cell_annotations$Predicted_cell_type
DimPlot(sce, group.by = "Cell_type_Raw") |
DimPlot(sce, group.by = "Cell_type_Smooth")
# Method 1: Weighted (recommended for most cases)
# Combines expression with marker specificity and detection rate
result_weighted <- Celltype_Calculate_PerCell(
seurat_obj = sce,
gene_list = Markers_list,
species = "Human",
method = "weighted"
)
# Method 2: Mean (simple, fast)
# Just averages normalized marker expression
result_mean <- Celltype_Calculate_PerCell(
seurat_obj = sce,
gene_list = Markers_list,
species = "Human",
method = "mean"
)
# Method 3: AUCell (rank-based, robust to batch effects)
# Scores based on proportion of markers in top 5
result_aucell <- Celltype_Calculate_PerCell(
seurat_obj = sce,
gene_list = Markers_list,
species = "Human",
method = "AUCell"
)
# Cluster-based annotation (original SlimR approach)
cluster_result <- Celltype_Calculate(
seurat_obj = sce,
gene_list = Markers_list,
species = "Human",
cluster_col = "seurat_clusters"
)
sce <- Celltype_Annotation(
seurat_obj = sce,
cluster_col = "seurat_clusters",
SlimR_anno_result = cluster_result,
annotation_col = "Cell_type_Cluster"
)
# Per-cell annotation
percell_result <- Celltype_Calculate_PerCell(
seurat_obj = sce,
gene_list = Markers_list,
species = "Human"
)
sce <- Celltype_Annotation_PerCell(
seurat_obj = sce,
SlimR_percell_result = percell_result,
annotation_col = "Cell_type_PerCell"
)
# Compare
library(ggplot2)
library(patchwork)
p1 <- DimPlot(sce, group.by = "Cell_type_Cluster") +
ggtitle("Cluster-based")
p2 <- DimPlot(sce, group.by = "Cell_type_PerCell") +
ggtitle("Per-cell")
p1 | p2
# Check agreement
table(sce$Cell_type_Cluster, sce$Cell_type_PerCell)
# For large datasets, adjust chunk_size to manage memory
result <- Celltype_Calculate_PerCell(
seurat_obj = sce,
gene_list = Markers_list,
species = "Human",
chunk_size = 10000, # Process 10k cells at a time
verbose = TRUE
)
# For UMAP smoothing, install RANN for 10-100x speedup
# install.packages("RANN")
result_smooth <- Celltype_Calculate_PerCell(
seurat_obj = sce,
gene_list = Markers_list,
species = "Human",
use_umap_smoothing = TRUE,
k_neighbors = 15
# RANN will be used automatically if installed
)
# Cell-level annotations
head(result$Cell_annotations)
# Cell_barcode Predicted_cell_type Max_score Confidence
# 1 AAACCTGAG... Enterocyte 0.85 0.62
# 2 AAACCTGCA... Goblet cell 0.72 0.45
# Summary statistics
result$Summary
# Cell_type Count Percentage
# 1 Enterocyte 5432 45.2
# 2 Goblet cell 2156 17.9
# Full probability matrix (if return_scores = TRUE)
result$Probability_matrix[1:5, 1:3]
# Enterocyte Goblet_cell Stem_cell
# AAACCTGAG... 0.85 0.10 0.05
# Extract high-confidence cells
high_conf <- result$Cell_annotations$Cell_barcode[
result$Cell_annotations$Confidence > 0.5
]
# Extract uncertain cells for manual review
uncertain <- result$Cell_annotations$Cell_barcode[
result$Cell_annotations$Confidence < 0.2
]
Other Section_3_Automated_Annotation:
Celltype_Annotation(),
Celltype_Annotation_PerCell(),
Celltype_Calculate(),
Celltype_Calculate_PerCell(),
Celltype_Verification(),
Celltype_Verification_PerCell(),
Parameter_Calculate()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.