knitr::opts_chunk$set(
  cache = FALSE,
  collapse = TRUE,
  tidy = FALSE,
  comment = "#>",
  results = "hide",
  message = FALSE,
  warning = FALSE,
  fig.path = "man/figures/",
  fig.height = 5,
  fig.width = 10,
  fig.align = "center",
  dpi = 300,
  out.width = "100%"
)

SCP: Single-Cell Pipeline

version codesize license

SCP provides a comprehensive set of tools for single-cell data processing and downstream analysis.

The package includes the following facilities:

The functions in the SCP package are all developed around the Seurat object and are compatible with other Seurat functions.

R version requirement

Installation in the global R environment

You can install the latest version of SCP from GitHub with:

if (!require("devtools", quietly = TRUE)) {
  install.packages("devtools")
}
devtools::install_github("zhanghao-njmu/SCP")

Create a python environment for SCP

To run functions such as RunPAGA or RunSCVELO, SCP requires conda to create a separate python environment. The default environment name is "SCP_env". You can specify the environment name for SCP by setting options(SCP_env_name="new_name")

Now, you can run PrepareEnv() to create the python environment for SCP. If the conda binary is not found, it will automatically download and install miniconda.

SCP::PrepareEnv()

To force SCP to use a specific conda binary, it is recommended to set reticulate.conda_binary R option:

options(reticulate.conda_binary = "/path/to/conda")
SCP::PrepareEnv()

If the download of miniconda or pip packages is slow, you can specify the miniconda repo and PyPI mirror according to your network region.

SCP::PrepareEnv(
  miniconda_repo = "https://mirrors.bfsu.edu.cn/anaconda/miniconda",
  pip_options = "-i https://pypi.tuna.tsinghua.edu.cn/simple"
)

Available miniconda repositories:

Available PyPI mirrors:

Installation in an isolated R environment using renv

If you do not want to change your current R environment or require reproducibility, you can use the renv package to install SCP into an isolated R environment.

Create an isolated R environment

if (!require("renv", quietly = TRUE)) {
  install.packages("renv")
}
dir.create("~/SCP_env", recursive = TRUE) # It cannot be the home directory "~" !
renv::init(project = "~/SCP_env", bare = TRUE, restart = TRUE)

Option 1: Install SCP from GitHub and create SCP python environment

renv::activate(project = "~/SCP_env")
renv::install("BiocManager")
renv::install("zhanghao-njmu/SCP", repos = BiocManager::repositories())
SCP::PrepareEnv()

Option 2: If SCP is already installed in the global environment, copy SCP from the local library

renv::activate(project = "~/SCP_env")
renv::hydrate("SCP")
SCP::PrepareEnv()

Activate SCP environment first before use

renv::activate(project = "~/SCP_env")

library(SCP)
data("pancreas_sub")
pancreas_sub <- RunPAGA(srt = pancreas_sub, group_by = "SubCellType", linear_reduction = "PCA", nonlinear_reduction = "UMAP")
CellDimPlot(pancreas_sub, group.by = "SubCellType", reduction = "draw_graph_fr")

Save and restore the state of SCP environment

renv::snapshot(project = "~/SCP_env")
renv::restore(project = "~/SCP_env")

Quick Start

Data exploration

The analysis is based on a subsetted version of mouse pancreas data.

library(SCP)
library(BiocParallel)
register(MulticoreParam(workers = 8, progressbar = TRUE))

data("pancreas_sub")
print(pancreas_sub)
CellDimPlot(
  srt = pancreas_sub, group.by = c("CellType", "SubCellType"),
  reduction = "UMAP", theme_use = "theme_blank"
)
CellDimPlot(
  srt = pancreas_sub, group.by = "SubCellType", stat.by = "Phase",
  reduction = "UMAP", theme_use = "theme_blank"
)
FeatureDimPlot(
  srt = pancreas_sub, features = c("Sox9", "Neurog3", "Fev", "Rbp4"),
  reduction = "UMAP", theme_use = "theme_blank"
)
FeatureDimPlot(
  srt = pancreas_sub, features = c("Ins1", "Gcg", "Sst", "Ghrl"),
  compare_features = TRUE, label = TRUE, label_insitu = TRUE,
  reduction = "UMAP", theme_use = "theme_blank"
)
ht <- GroupHeatmap(
  srt = pancreas_sub,
  features = c(
    "Sox9", "Anxa2", # Ductal
    "Neurog3", "Hes6", # EPs
    "Fev", "Neurod1", # Pre-endocrine
    "Rbp4", "Pyy", # Endocrine
    "Ins1", "Gcg", "Sst", "Ghrl" # Beta, Alpha, Delta, Epsilon
  ),
  group.by = c("CellType", "SubCellType"),
  heatmap_palette = "YlOrRd",
  cell_annotation = c("Phase", "G2M_score", "Cdh2"),
  cell_annotation_palette = c("Dark2", "Paired", "Paired"),
  show_row_names = TRUE, row_names_side = "left",
  add_dot = TRUE, add_reticle = TRUE
)
print(ht$plot)

CellQC

pancreas_sub <- RunCellQC(srt = pancreas_sub)
CellDimPlot(srt = pancreas_sub, group.by = "CellQC", reduction = "UMAP")
CellStatPlot(srt = pancreas_sub, stat.by = "CellQC", group.by = "CellType", label = TRUE)
CellStatPlot(
  srt = pancreas_sub,
  stat.by = c(
    "db_qc", "outlier_qc", "umi_qc", "gene_qc",
    "mito_qc", "ribo_qc", "ribo_mito_ratio_qc", "species_qc"
  ),
  plot_type = "upset", stat_level = "Fail"
)

Standard pipeline

pancreas_sub <- Standard_SCP(srt = pancreas_sub)
CellDimPlot(
  srt = pancreas_sub, group.by = c("CellType", "SubCellType"),
  reduction = "StandardUMAP2D", theme_use = "theme_blank"
)
CellDimPlot3D(srt = pancreas_sub, group.by = "SubCellType")

CellDimPlot3D

FeatureDimPlot3D(srt = pancreas_sub, features = c("Sox9", "Neurog3", "Fev", "Rbp4"))

FeatureDimPlot3D

Integration pipeline

Example data for integration is a subsetted version of panc8(eight human pancreas datasets)

data("panc8_sub")
panc8_sub <- Integration_SCP(srtMerge = panc8_sub, batch = "tech", integration_method = "Seurat")
CellDimPlot(
  srt = panc8_sub, group.by = c("celltype", "tech"), reduction = "SeuratUMAP2D",
  title = "Seurat", theme_use = "theme_blank"
)

UMAP embeddings based on different integration methods in SCP:

library(ggplot2)
library(cowplot)
library(gtable)
integration_methods <- c("Uncorrected", "Seurat", "scVI", "MNN", "fastMNN", "Harmony", "Scanorama", "BBKNN", "CSS", "LIGER", "Conos", "ComBat")
plist <- list()

for (method in integration_methods) {
  panc8_sub <- Integration_SCP(
    srtMerge = panc8_sub, batch = "tech", linear_reduction_dims_use = 1:50,
    integration_method = method, nonlinear_reduction = "umap"
  )
  plist[[method]] <- CellDimPlot(panc8_sub,
    title = method, group.by = c("celltype"), reduction = paste0(method, "UMAP2D"), theme_use = "theme_blank",
    xlab = "UMAP_1", ylab = "UMAP_2", legend.position = "none"
  )
}
p <- plot_grid(plotlist = plist, ncol = 3)
grob <- ggplotGrob(p)
legend <- get_legend(CellDimPlot(panc8_sub, group.by = c("celltype"), reduction = paste0(method, "UMAP2D"), theme_use = "theme_blank", legend.position = "bottom", legend.direction = "horizontal"))
grob <- gtable_add_rows(grob, sum(legend$heights) + unit(1, "cm"), 0)
grob <- gtable_add_grob(grob, legend, t = 1, l = min(grob$layout[grepl(pattern = "panel", x = grob$layout$name), "l"]))
panel_fix(grob, height = 2)

Integration-all

Cell projection between single-cell datasets

panc8_rename <- RenameFeatures(
  srt = panc8_sub,
  newnames = make.unique(capitalize(rownames(panc8_sub[["RNA"]]), force_tolower = TRUE)),
  assays = "RNA"
)
srt_query <- RunKNNMap(srt_query = pancreas_sub, srt_ref = panc8_rename, ref_umap = "SeuratUMAP2D")
ProjectionPlot(
  srt_query = srt_query, srt_ref = panc8_rename,
  query_group = "SubCellType", ref_group = "celltype"
)

Cell annotation using bulk RNA-seq datasets

data("ref_scMCA")
pancreas_sub <- RunKNNPredict(srt_query = pancreas_sub, bulk_ref = ref_scMCA, filter_lowfreq = 20)
CellDimPlot(srt = pancreas_sub, group.by = "KNNPredict_classification", reduction = "UMAP", label = TRUE)

Cell annotation using single-cell datasets

pancreas_sub <- RunKNNPredict(
  srt_query = pancreas_sub, srt_ref = panc8_rename,
  ref_group = "celltype", filter_lowfreq = 20
)
CellDimPlot(srt = pancreas_sub, group.by = "KNNPredict_classification", reduction = "UMAP", label = TRUE)

pancreas_sub <- RunKNNPredict(
  srt_query = pancreas_sub, srt_ref = panc8_rename,
  query_group = "SubCellType", ref_group = "celltype",
  return_full_distance_matrix = TRUE
)
CellDimPlot(srt = pancreas_sub, group.by = "KNNPredict_classification", reduction = "UMAP", label = TRUE)

ht <- CellCorHeatmap(
  srt_query = pancreas_sub, srt_ref = panc8_rename,
  query_group = "SubCellType", ref_group = "celltype",
  nlabel = 3, label_by = "row",
  show_row_names = TRUE, show_column_names = TRUE
)
print(ht$plot)

PAGA analysis

pancreas_sub <- RunPAGA(
  srt = pancreas_sub, group_by = "SubCellType",
  linear_reduction = "PCA", nonlinear_reduction = "UMAP"
)
PAGAPlot(srt = pancreas_sub, reduction = "UMAP", label = TRUE, label_insitu = TRUE, label_repel = TRUE)

Velocity analysis

To estimate RNA velocity, you need to have both "spliced" and "unspliced" assays in your Seurat object. You can generate these matrices using velocyto, bustools, or alevin.

pancreas_sub <- RunSCVELO(
  srt = pancreas_sub, group_by = "SubCellType",
  linear_reduction = "PCA", nonlinear_reduction = "UMAP"
)
VelocityPlot(srt = pancreas_sub, reduction = "UMAP", group_by = "SubCellType")
VelocityPlot(srt = pancreas_sub, reduction = "UMAP", plot_type = "stream")

Differential expression analysis

pancreas_sub <- RunDEtest(srt = pancreas_sub, group_by = "CellType", fc.threshold = 1, only.pos = FALSE)
VolcanoPlot(srt = pancreas_sub, group_by = "CellType")
DEGs <- pancreas_sub@tools$DEtest_CellType$AllMarkers_wilcox
DEGs <- DEGs[with(DEGs, avg_log2FC > 1 & p_val_adj < 0.05), ]
# Annotate features with transcription factors and surface proteins
pancreas_sub <- AnnotateFeatures(pancreas_sub, species = "Mus_musculus", db = c("TF", "CSPA"))
ht <- FeatureHeatmap(
  srt = pancreas_sub, group.by = "CellType", features = DEGs$gene, feature_split = DEGs$group1,
  species = "Mus_musculus", db = c("GO_BP", "KEGG", "WikiPathway"), anno_terms = TRUE,
  feature_annotation = c("TF", "CSPA"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
  height = 5, width = 4
)
print(ht$plot)

Enrichment analysis(over-representation) {#enrichment-analysisover-representation}

pancreas_sub <- RunEnrichment(
  srt = pancreas_sub, group_by = "CellType", db = "GO_BP", species = "Mus_musculus",
  DE_threshold = "avg_log2FC > log2(1.5) & p_val_adj < 0.05"
)
EnrichmentPlot(
  srt = pancreas_sub, group_by = "CellType", group_use = c("Ductal", "Endocrine"),
  plot_type = "bar"
)
EnrichmentPlot(
  srt = pancreas_sub, group_by = "CellType", group_use = c("Ductal", "Endocrine"),
  plot_type = "wordcloud"
)
EnrichmentPlot(
  srt = pancreas_sub, group_by = "CellType", group_use = c("Ductal", "Endocrine"),
  plot_type = "wordcloud", word_type = "feature"
)
EnrichmentPlot(
  srt = pancreas_sub, group_by = "CellType", group_use = "Ductal",
  plot_type = "network"
)

To ensure that labels are visible, you can adjust the size of the viewer panel on Rstudio IDE.

EnrichmentPlot(
  srt = pancreas_sub, group_by = "CellType", group_use = "Ductal",
  plot_type = "enrichmap"
)
EnrichmentPlot(srt = pancreas_sub, group_by = "CellType", plot_type = "comparison")

Enrichment analysis(GSEA) {#enrichment-analysisgsea}

pancreas_sub <- RunGSEA(
  srt = pancreas_sub, group_by = "CellType", db = "GO_BP", species = "Mus_musculus",
  DE_threshold = "p_val_adj < 0.05"
)
GSEAPlot(srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine", id_use = "GO:0007186")
GSEAPlot(
  srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine", plot_type = "bar",
  direction = "both", topTerm = 20
)
GSEAPlot(srt = pancreas_sub, group_by = "CellType", plot_type = "comparison")

Trajectory inference

pancreas_sub <- RunSlingshot(srt = pancreas_sub, group.by = "SubCellType", reduction = "UMAP")
FeatureDimPlot(pancreas_sub, features = paste0("Lineage", 1:3), reduction = "UMAP", theme_use = "theme_blank")
CellDimPlot(pancreas_sub, group.by = "SubCellType", reduction = "UMAP", lineages = paste0("Lineage", 1:3), lineages_span = 0.1)

Dynamic features

pancreas_sub <- RunDynamicFeatures(srt = pancreas_sub, lineages = c("Lineage1", "Lineage2"), n_candidates = 200)
ht <- DynamicHeatmap(
  srt = pancreas_sub, lineages = c("Lineage1", "Lineage2"),
  use_fitted = TRUE, n_split = 6, reverse_ht = "Lineage1",
  species = "Mus_musculus", db = "GO_BP", anno_terms = TRUE, anno_keys = TRUE, anno_features = TRUE,
  heatmap_palette = "viridis", cell_annotation = "SubCellType",
  separate_annotation = list("SubCellType", c("Nnat", "Irx1")), separate_annotation_palette = c("Paired", "Set1"),
  feature_annotation = c("TF", "CSPA"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
  pseudotime_label = 25, pseudotime_label_color = "red",
  height = 5, width = 2
)
print(ht$plot)
DynamicPlot(
  srt = pancreas_sub, lineages = c("Lineage1", "Lineage2"), group.by = "SubCellType",
  features = c("Plk1", "Hes1", "Neurod2", "Ghrl", "Gcg", "Ins2"),
  compare_lineages = TRUE, compare_features = FALSE
)
FeatureStatPlot(
  srt = pancreas_sub, group.by = "SubCellType", bg.by = "CellType",
  stat.by = c("Sox9", "Neurod2", "Isl1", "Rbp4"), add_box = TRUE,
  comparisons = list(
    c("Ductal", "Ngn3 low EP"),
    c("Ngn3 high EP", "Pre-endocrine"),
    c("Alpha", "Beta")
  )
)

Interactive data visualization with SCExplorer

PrepareSCExplorer(list(mouse_pancreas = pancreas_sub, human_pancreas = panc8_sub), base_dir = "./SCExplorer")
app <- RunSCExplorer(base_dir = "./SCExplorer")
list.files("./SCExplorer") # This directory can be used as site directory for Shiny Server.

if (interactive()) {
  shiny::runApp(app)
}

SCExplorer1 SCExplorer2

Other visualization examples

CellDimPlotExample1 CellStatPlotExample2 FeatureStatPlotExample3 GroupHeatmapExample3

You can also find more examples in the documentation of the function: Integration_SCP, RunKNNMap, RunMonocle3, RunPalantir, etc.



zh542370159/SCP documentation built on Nov. 22, 2023, 2:34 a.m.