In timoast/signac: Analysis of Single-Cell Chromatin Data

In this vignette we will demonstrate how to construct cell trajectories with Monocle 3 using single-cell ATAC-seq data. Please see the Monocle 3 website for information about installing Monocle 3.

To facilitate conversion between the Seurat (used by Signac) and CellDataSet (used by Monocle 3) formats, we will use a conversion function in the SeuratWrappers package available on GitHub.

Data loading

We will use a single-cell ATAC-seq dataset containing human CD34+ hematopoietic stem and progenitor cells published by Satpathy and Granja et al. (2019, Nature Biotechnology).

The processed dataset is available on NCBI GEO here: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE129785

Note that the fragment file is present inside the GSE129785_RAW.tar archive, and the index for the fragment file is not supplied. You can index the file yourself using tabix, for example: tabix -p bed <fragment_file>.

First we will load the dataset and perform some standard preprocessing using Signac.

knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = "../vignette_data/bmmc")

setRepositories(ind=1:3)

if (!requireNamespace("remotes", quietly = TRUE))
    install.packages("remotes")

if (!requireNamespace("SeuratWrappers", quietly = TRUE))
    remotes::install_github('satijalab/seurat-wrappers')

if (!requireNamespace("leidenbase", quietly = TRUE))
    remotes::install_github("cole-trapnell-lab/leidenbase")

if (!requireNamespace("monocle3", quietly = TRUE))
    remotes::install_github("cole-trapnell-lab/monocle3")

if (!requireNamespace("EnsDb.Hsapiens.v75", quietly = TRUE))
    BiocManager::install("EnsDb.Hsapiens.v75")

library(Signac)
library(Seurat)
library(SeuratWrappers)
library(monocle3)
library(Matrix)
library(ggplot2)
library(patchwork)

filepath <- "GSE129785_scATAC-Hematopoiesis-CD34"

peaks <- read.table(paste0(filepath, ".peaks.txt.gz"), header = TRUE)
cells <- read.table(paste0(filepath, ".cell_barcodes.txt.gz"), header = TRUE, stringsAsFactors = FALSE)
rownames(cells) <- make.unique(cells$Barcodes)

mtx <- readMM(file = paste0(filepath, ".mtx"))
mtx <- as(object = mtx, Class = "CsparseMatrix")
colnames(mtx) <- rownames(cells)
rownames(mtx) <- peaks$Feature

bone_assay <- CreateChromatinAssay(
  counts = mtx,
  min.cells = 5,
  fragments = "GSM3722029_CD34_Progenitors_Rep1_fragments.tsv.gz",
  sep = c("_", "_"),
  genome = "hg19"
)
bone <- CreateSeuratObject(
  counts = bone_assay,
  meta.data = cells,
  assay = "ATAC"
)

# The dataset contains multiple cell types
# We can subset to include just one replicate of CD34+ progenitor cells
bone <- bone[, bone$Group_Barcode == "CD34_Progenitors_Rep1"]

# add cell type annotations from the original paper
cluster_names <- c("HSC",   "MEP",  "CMP-BMP",  "LMPP", "CLP",  "Pro-B",    "Pre-B",    "GMP",
                  "MDP",    "pDC",  "cDC",  "Monocyte-1",   "Monocyte-2",   "Naive-B",  "Memory-B",
                  "Plasma-cell",    "Basophil", "Immature-NK",  "Mature-NK1",   "Mature-NK2",   "Naive-CD4-T1",
                  "Naive-CD4-T2",   "Naive-Treg",   "Memory-CD4-T", "Treg", "Naive-CD8-T1", "Naive-CD8-T2",
                  "Naive-CD8-T3",   "Central-memory-CD8-T", "Effector-memory-CD8-T",    "Gamma delta T")
num.labels <- length(cluster_names)
names(cluster_names) <- paste0( rep("Cluster", num.labels), seq(num.labels) )
new.md <- cluster_names[as.character(bone$Clusters)]
names(new.md) <- Cells(bone)
bone$celltype <- new.md

bone[["ATAC"]]

Next we can add gene annotations for the hg19 genome to the object. This will be useful for computing quality control metrics (TSS enrichment score) and plotting.

library(EnsDb.Hsapiens.v75)

# extract gene annotations from EnsDb
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v75)

# change to UCSC style since the data was mapped to hg19
seqlevels(annotations) <- paste0('chr', seqlevels(annotations))
genome(annotations) <- "hg19"

# add the gene information to the object
Annotation(bone) <- annotations

Quality control

We'll compute TSS enrichment, nucleosome signal score, and the percentage of counts in genomic blacklist regions for each cell, and use these metrics to help remove low quality cells from the datasets.

bone <- TSSEnrichment(bone)
bone <- NucleosomeSignal(bone)
bone$blacklist_fraction <- FractionCountsInRegion(bone, regions = blacklist_hg19)

VlnPlot(
  object = bone,
  features = c("nCount_ATAC", "TSS.enrichment", "nucleosome_signal", "blacklist_fraction"),
  pt.size = 0.1,
  ncol = 4
)

bone <- bone[, (bone$nCount_ATAC < 50000) &
               (bone$TSS.enrichment > 2) & 
               (bone$nucleosome_signal < 5)]

Dataset preprocessing

Next we can run a standard scATAC-seq analysis pipeline using Signac to perform dimension reduction, clustering, and cell type annotation.

bone <- FindTopFeatures(bone, min.cells = 10)
bone <- RunTFIDF(bone)
bone <- RunSVD(bone, n = 100)
DepthCor(bone)

bone <- RunUMAP(
  bone,
  reduction = "lsi",
  dims = 2:50,
  reduction.name = "UMAP"
)

bone <- FindNeighbors(bone, dims = 2:50, reduction = "lsi")
bone <- FindClusters(bone, resolution = 0.8, algorithm = 3)

DimPlot(bone, label = TRUE) + NoLegend()

Assign each cluster to the most common cell type based on the original annotations from the paper.

for(i in levels(bone)) {
  cells_to_reid <- WhichCells(bone, idents = i)
  newid <- names(sort(table(bone$celltype[cells_to_reid]),decreasing=TRUE))[1]
  Idents(bone, cells = cells_to_reid) <- newid
}
bone$assigned_celltype <- Idents(bone)

DimPlot(bone, label = TRUE)

Next we can subset the different lineages and create a trajectory for each lineage. Another way to build the trajectories is to use the whole dataset and build separate pseudotime trajectories for the different cell partitions found by Monocle 3.

DefaultAssay(bone) <- "ATAC"

erythroid <- bone[,  bone$assigned_celltype %in% c("HSC", "MEP", "CMP-BMP")]
lymphoid <- bone[, bone$assigned_celltype %in% c("HSC", "LMPP", "GMP", "CLP", "Pro-B", "pDC", "MDP", "GMP")]

Building trajectories with Monocle 3

We can convert the Seurat object to a CellDataSet object using the as.cell_data_set() function from SeuratWrappers and build the trajectories using Monocle 3. We'll do this separately for erythroid and lymphoid lineages, but you could explore other strategies building a trajectory for all lineages together.

erythroid.cds <- as.cell_data_set(erythroid)
erythroid.cds <- cluster_cells(cds = erythroid.cds, reduction_method = "UMAP")
erythroid.cds <- learn_graph(erythroid.cds, use_partition = TRUE)

lymphoid.cds <- as.cell_data_set(lymphoid)
lymphoid.cds <- cluster_cells(cds = lymphoid.cds, reduction_method = "UMAP")
lymphoid.cds <- learn_graph(lymphoid.cds, use_partition = TRUE)

To compute pseudotime estimates for each trajectory we need to decide what the start of each trajectory is. In our case, we know that the hematopoietic stem cells are the progenitors of other cell types in the trajectory, so we can set these cells as the root of the trajectory. Monocle 3 includes an interactive function to select cells as the root nodes in the graph. This function will be launched if calling order_cells() without specifying the root_cells parameter. Here we've pre-selected some cells as the root, and saved these to a file for reproducibility. This file can be downloaded here.

# load the pre-selected HSCs
hsc <- readLines("hsc_cells.txt")

# order cells
erythroid.cds <- order_cells(erythroid.cds, reduction_method = "UMAP", root_cells = hsc)
lymphoid.cds <- order_cells(lymphoid.cds, reduction_method = "UMAP", root_cells = hsc)

# plot trajectories colored by pseudotime
plot_cells(
  cds = erythroid.cds,
  color_cells_by = "pseudotime",
  show_trajectory_graph = TRUE
)

plot_cells(
  cds = lymphoid.cds,
  color_cells_by = "pseudotime",
  show_trajectory_graph = TRUE
)

Extract the pseudotime values and add to the Seurat object

bone <- AddMetaData(
  object = bone,
  metadata = erythroid.cds@principal_graph_aux@listData$UMAP$pseudotime,
  col.name = "Erythroid"
)

bone <- AddMetaData(
  object = bone,
  metadata = lymphoid.cds@principal_graph_aux@listData$UMAP$pseudotime,
  col.name = "Lymphoid"
)

FeaturePlot(bone, c("Erythroid", "Lymphoid"), pt.size = 0.1) & scale_color_viridis_c()

saveRDS(object = bone, file = "cd34.rds")

Acknowledgements

Thanks to the developers of Monocle 3, especially Cole Trapnell, Hannah Pliner, and members of the Trapnell lab. If you use Monocle please cite the Monocle papers.

Session Info