In this vignette we will demonstrate how to construct cell trajectories with Monocle 3 using single-cell ATAC-seq data. Please see the Monocle 3 website for information about installing Monocle 3.
To facilitate conversion between the Seurat (used by Signac) and CellDataSet (used by Monocle 3) formats, we will use a conversion function in the SeuratWrappers package available on GitHub.
We will use a single-cell ATAC-seq dataset containing human CD34+ hematopoietic stem and progenitor cells published by Satpathy and Granja et al. (2019, Nature Biotechnology).
The processed dataset is available on NCBI GEO here: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE129785
Note that the fragment file is present inside the GSE129785_RAW.tar
archive,
and the index for the fragment file is not supplied. You can index the file
yourself using tabix, for example:
tabix -p bed <fragment_file>
.
First we will load the dataset and perform some standard preprocessing using Signac.
knitr::opts_chunk$set(echo = TRUE) knitr::opts_knit$set(root.dir = "../vignette_data/bmmc")
setRepositories(ind=1:3) if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes") if (!requireNamespace("SeuratWrappers", quietly = TRUE)) remotes::install_github('satijalab/seurat-wrappers') if (!requireNamespace("leidenbase", quietly = TRUE)) remotes::install_github("cole-trapnell-lab/leidenbase") if (!requireNamespace("monocle3", quietly = TRUE)) remotes::install_github("cole-trapnell-lab/monocle3") if (!requireNamespace("EnsDb.Hsapiens.v75", quietly = TRUE)) BiocManager::install("EnsDb.Hsapiens.v75")
library(Signac) library(Seurat) library(SeuratWrappers) library(monocle3) library(Matrix) library(ggplot2) library(patchwork)
filepath <- "GSE129785_scATAC-Hematopoiesis-CD34" peaks <- read.table(paste0(filepath, ".peaks.txt.gz"), header = TRUE) cells <- read.table(paste0(filepath, ".cell_barcodes.txt.gz"), header = TRUE, stringsAsFactors = FALSE) rownames(cells) <- make.unique(cells$Barcodes) mtx <- readMM(file = paste0(filepath, ".mtx")) mtx <- as(object = mtx, Class = "CsparseMatrix") colnames(mtx) <- rownames(cells) rownames(mtx) <- peaks$Feature
bone_assay <- CreateChromatinAssay( counts = mtx, min.cells = 5, fragments = "GSM3722029_CD34_Progenitors_Rep1_fragments.tsv.gz", sep = c("_", "_"), genome = "hg19" ) bone <- CreateSeuratObject( counts = bone_assay, meta.data = cells, assay = "ATAC" ) # The dataset contains multiple cell types # We can subset to include just one replicate of CD34+ progenitor cells bone <- bone[, bone$Group_Barcode == "CD34_Progenitors_Rep1"] # add cell type annotations from the original paper cluster_names <- c("HSC", "MEP", "CMP-BMP", "LMPP", "CLP", "Pro-B", "Pre-B", "GMP", "MDP", "pDC", "cDC", "Monocyte-1", "Monocyte-2", "Naive-B", "Memory-B", "Plasma-cell", "Basophil", "Immature-NK", "Mature-NK1", "Mature-NK2", "Naive-CD4-T1", "Naive-CD4-T2", "Naive-Treg", "Memory-CD4-T", "Treg", "Naive-CD8-T1", "Naive-CD8-T2", "Naive-CD8-T3", "Central-memory-CD8-T", "Effector-memory-CD8-T", "Gamma delta T") num.labels <- length(cluster_names) names(cluster_names) <- paste0( rep("Cluster", num.labels), seq(num.labels) ) new.md <- cluster_names[as.character(bone$Clusters)] names(new.md) <- Cells(bone) bone$celltype <- new.md bone[["ATAC"]]
Next we can add gene annotations for the hg19 genome to the object. This will be useful for computing quality control metrics (TSS enrichment score) and plotting.
library(EnsDb.Hsapiens.v75) # extract gene annotations from EnsDb annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v75) # change to UCSC style since the data was mapped to hg19 seqlevels(annotations) <- paste0('chr', seqlevels(annotations)) genome(annotations) <- "hg19" # add the gene information to the object Annotation(bone) <- annotations
We'll compute TSS enrichment, nucleosome signal score, and the percentage of counts in genomic blacklist regions for each cell, and use these metrics to help remove low quality cells from the datasets.
bone <- TSSEnrichment(bone) bone <- NucleosomeSignal(bone) bone$blacklist_fraction <- FractionCountsInRegion(bone, regions = blacklist_hg19)
VlnPlot( object = bone, features = c("nCount_ATAC", "TSS.enrichment", "nucleosome_signal", "blacklist_fraction"), pt.size = 0.1, ncol = 4 )
bone <- bone[, (bone$nCount_ATAC < 50000) & (bone$TSS.enrichment > 2) & (bone$nucleosome_signal < 5)]
Next we can run a standard scATAC-seq analysis pipeline using Signac to perform dimension reduction, clustering, and cell type annotation.
bone <- FindTopFeatures(bone, min.cells = 10) bone <- RunTFIDF(bone) bone <- RunSVD(bone, n = 100) DepthCor(bone)
bone <- RunUMAP( bone, reduction = "lsi", dims = 2:50, reduction.name = "UMAP" )
bone <- FindNeighbors(bone, dims = 2:50, reduction = "lsi") bone <- FindClusters(bone, resolution = 0.8, algorithm = 3)
DimPlot(bone, label = TRUE) + NoLegend()
Assign each cluster to the most common cell type based on the original annotations from the paper.
for(i in levels(bone)) { cells_to_reid <- WhichCells(bone, idents = i) newid <- names(sort(table(bone$celltype[cells_to_reid]),decreasing=TRUE))[1] Idents(bone, cells = cells_to_reid) <- newid } bone$assigned_celltype <- Idents(bone)
DimPlot(bone, label = TRUE)
Next we can subset the different lineages and create a trajectory for each lineage. Another way to build the trajectories is to use the whole dataset and build separate pseudotime trajectories for the different cell partitions found by Monocle 3.
DefaultAssay(bone) <- "ATAC" erythroid <- bone[, bone$assigned_celltype %in% c("HSC", "MEP", "CMP-BMP")] lymphoid <- bone[, bone$assigned_celltype %in% c("HSC", "LMPP", "GMP", "CLP", "Pro-B", "pDC", "MDP", "GMP")]
We can convert the Seurat object to a CellDataSet object using the
as.cell_data_set()
function from SeuratWrappers
and build the trajectories using Monocle 3. We'll do this separately for
erythroid and lymphoid lineages, but you could explore other strategies building
a trajectory for all lineages together.
erythroid.cds <- as.cell_data_set(erythroid) erythroid.cds <- cluster_cells(cds = erythroid.cds, reduction_method = "UMAP") erythroid.cds <- learn_graph(erythroid.cds, use_partition = TRUE) lymphoid.cds <- as.cell_data_set(lymphoid) lymphoid.cds <- cluster_cells(cds = lymphoid.cds, reduction_method = "UMAP") lymphoid.cds <- learn_graph(lymphoid.cds, use_partition = TRUE)
To compute pseudotime estimates for each trajectory we need to decide what the
start of each trajectory is. In our case, we know that the hematopoietic stem
cells are the progenitors of other cell types in the trajectory, so we can set
these cells as the root of the trajectory. Monocle 3 includes an interactive
function to select cells as the root nodes in the graph. This function will be
launched if calling order_cells()
without specifying the root_cells
parameter.
Here we've pre-selected some cells as the root, and saved these to a file for
reproducibility. This file can be downloaded here.
# load the pre-selected HSCs hsc <- readLines("hsc_cells.txt")
# order cells erythroid.cds <- order_cells(erythroid.cds, reduction_method = "UMAP", root_cells = hsc) lymphoid.cds <- order_cells(lymphoid.cds, reduction_method = "UMAP", root_cells = hsc) # plot trajectories colored by pseudotime plot_cells( cds = erythroid.cds, color_cells_by = "pseudotime", show_trajectory_graph = TRUE ) plot_cells( cds = lymphoid.cds, color_cells_by = "pseudotime", show_trajectory_graph = TRUE )
Extract the pseudotime values and add to the Seurat object
bone <- AddMetaData( object = bone, metadata = erythroid.cds@principal_graph_aux@listData$UMAP$pseudotime, col.name = "Erythroid" ) bone <- AddMetaData( object = bone, metadata = lymphoid.cds@principal_graph_aux@listData$UMAP$pseudotime, col.name = "Lymphoid" )
FeaturePlot(bone, c("Erythroid", "Lymphoid"), pt.size = 0.1) & scale_color_viridis_c()
saveRDS(object = bone, file = "cd34.rds")
Thanks to the developers of Monocle 3, especially Cole Trapnell, Hannah Pliner, and members of the Trapnell lab. If you use Monocle please cite the Monocle papers.
Session Info
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.