This tutorial covers co-expression network analysis in a dataset with pseudotime information. We will use a dataset of human hematopietic stem cells to identify co-expression modules, perform pseudotime trajectory analysis with Monocle3, and study module dynamics throughout the cellular transitions from stem cells to mature cell types. Refer to the Monocle3 documentation for a more thorough explanation of pseudotime analysis, and see the hdWGCNA single-cell tutorial for an explanation of the co-expression network analysis steps.
First, download the .rds file from this Google Drive link containing the processed hematopoetic stem cell scRNA-seq Seurat object.
Next, load the dataset into R and the necessary packages for hdWGCNA and Monocle3.
# single-cell analysis package library(Seurat) # plotting and data science packages library(tidyverse) library(cowplot) library(patchwork) # co-expression network analysis packages: library(WGCNA) library(hdWGCNA) # using the cowplot theme for ggplot theme_set(theme_cowplot()) # set random seed for reproducibility set.seed(12345) # optionally enable multithreading enableWGCNAThreads(nThreads = 8) # load the dataset seurat_obj <- readRDS('hematopoetic_stem.rds') # plot the UMAP colored by cluster p <- DimPlot(seurat_obj, group.by='celltype', label=TRUE) + umap_theme() + coord_equal() + NoLegend() + theme(plot.title=element_blank()) p
library(Seurat) library(tidyverse) library(cowplot) library(patchwork) library(WGCNA) library(Matrix) library(viridis) library(harmony) library(RColorBrewer) library(ggpubr) library(tictoc) library(RColorBrewer) library(Hmisc) library(corrplot) library(enrichR) library(GeneOverlap) library(grid) library(gridExtra) library(igraph) library(ggrepel) #library(hdWGCNA) enableWGCNAThreads(nThreads = 8) theme_set(theme_cowplot()) set.seed(12345) library(hdWGCNA) setwd("/dfs7/swaruplab/smorabit/analysis/scWGCNA/pseudotime_tutorial/") data_dir <- 'data/' fig_dir <- 'figures/' seurat_obj <- readRDS(file='/dfs7/swaruplab/smorabit/analysis/scWGCNA/compare_seacells/data/seacells_hdWGCNA.rds')
In this section, we use Monocle3 to perform pseudotime trajectory analysis in this dataset. Follow these instructions to install Monocle3.
library(monocle3) library(SeuratWrappers) # convert the seurat object to CDS cds <- as.cell_data_set(seurat_obj) # run the monocle clustering cds <- cluster_cells(cds, reduction_method='UMAP') # learn graph for pseudotime cds <- learn_graph(cds) # plot the pseudotime graph: p1 <- plot_cells( cds = cds, color_cells_by = "celltype", show_trajectory_graph = TRUE, label_principal_points = TRUE ) # plot the UMAP partitions from the clustering algorithm p2 <- plot_cells( cds = cds, color_cells_by = "partition", show_trajectory_graph = FALSE ) p1 + p2
png(paste0(fig_dir, 'umap_monocle.png'), width=8, height=4, res=500, units='in') p1 + p2 dev.off()
The Monocle3 learn_graph
function builds a principal graph in a dimensionally reduced space of the dataset.
Here we can see the principal graph overlaid on the UMAP. The principal graph will serve as the basis of the
pseudotime trajectories. Next, we have to select the starting node, or principal node, of the graph as the
origin of the pseudotime. We select node Y_26
as the prinicipal node since that overlaps with the stem
cell population.
# get principal node & order cells principal_node <- 'Y_26' cds <- order_cells(cds,root_pr_nodes = principal_node) # add pseudotime to seurat object: seurat_obj$pseudotime <- pseudotime(cds) # separate pseudotime trajectories by the different mature cells seurat_obj$ery_pseudotime <- ifelse(seurat_obj$celltype %in% c("HSC", "MEP", 'Ery'), seurat_obj$pseudotime, NA) seurat_obj$mono_pseudotime <- ifelse(seurat_obj$celltype %in% c("HSC", "HMP", 'Mono'), seurat_obj$pseudotime, NA) seurat_obj$dc_pseudotime <- ifelse(seurat_obj$celltype %in% c("HSC", "HMP", 'DCPre', 'cDC', 'pDC'), seurat_obj$pseudotime, NA) seurat_obj$clp_pseudotime <- ifelse(seurat_obj$celltype %in% c("HSC", "HMP", 'CLP'), seurat_obj$pseudotime, NA)
p <- plot_cells( cds = cds, color_cells_by = "pseudotime", show_trajectory_graph = TRUE, label_roots = TRUE, label_leaves = FALSE, label_branch_points = FALSE, ) + umap_theme() pdf(paste0(fig_dir, 'umap_pseudotime.pdf'), width=5, height=4, useDingbats=FALSE) p dev.off()
See pseudotime plotting code
seurat_obj$UMAP1 <- seurat_obj@reductions$umap@cell.embeddings[,1] seurat_obj$UMAP2 <- seurat_obj@reductions$umap@cell.embeddings[,2] p1 <- seurat_obj@meta.data %>% ggplot(aes(x=UMAP1, y=UMAP2, color=ery_pseudotime)) + ggrastr::rasterise(geom_point(size=1), dpi=500, scale=0.75) + coord_equal() + scale_color_gradientn(colors=plasma(256), na.value='grey') + umap_theme() p2 <- seurat_obj@meta.data %>% ggplot(aes(x=UMAP1, y=UMAP2, color=mono_pseudotime)) + ggrastr::rasterise(geom_point(size=1), dpi=500, scale=0.75) + coord_equal() + scale_color_gradientn(colors=viridis(256), na.value='grey') + umap_theme() p3 <- seurat_obj@meta.data %>% ggplot(aes(x=UMAP1, y=UMAP2, color=dc_pseudotime)) + ggrastr::rasterise(geom_point(size=1), dpi=500, scale=0.75) + coord_equal() + scale_color_gradientn(colors=inferno(256), na.value='grey') + umap_theme() p4 <- seurat_obj@meta.data %>% ggplot(aes(x=UMAP1, y=UMAP2, color=clp_pseudotime)) + ggrastr::rasterise(geom_point(size=1), dpi=500, scale=0.75) + coord_equal() + scale_color_gradientn(colors=mako(256), na.value='grey') + umap_theme() # assemble with patchwork (p1 | p2) / (p3 + p4) + plot_layout(ncol=1, guides='collect')
png(paste0(fig_dir, 'umap_trajectories.png'), width=8, height=8, res=500, units='in') (p1 | p2) / (p3 + p4) + plot_layout(ncol=1, guides='collect') dev.off()
In this section, we perform the essential steps of co-expression network analysis on this dataset. See this tutorial for a more detailed explaination of these steps.
# set up the WGCNA experiment in the Seurat object seurat_obj <- SetupForWGCNA( seurat_obj, gene_select = "fraction", fraction = 0.05, wgcna_name = 'trajectory' ) # construct metacells seurat_obj <- MetacellsByGroups( seurat_obj = seurat_obj, group.by = "all_cells", k = 50, target_metacells=250, ident.group = 'all_cells', min_cells=0, max_shared=5, ) seurat_obj <- NormalizeMetacells(seurat_obj) # setup expression matrix seurat_obj <- SetDatExpr( seurat_obj, group.by='all_cells', group_name = 'all' ) # test soft power parameter seurat_obj <- TestSoftPowers(seurat_obj) # construct the co-expression network seurat_obj <- ConstructNetwork( seurat_obj, tom_name='trajectory', overwrite_tom=TRUE ) # compute module eigengenes & connectivity seurat_obj <- ModuleEigengenes(seurat_obj) seurat_obj <- ModuleConnectivity(seurat_obj) # plot dendro PlotDendrogram(seurat_obj, main='hdWGCNA Dendrogram')
# plot dendro png(paste0(fig_dir, 'dendro.png'), width=6, height=3, res=500, units='in') PlotDendrogram(seurat_obj, main='hdWGCNA Dendrogram') dev.off()
See DotPlot code
####################################################################### # DotPlot of MEs by clusters ####################################################################### MEs <- GetMEs(seurat_obj) modules <- GetModules(seurat_obj) mods <- levels(modules$module) mods <- mods[mods!='grey'] meta <- seurat_obj@meta.data seurat_obj@meta.data <- cbind(meta, MEs) # make dotplot p <- DotPlot( seurat_obj, group.by='celltype', features = rev(mods) ) + RotatedAxis() + scale_color_gradient2(high='red', mid='grey95', low='blue') + xlab('') + ylab('') + theme( plot.title = element_text(hjust = 0.5), axis.line.x = element_blank(), axis.line.y = element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1) ) seurat_obj@meta.data <- meta p
png(paste0(fig_dir, 'dotplot_MEs.png'), width=8, height=4, res=500, unit='in') p dev.off() ####################################################################### # DotPlot of MEs by clusters ####################################################################### seurat_obj <- RunModuleUMAP( seurat_obj, n_hubs =5, n_neighbors=15, min_dist=0.2, spread=1 #supervised=TRUE, #target_weight=0.5 ) # get the hub gene UMAP table from the seurat object umap_df <- GetModuleUMAP( seurat_obj ) # plot with ggplot p <- ggplot(umap_df, aes(x=UMAP1, y=UMAP2)) + geom_point( color=umap_df$color, size=umap_df$kME*2 ) + umap_theme() pdf(paste0(fig_dir, 'test_hubgene_umap_ggplot_uns.pdf'), width=5, height=5) p dev.off() library(igraph) png(paste0(fig_dir, 'marker_coex_umap.png'), width=8, height=8, units='in', res=500) ModuleUMAPPlot( seurat_obj, edge.alpha=0.5, sample_edges=TRUE, keep_grey_edges=FALSE, edge_prop=0.075, # taking the top 20% strongest edges in each module # label_genes = umap_df$isoform_name[umap_df$gene_name %in% plot_genes], #label_genes = c(''), label_hubs=0 # how many hub genes to plot per module? ) dev.off()
In this section we use the hdWGCNA function PlotModuleTrajectory
to visualize how
the module eigengenes change throughout the pseudotime trajectories for each
co-expression module. This function requires you to specify the name of the column
in the Seurat object's meta data where the pseudotime information is stored. Since
we split our pseudotime into four different trajectories, first we will plot the ME
trajectory dynamics for the erythrocytes.
Importantly, we note that module dynamics can be studied using different pseudotime inference approaches, we simply chose to run Monocle3 as an example.
seurat_obj$ery_pseudotime <- NA p <- PlotModuleTrajectory( seurat_obj, pseudotime_col = 'ery_pseudotime' ) p
png(paste0(fig_dir, 'MEs_pseudotime_ery.png'), width=10, height=6, res=500, units='in') p dev.off()
Based on these dynamics, we can see which co-expression modules are turning their expression programs on or off throughout the transition from stem cells to mature erythrocytes.
We can also compare module dynamics for multiple trajectories simultaneously by passing more than one
pseudotime_col
parameters to PlotModuleTrajectory
.
# loading this package for color schemes, purely optional library(MetBrewer) p <- PlotModuleTrajectory( seurat_obj, pseudotime_col = c('ery_pseudotime', 'dc_pseudotime', 'mono_pseudotime', 'clp_pseudotime'), group_colors = paste0(met.brewer("Lakota", n=4, type='discrete')) ) p
png(paste0(fig_dir, 'MEs_pseudotime_test_multi.png'), width=10, height=6, units='in', res=500) p dev.off()
# just get the ery lineage seurat_ery <- subset(seurat_obj, celltype %in% c("HSC", "MEP", 'Ery')) seurat_ery <- BinPseudotime(seurat_ery, pseudotime_col='ery_pseudotime', n_bins=c(20)) levels(seurat_ery$ery_pseudotime_bins_20) %>% length seurat_ery <- BinPseudotime(seurat_ery, pseudotime_col='ery_pseudotime', n_bins=c(5)) bins <- levels(seurat_ery$ery_pseudotime_bins_5) for(i in 1:length(bins)){ cur_bin <- bins[i] print(i) print(cur_bin) if(cur_bin == bins[1]){ seurat_ery <- SetupForWGCNA( seurat_ery, gene_select = "fraction", group.by = 'ery_pseudotime_bins_5', fraction = 0.05, wgcna_name = paste0('pb', i) ) net_genes <- GetWGCNAGenes(seurat_ery) seurat_ery <- MetacellsByGroups( seurat_obj = seurat_ery, group.by = "ery_pseudotime_bins_5", k = 30, target_metacells=250, ident.group = 'ery_pseudotime_bins_5', min_cells=0, max_shared=10 ) seurat_ery <- NormalizeMetacells(seurat_ery) } else { seurat_ery <- SetupForWGCNA( seurat_ery, gene_select = "custom", features = net_genes, wgcna_name = paste0('pb', i), metacell_location = 'pb1' ) } # setup expression matrix seurat_ery <- SetDatExpr( seurat_ery, group.by='ery_pseudotime_bins_5', group_name = bins[i], use_metacells=TRUE, ) seurat_ery <- TestSoftPowers(seurat_ery) seurat_ery <- ConstructNetwork( seurat_ery, tom_name=paste0('ery_pb', i), overwrite_tom=TRUE ) seurat_ery <- ModuleEigengenes(seurat_ery, verbose=FALSE) seurat_ery <- ModuleConnectivity(seurat_ery) # run RenameModules # seurat_ery <- ResetModuleNames( # seurat_ery, # new_name = paste0("T", i, '-') # ) } genes <- unique(unlist(lapply(1:length(bins), function(x){GetWGCNAGenes(seurat_ery, paste0('pb', x))}))) test <- lapply(1:length(bins), function(x){ cur_mods <- GetModules(seurat_ery, wgcna_name = paste0('pb', x))[,1:3] cur_mods <- cur_mods[genes,] cur_mods$gene_name <- genes cur_mods$module <- ifelse(is.na(cur_mods$module), 'grey' , as.character(cur_mods$module)) cur_mods$color <- ifelse(is.na(cur_mods$color), 'grey' , as.character(cur_mods$color)) cur_mods }) test <- lapply(1:length(bins), function(x){ cur_mods <- GetModules(seurat_ery, wgcna_name = paste0('pb', x))[,1:3] cur_mods <- cur_mods[genes,] cur_mods$gene_name <- genes cur_mods$module <- ifelse(is.na(cur_mods$module), 'grey' , as.character(cur_mods$module)) cur_mods$color <- ifelse(is.na(cur_mods$color), 'grey' , as.character(cur_mods$color)) cur_mods <- cur_mods %>% select(c(module, color)) %>% distinct() cur_mods }) df <- test[[1]] df$module1 <- df$module df$color1 <- df$color for(i in 2:length(bins)){ df[,paste0('module', i)] <- test[[i]]$module df[,paste0('color', i)] <- test[[i]]$color } # plot the results: # assemble with patchwork pdf(paste0(fig_dir, 'test_softpower_ery_bins.pdf'), width=12, height=9) for(i in 1:length(bins)){ plot_list <- PlotSoftPowers(seurat_ery, wgcna_name = paste0('pb', i)) print(wrap_plots(plot_list, ncol=2) + plot_annotation(title=paste0('pb', i))) } dev.off() # assemble with patchwork pdf(paste0(fig_dir, 'ery_bins_bins.pdf'), width=6, height=3) for(i in 1:length(bins)){ PlotDendrogram(seurat_ery, main='hdWGCNA Dendrogram', wgcna_name=paste0('pb',i)) } dev.off() library(ggsankey) df <- df%>% ggsankey::make_long(module1, module2, module3, module4, module5) p <- ggplot(df, aes(x = x, next_x = next_x, node = node, next_node = next_node, fill = factor(node), label = node)) + geom_sankey() + geom_sankey_label(size=2) + theme_sankey(base_size = 16) + NoLegend() + scale_fill_manual(values=mod_colors) # scale_x_discrete(labels=c( # 'seurat_clusters_RNA' = 'Genes', # 'seurat_clusters_wnn' = 'WNN', # 'seurat_clusters_iso' = 'Isoforms' # )) png(paste0(fig_dir, 'module_sankey.png'), width=12, height=6, units='in', res=500) p dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.