We reproduce a lot of code from the Bioc 2020 trajectory workshop.
The dataset we will be working with concerns a single-cell RNA-sequencing dataset consisting of two different experiments, which correspond to two treatments. McFaline-Figueroa et al. [@McFaline-Figueroa2019] studied the epithelial-to-mesenchymal transition (EMT), where cells spatially migrate from the epithelium to the mesenchyme during development.
libs <- c("dplyr", "tradeSeq", "SingleCellExperiment", "slingshot", "tidyr", "Seurat", "ggplot2", "condiments", "pheatmap", "fgsea", "msigdbr", "openxlsx", "monocle3", "miloR", "DAseq", "magrittr") suppressMessages( suppressWarnings(sapply(libs, require, character.only = TRUE)) ) rm(libs) knitr::opts_chunk$set(fig.width=7) theme_set(theme_classic())
The data is imported using a function from the package. We then normalized using Seurat[@Stuart2019] and compute reduced dimension coordinates with UMAP [@Becht2019, @McInnes2018].
tgfb <- condimentsPaper::import_TGFB() library(Seurat) ######################## ### Split by condition and convert to Seurat ######################## assays(tgfb)$logcounts <- log1p(assays(tgfb)$counts) tgfbMock <- tgfb[ ,colData(tgfb)$pheno$treatment_id=='Mock'] tgfbTGFB <- tgfb[ ,colData(tgfb)$pheno$treatment_id=='TGFB'] soMock <- as.Seurat(tgfbMock) soTGFB <- as.Seurat(tgfbTGFB) ######################## ### Normalize ######################## soMock <- SCTransform(soMock, verbose = FALSE) soTGFB <- SCTransform(soTGFB, verbose = FALSE) ######################## ### Integrate ######################## dtlist <- list(Mock = soMock, TGFB = soTGFB) intfts <- SelectIntegrationFeatures(object.list = dtlist, nfeatures = nrow(tgfb)) dtlist <- PrepSCTIntegration(object.list = dtlist, anchor.features = intfts) anchors <- FindIntegrationAnchors(object.list = dtlist, normalization.method = "SCT", anchor.features = intfts) integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT") integrated <- RunPCA(integrated) integrated <- RunUMAP(integrated, dims = 1:50) ## convert back to singleCellExperiment tgfb <- as.SingleCellExperiment(integrated, assay = "RNA")
data("tgfb", package = "condimentsPaper")
df <- bind_cols( as.data.frame(reducedDims(tgfb)$UMAP), as.data.frame(colData(tgfb)[, -3]) ) %>% sample_frac(1) p1 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = pheno.treatment_id)) + geom_point(size = .7) + scale_color_brewer(palette = "Accent") + labs(col = "Treatment") p1
p2 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = pheno.spatial_id)) + geom_point(size = .7) + scale_color_brewer(palette = "Dark2") + labs(col = "Spatial ID") p2
scores <- condiments::imbalance_score( Object = df %>% select(UMAP_1, UMAP_2) %>% as.matrix(), conditions = df$pheno.treatment_id, k = 20, smooth = 40) df$scores <- scores$scaled_scores p3 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = scores)) + geom_point(size = .7) + scale_color_viridis_c(option = "C") + labs(col = "Scores") p3
To estimate the trajectory, we use slingshot [@Street2018a].
library(slingshot) tgfb <- slingshot(tgfb, reducedDim = 'UMAP', clusterLabels = colData(tgfb)$pheno$spatial_id, start.clus = 'inner', approx_points = 100)
set.seed(821) topologyTest(SlingshotDataSet(tgfb), tgfb$pheno$treatment_id, rep = 100, methods = "KS_mean", threshs = .01)
df <- bind_cols( as.data.frame(reducedDims(tgfb)$UMAP), as.data.frame(colData(tgfb)[,-3]) ) %>% sample_frac(1) curve <- slingCurves(tgfb)[[1]] p4 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = slingPseudotime_1)) + geom_point(size = .7) + scale_color_viridis_c() + labs(col = "Pseudotime") + geom_path(data = curve$s[curve$ord, ] %>% as.data.frame(), col = "black", size = 1.5) p4
p5 <- ggplot(df, aes(x = slingPseudotime_1)) + geom_density(alpha = .8, aes(fill = pheno.treatment_id), col = "transparent") + geom_density(aes(col = pheno.treatment_id), fill = "transparent", guide = FALSE, size = 1.5) + labs(x = "Pseudotime", fill = "Treatment") + guides(col = "none", fill = guide_legend( override.aes = list(size = 1.5, col = c("#7FC97F", "#BEAED4")) )) + scale_fill_brewer(palette = "Accent") + scale_color_brewer(palette = "Accent") p5
progressionTest(SlingshotDataSet(tgfb), conditions = tgfb$pheno$treatment_id)
We use tradeSeq [@VandenBerge2020].
library(tradeSeq) set.seed(3) icMat <- evaluateK(counts = as.matrix(assays(tgfb)$counts), pseudotime = colData(tgfb)$slingshot$pseudotime, cellWeights = colData(tgfb)$slingshot$cellWeights.V1, conditions = factor(colData(tgfb)$pheno$treatment_id), nGenes = 300, k = 3:7)
set.seed(3) tgfb <- fitGAM(counts = tgfb, nknots = 5, conditions = factor(colData(tgfb)$pheno$treatment_id), ) mean(rowData(tgfb)$tradeSeq$converged)
condRes <- conditionTest(tgfb, l2fc = log2(2)) condRes$padj <- p.adjust(condRes$pvalue, "fdr") mean(condRes$padj <= 0.05, na.rm = TRUE) sum(condRes$padj <= 0.05, na.rm = TRUE) conditionGenes <- rownames(condRes)[condRes$padj <= 0.05] conditionGenes <- conditionGenes[!is.na(conditionGenes)]
library(RColorBrewer) scales <- brewer.pal(3, "Accent")[1:2] # plot genes oo <- order(condRes$waldStat, decreasing = TRUE) # most significant gene p6 <- plotSmoothers(tgfb, assays(tgfb)$counts, gene = rownames(assays(tgfb)$counts)[oo[1]], alpha = 1, border = TRUE, curvesCols = scales) + scale_color_manual(values = scales) + ggtitle(rownames(assays(tgfb)$counts)[oo[1]]) # Second most significant gene p7 <- plotSmoothers(tgfb, assays(tgfb)$counts, gene = rownames(assays(tgfb)$counts)[oo[2]], alpha = 1, border = TRUE, curvesCols = scales) + scale_color_manual(values = scales) + ggtitle(rownames(assays(tgfb)$counts)[oo[2]]) # least significant gene p8 <- plotSmoothers(tgfb, assays(tgfb)$counts, gene = rownames(assays(tgfb)$counts)[oo[nrow(tgfb)]], alpha = 1, border = TRUE, curvesCols = scales) + scale_color_manual(values = scales) + ggtitle(rownames(assays(tgfb)$counts)[oo[nrow(tgfb)]]) p6 p7 p8
# TGF genes grep(x = conditionGenes, pattern = "TGF", value = TRUE) # plot TGFB receptors # Second most significant gene pr1 <- plotSmoothers(tgfb, assays(tgfb)$counts, gene = "TGFBR1", alpha = 1, border = TRUE, curvesCols = scales) + scale_color_manual(values = scales) + guides(col = 'none') + theme(plot.title = element_text(hjust = .5)) + ggtitle("TGFBR1") pr1 pr2 <- plotSmoothers(tgfb, assays(tgfb)$counts, gene = "TGFBR2", alpha = 1, border = TRUE, curvesCols = scales) + scale_color_manual(values = scales) + theme(plot.title = element_text(hjust = .5), legend.position = "top") + ggtitle("TGFBR2") pr2
Below we show heatmaps of the genes DE between conditions. The DE genes in the heatmaps are ordered according to a hierarchical clustering on the TGF-Beta condition.
library(cowplot) library(scales) ### based on mean smoother yhatSmooth <- predictSmooth(tgfb, gene = conditionGenes, nPoints = 50, tidy = FALSE) %>% log1p() yhatSmoothScaled <- t(apply(yhatSmooth,1, scales::rescale)) heatSmooth_TGF <- pheatmap(yhatSmoothScaled[, 51:100], cluster_cols = FALSE, show_rownames = FALSE, show_colnames = FALSE, main = "TGF-Beta", legend = FALSE, silent = TRUE ) matchingHeatmap_mock <- pheatmap(yhatSmoothScaled[heatSmooth_TGF$tree_row$order, 1:50], cluster_cols = FALSE, cluster_rows = FALSE, show_rownames = FALSE, show_colnames = FALSE, main = "Mock", legend = FALSE, silent = TRUE ) p9 <- plot_grid(heatSmooth_TGF[[4]], matchingHeatmap_mock[[4]], ncol = 2) p9
This is done using the fgsea package [@Korotkevich2016].
geneSets <- msigdbr(species = "Mus musculus", category = "C5", subcategory = "BP") %>% mutate(gene_symbol = toupper(gene_symbol)) %>% filter(gene_symbol %in% names(tgfb)) m_list <- geneSets %>% split(x = .$gene_symbol, f = .$gs_name) statsCond <- condRes$waldStat names(statsCond) <- rownames(condRes) eaRes <- fgsea(pathways = m_list, stats = statsCond, nperm = 5e4, minSize = 10) eaRes <- eaRes %>% arrange(pval) knitr::kable(head(eaRes[, 1:3], n = 20))
clusters <- colData(tgfb)$pheno$spatial_id names(clusters) <- colnames(tgfb) clusters[reducedDim(tgfb, "UMAP")[,1] > 0] <- "outer" tgfb$condition <- tgfb$pheno$treatment_id cds <- condimentsPaper:::.running_monocle(tgfb, clusters, start = "inner", params = list(minimal_branch_len = 20), keep_cds = TRUE) cds$cds@colData$conditions <- tgfb$condition plot_cells(cds$cds, color_cells_by = "conditions", label_branch_points = FALSE, label_roots = FALSE, label_leaves = FALSE, cell_size = .7, label_groups_by_cluster = FALSE, trajectory_graph_segment_size = 1.5, trajectory_graph_color = "black", label_cell_groups = FALSE) + scale_color_brewer(palette = "Accent") + theme_classic() + theme(legend.position = "bottom", rect = element_blank())
df <- data.frame(pseudotime = cds$pseudotime[,1], Treatment = cds$conditions) ggplot(df, aes(x = pseudotime)) + geom_density(alpha = .8, aes(fill = Treatment), col = "transparent") + geom_density(aes(col = Treatment), fill = "transparent", guide = FALSE, size = 1.5) + guides(col = "none", fill = guide_legend( override.aes = list(size = 1.5, col = c("#7FC97F", "#BEAED4")) )) + scale_fill_brewer(palette = "Accent") + scale_color_brewer(palette = "Accent") + theme(legend.position = "bottom")
progressionTest(cds$pseudotime, cds$cellWeights, conditions = cds$conditions)
set.seed(3) gamModels <- fitGAM(counts = as.matrix(assays(tgfb)$counts), pseudotime = cds$pseudotime[,1], cellWeights = cds$cellWeights[,1], conditions = factor(cds$conditions), nknots = 5) mean(rowData(gamModels)$tradeSeq$converged) condRes_monocle <- conditionTest(gamModels, l2fc = log2(2)) condRes_monocle$padj <- p.adjust(condRes_monocle$pvalue, "fdr") condRes_monocle <- condRes_monocle %>% mutate(gene = rownames(.))
data("condRes_monocle", package = "condimentsPaper") condRes <- bind_rows( "Monocle" = condRes_monocle, "Slingshot" = condRes %>% mutate(gene = rownames(.)), .id = "Trajectory" ) condRes %>% select(Trajectory, gene, waldStat) %>% pivot_wider(names_from = "Trajectory", values_from = "waldStat") %$% cor(.$`Monocle`, .$`Slingshot`, use = "complete.obs") condRes %>% select(Trajectory, gene, padj) %>% pivot_wider(names_from = "Trajectory", values_from = "padj") %>% filter(!is.na(Monocle) & !is.na(Slingshot)) %>% mutate(Monocle = if_else(Monocle <= .05, "sig", "not_sig"), Slingshot = if_else(Slingshot <= .05, "sig", "not_sig")) %>% group_by(Monocle, Slingshot) %>% summarise(n = n())
tgfb$condition <- tgfb$pheno$treatment_id colnames(tgfb) <- paste0("cell", seq_len(ncol(tgfb))) rownames(reducedDim(tgfb)) <- colnames(tgfb) tgfb$Sample <- sample(1:5, ncol(tgfb), replace = TRUE) tgfb$Sample[tgfb$condition == "TGFB"] <- tgfb$Sample[tgfb$condition == "TGFB"] + 10 tgfb$Sample <- as.character(tgfb$Sample) info <- colData(tgfb)[, c("Sample", "condition")] %>% as.data.frame() %>% distinct() da_cells <- getDAcells( X = reducedDim(tgfb, "UMAP"), cell.labels = tgfb$Sample, labels.1 = info$Sample[info$condition == "Mock"], labels.2 = info$Sample[info$condition == "TGFB"], k.vector = seq(50, 500, 50), do.plot = TRUE, plot.embedding = reducedDim(tgfb, "UMAP") ) da_regions <- getDAregion( X = reducedDim(tgfb, "UMAP"), da.cells = da_cells, cell.labels = tgfb$Sample, labels.1 = info$Sample[info$condition == "Mock"], labels.2 = info$Sample[info$condition == "TGFB"], do.plot = TRUE, plot.embedding = reducedDim(tgfb, "UMAP") ) df <- bind_cols( as.data.frame(reducedDims(tgfb)$UMAP), as.data.frame(colData(tgfb)[, -3]) ) df <- df[da_regions$cell.idx, ] df$region <- da_regions$da.region.label %>% as.character() df <- df %>% select(UMAP_1, UMAP_2, region, condition) %>% full_join(da_regions$DA.stat %>% as.data.frame %>% mutate(region = rownames(.))) ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = DA.score)) + geom_point(size = .7) + scale_colour_gradient2() + labs(col = "DA Score\nfrom DASeq") + theme(legend.position = "bottom")
logcounts(tgfb) <- log1p(counts(tgfb)) milo <- Milo(tgfb) milo <- buildGraph(milo, reduced.dim = "UMAP", d = 2, k = 20, BPPARAM = BiocParallel::SerialParam()) milo <- makeNhoods(milo, refined = TRUE, reduced_dims = "UMAP", d = 2) milo$Sample <- sample(1:5, ncol(tgfb), replace = TRUE) milo$Sample[milo$condition == "TGFB"] <- milo$Sample[milo$condition == "TGFB"] + 5 milo$Sample <- as.character(milo$Sample) design.df <- colData(milo)[, c("condition", "Sample")] %>% as.data.frame() milo <- miloR::countCells(milo, meta.data = design.df, sample = "Sample") milo <- calcNhoodDistance(x = milo, d = 2, reduced.dim = "UMAP") design.df <- design.df %>% distinct() rownames(design.df) <- design.df$Sample da_results <- testNhoods(milo, design = ~ condition, design.df = design.df) milo <- buildNhoodGraph(milo) da_results <- groupNhoods(milo, da_results, max.lfc.delta = 2) plotNhoodGraphDA(milo, da_results, layout = "UMAP", alpha = 0.1) + guides(edge_width = "none", size = "none", fill = guide_colorbar(title = "logFC\nfrom Milo")) + theme_classic() + labs(x = "UMAP_1", y = "UMAP_2") + theme(legend.position = "bottom", legend.title = element_text(size = 11))
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.