Overview

We reproduce a lot of code from the Bioc 2020 trajectory workshop.

Dataset

The dataset we will be working with concerns a single-cell RNA-sequencing dataset consisting of two different experiments, which correspond to two treatments. McFaline-Figueroa et al. [@McFaline-Figueroa2019] studied the epithelial-to-mesenchymal transition (EMT), where cells spatially migrate from the epithelium to the mesenchyme during development.

libs <- c("dplyr", "tradeSeq", "SingleCellExperiment", "slingshot", "tidyr",
          "Seurat", "ggplot2", "condiments", "pheatmap", "fgsea", "msigdbr",
          "openxlsx", "monocle3", "miloR", "DAseq", "magrittr")
suppressMessages(
  suppressWarnings(sapply(libs, require, character.only = TRUE))
)
rm(libs)
knitr::opts_chunk$set(fig.width=7)
theme_set(theme_classic())

Load data

The data is imported using a function from the package. We then normalized using Seurat[@Stuart2019] and compute reduced dimension coordinates with UMAP [@Becht2019, @McInnes2018].

tgfb <- condimentsPaper::import_TGFB()
library(Seurat)
########################
### Split by condition and convert to Seurat
########################
assays(tgfb)$logcounts <- log1p(assays(tgfb)$counts)
tgfbMock <- tgfb[ ,colData(tgfb)$pheno$treatment_id=='Mock']
tgfbTGFB <- tgfb[ ,colData(tgfb)$pheno$treatment_id=='TGFB']
soMock <- as.Seurat(tgfbMock)
soTGFB <- as.Seurat(tgfbTGFB)

########################
### Normalize
########################
soMock <- SCTransform(soMock, verbose = FALSE)
soTGFB <- SCTransform(soTGFB, verbose = FALSE)

########################
### Integrate
########################
dtlist <- list(Mock = soMock, TGFB = soTGFB)
intfts <- SelectIntegrationFeatures(object.list = dtlist, 
                                    nfeatures = nrow(tgfb))
dtlist <- PrepSCTIntegration(object.list = dtlist,
                             anchor.features = intfts)
anchors <- FindIntegrationAnchors(object.list = dtlist, 
                                  normalization.method = "SCT",
                                  anchor.features = intfts)
integrated <- IntegrateData(anchorset = anchors, 
                            normalization.method = "SCT")
integrated <- RunPCA(integrated)
integrated <- RunUMAP(integrated, dims = 1:50)

## convert back to singleCellExperiment
tgfb <- as.SingleCellExperiment(integrated, assay = "RNA")
data("tgfb", package = "condimentsPaper")
df <- bind_cols(
  as.data.frame(reducedDims(tgfb)$UMAP),
  as.data.frame(colData(tgfb)[, -3])
  ) %>%
  sample_frac(1)
p1 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = pheno.treatment_id)) +
  geom_point(size = .7) +
  scale_color_brewer(palette = "Accent") +
  labs(col = "Treatment")
p1
p2 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = pheno.spatial_id)) +
  geom_point(size = .7) +
  scale_color_brewer(palette = "Dark2") +
  labs(col = "Spatial ID")
p2
scores <- condiments::imbalance_score(
  Object = df %>% select(UMAP_1, UMAP_2) %>% as.matrix(), 
  conditions = df$pheno.treatment_id,
  k = 20, smooth = 40)
df$scores <- scores$scaled_scores
p3 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = scores)) +
  geom_point(size = .7) +
  scale_color_viridis_c(option = "C") +
  labs(col = "Scores")
p3

Trajectory Inference and Differential Topology

To estimate the trajectory, we use slingshot [@Street2018a].

library(slingshot)
tgfb <- slingshot(tgfb, reducedDim = 'UMAP',
                  clusterLabels = colData(tgfb)$pheno$spatial_id,
                  start.clus = 'inner', approx_points = 100)
set.seed(821)
topologyTest(SlingshotDataSet(tgfb), tgfb$pheno$treatment_id, rep = 100,
             methods = "KS_mean", threshs = .01)
df <- bind_cols(
  as.data.frame(reducedDims(tgfb)$UMAP),
  as.data.frame(colData(tgfb)[,-3])
  ) %>%
  sample_frac(1)
curve <- slingCurves(tgfb)[[1]]
p4 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = slingPseudotime_1)) +
  geom_point(size = .7) +
  scale_color_viridis_c() +
  labs(col = "Pseudotime") +
  geom_path(data = curve$s[curve$ord, ] %>% as.data.frame(),
            col = "black", size = 1.5)
p4

Differential progression

p5 <- ggplot(df, aes(x = slingPseudotime_1)) +
  geom_density(alpha = .8, aes(fill = pheno.treatment_id), col = "transparent") +
  geom_density(aes(col = pheno.treatment_id), fill = "transparent",
               guide = FALSE, size = 1.5) +
  labs(x = "Pseudotime", fill = "Treatment") +
  guides(col = "none", fill = guide_legend(
    override.aes = list(size = 1.5, col = c("#7FC97F", "#BEAED4"))
  )) +
  scale_fill_brewer(palette = "Accent") +
  scale_color_brewer(palette = "Accent")
p5
progressionTest(SlingshotDataSet(tgfb), conditions = tgfb$pheno$treatment_id)

Differential expression

We use tradeSeq [@VandenBerge2020].

Select number of knots

library(tradeSeq)
set.seed(3)
icMat <- evaluateK(counts = as.matrix(assays(tgfb)$counts),
                   pseudotime = colData(tgfb)$slingshot$pseudotime,
                   cellWeights = colData(tgfb)$slingshot$cellWeights.V1,
                   conditions = factor(colData(tgfb)$pheno$treatment_id),
                   nGenes = 300,
                   k = 3:7)

Fit GAM

set.seed(3)
tgfb <- fitGAM(counts = tgfb, nknots = 5,
               conditions = factor(colData(tgfb)$pheno$treatment_id),
               )
mean(rowData(tgfb)$tradeSeq$converged)

Differential expression between conditions

condRes <- conditionTest(tgfb, l2fc = log2(2))
condRes$padj <- p.adjust(condRes$pvalue, "fdr")
mean(condRes$padj <= 0.05, na.rm = TRUE)
sum(condRes$padj <= 0.05, na.rm = TRUE)
conditionGenes <- rownames(condRes)[condRes$padj <= 0.05]
conditionGenes <- conditionGenes[!is.na(conditionGenes)]

Visualize most and least significant gene

library(RColorBrewer)
scales <- brewer.pal(3, "Accent")[1:2]

# plot genes
oo <- order(condRes$waldStat, decreasing = TRUE)

# most significant gene
p6 <- plotSmoothers(tgfb, assays(tgfb)$counts,
              gene = rownames(assays(tgfb)$counts)[oo[1]],
              alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  ggtitle(rownames(assays(tgfb)$counts)[oo[1]])

# Second most significant gene
p7 <- plotSmoothers(tgfb, assays(tgfb)$counts,
              gene = rownames(assays(tgfb)$counts)[oo[2]],
              alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  ggtitle(rownames(assays(tgfb)$counts)[oo[2]])

# least significant gene
p8 <- plotSmoothers(tgfb, assays(tgfb)$counts,
              gene = rownames(assays(tgfb)$counts)[oo[nrow(tgfb)]],
              alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  ggtitle(rownames(assays(tgfb)$counts)[oo[nrow(tgfb)]])
p6
p7
p8

Biologically relevant genes

# TGF genes
grep(x = conditionGenes, pattern = "TGF", value = TRUE)

# plot TGFB receptors
# Second most significant gene
pr1 <- plotSmoothers(tgfb, assays(tgfb)$counts, gene = "TGFBR1", alpha = 1,
                     border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  guides(col = 'none') +
  theme(plot.title = element_text(hjust = .5)) +
  ggtitle("TGFBR1")
pr1
pr2 <- plotSmoothers(tgfb, assays(tgfb)$counts, gene = "TGFBR2",
                     alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  theme(plot.title = element_text(hjust = .5), legend.position = "top") +
  ggtitle("TGFBR2")
pr2

Heatmaps of genes DE between conditions

Below we show heatmaps of the genes DE between conditions. The DE genes in the heatmaps are ordered according to a hierarchical clustering on the TGF-Beta condition.

library(cowplot)
library(scales)
### based on mean smoother
yhatSmooth <- 
  predictSmooth(tgfb, gene = conditionGenes, nPoints = 50, tidy = FALSE) %>%
  log1p()
yhatSmoothScaled <- t(apply(yhatSmooth,1, scales::rescale))
heatSmooth_TGF <- pheatmap(yhatSmoothScaled[, 51:100],
  cluster_cols = FALSE,
  show_rownames = FALSE, show_colnames = FALSE, main = "TGF-Beta", legend = FALSE,
  silent = TRUE
)

matchingHeatmap_mock <- 
  pheatmap(yhatSmoothScaled[heatSmooth_TGF$tree_row$order, 1:50],
  cluster_cols = FALSE, cluster_rows = FALSE,
  show_rownames = FALSE, show_colnames = FALSE, main = "Mock",
  legend = FALSE, silent = TRUE 
)

p9 <- plot_grid(heatSmooth_TGF[[4]], matchingHeatmap_mock[[4]], ncol = 2)
p9

Gene set enrichment analysis

This is done using the fgsea package [@Korotkevich2016].

geneSets <- msigdbr(species = "Mus musculus", category = "C5", subcategory = "BP") %>%
  mutate(gene_symbol = toupper(gene_symbol)) %>%
  filter(gene_symbol %in% names(tgfb)) 
m_list <- geneSets %>% split(x = .$gene_symbol, f = .$gs_name)
statsCond <- condRes$waldStat
names(statsCond) <- rownames(condRes)
eaRes <- fgsea(pathways = m_list, stats = statsCond, nperm = 5e4, minSize = 10)
eaRes <- eaRes %>% arrange(pval)
knitr::kable(head(eaRes[, 1:3], n = 20))

Running with Monocle 3

Trajectory inference and differential progression

clusters <- colData(tgfb)$pheno$spatial_id
names(clusters) <- colnames(tgfb)
clusters[reducedDim(tgfb, "UMAP")[,1] > 0] <- "outer"
tgfb$condition <- tgfb$pheno$treatment_id
cds <- condimentsPaper:::.running_monocle(tgfb, clusters, start = "inner",
                                          params = list(minimal_branch_len = 20),
                                          keep_cds = TRUE)
cds$cds@colData$conditions <- tgfb$condition
plot_cells(cds$cds, color_cells_by = "conditions", label_branch_points = FALSE,
           label_roots = FALSE, label_leaves = FALSE, cell_size = .7, 
           label_groups_by_cluster = FALSE, trajectory_graph_segment_size = 1.5,
           trajectory_graph_color = "black", label_cell_groups = FALSE) +
  scale_color_brewer(palette = "Accent") +
  theme_classic() +
  theme(legend.position = "bottom",
        rect = element_blank())
df <- data.frame(pseudotime = cds$pseudotime[,1],
                 Treatment = cds$conditions)
ggplot(df, aes(x = pseudotime)) +
  geom_density(alpha = .8, aes(fill = Treatment), col = "transparent") +
  geom_density(aes(col = Treatment), fill = "transparent",
               guide = FALSE, size = 1.5) +
  guides(col = "none", fill = guide_legend(
    override.aes = list(size = 1.5, col = c("#7FC97F", "#BEAED4"))
  )) +
  scale_fill_brewer(palette = "Accent") +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom")
progressionTest(cds$pseudotime, cds$cellWeights, conditions = cds$conditions)

Fit GAM

set.seed(3)
gamModels <- fitGAM(counts = as.matrix(assays(tgfb)$counts),
              pseudotime = cds$pseudotime[,1],
              cellWeights = cds$cellWeights[,1],
              conditions = factor(cds$conditions),
              nknots = 5)
mean(rowData(gamModels)$tradeSeq$converged)
condRes_monocle <- conditionTest(gamModels, l2fc = log2(2))
condRes_monocle$padj <- p.adjust(condRes_monocle$pvalue, "fdr")
condRes_monocle <- condRes_monocle %>% mutate(gene = rownames(.))
data("condRes_monocle", package = "condimentsPaper")
condRes <- bind_rows(
  "Monocle" = condRes_monocle,
  "Slingshot" = condRes %>% mutate(gene = rownames(.)),
  .id = "Trajectory"
)
condRes %>% 
  select(Trajectory, gene, waldStat) %>%
  pivot_wider(names_from = "Trajectory", values_from = "waldStat") %$%
  cor(.$`Monocle`, .$`Slingshot`, use = "complete.obs")
condRes %>% 
  select(Trajectory, gene, padj) %>%
  pivot_wider(names_from = "Trajectory", values_from = "padj") %>%
  filter(!is.na(Monocle) & !is.na(Slingshot)) %>%
  mutate(Monocle = if_else(Monocle <= .05, "sig", "not_sig"),
         Slingshot = if_else(Slingshot <= .05, "sig", "not_sig")) %>%
  group_by(Monocle, Slingshot) %>%
  summarise(n = n())

DASeq

tgfb$condition <- tgfb$pheno$treatment_id
colnames(tgfb) <- paste0("cell", seq_len(ncol(tgfb)))
rownames(reducedDim(tgfb)) <- colnames(tgfb)
tgfb$Sample <- sample(1:5, ncol(tgfb), replace = TRUE)
tgfb$Sample[tgfb$condition == "TGFB"] <- 
  tgfb$Sample[tgfb$condition == "TGFB"] + 10
tgfb$Sample <- as.character(tgfb$Sample)
info <- colData(tgfb)[, c("Sample", "condition")] %>%
  as.data.frame() %>%
  distinct()
da_cells <- getDAcells(
  X = reducedDim(tgfb, "UMAP"),
  cell.labels = tgfb$Sample,
  labels.1 = info$Sample[info$condition == "Mock"],
  labels.2 = info$Sample[info$condition == "TGFB"],
  k.vector = seq(50, 500, 50),
  do.plot = TRUE,
  plot.embedding = reducedDim(tgfb, "UMAP")
)
da_regions <- getDAregion(
  X = reducedDim(tgfb, "UMAP"),
  da.cells = da_cells,
  cell.labels = tgfb$Sample,
  labels.1 = info$Sample[info$condition == "Mock"],
  labels.2 = info$Sample[info$condition == "TGFB"],
  do.plot = TRUE,
  plot.embedding = reducedDim(tgfb, "UMAP")
)
df <- bind_cols(
  as.data.frame(reducedDims(tgfb)$UMAP),
  as.data.frame(colData(tgfb)[, -3])
  )
df <- df[da_regions$cell.idx, ]
df$region <- da_regions$da.region.label %>% as.character()
df <- df %>%
  select(UMAP_1, UMAP_2, region, condition) %>%
  full_join(da_regions$DA.stat %>%
              as.data.frame %>% 
              mutate(region = rownames(.)))
ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = DA.score)) +
  geom_point(size = .7) +
  scale_colour_gradient2() +
  labs(col = "DA Score\nfrom DASeq") +
  theme(legend.position = "bottom")

Milo

logcounts(tgfb) <- log1p(counts(tgfb))
milo <- Milo(tgfb)
milo <- buildGraph(milo, reduced.dim = "UMAP", d = 2, k = 20,
                   BPPARAM = BiocParallel::SerialParam())
milo <- makeNhoods(milo, refined = TRUE, reduced_dims = "UMAP", d = 2)
milo$Sample <- sample(1:5, ncol(tgfb), replace = TRUE)
milo$Sample[milo$condition == "TGFB"] <- 
  milo$Sample[milo$condition == "TGFB"] + 5
milo$Sample <- as.character(milo$Sample)
design.df <- colData(milo)[, c("condition", "Sample")] %>%
  as.data.frame()
milo <- miloR::countCells(milo, meta.data = design.df, sample = "Sample")
milo <- calcNhoodDistance(x = milo, d = 2, reduced.dim = "UMAP")
design.df <- design.df %>% distinct()
rownames(design.df) <- design.df$Sample
da_results <- testNhoods(milo, design = ~ condition, design.df = design.df)
milo <- buildNhoodGraph(milo)
da_results <- groupNhoods(milo, da_results, max.lfc.delta = 2)
plotNhoodGraphDA(milo, da_results, layout = "UMAP", alpha = 0.1) +
  guides(edge_width = "none",
         size = "none",
         fill = guide_colorbar(title = "logFC\nfrom Milo")) +
  theme_classic() +
  labs(x = "UMAP_1", y = "UMAP_2") +
  theme(legend.position = "bottom",
        legend.title = element_text(size = 11))

Session info

sessionInfo()

References



HectorRDB/condimentsPaper documentation built on Oct. 16, 2021, 7:02 p.m.