In HectorRDB/condimentsPaper: Package to accompany the condiments Paper

Overview

We reproduce a lot of code from the Bioc 2020 trajectory workshop.

Dataset

The dataset we will be working with concerns a single-cell RNA-sequencing dataset consisting of two different experiments, which correspond to two treatments. McFaline-Figueroa et al. [@McFaline-Figueroa2019] studied the epithelial-to-mesenchymal transition (EMT), where cells spatially migrate from the epithelium to the mesenchyme during development.

libs <- c("dplyr", "tradeSeq", "SingleCellExperiment", "slingshot", "tidyr",
          "Seurat", "ggplot2", "condiments", "pheatmap", "fgsea", "msigdbr",
          "openxlsx", "monocle3", "miloR", "DAseq", "magrittr")
suppressMessages(
  suppressWarnings(sapply(libs, require, character.only = TRUE))
)
rm(libs)
knitr::opts_chunk$set(fig.width=7)
theme_set(theme_classic())

Load data

The data is imported using a function from the package. We then normalized using Seurat[@Stuart2019] and compute reduced dimension coordinates with UMAP [@Becht2019, @McInnes2018].

tgfb <- condimentsPaper::import_TGFB()
library(Seurat)
########################
### Split by condition and convert to Seurat
########################
assays(tgfb)$logcounts <- log1p(assays(tgfb)$counts)
tgfbMock <- tgfb[ ,colData(tgfb)$pheno$treatment_id=='Mock']
tgfbTGFB <- tgfb[ ,colData(tgfb)$pheno$treatment_id=='TGFB']
soMock <- as.Seurat(tgfbMock)
soTGFB <- as.Seurat(tgfbTGFB)

########################
### Normalize
########################
soMock <- SCTransform(soMock, verbose = FALSE)
soTGFB <- SCTransform(soTGFB, verbose = FALSE)

########################
### Integrate
########################
dtlist <- list(Mock = soMock, TGFB = soTGFB)
intfts <- SelectIntegrationFeatures(object.list = dtlist, 
                                    nfeatures = nrow(tgfb))
dtlist <- PrepSCTIntegration(object.list = dtlist,
                             anchor.features = intfts)
anchors <- FindIntegrationAnchors(object.list = dtlist, 
                                  normalization.method = "SCT",
                                  anchor.features = intfts)
integrated <- IntegrateData(anchorset = anchors, 
                            normalization.method = "SCT")
integrated <- RunPCA(integrated)
integrated <- RunUMAP(integrated, dims = 1:50)

## convert back to singleCellExperiment
tgfb <- as.SingleCellExperiment(integrated, assay = "RNA")

data("tgfb", package = "condimentsPaper")

df <- bind_cols(
  as.data.frame(reducedDims(tgfb)$UMAP),
  as.data.frame(colData(tgfb)[, -3])
  ) %>%
  sample_frac(1)
p1 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = pheno.treatment_id)) +
  geom_point(size = .7) +
  scale_color_brewer(palette = "Accent") +
  labs(col = "Treatment")
p1

p2 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = pheno.spatial_id)) +
  geom_point(size = .7) +
  scale_color_brewer(palette = "Dark2") +
  labs(col = "Spatial ID")
p2

scores <- condiments::imbalance_score(
  Object = df %>% select(UMAP_1, UMAP_2) %>% as.matrix(), 
  conditions = df$pheno.treatment_id,
  k = 20, smooth = 40)
df$scores <- scores$scaled_scores
p3 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = scores)) +
  geom_point(size = .7) +
  scale_color_viridis_c(option = "C") +
  labs(col = "Scores")
p3

Trajectory Inference and Differential Topology

To estimate the trajectory, we use slingshot [@Street2018a].

library(slingshot)
tgfb <- slingshot(tgfb, reducedDim = 'UMAP',
                  clusterLabels = colData(tgfb)$pheno$spatial_id,
                  start.clus = 'inner', approx_points = 100)

set.seed(821)
topologyTest(SlingshotDataSet(tgfb), tgfb$pheno$treatment_id, rep = 100,
             methods = "KS_mean", threshs = .01)

df <- bind_cols(
  as.data.frame(reducedDims(tgfb)$UMAP),
  as.data.frame(colData(tgfb)[,-3])
  ) %>%
  sample_frac(1)
curve <- slingCurves(tgfb)[[1]]
p4 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = slingPseudotime_1)) +
  geom_point(size = .7) +
  scale_color_viridis_c() +
  labs(col = "Pseudotime") +
  geom_path(data = curve$s[curve$ord, ] %>% as.data.frame(),
            col = "black", size = 1.5)
p4

Differential progression

p5 <- ggplot(df, aes(x = slingPseudotime_1)) +
  geom_density(alpha = .8, aes(fill = pheno.treatment_id), col = "transparent") +
  geom_density(aes(col = pheno.treatment_id), fill = "transparent",
               guide = FALSE, size = 1.5) +
  labs(x = "Pseudotime", fill = "Treatment") +
  guides(col = "none", fill = guide_legend(
    override.aes = list(size = 1.5, col = c("#7FC97F", "#BEAED4"))
  )) +
  scale_fill_brewer(palette = "Accent") +
  scale_color_brewer(palette = "Accent")
p5

progressionTest(SlingshotDataSet(tgfb), conditions = tgfb$pheno$treatment_id)

Differential expression

We use tradeSeq [@VandenBerge2020].

Select number of knots

library(tradeSeq)
set.seed(3)
icMat <- evaluateK(counts = as.matrix(assays(tgfb)$counts),
                   pseudotime = colData(tgfb)$slingshot$pseudotime,
                   cellWeights = colData(tgfb)$slingshot$cellWeights.V1,
                   conditions = factor(colData(tgfb)$pheno$treatment_id),
                   nGenes = 300,
                   k = 3:7)

Fit GAM

set.seed(3)
tgfb <- fitGAM(counts = tgfb, nknots = 5,
               conditions = factor(colData(tgfb)$pheno$treatment_id),
               )
mean(rowData(tgfb)$tradeSeq$converged)

Differential expression between conditions

condRes <- conditionTest(tgfb, l2fc = log2(2))
condRes$padj <- p.adjust(condRes$pvalue, "fdr")
mean(condRes$padj <= 0.05, na.rm = TRUE)
sum(condRes$padj <= 0.05, na.rm = TRUE)
conditionGenes <- rownames(condRes)[condRes$padj <= 0.05]
conditionGenes <- conditionGenes[!is.na(conditionGenes)]

Visualize most and least significant gene

library(RColorBrewer)
scales <- brewer.pal(3, "Accent")[1:2]

# plot genes
oo <- order(condRes$waldStat, decreasing = TRUE)

# most significant gene
p6 <- plotSmoothers(tgfb, assays(tgfb)$counts,
              gene = rownames(assays(tgfb)$counts)[oo[1]],
              alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  ggtitle(rownames(assays(tgfb)$counts)[oo[1]])

# Second most significant gene
p7 <- plotSmoothers(tgfb, assays(tgfb)$counts,
              gene = rownames(assays(tgfb)$counts)[oo[2]],
              alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  ggtitle(rownames(assays(tgfb)$counts)[oo[2]])

# least significant gene
p8 <- plotSmoothers(tgfb, assays(tgfb)$counts,
              gene = rownames(assays(tgfb)$counts)[oo[nrow(tgfb)]],
              alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  ggtitle(rownames(assays(tgfb)$counts)[oo[nrow(tgfb)]])
p6
p7
p8

Biologically relevant genes

# TGF genes
grep(x = conditionGenes, pattern = "TGF", value = TRUE)

# plot TGFB receptors
# Second most significant gene
pr1 <- plotSmoothers(tgfb, assays(tgfb)$counts, gene = "TGFBR1", alpha = 1,
                     border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  guides(col = 'none') +
  theme(plot.title = element_text(hjust = .5)) +
  ggtitle("TGFBR1")
pr1
pr2 <- plotSmoothers(tgfb, assays(tgfb)$counts, gene = "TGFBR2",
                     alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  theme(plot.title = element_text(hjust = .5), legend.position = "top") +
  ggtitle("TGFBR2")
pr2

Heatmaps of genes DE between conditions

Below we show heatmaps of the genes DE between conditions. The DE genes in the heatmaps are ordered according to a hierarchical clustering on the TGF-Beta condition.

library(cowplot)
library(scales)
### based on mean smoother
yhatSmooth <- 
  predictSmooth(tgfb, gene = conditionGenes, nPoints = 50, tidy = FALSE) %>%
  log1p()
yhatSmoothScaled <- t(apply(yhatSmooth,1, scales::rescale))
heatSmooth_TGF <- pheatmap(yhatSmoothScaled[, 51:100],
  cluster_cols = FALSE,
  show_rownames = FALSE, show_colnames = FALSE, main = "TGF-Beta", legend = FALSE,
  silent = TRUE
)

matchingHeatmap_mock <- 
  pheatmap(yhatSmoothScaled[heatSmooth_TGF$tree_row$order, 1:50],
  cluster_cols = FALSE, cluster_rows = FALSE,
  show_rownames = FALSE, show_colnames = FALSE, main = "Mock",
  legend = FALSE, silent = TRUE 
)

p9 <- plot_grid(heatSmooth_TGF[[4]], matchingHeatmap_mock[[4]], ncol = 2)
p9

Gene set enrichment analysis

This is done using the fgsea package [@Korotkevich2016].

geneSets <- msigdbr(species = "Mus musculus", category = "C5", subcategory = "BP") %>%
  mutate(gene_symbol = toupper(gene_symbol)) %>%
  filter(gene_symbol %in% names(tgfb)) 
m_list <- geneSets %>% split(x = .$gene_symbol, f = .$gs_name)
statsCond <- condRes$waldStat
names(statsCond) <- rownames(condRes)
eaRes <- fgsea(pathways = m_list, stats = statsCond, nperm = 5e4, minSize = 10)
eaRes <- eaRes %>% arrange(pval)
knitr::kable(head(eaRes[, 1:3], n = 20))

Running with Monocle 3

Trajectory inference and differential progression

clusters <- colData(tgfb)$pheno$spatial_id
names(clusters) <- colnames(tgfb)
clusters[reducedDim(tgfb, "UMAP")[,1] > 0] <- "outer"
tgfb$condition <- tgfb$pheno$treatment_id
cds <- condimentsPaper:::.running_monocle(tgfb, clusters, start = "inner",
                                          params = list(minimal_branch_len = 20),
                                          keep_cds = TRUE)
cds$cds@colData$conditions <- tgfb$condition
plot_cells(cds$cds, color_cells_by = "conditions", label_branch_points = FALSE,
           label_roots = FALSE, label_leaves = FALSE, cell_size = .7, 
           label_groups_by_cluster = FALSE, trajectory_graph_segment_size = 1.5,
           trajectory_graph_color = "black", label_cell_groups = FALSE) +
  scale_color_brewer(palette = "Accent") +
  theme_classic() +
  theme(legend.position = "bottom",
        rect = element_blank())

df <- data.frame(pseudotime = cds$pseudotime[,1],
                 Treatment = cds$conditions)
ggplot(df, aes(x = pseudotime)) +
  geom_density(alpha = .8, aes(fill = Treatment), col = "transparent") +
  geom_density(aes(col = Treatment), fill = "transparent",
               guide = FALSE, size = 1.5) +
  guides(col = "none", fill = guide_legend(
    override.aes = list(size = 1.5, col = c("#7FC97F", "#BEAED4"))
  )) +
  scale_fill_brewer(palette = "Accent") +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom")

progressionTest(cds$pseudotime, cds$cellWeights, conditions = cds$conditions)

Fit GAM

set.seed(3)
gamModels <- fitGAM(counts = as.matrix(assays(tgfb)$counts),
              pseudotime = cds$pseudotime[,1],
              cellWeights = cds$cellWeights[,1],
              conditions = factor(cds$conditions),
              nknots = 5)
mean(rowData(gamModels)$tradeSeq$converged)
condRes_monocle <- conditionTest(gamModels, l2fc = log2(2))
condRes_monocle$padj <- p.adjust(condRes_monocle$pvalue, "fdr")
condRes_monocle <- condRes_monocle %>% mutate(gene = rownames(.))

data("condRes_monocle", package = "condimentsPaper")
condRes <- bind_rows(
  "Monocle" = condRes_monocle,
  "Slingshot" = condRes %>% mutate(gene = rownames(.)),
  .id = "Trajectory"
)
condRes %>% 
  select(Trajectory, gene, waldStat) %>%
  pivot_wider(names_from = "Trajectory", values_from = "waldStat") %$%
  cor(.$`Monocle`, .$`Slingshot`, use = "complete.obs")
condRes %>% 
  select(Trajectory, gene, padj) %>%
  pivot_wider(names_from = "Trajectory", values_from = "padj") %>%
  filter(!is.na(Monocle) & !is.na(Slingshot)) %>%
  mutate(Monocle = if_else(Monocle <= .05, "sig", "not_sig"),
         Slingshot = if_else(Slingshot <= .05, "sig", "not_sig")) %>%
  group_by(Monocle, Slingshot) %>%
  summarise(n = n())

DASeq

tgfb$condition <- tgfb$pheno$treatment_id
colnames(tgfb) <- paste0("cell", seq_len(ncol(tgfb)))
rownames(reducedDim(tgfb)) <- colnames(tgfb)
tgfb$Sample <- sample(1:5, ncol(tgfb), replace = TRUE)
tgfb$Sample[tgfb$condition == "TGFB"] <- 
  tgfb$Sample[tgfb$condition == "TGFB"] + 10
tgfb$Sample <- as.character(tgfb$Sample)
info <- colData(tgfb)[, c("Sample", "condition")] %>%
  as.data.frame() %>%
  distinct()
da_cells <- getDAcells(
  X = reducedDim(tgfb, "UMAP"),
  cell.labels = tgfb$Sample,
  labels.1 = info$Sample[info$condition == "Mock"],
  labels.2 = info$Sample[info$condition == "TGFB"],
  k.vector = seq(50, 500, 50),
  do.plot = TRUE,
  plot.embedding = reducedDim(tgfb, "UMAP")
)
da_regions <- getDAregion(
  X = reducedDim(tgfb, "UMAP"),
  da.cells = da_cells,
  cell.labels = tgfb$Sample,
  labels.1 = info$Sample[info$condition == "Mock"],
  labels.2 = info$Sample[info$condition == "TGFB"],
  do.plot = TRUE,
  plot.embedding = reducedDim(tgfb, "UMAP")
)
df <- bind_cols(
  as.data.frame(reducedDims(tgfb)$UMAP),
  as.data.frame(colData(tgfb)[, -3])
  )
df <- df[da_regions$cell.idx, ]
df$region <- da_regions$da.region.label %>% as.character()
df <- df %>%
  select(UMAP_1, UMAP_2, region, condition) %>%
  full_join(da_regions$DA.stat %>%
              as.data.frame %>% 
              mutate(region = rownames(.)))
ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = DA.score)) +
  geom_point(size = .7) +
  scale_colour_gradient2() +
  labs(col = "DA Score\nfrom DASeq") +
  theme(legend.position = "bottom")

Milo

logcounts(tgfb) <- log1p(counts(tgfb))
milo <- Milo(tgfb)
milo <- buildGraph(milo, reduced.dim = "UMAP", d = 2, k = 20,
                   BPPARAM = BiocParallel::SerialParam())
milo <- makeNhoods(milo, refined = TRUE, reduced_dims = "UMAP", d = 2)
milo$Sample <- sample(1:5, ncol(tgfb), replace = TRUE)
milo$Sample[milo$condition == "TGFB"] <- 
  milo$Sample[milo$condition == "TGFB"] + 5
milo$Sample <- as.character(milo$Sample)
design.df <- colData(milo)[, c("condition", "Sample")] %>%
  as.data.frame()
milo <- miloR::countCells(milo, meta.data = design.df, sample = "Sample")
milo <- calcNhoodDistance(x = milo, d = 2, reduced.dim = "UMAP")
design.df <- design.df %>% distinct()
rownames(design.df) <- design.df$Sample
da_results <- testNhoods(milo, design = ~ condition, design.df = design.df)
milo <- buildNhoodGraph(milo)
da_results <- groupNhoods(milo, da_results, max.lfc.delta = 2)
plotNhoodGraphDA(milo, da_results, layout = "UMAP", alpha = 0.1) +
  guides(edge_width = "none",
         size = "none",
         fill = guide_colorbar(title = "logFC\nfrom Milo")) +
  theme_classic() +
  labs(x = "UMAP_1", y = "UMAP_2") +
  theme(legend.position = "bottom",
        legend.title = element_text(size = 11))

Session info

sessionInfo()

References

HectorRDB/condimentsPaper documentation built on Oct. 16, 2021, 7:02 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

HectorRDB/condimentsPaper
Package to accompany the condiments Paper

In HectorRDB/condimentsPaper: Package to accompany the condiments Paper

Overview

Dataset

Load data

Trajectory Inference and Differential Topology

Differential progression

Differential expression

Select number of knots

Fit GAM

Differential expression between conditions

Visualize most and least significant gene

Biologically relevant genes

Heatmaps of genes DE between conditions

Gene set enrichment analysis

Running with Monocle 3

Trajectory inference and differential progression

Fit GAM

DASeq

Milo

Session info

References

R Package Documentation

Browse R Packages

We want your feedback!

HectorRDB/condimentsPaper Package to accompany the condiments Paper

In HectorRDB/condimentsPaper: Package to accompany the condiments Paper

Overview

Dataset

Load data

Trajectory Inference and Differential Topology

Differential progression

Differential expression

Select number of knots

Fit GAM

Differential expression between conditions

Visualize most and least significant gene

Biologically relevant genes

Heatmaps of genes DE between conditions

Gene set enrichment analysis

Running with Monocle 3

Trajectory inference and differential progression

Fit GAM

DASeq

Milo

Session info

References

R Package Documentation

Browse R Packages

We want your feedback!

HectorRDB/condimentsPaper
Package to accompany the condiments Paper