In HectorRDB/condimentsPaper: Package to accompany the condiments Paper

library(knitr)
opts_chunk$set(
  fig.pos = "!h", out.extra = "", warning = F, 
  fig.align = "center"
)

The dataset we will be working with concerns a single-cell RNA-sequencing dataset consisting of two different experiments, which correspond to treatment (TCDD) and control. Nault et al. [@Nault2021] studied the effect of that drug on the liver, along the central-portal axis.

libs <- c("dplyr", "stringr", "Seurat", "SingleCellExperiment", "slingshot",
          "condiments", "ggplot2", "cowplot", "tradeSeq", "RColorBrewer",
          "pheatmap", "scales")
suppressMessages(
  suppressWarnings(sapply(libs, require, character.only = TRUE))
)
rm(libs)
theme_set(theme_classic())
pal <- c("#729ECE", "#FF9E4A", "#67BF5C", "#ED665D", "#AD8BC9",
         "#A8786E", "#ED97CA", "#A2A2A2", "#CDCC5D", "#6DCCDA")

Load data

cds <- condimentsPaper::import_TCDD()
# Get zonal genes ----
url <- paste0("https://static-content.springer.com/esm/art%3A10.1038%2Fnature",
              "21065/MediaObjects/41586_2017_BFnature21065_MOESM62_ESM.xlsx")
df <- openxlsx::read.xlsx(url, startRow = 3, na.strings = "NaN")
df <- df[, c(1, 21)] %>%
  dplyr::rename("qvalues" = `q-values`) %>%
  dplyr::filter(!is.na(qvalues), qvalues < .005) %>%
  dplyr::select(Gene.Symbol)
zone.mat <- df$Gene.Symbol %>% stringr::str_split(";") %>% unlist()
# Filter to only keep hepatocytes
hepatocytes <- subset(cds, subset = celltype.ontology == "CL:0000182:Hepatocyte")
common.genes <- intersect(zone.mat, rownames(hepatocytes))
DefaultAssay(hepatocytes) <- "integrated"
hepatocytes <- ScaleData(hepatocytes, verbose = FALSE)
hepatocytes <- RunPCA(hepatocytes, features = common.genes, verbose = FALSE)
hepatocytes <- RunUMAP(hepatocytes, dims = 1:30, verbose = FALSE)
tcdd <- as.SingleCellExperiment(hepatocytes, assay = "RNA")
tcdd$celltype <- droplevels(tcdd$celltype) %>%
  stringr::str_remove("Hepatocytes - ") %>%
  unlist()
rowData(tcdd)$is_zonal <- rownames(tcdd) %in% zone.mat

data("tcdd", package = "condimentsPaper")

EDA

The data is imported using a function from the package. We then compute reduced dimension coordinates with UMAP [@Becht2019, @McInnes2018].

df <- bind_cols(
  as.data.frame(reducedDims(tcdd)$UMAP),
  as.data.frame(colData(tcdd)[, 1:60]))
p1 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = treatment)) +
  geom_point(size = .7) +
  scale_color_brewer(palette = "Accent") +
  labs(col = "Treatment")
p2 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, fill = celltype)) +
  geom_point(size = 1, alpha = .65, col = "grey70", shape = 21) +
  scale_fill_manual(values = pal) +
  labs(fill = "Cell Type")
p1
p2

tcdd <- imbalance_score(tcdd, dimred = "UMAP", conditions = "treatment", smooth = 5)
df$scores <- tcdd$scores$scaled_scores
p3 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = scores)) +
  geom_point(size = .7) +
  scale_color_viridis_c(option = "C") +
  labs(col = "Score")
p3

Differential Topology

To estimate the trajectory, we use slingshot [@Street2018a].

tcdd <- slingshot(tcdd, reducedDim = "UMAP", clusterLabels = tcdd$celltype,
                  start.clus = "Central", end.clus  = "Portal")

set.seed(9764)
topologyTest(tcdd, tcdd$treatment)

rownames(df) <- colnames(tcdd)
df$cells <- rownames(df)
pst <- data.frame(cells = colnames(tcdd),
                  pst = slingPseudotime(tcdd)[, 1])
df <- dplyr::full_join(df, pst)
p4 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = pst)) +
  geom_point(size = .7) +
  scale_color_viridis_c() +
  labs(col = "Pseudotime") +
  geom_path(data = slingCurves(tcdd, as.df = TRUE) %>% arrange(Order),
            col = "black", size = 1.5)
p4

Differential Progression

p5 <- ggplot(df, aes(x = pst)) +
  geom_density(alpha = .8, aes(fill = treatment), col = "transparent") +
  geom_density(aes(col = treatment), fill = "transparent", size = 1.5) +
  guides(col = "none", 
         fill = guide_legend(override.aes = 
                               list(size = 1.5, col = c("#7FC97F", "#BEAED4"),
                                    title.position = "top"))) +
  scale_fill_brewer(palette = "Accent") +
  scale_color_brewer(palette = "Accent") +
  labs(x = "Pseudotime", fill = "Treatment")
p5

progressionTest(tcdd, conditions = tcdd$treatment)

Differential Expression

We use tradeSeq [@VandenBerge2020].

filter <- apply(counts(tcdd), 1, function(g) {
    sum(g >= 3) >= 10
})
tcdd <- tcdd[filter, ]

set.seed(3)
library(BiocParallel)
BPPARAM <- bpparam()
BPPARAM$workers <- 3
BPPARAM$progressbar <- TRUE
icMat <- evaluateK(counts = as.matrix(assays(tcdd)$counts),
                   pseudotime = slingPseudotime(sds),
                   cellWeights = rep(1, ncol(tcdd)),
                   conditions = factor(tcdd$treatment),
                   nGenes = 300,
                   k = 3:7, parallel = TRUE,
                   BPPARAM = BPPARAM)
set.seed(3)
tcdd <- fitGAM(counts = tcdd,
              conditions = factor(tcdd$treatment),
              nknots = 7,
              verbose = TRUE, 
              parallel = TRUE,
              BPPARAM = BPPARAM)

Assessing Differential Expression Accross conditions

condRes <- conditionTest(tcdd, l2fc = log2(2))
condRes$padj <- p.adjust(condRes$pvalue, "fdr")
mean(condRes$padj <= 0.05, na.rm = TRUE)
sum(condRes$padj <= 0.05, na.rm = TRUE)
conditionGenes <- rownames(condRes)[condRes$padj <= 0.05]
conditionGenes <- conditionGenes[!is.na(conditionGenes)]
scales <- brewer.pal(3, "Accent")[1:2]
oo <- order(condRes$waldStat, decreasing = TRUE)

# most significant gene
p1 <- plotSmoothers(tcdd, assays(tcdd)$counts,
                    gene = rownames(assays(tcdd)$counts)[oo[1]],
                    alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  ggtitle(rownames(assays(tcdd)$counts)[oo[1]])
# second most significant gene
p2 <- plotSmoothers(tcdd, assays(tcdd)$counts,
                    gene = rownames(assays(tcdd)$counts)[oo[2]],
                    alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  ggtitle(rownames(assays(tcdd)$counts)[oo[2]])
# least significant gene
p3 <- plotSmoothers(tcdd, assays(tcdd)$counts,
                    gene = rownames(assays(tcdd)$counts)[oo[nrow(tcdd)]],
                    alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  ggtitle(rownames(assays(tcdd)$counts)[oo[nrow(tcdd)]])
p1
p2
p3

### based on mean smoother
yhatSmooth <- predictSmooth(tcdd, gene = conditionGenes, nPoints = 50, tidy = FALSE) %>%
  log1p()
yhatSmoothScaled <- t(apply(yhatSmooth,1, scales::rescale))
heatSmooth_TCDD <- pheatmap(yhatSmoothScaled[, 51:100],
  cluster_cols = FALSE,
  show_rownames = FALSE, show_colnames = FALSE, main = "TCDD", legend = FALSE,
  silent = TRUE
)

matchingHeatmap_control <- pheatmap(yhatSmoothScaled[heatSmooth_TCDD$tree_row$order, 1:50],
  cluster_cols = FALSE, cluster_rows = FALSE,
  show_rownames = FALSE, show_colnames = FALSE, main = "Control",
  legend = FALSE, silent = TRUE 
)

p4 <- plot_grid(heatSmooth_TCDD[[4]], matchingHeatmap_control[[4]], ncol = 2)
p4

kept <- rownames(tcdd)[rowData(tcdd)$is_zonal]
mat <- matrix(c(sum(kept %in% conditionGenes), length(conditionGenes),
                sum(kept %in% rownames(tcdd)[!rownames(tcdd) %in% conditionGenes]),
                nrow(tcdd) - length(conditionGenes)),
              byrow = FALSE, ncol = 2)
fisher.test(mat)