Started on r format(Sys.time(), "%Y-%m-%d %H:%M:%S")

knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, knitr.table.format = "html")
library(ggridges)
library(cowplot)
library(kableExtra)
library(tidyverse)
library(ezRun)
library(Seurat)
library(SCpubr)
param = readRDS("param.rds")
scData <- readRDS("scData.rds")
Idents(scData) <- scData@meta.data[[param$grouping]]
scData <- subset(scData, idents=c(param$sampleGroup, param$refGroup))
#values need to be re-scaled after subsetting for plotting purposes
scData <- ScaleData(scData)
Idents(scData) <- scData[[param$CellIdentity]]
clusterNames <- sort(unique(Idents(scData)))
var_heigth <- 1

Analysis results {.tabset}

conservedMarkers <- readxl::read_xlsx("consMarkers.xlsx")
diffGenes <- readxl::read_xlsx("diffGenes.xlsx")
cat("### Dim Plot")
cat("\n")
cat(sprintf("We first plot a dimensionality plot of the data after subsetting the original integrated data to cells originating from only samples '%s' and '%s'. The plot colors the cells by the Seurat '%s' metadata column, and splits the plot by sample. Gray cells indicate cells absent from this sample, but present in the other.", param$sampleGroup, param$refGroup, param$CellIdentity))
cat("\n\n")
do_DimPlot(scData, split.by = param$grouping, group.by = param$CellIdentity)
cat("### Conserved Markers")
cat("\n")
cat("Identify cell type marker genes that are conserved across conditions. Differential gene expression tests are performed for each group and then, the p-values are combined using meta-analysis methods.")
cat("\n\n")
cat("#### Conserved cell type markers \n")
cat("\n")
ezInteractiveTableRmd(conservedMarkers, digits = 3)
top5 <- conservedMarkers %>% 
  mutate(cluster = as.numeric(as.character(cluster))) %>% group_by(cluster)
top5 <- slice_max(top5, n = 5, order_by = avg_avg_fc)
var_heigth <- nrow(top5)*0.2
cat("### Conserved Markers - Plots\n")
cat("\n")
cat("Here, we use a heatmap and a dotplot to visualize simultaneously the top 5 conserved markers in each cluster (with highest average fold change across the two groups). Be aware that some genes may be in the top markers for different clusters.\n")
cat("\n")
DoHeatmap(scData, features=top5$gene)
DotPlot(scData, features=unique(top5$gene)) + 
  coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust=1))
cat("### Differential Expression")
cat("\n")

cat(paste0("After identifying common cell types across conditions, we can look for genes that change in different conditions for cells of the same type. The interactive table can help you to explore the more relevant genes (maximum 1000 genes with the biggest diff_pct). The full list of tested genes is in the Data availability section of this report. The comparison made was ", paste(c(param$sampleGroup, param$refGroup), collapse = " vs ")))

cat("\n")

diffGenesTable <- diffGenes[diffGenes$p_val_adj<0.05, ]  #too many genes make the interactive table slow
diffGenesTable <- diffGenesTable[order(abs(diffGenes$avg_log2FC), decreasing = TRUE), ]
cutoff <- 1000

if(nrow(diffGenesTable) > cutoff) {
  diffGenesTable <- diffGenesTable[1:cutoff, ]
}

cat("\n")
cat("#### Differential expressed genes per cluster")
cat("\n")
ezInteractiveTableRmd(as.data.frame(diffGenesTable), digits=3) %>% formatSignif("p_val")
cat("### Differential Expression - Plots\n")
cat("\n")
cat("The 50 dysregulated genes with the highest difference in expression between the two conditions are represented in the heatmap and the violin plots. The heatmap shows global differences of these genes across the conditions, while the violin plots show cluster specific changes across conditions.")
cat("\n")
top5 <- diffGenes %>% group_by(cluster)
top5 <- slice_max(top5, n = 5, order_by = avg_log2FC)
scData$cluster.group <- paste0(scData@meta.data[[param$CellIdentity]], ".", scData@meta.data[[param$grouping]])
DoHeatmap(scData, features = top5$gene, group.by = param$grouping)
DoHeatmap(scData, features = top5$gene, group.by = "cluster.group") + NoLegend()
Idents(scData) <- scData@meta.data[[param$grouping]]
for (clusterName in clusterNames) {
  dt <- diffGenes %>% filter(cluster == clusterName)
  scd <- scData[, scData@meta.data[[param$CellIdentity]] == clusterName]
  if (nrow(dt) > 0) {
    dt <- dt[order(dt$p_val_adj), ]
    upGenes <- dt[dt$avg_log2FC > 0, ] %>% slice_max( n=20, order_by = avg_log2FC) %>% pull("gene")
    downGenes <- dt[dt$avg_log2FC < 0, ] %>% slice_min( n=20, order_by = avg_log2FC) %>% pull("gene")

    genesToPlot <- c(upGenes, downGenes)
    print(DoHeatmap(subset(scd, downsample = 2000), genesToPlot, slot="scale.data") + ggtitle(paste0("Cell type: ", clusterName)))
  }
  p <- do_VolcanoPlot(scData, dt, plot.title = clusterName, add_gene_tags = TRUE, order_tags_by = "pvalue")
  print(p)
}

Data availability

Conserved markers

conservedMarkers

Differentially expressed genes

diffGenes

Parameters

param[c("DE.method", "DE.regress", "grouping", "sampleGroup", "refGroup", "CellIdentity")]

SessionInfo

ezSessionInfo()


uzh/ezRun documentation built on May 4, 2024, 3:23 p.m.