Started on r format(Sys.time(), "%Y-%m-%d %H:%M:%S")

library(ggridges)
library(cowplot)
library(kableExtra)
library(tidyverse)
library(ezRun)
library(HDF5Array)
library(scater)

knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, knitr.table.format = "html")
input = readRDS("input.rds")
param = readRDS("param.rds")
URLs <- input$getColumn("Static Report")
filePath <- file.path("/srv/gstore/projects", sub("https://fgcz-(gstore|sushi).uzh.ch/projects", "",dirname(URLs)), "sce_h5")
sce <- loadHDF5SummarizedExperiment(filePath)
#subset the object to only contain the conditions we are interested in
sce <- sce[,sce$Condition %in% c(param$sampleGroup, param$refGroup)]
var_heigth <- 1

Analysis results {.tabset}

Differential expression testing across conditions was performed on “pseudo-bulk” expression profiles (Tung et al. 2017), generated by summing counts together for all cells with the same combination of cluster and sample. We removed cluster-sample combinations containing fewer than 20 cells. We corrected for composition biases by computing normalization factors with the trimmed mean of M-values method (Robinson and Oshlack 2010). The design matrix was created including the Batch and the Condition tested as additive terms. The negative binomial and the quasi-likelihood (Chen, Lun, and Smyth 2016) dispersions were also estimated. Differential expressed genes were defined as those with non-zero log-fold changes at a false discovery rate of 5%. These steps were performed using the functions aggregateAcrossCells from scater and pseudoBulkDGE from scran (Lun ATL, McCarthy DJ, Marioni JC 2016)

diffGenes <- tryCatch({read_tsv("differential_genes.tsv")},error = function(e) NULL)
cat("### Differential expressed genes")
cat("\n")

cat(paste0("After identifying common cell types across conditions, we can look for genes that change in different conditions for cells of the same type. The interactive table can help you to explore the more relevant genes (maximum 1000 genes with the biggest diff_pct). The full list of tested genes is in the Data availability section of this report. The comparison made was ", paste(c(param$sampleGroup, param$refGroup), collapse = " vs ")))

cat("\n")

#too many genes make the interactive table slow
cutoff <- 1000

if(nrow(diffGenes) > cutoff)
   diffGenes <- diffGenes[1:cutoff,]

cat("#### Differential expressed genes per cluster")
cat("\n")
ezInteractiveTableRmd(diffGenes)
cat("### Differential genes plots\n")
cat("\n")
cat("The 50 dysregulated genes with the highest difference in expression between the two conditions are represented in the heatmap and the violin plots. The heatmap shows global differences of these genes across the conditions, while the violin plots show cluster specific changes across conditions.")
cat("\n")
top5 <- diffGenes %>% group_by(Cluster)
top5 <- slice_min(top5, n = 5, order_by = FDR)
plotHeatmap(sce, features=unique(top5$gene_name), center=TRUE, zlim=c(-2, 2),
            colour_columns_by = c("Condition"), 
            order_columns_by = c("Condition"),
            cluster_rows=FALSE,
            fontsize_row=8)
for (gene in top5$gene_name) {
  p = plotExpression(sce, gene, x="Condition", colour_by="Condition", other_fields="ident") + facet_wrap(~ident) + ggtitle(gene)
  print(p)
}

Parameters

param[c("DE.regress", "sampleGroup", "refGroup")]

SessionInfo

ezSessionInfo()


uzh/ezRun documentation built on May 4, 2024, 3:23 p.m.