knitr::opts_chunk$set(echo = TRUE,
                      fig.height=6, fig.width=6)
library(ribiosUtils)
library(ribiosIO)
library(ribiosPlot)
library(ribiosNGS)
library(ribiosExpression)
library(ribiosGSEA)
library(limma)
library(edgeR)
library(openxlsx)
library(tidyverse)
library(ggplot2)
library(ComplexHeatmap)
library(corrplot)
library(Vennerable)
library(igraph)
library(BioQC)
library(ggrepel)
set.seed(1887)
theme_set(theme_light(base_size=13))
if(is.null(params$directory)) {
  workDir <- getwd()
} else {
  workDir <- normalizePath(params$directory)
}
wdFile <- function(x) file.path(workDir, x)

Analysis

Reading in

edgeRes <- loadObject(wdFile("ngsDge.RData"))
print(edgeRes)
edgeLfc <- logFCmatrix(edgeRes)
nctrt <- nContrast(edgeRes)
nctrtSqrt <- ceiling(sqrt(nctrt))

Volcano plots

vcpWidth <- 2 * nctrtSqrt
vcpHeight <- 2 * nctrtSqrt
volcanoPlot(edgeRes)

DGE count

dgepWidth <- pmax(6, 0.4 * nctrt)
sigGeneBarchart(edgeRes)

Top differentially expressed genes

nSigned <- params$nSignedTopGenes
groupSize <- ulen(dgeList(edgeRes)$samples$group)
tsgWidth <- pmax(6, 0.35 * nSigned * groupSize)
tsgHeight <- pmax(6, 2 * nctrt)
topSigGenesPlot <- plotTopSigGenes(edgeRes, nSigned = nSigned)
print(topSigGenesPlot)

PCA of contrasts

lfcCorSize <- pmax(6, 0.45 * nctrt)
diffpcaSize <-  pmax(6, 1.25 * nctrtSqrt)
lfcCor <- cor(edgeLfc, method="spearman")
corrplot::corrplot(lfcCor, method="ellipse", col=royalbluered(100), tl.cex=0.8,
         mar=c(0,0,3,0), addCoef.col = "cyan3", number.cex=0.85,
         title="Correlation of differential gene expression profiles")
diffPcaScore <- pcaScoresFromLogFC(edgeLfc)
diffPcaPlot <- ggplot(as.data.frame(diffPcaScore), 
       aes(x=PC1, y=PC2, label=contrastNames(edgeRes))) +
  geom_point(size=4) +
  xlab(expVarLabel(diffPcaScore, 1, compact = TRUE)) +
  ylab(expVarLabel(diffPcaScore, 2, compact = TRUE)) +
  ggtitle("PCA of differential gene expression") +
  geom_vline(xintercept = 0) +
  ggrepel::geom_text_repel() +
  geom_hline(yintercept = 0)
print(diffPcaPlot)

Pathway analysis

cameraRes <- readCameraResults(wdFile("geneset-analysis/camera-results.txt"))
cameraSig <- cameraRes %>% filter(PValue<0.01,
                                  NGenes>=5,
                                  NGenes<=200,
                                  abs(EffectSize)>=0.5,
                                  ! Namespace %in% c("goslim", "immunespace", 
                                                     "immunomics", "mbdisease",
                                                     "mbpathology", "mbtoxicity",
                                                     "msigdbC7", "msigdbC2",
                                                     "MolecularPhenotyping"))
cameraMat <- cameraRes %>% filter(GeneSet %in% cameraSig$GeneSet) %>%
  longdf2matrix(row.col="GeneSet", column.col="Contrast", value.col="Score")
cameraHeatmapWidth <- pmax(6, 0.18 * nctrt)
cameraHeatmapHeight <- pmax(7, log2(nrow(cameraMat)))
biosHeatmap(cameraMat, col="yellowmagenta", zlim=c(-2,2),
            Colv=FALSE, dendrogram="row",
            lwid=c(1,3), lhei=c(1,9), cexCol=1, labRow=NA,
            xlab="Contrast", ylab="Gene-sets",
            color.key.title = "ES")
goi <- rownames(cameraMat)
goiGenes <- parseCameraContributingGenes(cameraRes,goi)
repGeneSets <- kmeansGeneset(cameraMat, genesetGenes = goiGenes, optK=25,
                             thrCumJaccardIndex = 0.5, maxRepPerCluster = 3)
repGeneCount <- length(repGeneSets$repGenesets)
repGenesetsHeatmapWidth <- pmax(7L, 0.28 * nctrt)
repGenesetsHeatmapHeight <- pmax(7L, repGeneCount * 0.25)
rgHei <- c(1, repGenesetsHeatmapHeight-1)
rgWid <- c(1, repGenesetsHeatmapWidth-1)

Representative pathways

We visualize representative gene-sets.

repGsMat <- cameraMat[repGeneSets$repGenesets,]
repGsLabel <- prettyRonetGenesetNames(rownames(repGsMat), nchar=40)
ha <- ComplexHeatmap::HeatmapAnnotation(GeneSetCluster=repGeneSets$repGenesetClusters,
                                        col=list(GeneSetCluster=fcbase(fcbrewer(repGeneSets$repGenesetClusters))),
                                        which="row")
gsMaxRownamesWidth <- unit(6, "inch")
repGsHeatmap <- ComplexHeatmap::Heatmap(repGsMat,
                        right_annotation = ha,
                        row_names_side="left",
                        row_names_max_width=gsMaxRownamesWidth,
                        cluster_rows = FALSE, cluster_columns=FALSE,
                        name="Enrichment score",
                        col=circlize::colorRamp2(breaks=c(-3,0, 3),
                                 colors=yellowmagenta(3)),
                        row_labels = repGsLabel,
                        column_labels = colnames(repGsMat),
                        row_names_gp = gpar(fontsize=13),
                        column_names_gp = gpar(fontsize=13))
print(repGsHeatmap)

We visualize representative gene-sets and contrasts with only gene-sets clustered.

repGsHeatmapRowClustered <- ComplexHeatmap::Heatmap(repGsMat,
                        row_names_side="left",
                        row_names_max_width=gsMaxRownamesWidth,
                        cluster_rows = TRUE, cluster_columns=FALSE,
                        clustering_method_rows = "ward.D2",
                        name="Enrichment score",
                        col=circlize::colorRamp2(breaks=c(-3,0, 3),
                                 colors=yellowmagenta(3)),
                        row_labels = repGsLabel,
                        column_labels = colnames(repGsMat),
                        row_names_gp = gpar(fontsize=13),
                        column_names_gp = gpar(fontsize=13))
print(repGsHeatmapRowClustered)

We visualize representative gene-sets and contrasts with both dimensions clustered.

repGsHeatmapTwoWayClustered <- ComplexHeatmap::Heatmap(repGsMat,
                        row_names_side="left",
                        row_names_max_width=gsMaxRownamesWidth,
                        cluster_rows = TRUE, cluster_columns=TRUE,
                        clustering_method_rows = "ward.D2",
                        clustering_method_columns = "ward.D2",
                        name="Enrichment score",
                        col=circlize::colorRamp2(breaks=c(-3,0, 3),
                                 colors=yellowmagenta(3)),
                        row_labels = repGsLabel,
                        column_labels = colnames(repGsMat),
                        row_names_gp = gpar(fontsize=13),
                        column_names_gp = gpar(fontsize=13))
print(repGsHeatmapTwoWayClustered)

Exported heatmaps of individual genesets

We export heatmaps of differential gene expression of genes in these representative pathways. They can be found in this PDF file.

strEdgeRes <- updateSigFilter(edgeRes, logFC=0.5, FDR=0.05)
strEdgeSigGenes <- munion(sigGenes(strEdgeRes, value="GeneSymbol"))
repGoiGenes <- lapply(goiGenes[repGeneSets$repGenesets],
                      function(x) intersect(x, strEdgeSigGenes))
try({
  createDir(wdFile("figures"))
  openFileDevice(wdFile("figures/repGenesetHeatmaps.pdf"),
                  width=cameraHeatmapWidth,
                 height=repGenesetsHeatmapHeight)
  sapply(repGeneSets$repGenesets,
         function(gs) {
           genes <- repGoiGenes[[gs]]
           biosHeatmap(edgeLfc[genes,], main=gs, zlim=c(-2,2), 
                       lhei=c(1, repGenesetsHeatmapHeight-1),
                       color.key.title = "logFC", col="royalbluered")
         })
  closeFileDevice()
})

Save the environment

save(lfcCor, diffPcaScore, diffPcaPlot,
     repGsMat,repGsLabel,repGeneSets,gsMaxRownamesWidth,
     repGenesetsHeatmapWidth, repGenesetsHeatmapHeight,
     repGsHeatmap, repGsHeatmapRowClustered, repGsHeatmapTwoWayClustered,
     file="edgeR-report.RData")

Session information

bedaInfo()
sessionInfo()


bedapub/ribiosNGS documentation built on Feb. 10, 2025, 12:34 a.m.