knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
library(DT)
library(prora)


gseaResult <- GSEAResults$fgseaRes
geneSet <- GSEAResults$geneSet
rankList <- GSEAResults$rankList
relevantResult <- GSEAResults$relevantResult
gsName <- GSEAResults$gsName
threshold <- GSEAResults$threshold
mainPathways <- GSEAResults$mainPathways
map_summaries <- GSEAResults$map_summaries
score <- GSEAResults$score

Introduction

Gene Set Enrichment Analysis (GSEA) examines the distribution of a gene-set within a ranked list of proteins. The null hypothesis is that the gene set genes are randomly distributed over the ranked list of proteins. The alternative is that most of the proteins are concentrated in one of the ranked list tails. To estimate the probability of falsely rejecting the null hypothesis (p-value) a normalized enrichment score (NES) for a gene set is computed and compared with the null distribution of the NES. Since we examine dozens of gene-sets and perform multiple tests, the obtained p-values need to be adjusted for multiple testing, resulting in a false discovery rate (FDR).

Summary

To rank the proteins we used r score.

plot(sort(rankList, decreasing = TRUE), type = "h", ylab = score)
par(mfrow = c(1,2))

fgseaResult <- fgseaResult  %>% dplyr::rename(FDR = padj)

plot( fgseaResult$NES ,
     -log10(fgseaResult$FDR),
     ylim = c(0, max(c(-log10(0.05), -log10(fgseaResult$FDR)), na.rm = TRUE)),
     ylab = "-log10(FDR)", xlab = "NES", pch = 16, main = "")
abline(h = -log10(threshold), col = c(2,3))
if (nrow(relevantResult) > 0) {
tableLegend <- c("GS" = "Gene-set Collection : for more details see msigdb.org",
"FDR" = "False rate",
"nMoreExtreme" =  "a number of times a random gene set had a more extreme enrichment score value",
"leadingEdge" = "vector with Entrez Id's of leading edge genes that drive the enrichment.",
"size" = "size of the pathway after removing genes not present in in the protein list")

knitr::kable(data.frame("Name" = names(tableLegend), "Description" = tableLegend), caption = "Column Descriptions.")
}

r if(nrow(relevantResult) ==0){ paste0("There are NO relevant results with FDR < ", threshold, ".")}

if (nrow(relevantResult) > 0) {
  caption <- paste0("All Pathways with FDR < ", threshold, ".")
  relevantResult <- dplyr::rename(relevantResult, FDR = padj)
  relevantResult$pathway <-
    DT_makeURLfor(relevantResult$pathway, path = "https://www.gsea-msigdb.org/gsea/msigdb/cards/")

  relevantResult$leadingEdge <-
    lapply( relevantResult$leadingEdge, DT_makeURLfor, path = "https://www.ncbi.nlm.nih.gov/gene/")

  DT::datatable(relevantResult, caption = caption, escape = FALSE) %>% 
    formatSignif('NES',2) %>%
    formatSignif(c('pval','FDR'), 2)
}
if (nrow(mainPathways) > 0) {
  mainGeneSets <- geneSet[mainPathways$pathway]

  mainPathways2 <- mainPathways
  mainPathways2 <- dplyr::rename(mainPathways2, FDR = padj)
  caption = paste0("Non redundant pathways with FDR < ",  threshold,".")
  mainPathways2$pathway <-
    DT_makeURLfor(mainPathways2$pathway, path = "https://www.gsea-msigdb.org/gsea/msigdb/cards/")

  mainPathways2$leadingEdge <-
    lapply( mainPathways2$leadingEdge, DT_makeURLfor, path = "https://www.ncbi.nlm.nih.gov/gene/")

  DT::datatable(mainPathways2, caption = caption, escape = FALSE) %>%
    formatSignif('NES',2) %>%
    formatSignif(c('pval','FDR'),2)

}
height.fig = min(30,max(3 + 2 * nrow(mainPathways)))
if (nrow(mainPathways) > 0) {

  plotGseaTable(mainGeneSets, rankList , mainPathways,
                gseaParam = 0.5)
}

References

Gene-sets defined in the Molecular Signature Database MSigDB are used. To generate the analysis the following R packages are used r BiocStyle::Biocpkg("msigdbr"), r BiocStyle::Biocpkg("fgsea") and r BiocStyle::Githubpkg("protviz/prora", "prora")

Do you have further questions, please contact wew@fgcz.ethz.ch or protinf@fgcz.uzh.ch.



protViz/prora documentation built on Dec. 12, 2021, 12:32 a.m.