## nolint start
suppressPackageStartupMessages({
    library(basejump)
    library(AcidGSEA)
})
## nolint end
prepareTemplate()
source("_setup.R") # nolint
## This is required for tabset R Markdown headers to work in `lapply()` call.
knitr::opts_chunk[["set"]](message = FALSE)

Note that we have switched to MSigDB 7.0 (August 2019) from 6.2.

Load FgseaList object

Import the FgseaList S4 class object. This single object contains everything we need to visualize the GSEA results.

if (is.character(params[["object"]])) {
    file <- params[["object"]]
    object <- import(file)
    name <- params[["name"]]
    if (!isTRUE(nzchar(name))) {
        name <- basenameSansExt(file)
    }
    rm(file)
} else {
    object <- params[["object"]]
    name <- params[["name"]]
}
stopifnot(
    is(object, "FgseaList"),
    isTRUE(nzchar(name))
)
invisible(validObject(object))
print(object)

Ranked gene list input

A ranked numeric vector gene list is used as input for fast, parameterized GSEA. This is stored as a RankedList S4 class object by pfgsea, and internally named as rankedlist in the metadata() slot of the FgseaList object.

It requires three features:

  1. numeric: test statistic (preferred) or log2 fold change. For [DESeq2][], the test statistic is generated using the Wald test by default (see DESeq2::DESeq(), DESeq2::nbinomWaldTest()).
  2. named: every number has a name, the corresponding gene ID.
  3. sorted: number should be sorted in decreasing order.

Note that [MSigDb][] requires that the identifiers map either to HGNC names (gene symbols) or NCBI (Entrez) gene identifiers. We prefer to map using the gene symbols, but these are not 1:1 for all Ensembl gene IDs (e.g. ENSG00000000003 = TSPAN6). In the event that there are Ensembl gene IDs per HGNC symbol, we're taking the average of the values here.

MSigDb GMT files

Here we are using pathways from the MSigDB database. GSEA requires a gene list containing names that match the gene names in the MSigDb pathways list (i.e. gene symbols instead of Ensembl gene IDs).

Before proceeding, first download the MSigDb 6.2 release and extract the ZIP archive, which will contain GMT files. Note that e-mail registration is required to access resources at the Broad Institute.

Running GSEA

All genes can be used in Gene Set Enrichment Analysis (GSEA). GSEA aggregates the per gene statistics across genes within a gene set, therefore making it possible to detect situations where all genes in a predefined set change in a small but coordinated way. Since it is likely that many relevant phenotypic differences are manifested by small but consistent changes in a set of genes. GSEA uses a ranked list of genes (here ranked by the test statistic), without applying a per-gene P value significance cutoff. This allows the analysis to use more information to identify enriched biological processes. The GSEA introduction goes into more detail about some of the advantages of this approach.

This section is a modified version of Stephen Turner's excellent DESeq results to pathways in 60 Seconds with the fgsea package vignette. The fgsea vignette also has additional useful information.

Each row corresponds to a tested pathway. The columns are the following:

Here we are arranging the tables by normalized enrichment score (NES), positive to negative.

Export tables

Here we are exporting the GSEA tables from the object, organized per contrast.

export(
    object = object,
    name = name,
    dir = params[["output_dir"]]
)

Hallmark

First, let's use the Homo sapiens Hallmark collection from MSigDb. Hallmark gene sets summarize and represent specific well-defined biological states or processes and display coherent expression. These gene sets were generated by a computational methodology based on identifying overlaps between gene sets in other MSigDb collections and retaining genes that display coordinate expression.

The fgsea::gmtPathways() function will take a GMT file from MSigDb and turn it into a list. Each element in the list is a character vector of genes in the pathway.

Top tables {.tabset}

markdownTables(
    object = object,
    collection = "h",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

Plot enriched gene sets {.tabset}

plotEnrichedGeneSets(
    object = object,
    collection = "h",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

UpSet plot

plotEnrichedUpset(
    object = object,
    collection = "h"
)

C1

Top tables {.tabset}

markdownTables(
    object = object,
    collection = "c1",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

Plot enriched gene sets {.tabset}

plotEnrichedGeneSets(
    object = object,
    collection = "c1",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

UpSet plot

plotEnrichedUpset(
    object = object,
    collection = "c1"
)

C2

Top tables {.tabset}

markdownTables(
    object = object,
    collection = "c2",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

Plot enriched gene sets {.tabset}

plotEnrichedGeneSets(
    object = object,
    collection = "c2",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

UpSet plot

plotEnrichedUpset(
    object = object,
    collection = "c2"
)

C3

Top tables {.tabset}

markdownTables(
    object = object,
    collection = "c3",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

Plot enriched gene sets {.tabset}

plotEnrichedGeneSets(
    object = object,
    collection = "c3",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

UpSet plot

plotEnrichedUpset(
    object = object,
    collection = "c3"
)

C4

Top tables {.tabset}

markdownTables(
    object = object,
    collection = "c4",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

Plot enriched gene sets {.tabset}

plotEnrichedGeneSets(
    object = object,
    collection = "c4",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

UpSet plot

plotEnrichedUpset(
    object = object,
    collection = "c4"
)

C5

Top tables {.tabset}

markdownTables(
    object = object,
    collection = "c5",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

Plot enriched gene sets {.tabset}

plotEnrichedGeneSets(
    object = object,
    collection = "c5",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

UpSet plot

plotEnrichedUpset(
    object = object,
    collection = "c5"
)

C6

Top tables {.tabset}

markdownTables(
    object = object,
    collection = "c6",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

Plot enriched gene sets {.tabset}

plotEnrichedGeneSets(
    object = object,
    collection = "c6",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

UpSet plot

plotEnrichedUpset(
    object = object,
    collection = "c6"
)

C7

Top tables {.tabset}

markdownTables(
    object = object,
    collection = "c7",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

Plot enriched gene sets {.tabset}

plotEnrichedGeneSets(
    object = object,
    collection = "c7",
    n = params[["n_pathways"]],
    headerLevel = 3L
)

UpSet plot

plotEnrichedUpset(
    object = object,
    collection = "c7"
)




steinbaugh/pfgsea documentation built on Oct. 17, 2023, 11:24 a.m.