knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
                      dev="png", message=FALSE, error=FALSE, warning=TRUE)

Citation: if you use MAGeCKFlute in published research, please cite: Binbin Wang, Mei Wang, Wubing Zhang. "Integrative analysis of pooled CRISPR genetic screens using MAGeCKFlute." Nature Protocols (2019), doi: 10.1038/s41596-018-0113-7.

Install and load the packages

if(!"MAGeCKFlute" %in% installed.packages()) BiocManager::install("MAGeCKFlute")
if(!"ggplot2" %in% installed.packages()) BiocManager::install("ggplot2")
library(MAGeCKFlute)
library(ggplot2)

Input data

MAGeCKFlute requires a weighted gene list for enrichment analysis.

file1 = file.path(system.file("extdata", package = "MAGeCKFlute"),
                  "testdata/rra.gene_summary.txt")
gdata = ReadRRA(file1)
genelist = gdata$Score
names(genelist) = gdata$id
genelist = sort(genelist, decreasing = TRUE)
head(genelist)

Enrichment analysis methods

Hypergeometric test

# Alternative functions EnrichAnalyzer and enrich.HGT.
hgtRes1 = EnrichAnalyzer(genelist[1:200], method = "HGT", 
                         type = "Pathway", organism = "mmu")
head(hgtRes1@result)
# hgtRes2 = enrich.HGT(genelist[1:200])
# head(hgtRes2@result)

Over-representation test

The ORT and GSEA are implemented in the R package clusterProfiler [@Yu2012]. So please install it prior to the analysis.

if(!"clusterProfiler" %in% installed.packages()){
  BiocManager::install("clusterProfiler")
}
library(clusterProfiler)
# Alternative functions EnrichAnalyzer and enrich.ORT.
ortRes1 = EnrichAnalyzer(genelist[1:200], method = "ORT", 
                         type = "KEGG", organism = "mmu")
head(ortRes1@result)
# ortRes2 = enrich.ORT(genelist[genelist< -1])
# head(ortRes2@result)

Gene set enrichment analysis

library(clusterProfiler)
# Alternative functions EnrichAnalyzer and enrich.GSE.
gseRes1 = EnrichAnalyzer(genelist, method = "GSEA", type = "Pathway", organism = "mmu")

Visualize enrichment results.

Barplot

require(ggplot2)
df = hgtRes1@result
df$logFDR = -log10(df$p.adjust)
p = BarView(df[1:5,], "Description", 'logFDR')
p = p + labs(x = NULL) + coord_flip()
p

# Or use function barplot from enrichplot package
barplot(hgtRes1, showCategory = 5)

Dot plot

## top: up-regulated pathways; 
## bottom: down-regulated pathways
EnrichedView(hgtRes1, top = 5, bottom = 0, mode = 1)
EnrichedView(hgtRes1, top = 5, bottom = 0, mode = 2)
dotplot(hgtRes1, showCategory = 5)

Other visualization functions from enrichplot [@Yu2018].

library(enrichplot)
hgtRes1@result$geneID = hgtRes1@result$geneName
cnetplot(hgtRes1, 5)
heatplot(hgtRes1, showCategory = 5, foldChange = genelist)
tmp <- pairwise_termsim(hgtRes1)
emapplot(tmp, showCategory = 5, layout = "kk")

Visulization for GSEA enriched categories

# show GSEA results of one pathway
idx = which(gseRes1$NES>0)[1]
gseaplot(gseRes1, geneSetID = idx, title = gseRes1$Description[idx])
# show GSEA results of multiple pathways
gseaplot2(gseRes1, geneSetID = which(gseRes1$NES>0)[1:3])

Type of gene sets for enrichment analysis

For enrichment analysis, MAGeCKFlute signifies the public available gene sets, including Pathways (PID, KEGG, REACTOME, BIOCARTA, C2CP), GO terms (GOBP, GOCC, GOMF), Complexes (CORUM) and molecular signature from MsigDB (c1, c2, c3, c4, c5, c6, c7, HALLMARK).

Functional enrichment analysis on GO terms and pathways

Analysis of high-throughput data increasingly relies on pathway annotation and functional information derived from Gene Ontology, which is also useful in the analysis of CRISPR screens.

## combination of the gene sets
enrichComb = EnrichAnalyzer(genelist[1:200], type = "KEGG+REACTOME", organism = "mmu")
EnrichedView(enrichComb, top = 5)
## All pathways
enrich = EnrichAnalyzer(geneList = genelist[1:200], type = "REACTOME", organism = "mmu")
EnrichedView(enrich, top = 5)
## All gene ontology
enrichGo = EnrichAnalyzer(genelist[1:200], type = "GOBP", organism = "mmu")

Protein complex analysis

Functional annotations from the pathways and GO are powerful in the context of network dynamics. However, the approach has limitations in particular for the analysis of CRISPR screenings, in which elements within a protein complex rather than complete pathways might have a strong selection. So we incorporate protein complex resource from CORUM database, which enable the identification of essential protein complexes from the CRISPR screens.

enrichPro = EnrichAnalyzer(genelist[1:200], type = "Complex", organism = "mmu")
EnrichedView(enrichPro, top = 5)

Enrichment analysis on the combination of the gene sets

Limit the size of gene sets for testing

Usually, only the core genes in a pathway could be selected in a CRISPR screen, so we recommend to perform enrichment analysis on small pathways instead of pathways involving hundreds or even more genes.

enrich = EnrichAnalyzer(genelist[1:200], type = "GOBP", limit = c(2, 80), organism = "mmu")
EnrichedView(enrich, top = 5)

Remove redundant results using EnrichedFilter.

The EnrichedFilter function tries to eliminate redundant pathways from the enrichment results by calculating the Jaccard index between pathways.

enrich1 = EnrichAnalyzer(genelist[1:200], type = "Pathway", organism = "mmu")
enrich2 = EnrichedFilter(enrich1)
EnrichedView(enrich1, top = 15)
EnrichedView(enrich2, top = 15)

Session info

sessionInfo()

References



WubingZhang/MAGeCKFlute documentation built on Jan. 27, 2024, 2:43 p.m.