knitr::opts_chunk$set(tidy=FALSE, cache=TRUE, dev="png", message=FALSE, error=FALSE, warning=TRUE)
Citation: if you use MAGeCKFlute in published research, please cite: Binbin Wang, Mei Wang, Wubing Zhang. "Integrative analysis of pooled CRISPR genetic screens using MAGeCKFlute." Nature Protocols (2019), doi: 10.1038/s41596-018-0113-7.
if(!"MAGeCKFlute" %in% installed.packages()) BiocManager::install("MAGeCKFlute") if(!"ggplot2" %in% installed.packages()) BiocManager::install("ggplot2") library(MAGeCKFlute) library(ggplot2)
MAGeCKFlute requires a weighted gene list for enrichment analysis.
file1 = file.path(system.file("extdata", package = "MAGeCKFlute"), "testdata/rra.gene_summary.txt") gdata = ReadRRA(file1) genelist = gdata$Score names(genelist) = gdata$id genelist = sort(genelist, decreasing = TRUE) head(genelist)
# Alternative functions EnrichAnalyzer and enrich.HGT. hgtRes1 = EnrichAnalyzer(genelist[1:200], method = "HGT", type = "Pathway", organism = "mmu") head(hgtRes1@result) # hgtRes2 = enrich.HGT(genelist[1:200]) # head(hgtRes2@result)
The ORT and GSEA are implemented in the R package clusterProfiler [@Yu2012]. So please install it prior to the analysis.
if(!"clusterProfiler" %in% installed.packages()){ BiocManager::install("clusterProfiler") } library(clusterProfiler) # Alternative functions EnrichAnalyzer and enrich.ORT. ortRes1 = EnrichAnalyzer(genelist[1:200], method = "ORT", type = "KEGG", organism = "mmu") head(ortRes1@result) # ortRes2 = enrich.ORT(genelist[genelist< -1]) # head(ortRes2@result)
library(clusterProfiler) # Alternative functions EnrichAnalyzer and enrich.GSE. gseRes1 = EnrichAnalyzer(genelist, method = "GSEA", type = "Pathway", organism = "mmu")
require(ggplot2) df = hgtRes1@result df$logFDR = -log10(df$p.adjust) p = BarView(df[1:5,], "Description", 'logFDR') p = p + labs(x = NULL) + coord_flip() p # Or use function barplot from enrichplot package barplot(hgtRes1, showCategory = 5)
## top: up-regulated pathways; ## bottom: down-regulated pathways EnrichedView(hgtRes1, top = 5, bottom = 0, mode = 1) EnrichedView(hgtRes1, top = 5, bottom = 0, mode = 2) dotplot(hgtRes1, showCategory = 5)
library(enrichplot) hgtRes1@result$geneID = hgtRes1@result$geneName cnetplot(hgtRes1, 5) heatplot(hgtRes1, showCategory = 5, foldChange = genelist) tmp <- pairwise_termsim(hgtRes1) emapplot(tmp, showCategory = 5, layout = "kk")
# show GSEA results of one pathway idx = which(gseRes1$NES>0)[1] gseaplot(gseRes1, geneSetID = idx, title = gseRes1$Description[idx]) # show GSEA results of multiple pathways gseaplot2(gseRes1, geneSetID = which(gseRes1$NES>0)[1:3])
For enrichment analysis, MAGeCKFlute signifies the public available gene sets, including Pathways (PID, KEGG, REACTOME, BIOCARTA, C2CP), GO terms (GOBP, GOCC, GOMF), Complexes (CORUM) and molecular signature from MsigDB (c1, c2, c3, c4, c5, c6, c7, HALLMARK).
Analysis of high-throughput data increasingly relies on pathway annotation and functional information derived from Gene Ontology, which is also useful in the analysis of CRISPR screens.
## combination of the gene sets enrichComb = EnrichAnalyzer(genelist[1:200], type = "KEGG+REACTOME", organism = "mmu") EnrichedView(enrichComb, top = 5) ## All pathways enrich = EnrichAnalyzer(geneList = genelist[1:200], type = "REACTOME", organism = "mmu") EnrichedView(enrich, top = 5) ## All gene ontology enrichGo = EnrichAnalyzer(genelist[1:200], type = "GOBP", organism = "mmu")
Functional annotations from the pathways and GO are powerful in the context of network dynamics. However, the approach has limitations in particular for the analysis of CRISPR screenings, in which elements within a protein complex rather than complete pathways might have a strong selection. So we incorporate protein complex resource from CORUM database, which enable the identification of essential protein complexes from the CRISPR screens.
enrichPro = EnrichAnalyzer(genelist[1:200], type = "Complex", organism = "mmu") EnrichedView(enrichPro, top = 5)
Usually, only the core genes in a pathway could be selected in a CRISPR screen, so we recommend to perform enrichment analysis on small pathways instead of pathways involving hundreds or even more genes.
enrich = EnrichAnalyzer(genelist[1:200], type = "GOBP", limit = c(2, 80), organism = "mmu") EnrichedView(enrich, top = 5)
EnrichedFilter
.The EnrichedFilter
function tries to eliminate redundant pathways from the enrichment results by calculating the Jaccard index between pathways.
enrich1 = EnrichAnalyzer(genelist[1:200], type = "Pathway", organism = "mmu") enrich2 = EnrichedFilter(enrich1) EnrichedView(enrich1, top = 15) EnrichedView(enrich2, top = 15)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.