# set global chunk options library(knitr) opts_chunk$set(size='tiny')
The whole purpose of this vignette/cheatsheet is to show different approaches for testing for significance of certain over-representation in one group of genes vs another.
we'll take a look what is enriched in the sixth cluster compare to the rest
library(vp.misc) library(clusterProfiler) library(GOstats) library(ReactomePA)
data(gcSample) # there are 8 clusters sapply(gcSample, length) # the IDs are Entrez Gene IDs head(gcSample[[1]])
GOstats
packagep <- new("KEGGHyperGParams", geneIds=gcSample[[6]], universeGeneIds=unique(unlist(gcSample)), pvalueCutoff = 1, annotation="org.Hs.eg.db", testDirection="over") xke1 <- hyperGTest(p) res <- summary(xke1)[,c("KEGGID","Term","Pvalue","Count","Size")] kable(head(res))
clusterProfiler
packagexke2 <- enrichKEGG(gcSample[[6]], universe = unlist((gcSample)), pvalueCutoff = 1, qvalueCutoff = 1, minGSSize = 0, use_internal_data=TRUE) # then uses KEGG.db as GOstats below res <- summary(xke2)[,c("ID","Description","pvalue","GeneRatio","BgRatio")] rownames(res) <- NULL kable(head(res))
Pretty much same thing as above, but with a few tweaks
GOstats
packagep <- new("GOHyperGParams", geneIds=gcSample[[6]], universeGeneIds=unique(unlist(gcSample)), pvalueCutoff = 1, annotation="org.Hs.eg.db", ontology="BP", conditional=FALSE, # influential option testDirection="over") xgobp1 <- hyperGTest(p) res <- summary(xgobp1)[,c("GOBPID","Term","Pvalue","Count","Size")] kable(head(res))
clusterProfiler
packagexgobp2 <- enrichGO(gcSample[[6]], OrgDb = "org.Hs.eg.db", universe = unique(unlist((gcSample))), ont = "BP", pvalueCutoff = 1, qvalueCutoff = 1, minGSSize = 0) xgobp2 <- subset_by_size(xgobp2, maxObsSize = 50) res <- summary(xgobp2)[,c("ID","Description","pvalue","GeneRatio","BgRatio")] rownames(res) <- NULL kable(head(res))
There is a discrepancy between the results.
clusterProfiler does the test in the following chain:
enrichGO -> DOSE:::enrich.internal
GOstats chain:
hyperGTest -> GOstats:::.hyperGTestInternal -> Category:::.doHyperGTest -> Category:::.doHyperGInternal
It seems that GOstats does quite a bit of gene and GO term removal prior the test.
GOstats
packagep <- new("GOHyperGParams", geneIds=gcSample[[6]], universeGeneIds=unique(unlist(gcSample)), pvalueCutoff = 1, annotation="org.Hs.eg.db", ontology="CC", conditional=FALSE, # influential option testDirection="over") xgocc1 <- hyperGTest(p) res <- summary(xgocc1)[,c("GOCCID","Term","Pvalue","Count","Size")] kable(head(res))
clusterProfiler
packagexgocc2 <- enrichGO(gcSample[[6]], OrgDb = "org.Hs.eg.db", universe = unique(unlist((gcSample))), ont = "CC", pvalueCutoff = 1, qvalueCutoff = 1, minGSSize = 0) xgocc2 <- subset_by_size(xgocc2, maxObsSize = 50) res <- summary(xgocc2)[,c("ID","Description","pvalue","GeneRatio","BgRatio")] rownames(res) <- NULL kable(head(res))
GOstats
packagep <- new("GOHyperGParams", geneIds=gcSample[[6]], universeGeneIds=unique(unlist(gcSample)), pvalueCutoff = 1, annotation="org.Hs.eg.db", ontology="MF", conditional=FALSE, # influential option testDirection="over") xgomf1 <- hyperGTest(p) res <- summary(xgomf1)[,c("GOMFID","Term","Pvalue","Count","Size")] kable(head(res))
clusterProfiler
packagexgomf2 <- enrichGO(gcSample[[6]], OrgDb = "org.Hs.eg.db", universe = unique(unlist((gcSample))), ont = "MF", pvalueCutoff = 1, qvalueCutoff = 1, minGSSize = 0) xgomf2 <- subset_by_size(xgomf2, maxObsSize = 50) res <- summary(xgomf2)[,c("ID","Description","pvalue","GeneRatio","BgRatio")] rownames(res) <- NULL kable(head(res))
As for Reactome there is only one pre-canned solution - from ReactomePA
ReactomePA
packagexreac <- enrichPathway(gcSample[[6]], organism = "human", universe = unique(unlist((gcSample))), pvalueCutoff = 1, qvalueCutoff = 1, minGSSize = 0) xreac <- subset_by_size(xreac, maxObsSize = 50) res <- summary(xreac)[,c("ID","Description","pvalue","GeneRatio","BgRatio")] rownames(res) <- NULL kable(head(res))
There is a warning because some genes map to both human and mouse pathways. Later this creates trouble. Here is my post about that on Bioconductor's mailing list.
GOstats
packagep <- new("PFAMHyperGParams", geneIds=gcSample[[6]], universeGeneIds=unique(unlist(gcSample)), pvalueCutoff = 1, annotation="org.Hs.eg.db", testDirection="over") xpfam <- hyperGTest(p) res <- summary(xpfam) kable(head(res))
Adding description to PFAM term is something to think about.
Taking advantage of enricher
function from clusterProfiler
# randomly assign genes to 20 clusters setnum <- replicate(length(unique(unlist(gcSample))), sample(1:20, 1)) setnum <- paste("set", setnum, sep = '') TERM2GENE <- data.frame(term=setnum, gene=unique(unlist(gcSample)), stringsAsFactors = FALSE) TERM2NAME <- data.frame(term=setnum, name=paste("Description: ", setnum, sep = ''), stringsAsFactors = FALSE) xfake <- enricher(gene = gcSample[[6]], universe = unique(unlist(gcSample)), pvalueCutoff = 1, qvalueCutoff = 1, minGSSize = 0, TERM2GENE = TERM2GENE, TERM2NAME = TERM2NAME) res <- summary(xfake)[,c("ID","Description","pvalue","GeneRatio","BgRatio")] rownames(res) <- NULL kable(head(res))
cnetplot(xfake, vertex.label.cex = 0.5)
A different layout
cnetplot(xfake, vertex.label.cex = 0.5, layout="kk")
Check out as well HTSanalyzeR and geecc Bioconductor packages.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.