inst/doc/Enrichment_Analysis_and_Visualization.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## ----load_InterMineR, message=FALSE, warning=FALSE----------------------------
library(InterMineR)

## ----source_function_load_data, message=FALSE, warning=FALSE------------------
# get HumanMine instance
im.human = initInterMine(listMines()["HumanMine"])

# retrieve data model for HumanMine
# hsa_model = getModel(im.human)  # temporarily removed

# all targets of HumanMine enrichment widgets possess InterMine ids
# subset(hsa_model, type %in% c("Gene", "Protein", "SNP") & child_name == "id") # temporarily removed

# load human genes which are associated with Diabetes (retrieved from HumanMine)
data("PL_DiabetesGenes")

head(PL_DiabetesGenes, 3)

# get Gene.symbol
hsa_gene_names = as.character(PL_DiabetesGenes$Gene.symbol)

head(hsa_gene_names, 3)

# get Gene.primaryIdentifiers (ENTREZ Gene identifiers)
hsa_gene_entrez = as.character(PL_DiabetesGenes$Gene.primaryIdentifier)

head(hsa_gene_entrez, 3)

## ----doEnrichment, message=FALSE, warning=FALSE-------------------------------
# get all HumanMine widgets
human.widgets = as.data.frame(getWidgets(im.human))

human.widgets

## ----enrichment_widgets_gene_targets, message=FALSE, warning=FALSE------------
# get enrichment, gene-related widgets for HumanMine
subset(human.widgets, widgetType == 'enrichment' & targets == "Gene")

## ----go_enrichment_analysis, message=FALSE, warning=FALSE---------------------
# Assign directly the doEnrichment.string from getGeneIds function to the ids argument of doEnrichment to perform the analysis

# Perform enrichment analysis
GO_enrichResult = doEnrichment(
  im = im.human,
  ids = hsa_gene_entrez,
  widget = "go_enrichment_for_gene"
  # organism = "Homo sapiens" # optional if results from more than one organism are retrieved
)

## ----doEnrichment_genelist, message=FALSE, warning=FALSE, eval=FALSE----------
#  # Perform enrichment analysis with genelist name instead of ids
#  GO_enrichResult = doEnrichment(
#    im = im.human,
#    genelist = "PL_DiabetesGenes",
#    # ids = hsa_gene_entrez,
#    widget = "go_enrichment_for_gene"
#  )

## ----data_frame_enrichResult, message=FALSE, warning=FALSE--------------------
# data.frame containing the results of the enrichment analysis
head(GO_enrichResult$data)

dim(GO_enrichResult$data)

## ----populationCount, message=FALSE, warning=FALSE----------------------------
GO_enrichResult$populationCount

## ----notAnalysed, message=FALSE, warning=FALSE--------------------------------
GO_enrichResult$notAnalysed

## ----im, message=FALSE, warning=FALSE-----------------------------------------
GO_enrichResult$im

## ----parameters, message=FALSE, warning=FALSE---------------------------------
GO_enrichResult$parameters

## ----go_filter, message=FALSE, warning=FALSE----------------------------------
# get available filter values for Gene Ontology Enrichment widget
as.character(subset(human.widgets, name == "go_enrichment_for_gene")$filters)

# Perform enrichment analysis for GO molecular function terms
GO_MF_enrichResult = doEnrichment(
  im = im.human,
  ids = hsa_gene_entrez,
  widget = "go_enrichment_for_gene",
  filter = "molecular_function")

head(GO_MF_enrichResult$data)

dim(GO_MF_enrichResult$data)

## ----prot_domain_changing_correction, message=FALSE, warning=FALSE------------
# Perform enrichment analysis for Protein Domains in list of genes
PD_FDR_enrichResult = doEnrichment(
  im = im.human,
  ids = hsa_gene_entrez,
  widget = "prot_dom_enrichment_for_gene",
  correction = "Benjamini Hochberg"
) 

head(PD_FDR_enrichResult$data)

# Perform enrichment analysis for Protein Domains in list of genes
# but without a correction algoritm this time
PD_None_enrichResult = doEnrichment(
  im = im.human,
  ids = hsa_gene_entrez,
  widget = "prot_dom_enrichment_for_gene",
  correction = "None"
)

head(PD_None_enrichResult$data)

## ----convertToGeneAnswers, message=FALSE, warning=FALSE-----------------------
# load GeneAnswers package
library(GeneAnswers)

# convert InterMineR Gene Ontology Enrichment analysis results to GeneAnswers object
geneanswer_object = convertToGeneAnswers(
  
  # assign with doEnrichment result:
  enrichmentResult = GO_enrichResult,
  
  # assign with list of genes:
  geneInput = data.frame(GeneID = as.character(hsa_gene_entrez), 
                         stringsAsFactors = FALSE),
  
  # assign with the type of gene identifier
  # in our example we use Gene.primaryIdentifier values (ENTREZ IDs):
  geneInputType = "Gene.primaryIdentifier",
  
  # assign with Bioconductor annotation package:
  annLib = 'org.Hs.eg.db',
  
  # assign with annotation category type
  # in our example we use Gene Ontology (GO) terms:
  categoryType = "GO"
  
  #optional define manually if 'enrichIdentifier' is missing from getWidgets:
  #enrichCategoryChildName = "Gene.goAnnotation.ontologyTerm.parents.identifier"
)

class(geneanswer_object)
summary(geneanswer_object)
#slotNames(geneanswer_object)

## ----filters_GO_&_pathways, message=FALSE, warning=FALSE----------------------
as.character(
subset(human.widgets,
       title %in% c("Gene Ontology Enrichment",
                    "Pathway Enrichment"))$filters
)

## ----GO_terms_MF, message=FALSE, warning=FALSE--------------------------------
# convert to GeneAnswers results for GO terms associated with molecular function
geneanswer_MF_object = convertToGeneAnswers(
  enrichmentResult = GO_MF_enrichResult,
  geneInput = data.frame(GeneID = as.character(hsa_gene_entrez), 
                         stringsAsFactors = FALSE),
  geneInputType = "Gene.primaryIdentifier",
  annLib = 'org.Hs.eg.db',
  categoryType = "GO.MF"
  #enrichCategoryChildName = "Gene.goAnnotation.ontologyTerm.parents.identifier"
)

class(geneanswer_MF_object)
summary(geneanswer_MF_object)
#slotNames(geneanswer_MF_object)

## ----geneanswer_readable, message=FALSE, warning=FALSE, eval = FALSE----------
#  # Make GeneAnswers Instance readable
#  geneanswer_object_readable = geneAnswersReadable(geneanswer_object)

## ----piechart, message=FALSE, warning=FALSE, eval = FALSE---------------------
#  # GeneAnswers pieChart
#  geneAnswersChartPlots(geneanswer_object_readable,
#                        chartType='pieChart',
#                        sortBy = 'correctedPvalue',
#                        top = 3)

## ----barplot, message=FALSE, warning=FALSE, eval = FALSE----------------------
#  # GeneAnswers barplot
#  geneAnswersChartPlots(geneanswer_object_readable,
#                        chartType='barPlot',
#                        sortBy = 'correctedPvalue',
#                        top = 5)

## ----geneAnswersConceptNet, message=FALSE, warning=FALSE, eval = FALSE--------
#  # generate concept-gene network
#  geneAnswersConceptNet(geneanswer_object,
#                        colorValueColumn=NULL,
#                        centroidSize='correctedPvalue',
#                        output='interactive',
#                        geneSymbol = TRUE,
#                        catTerm = TRUE,
#                        catID = TRUE,
#                        showCats = 1:5)

## ----pseudo_foldChange, message=FALSE, warning=FALSE, eval = FALSE------------
#  # generate concept-gene network
#  # for visualization purposes add a column of RANDOM numbers in geneInput
#  set.seed(123)
#  geneanswer_object@geneInput$random_values = rnorm(nrow(geneanswer_object@geneInput))
#  
#  geneAnswersConceptNet(geneanswer_object,
#                        colorValueColumn=2,
#                        centroidSize='correctedPvalue',
#                        output='interactive',
#                        geneSymbol = TRUE,
#                        catTerm = TRUE,
#                        catID = TRUE,
#                        showCats = 1:5)

## ----geneAnswersConceptRelation, message=FALSE, warning=FALSE, eval = FALSE----
#  # visualize the relations between the first two GO terms
#  geneAnswersConceptRelation(geneanswer_object,
#                             directed=TRUE,
#                             netMode='connection',
#                             catTerm=TRUE,
#                             catID=TRUE,
#                             showCats = 1:2)

## ----buildNet, message=FALSE, warning=FALSE, eval = FALSE---------------------
#  # visualize interactions between the first five genes
#  buildNet(getGeneInput(geneanswer_object)[1:5,1],
#           idType='GeneInteraction',
#           layers=2,
#           filterGraphIDs=getGeneInput(geneanswer_object)[,1],
#           filterLayer=2,
#           netMode='connection')

## ----geneAnswersHeatmap, message=FALSE, warning=FALSE, eval = TRUE------------
# generate GO terms - Genes cross tabulation
geneAnswersHeatmap(geneanswer_object, 
                   catTerm=TRUE, 
                   geneSymbol=TRUE,
                   showCats = 5:10,
                   cex.axis = 0.75)

## ----publication_enrichment, message=FALSE, warning=FALSE---------------------
# publication enrichment analysis
publication_enrichResult = doEnrichment(
  im = im.human,
  ids = hsa_gene_entrez,
  widget = "publication_enrichment")

## ----publication_conversion, message=FALSE, warning=FALSE---------------------
# convert to GeneAnswer-class
# use Gene.symbol values instead of Gene.primaryIdentifier (ENTREZ)
geneanswer_pubs_object = convertToGeneAnswers(
  enrichmentResult = publication_enrichResult,
  geneInput = data.frame(GeneID = as.character(hsa_gene_names),
                         stringsAsFactors = FALSE),
  geneInputType = "Gene.symbol",
  annLib = 'org.Hs.eg.db',
  categoryType = NULL,
  enrichCategoryChildName = "Gene.publications.pubMedId"
)

class(geneanswer_pubs_object)
summary(geneanswer_pubs_object)
#slotNames(geneanswer_pubs_object)

## ----pub_piechart, message=FALSE, warning=FALSE, eval=FALSE-------------------
#  # GeneAnswers pieChart
#  geneAnswersChartPlots(geneanswer_pubs_object,
#                        chartType='pieChart',
#                        sortBy = 'correctedPvalue',
#                        top = 5)

## ----pub_barplot, message=FALSE, warning=FALSE, eval=FALSE--------------------
#  # GeneAnswers barplot
#  geneAnswersChartPlots(geneanswer_pubs_object,
#                        chartType='barPlot',
#                        sortBy = 'correctedPvalue',
#                        top = 5)

## ----pub_concept_gene_network, message=FALSE, warning=FALSE, eval=FALSE-------
#  # generate concept-gene network
#  geneAnswersConceptNet(geneanswer_pubs_object,
#                        colorValueColumn=NULL,
#                        centroidSize='correctedPvalue',
#                        output='interactive',
#                        catTerm = FALSE,
#                        catID = FALSE,
#                        showCats = 1:5)

## ----pub_gene_interaction, message=FALSE, warning=FALSE, eval=FALSE-----------
#  # copy the existing GeneAnswer-class object
#  geneanswer_pubs_object2 = geneanswer_pubs_object
#  
#  # replace Gene.symbol with Gene.primaryIdentifier values
#  geneanswer_pubs_object2@geneInput = data.frame(
#    GeneID = as.character(hsa_gene_entrez),
#    stringsAsFactors = FALSE)
#  
#  # generate gene interaction network
#  buildNet(getGeneInput(geneanswer_pubs_object2)[1:5,1],
#           idType='GeneInteraction',
#           layers=2,
#           filterGraphIDs=getGeneInput(geneanswer_pubs_object)[,1],
#           filterLayer=2,
#           netMode='connection')

## ----sessioInfo---------------------------------------------------------------
sessionInfo()

Try the InterMineR package in your browser

Any scripts or data that you put into this service are public.

InterMineR documentation built on Nov. 8, 2020, 5:58 p.m.