options(width = 750) knitr::opts_chunk$set( comment = "#>", error = FALSE, tidy = FALSE)
Evolutionary Transcriptomics aims to predict stages or periods of evolutionary conservation in biological processes on the transcriptome level. However, finding genes sharing a common evolutionary history could reveal how the the biological process might have evolved in the first place.
In this Use Case
we will combine functional and biological annotation obtained with biomartr
with enriched genes obtained with PlotEnrichment().
For the following example we will use the dataset an enrichment analyses found in PlotEnrichment().
Install and load the myTAI package:
# install myTAI install.packages("myTAI") # load myTAI library(myTAI)
Download the Phylostratigraphic Map
of D. rerio:
# download the Phylostratigraphic Map of Danio rerio # from Sestak and Domazet-Loso, 2015
The dataset comes from Supplementary file 3
of this publication: https://academic.oup.com/mbe/article/32/2/299/1058654#77837069
After downloading Supplementary file 3
, you will find the file TableS3-2.xlsx
which can be used in the following biomartr
functions.
Read the *.xlsx
file storing the Phylostratigraphic Map
of D. rerio and format it for the use with myTAI
:
# install the readxl package install.packages("readxl") # load package readxl library(readxl) # read the excel file DrerioPhyloMap.MBEa <- read_excel("TableS3-2.xlsx", sheet = 1, skip = 4) # format Phylostratigraphic Map for use with myTAI Drerio.PhyloMap <- DrerioPhyloMap.MBEa[ , 1:2] # have a look at the final format head(Drerio.PhyloMap)
Phylostrata ZFIN_ID 1 1 ZDB-GENE-000208-13 2 1 ZDB-GENE-000208-17 3 1 ZDB-GENE-000208-18 4 1 ZDB-GENE-000208-23 5 1 ZDB-GENE-000209-3 6 1 ZDB-GENE-000209-4
Now, Drerio.PhyloMap
stores the Phylostratigraphic Map
of D. rerio which is used
as background set to perform enrichment analyses with PlotEnrichment()
from myTAI
.
Now, the PlotEnrichment()
function visualizes the over- and underrepresented Phylostrata
of brain specific genes when compared with the total number of genes stored in the Phylostratigraphic Map
of D. rerio.
library(readxl) # read expression data (organ specific genes) from Sestak and Domazet-Loso, 2015 Drerio.OrganSpecificExpression <- read_excel("TableS3-2.xlsx", sheet = 2, skip = 3) # select only brain specific genes Drerio.Brain.Genes <- unlist(unique(na.omit(Drerio.OrganSpecificExpression[ , "brain"]))) # visualize enriched Phylostrata of genes annotated as brain specific PlotEnrichment(Drerio.PhyloMap, test.set = Drerio.Brain.Genes, measure = "foldchange", use.only.map = TRUE, legendName = "PS")
Users will observe that for example brain genes deriving from PS5 are significantly enriched.
Now we can select all brain genes originating in PS5 using the SelectGeneSet()
function from myTAI
. Please notice that SelectGeneSet()
can only be used with phylostratigraphic maps only (use.map.only = TRUE
argument) since myTAI version > 0.3.0.
BrainGenes <- SelectGeneSet(ExpressionSet = Drerio.PhyloMap, gene.set = Drerio.Brain.Genes, use.only.map = TRUE) # select only brain genes originating in PS5 BrainGenes.PS5 <- BrainGenes[which(BrainGenes[ , "Phylostrata"] == 5), ] # look at the results head(BrainGenes.PS5)
Phylostrata ZFIN_ID 14851 5 ZDB-GENE-000210-6 14852 5 ZDB-GENE-000210-7 14853 5 ZDB-GENE-000328-4 14856 5 ZDB-GENE-000411-1 14857 5 ZDB-GENE-000427-4 14860 5 ZDB-GENE-000526-1
Now users can perform the biomart()
function to obtain the functional annotation of brain genes originating in PS5.
For this purpose, first we need to find the filter name of the corresponding gene ids such as ZDB-GENE-000210-6
.
# find filter for zfin.org ids organismFilters("Danio rerio", topic = "zfin_id")
name description dataset 52 with_zfin_id with ZFIN ID(s) drerio_gene_ensembl 53 with_zfin_id_transcript_name with ZFIN transcript name(s) drerio_gene_ensembl 103 zfin_id ZFIN ID(s) [e.g. ZDB-GENE-060825-136] drerio_gene_ensembl 274 with_zfin_id with ZFIN ID(s) drerio_gene_vega 286 zfin_id ZFIN ID(s) [e.g. ZDB-GENE-121214-212] drerio_gene_vega 366 with_zfin_id with ZFIN ID(s) drerio_gene_ensembl 367 with_zfin_id_transcript_name with ZFIN transcript name(s) drerio_gene_ensembl 417 zfin_id ZFIN ID(s) [e.g. ZDB-GENE-060825-136] drerio_gene_ensembl 588 with_zfin_id with ZFIN ID(s) drerio_gene_vega 600 zfin_id ZFIN ID(s) [e.g. ZDB-GENE-121214-212] drerio_gene_vega 680 with_zfin_id with ZFIN ID(s) drerio_gene_ensembl 681 with_zfin_id_transcript_name with ZFIN transcript name(s) drerio_gene_ensembl 731 zfin_id ZFIN ID(s) [e.g. ZDB-GENE-060825-136] drerio_gene_ensembl 902 with_zfin_id with ZFIN ID(s) drerio_gene_vega 914 zfin_id ZFIN ID(s) [e.g. ZDB-GENE-121214-212] drerio_gene_vega mart 52 ENSEMBL_MART_ENSEMBL 53 ENSEMBL_MART_ENSEMBL 103 ENSEMBL_MART_ENSEMBL 274 ENSEMBL_MART_ENSEMBL 286 ENSEMBL_MART_ENSEMBL 366 ENSEMBL_MART_ENSEMBL 367 ENSEMBL_MART_ENSEMBL 417 ENSEMBL_MART_ENSEMBL 588 ENSEMBL_MART_ENSEMBL 600 ENSEMBL_MART_ENSEMBL 680 ENSEMBL_MART_ENSEMBL 681 ENSEMBL_MART_ENSEMBL 731 ENSEMBL_MART_ENSEMBL 902 ENSEMBL_MART_ENSEMBL 914 ENSEMBL_MART_ENSEMBL
Now users can retrieve the corresponding GO attribute of D. rerio with organismAttributes
.
# find go attribute term for D. rerio organismAttributes("Danio rerio", topic = "go")
name description 33 go_id GO Term Accession 36 go_linkage_type GO Term Evidence Code 38 goslim_goa_accession GOSlim GOA Accession(s) 39 goslim_goa_description GOSlim GOA Description 516 ggorilla_homolog_ensembl_gene Gorilla Ensembl Gene ID 517 ggorilla_homolog_canomical_transcript_protein Canonical Protein or Transcript ID 518 ggorilla_homolog_ensembl_peptide Gorilla Ensembl Protein ID 519 ggorilla_homolog_chromosome Gorilla Chromosome Name 520 ggorilla_homolog_chrom_start Gorilla Chromosome Start (bp) 521 ggorilla_homolog_chrom_end Gorilla Chromosome End (bp) 522 ggorilla_homolog_orthology_type Homology Type 523 ggorilla_homolog_subtype Ancestor 524 ggorilla_homolog_orthology_confidence Orthology confidence [0 low, 1 high] 525 ggorilla_homolog_perc_id % Identity with respect to query gene 526 ggorilla_homolog_perc_id_r1 % Identity with respect to Gorilla gene 527 ggorilla_homolog_dn dN 528 ggorilla_homolog_ds dS 1240 go_id GO ID 1241 quick_go Quick GO ID 1370 go_id GO Term Accession 1373 go_linkage_type GO Term Evidence Code 1375 goslim_goa_accession GOSlim GOA Accession(s) 1376 goslim_goa_description GOSlim GOA Description 1853 ggorilla_homolog_ensembl_gene Gorilla Ensembl Gene ID 1854 ggorilla_homolog_canomical_transcript_protein Canonical Protein or Transcript ID 1855 ggorilla_homolog_ensembl_peptide Gorilla Ensembl Protein ID 1856 ggorilla_homolog_chromosome Gorilla Chromosome Name 1857 ggorilla_homolog_chrom_start Gorilla Chromosome Start (bp) 1858 ggorilla_homolog_chrom_end Gorilla Chromosome End (bp) 1859 ggorilla_homolog_orthology_type Homology Type 1860 ggorilla_homolog_subtype Ancestor 1861 ggorilla_homolog_orthology_confidence Orthology confidence [0 low, 1 high] 1862 ggorilla_homolog_perc_id % Identity with respect to query gene 1863 ggorilla_homolog_perc_id_r1 % Identity with respect to Gorilla gene 1864 ggorilla_homolog_dn dN 1865 ggorilla_homolog_ds dS 2577 go_id GO ID 2578 quick_go Quick GO ID 2707 go_id GO Term Accession 2710 go_linkage_type GO Term Evidence Code 2712 goslim_goa_accession GOSlim GOA Accession(s) 2713 goslim_goa_description GOSlim GOA Description 3190 ggorilla_homolog_ensembl_gene Gorilla Ensembl Gene ID 3191 ggorilla_homolog_canomical_transcript_protein Canonical Protein or Transcript ID 3192 ggorilla_homolog_ensembl_peptide Gorilla Ensembl Protein ID 3193 ggorilla_homolog_chromosome Gorilla Chromosome Name 3194 ggorilla_homolog_chrom_start Gorilla Chromosome Start (bp) 3195 ggorilla_homolog_chrom_end Gorilla Chromosome End (bp) 3196 ggorilla_homolog_orthology_type Homology Type 3197 ggorilla_homolog_subtype Ancestor 3198 ggorilla_homolog_orthology_confidence Orthology confidence [0 low, 1 high] 3199 ggorilla_homolog_perc_id % Identity with respect to query gene 3200 ggorilla_homolog_perc_id_r1 % Identity with respect to Gorilla gene 3201 ggorilla_homolog_dn dN 3202 ggorilla_homolog_ds dS 3914 go_id GO ID 3915 quick_go Quick GO ID dataset mart 33 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 36 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 38 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 39 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 516 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 517 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 518 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 519 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 520 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 521 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 522 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 523 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 524 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 525 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 526 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 527 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 528 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1240 drerio_gene_vega ENSEMBL_MART_ENSEMBL 1241 drerio_gene_vega ENSEMBL_MART_ENSEMBL 1370 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1373 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1375 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1376 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1853 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1854 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1855 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1856 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1857 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1858 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1859 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1860 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1861 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1862 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1863 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1864 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 1865 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 2577 drerio_gene_vega ENSEMBL_MART_ENSEMBL 2578 drerio_gene_vega ENSEMBL_MART_ENSEMBL 2707 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 2710 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 2712 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 2713 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3190 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3191 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3192 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3193 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3194 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3195 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3196 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3197 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3198 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3199 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3200 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3201 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3202 drerio_gene_ensembl ENSEMBL_MART_ENSEMBL 3914 drerio_gene_vega ENSEMBL_MART_ENSEMBL 3915 drerio_gene_vega ENSEMBL_MART_ENSEMBL
Now users can specify the filter zfin_id
and attribute go_id
to retrieve the GO terms of corresponding gene ids (Please note that this will take some time).
# retrieve GO terms of D. rerio brain genes originating in PS5 GO_tbl.BrainGenes <- biomart(genes = unlist(BrainGenes.PS5[ , "ZFIN_ID"]), mart = "ENSEMBL_MART_ENSEMBL", dataset = "drerio_gene_ensembl", attributes = "go_id", filters = "zfin_id") head(GO_tbl.BrainGenes)
zfin_id go_id 1 ZDB-GENE-000210-6 GO:0060037 2 ZDB-GENE-000210-6 GO:0046983 3 ZDB-GENE-000210-7 GO:0046983 4 ZDB-GENE-000328-4 GO:0007275 5 ZDB-GENE-000328-4 GO:0007166 6 ZDB-GENE-000328-4 GO:0035567
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.