ABAEnrichment.R
In ABAEnrichment: Gene expression enrichment in human brain regions

## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----global_options, include=FALSE--------------------------------------------------------------------------
knitr::opts_chunk$set(fig.width=10, fig.height=7, warning=FALSE, message=FALSE)
options(width=110)
set.seed(123)

## -----------------------------------------------------------------------------------------------------------
## load ABAEnrichment package
library(ABAEnrichment)
## create input data.frame with candidate genes
gene_ids = c('NCAPG', 'APOL4', 'NGFR', 'NXPH4', 'C21orf59', 'CACNG2', 'AGTR1',
    'ANO1', 'BTBD3', 'MTUS1', 'CALB1', 'GYG1', 'PAX2')
input_hyper = data.frame(gene_ids, is_candidate=1)
head(input_hyper)

## ---- eval=FALSE--------------------------------------------------------------------------------------------
#  ## run enrichment analyses with default parameters
#  ## for the adult and developing human brain
#  res_adult = aba_enrich(input_hyper, dataset='adult')
#  res_devel = aba_enrich(input_hyper, dataset='5_stages')

## ----results='hide'-----------------------------------------------------------------------------------------
## run enrichment analysis with less cutoffs and randomsets
## to save computation time
res_devel = aba_enrich(input_hyper, dataset='5_stages',
    cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
## extract first element from the output list, which contains the statistics
fwers_devel = res_devel[[1]]
## see results for the brain regions with highest enrichment
## for children (3-11 yrs, age_category 3)
fwers_3 = fwers_devel[fwers_devel[,1]==3, ]
head(fwers_3)

## -----------------------------------------------------------------------------------------------------------
res_devel[2:3]

## ----eval=FALSE---------------------------------------------------------------------------------------------
#  ## run enrichment analysis, with randomsets dependent on gene length
#  res_len = aba_enrich(input_hyper, gene_len=TRUE)
#  ## run the same analysis using gene-coordinates
#  ## from GRCh38 instead of the default GRCh37
#  res_len_grch38 = aba_enrich(input_hyper, gene_len=TRUE, ref_genome='grch38')

## -----------------------------------------------------------------------------------------------------------
## create input vector with a candidate region on chromosome 8
## and background regions on chromosome 7, 8 and 9
regions = c('8:82000000-83000000', '7:1300000-56800000',
    '7:74900000-148700000', '8:7400000-44300000', '8:47600000-146300000',
    '9:0-39200000', '9:69700000-140200000')
is_candidate = c(1, rep(0,6))
input_region = data.frame(regions, is_candidate)

## ----results='hide'-----------------------------------------------------------------------------------------
## run enrichment analysis for the adult human brain
res_region = aba_enrich(input_region, dataset='adult',
    cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
## look at the results from the enrichment analysis
fwers_region = res_region[[1]]
head(fwers_region)
## see which genes are located in the candidate region
input_genes = res_region[[2]]
candidate_genes = input_genes[input_genes[,2]==1,]
candidate_genes

## ----echo=FALSE---------------------------------------------------------------------------------------------
gene = c('NCAPG','APOL4','NGFR','NXPH4','C21orf59','CACNG2')
chr = c('chr4', 'chr22', 'chr17', 'chr12', 'chr21', 'chr22')
start = c(17812436, 36585176, 47572655, 57610578, 33954510, 36956916)
end = c(17846487, 36600879, 47592382, 57620232, 33984918, 37098690)
custom_coords = data.frame(gene, chr, start, end, stringsAsFactors=FALSE)

## -----------------------------------------------------------------------------------------------------------
## example for a dataframe with custom gene-coordinates
head(custom_coords)

## ----eval=FALSE---------------------------------------------------------------------------------------------
#  ## use correction for gene-length based on custom gene-coordinates
#  res_len_cc = aba_enrich(input_hyper, gene_len=TRUE, gene_coords=custom_coords)

## -----------------------------------------------------------------------------------------------------------
## assign random scores to the genes used above
scores = sample(1:50, length(gene_ids))
input_wicox = data.frame(gene_ids, scores)
head(input_wicox)

## ---- results='hide'----------------------------------------------------------------------------------------
## test for enrichment of expressed genes with high scores in the adult brain
## using the Wilcoxon rank-sum test
res_wilcox = aba_enrich(input_wicox, test='wilcoxon',
    cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
head(res_wilcox[[1]])

## -----------------------------------------------------------------------------------------------------------
## create a toy example dataset with two counts per gene
high_A_genes = c('RFFL', 'NTS', 'LIPE', 'GALNT6', 'GSN', 'BTBD16', 'CERS2')
low_A_genes = c('GDA', 'ENC1', 'EGR4', 'VIPR1', 'DOC2A', 'OASL', 'FRY', 'NAV3')
A_counts = c(sample(20:30, length(high_A_genes)),
    sample(5:15, length(low_A_genes)))
B_counts = c(sample(5:15, length(high_A_genes)),
    sample(20:30, length(low_A_genes)))
input_binom = data.frame(gene_ids=c(high_A_genes, low_A_genes),
    A_counts, B_counts)
head(input_binom)

## -----------------------------------------------------------------------------------------------------------
## run binomial test
res_binom = aba_enrich(input_binom, cutoff_quantiles=c(0.2,0.9),
    test='binomial', n_randsets=100, silent=TRUE)
head(res_binom[[1]])

## -----------------------------------------------------------------------------------------------------------
## create a toy example with four counts per gene
high_substi_genes = c('RFFL', 'NTS', 'LIPE', 'GALNT6', 'GSN', 'BTBD16', 'CERS2')
low_substi_genes = c('ENC1', 'EGR4', 'NPTX1', 'DOC2A', 'OASL', 'FRY', 'NAV3')
subs_non_syn = c(sample(5:15, length(high_substi_genes), replace=TRUE),
    sample(0:5, length(low_substi_genes), replace=TRUE))
subs_syn = sample(5:15, length(c(high_substi_genes, low_substi_genes)),
    replace=TRUE)
vari_non_syn = c(sample(0:5, length(high_substi_genes), replace=TRUE),
    sample(0:10, length(low_substi_genes), replace=TRUE))
vari_syn = sample(5:15, length(c(high_substi_genes, low_substi_genes)),
    replace=TRUE)
input_conti = data.frame(gene_ids=c(high_substi_genes, low_substi_genes),
    subs_non_syn, subs_syn, vari_non_syn, vari_syn)
head(input_conti)

## the corresponding contingency table for the first gene would be:
matrix(input_conti[1, 2:5], ncol=2, dimnames=list(c('non-synonymous',
    'synonymous'), c('substitution','variable')))

## ---- results='hide'----------------------------------------------------------------------------------------
res_conti = aba_enrich(input_conti, test='contingency',
    cutoff_quantiles=c(0.7,0.8,0.9), n_randset=100)


## -----------------------------------------------------------------------------------------------------------
head(res_conti[[1]])

## -----------------------------------------------------------------------------------------------------------
## get expression data for the top 5 regions
## and all input genes
## of the last aba_enrich analysis (res_conti)
top_regions = res_conti[[1]][1:5, 'structure_id']
gene_ids = res_conti[[2]][,1]
expr = get_expression(structure_ids=top_regions, gene_ids=gene_ids,
    dataset='adult')
expr[,1:6]

## -----------------------------------------------------------------------------------------------------------
get_expression(structure_ids=c('Allen:10657','Allen:10208'),
    gene_ids=c('RFFL', 'NTS', 'LIPE'), dataset='5_stages')

## -----------------------------------------------------------------------------------------------------------
## plot the expression data from above
plot_expression(expr, main="microarray expression data for adult brain")

## -----------------------------------------------------------------------------------------------------------
## plot expression data for the top 5 regions
## of age-category 2 and all input genes
## of the developmental stage aba_enrich analysis above (res_devel)

## extract brain regions
devel_stats = res_devel[[1]]
devel_stats_2 = devel_stats[devel_stats$age_category==2,]
top_regions_2 = devel_stats_2[1:5,'structure_id']
## extract genes
genes = res_devel[[2]][,1]
## get expression for all 5 age categories
expr_all = get_expression(top_regions_2, genes, "5_stages")
## subset to age-cateogry 2
expr_2 = expr_all[["age_category_2"]]
## plot heatmap
plot_expression(expr_2, main="RPKM from RNA-seq for age_category_2")

## -----------------------------------------------------------------------------------------------------------
## plot expression data for the top 5 regions
## and all input genes
## from the 2x2-contingency table analysis (res_conti)

## extract brain regions
top_regions = res_conti[[1]][1:5, 'structure_id']
## extract genes
gene_ids = res_conti[[2]][,1]
## get expression 
expr = get_expression(structure_ids=top_regions, gene_ids=gene_ids, dataset='adult')
## plot expression with sidebar
plot_expression(expr, gene_vars=res_conti[[2]], main="heatmap with sidebar")

## -----------------------------------------------------------------------------------------------------------
## use heatmap.2 to allow adjusting other parameters like color
gplots::heatmap.2(expr, scale="none", col=gplots::greenred(100),
    main="custom heatmap", trace="none",
    keysize=1.4, key.xlab="expression",
    dendrogram="row", margins=c(6, 8))

## -----------------------------------------------------------------------------------------------------------
## get IDs of the substructures of the frontal neocortex (Allen:10161)
## for which expression data are available
subs = get_sampled_substructures('Allen:10161')
subs
## get the full name of those substructures
get_name(subs)
## get the superstructures of the frontal neocortex (from general to special)
supers = get_superstructures('Allen:10161')
supers
## get the full names of those superstructures
get_name(supers)

## -----------------------------------------------------------------------------------------------------------
## get structure IDs of brain regions that contain 'accumbens' in their name
get_id('accumbens')
## get structure IDs of brain regions that contain 'telencephalon' in their name
get_id('telencephalon')

## -----------------------------------------------------------------------------------------------------------
## get all brain regions included in ABAEnrichment together with thier IDs
all_regions = get_id('')
head(all_regions)

## ---- results='hide'----------------------------------------------------------------------------------------
## run an enrichment analysis with 7 candidate and 7 background genes
## for the developing brain
gene_ids = c('FOXJ1', 'NTS', 'LTBP1', 'STON2', 'KCNJ6', 'AGT', 
    'ANO3', 'TTR', 'ELAVL4', 'BEAN1', 'TOX', 'EPN3', 'PAX2', 'KLHL1')
is_candidate = rep(c(1,0), each=7)
input_hyper = data.frame(gene_ids, is_candidate)
res = aba_enrich(input_hyper, dataset='5_stages',
    cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
head(res[[1]])
## see which candidate genes are annotated to brain regions with a FWER < 0.01
anno = get_annotated_genes(res, fwer_threshold=0.1)
anno

## -----------------------------------------------------------------------------------------------------------
## find out which of the above genes have expression
## above the 70% and 90% expression-cutoff
## in the basal ganglia of the adult human brain (Allen:4276)
anno2 = get_annotated_genes(structure_ids='Allen:4276', dataset='adult', 
    cutoff_quantiles=c(0.7,0.9), genes=gene_ids)
anno2

## -----------------------------------------------------------------------------------------------------------
## use previously defined input genes dataframe
head(input_hyper)

## ----results='hide'-----------------------------------------------------------------------------------------
## run enrichment analysis with developmental effect score
res_dev_effect = aba_enrich(input_hyper, dataset='dev_effect',
    cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
## see the 5 brain regions with the lowest FWERs
top_regions = res_dev_effect[[1]][1:5, ]
top_regions

## ---- eval=F------------------------------------------------------------------------------------------------
#  ## plot developmental effect score of the 5 brain regions with the lowest FWERs
#  plot_expression(top_regions[ ,'structure_id'])

## -----------------------------------------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
ABAEnrichment documentation built on Nov. 8, 2020, 5:43 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ABAEnrichment
Gene expression enrichment in human brain regions

inst/doc/ABAEnrichment.R
In ABAEnrichment: Gene expression enrichment in human brain regions

Try the ABAEnrichment package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ABAEnrichment Gene expression enrichment in human brain regions

inst/doc/ABAEnrichment.R In ABAEnrichment: Gene expression enrichment in human brain regions

Try the ABAEnrichment package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ABAEnrichment
Gene expression enrichment in human brain regions

inst/doc/ABAEnrichment.R
In ABAEnrichment: Gene expression enrichment in human brain regions