pathwayRankBasedEnrichment: Gene set enrichment analysis

Description Usage Arguments Value Examples

View source: R/pathwayAnalysis.R

Description

This function performes gene set enrichment analysis using Wilcoxon Rank Sum test.

Usage

1
pathwayRankBasedEnrichment(geneStats, annotation, alter = "greater")

Arguments

geneStats

an array of statistics (e.g. log2 FC) of all quantified genes or phosphosite with names of the array as gene or phosphosite IDs.

annotation

a list of pathways with each element containing an array of gene IDs.

alter

test for enrichment ('greater', default), depletion ('less'), or 'two.sided'.

Value

A matrix of pathways and their associated substrates and p-values.

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
library(limma)

data('phospho_L6_ratio')
data('SPSs')

grps = gsub('_.+', '', colnames(phospho.L6.ratio))

# Cleaning phosphosite label
phospho.site.names = rownames(phospho.L6.ratio)
L6.sites = gsub(' ', '', sapply(strsplit(rownames(phospho.L6.ratio), '~'),
                                function(x){paste(toupper(x[2]), x[3], '',
                                                sep=';')}))
phospho.L6.ratio = t(sapply(split(data.frame(phospho.L6.ratio), L6.sites),
                            colMeans))
phospho.site.names = split(phospho.site.names, L6.sites)

# Construct a design matrix by condition
design = model.matrix(~ grps - 1)

# phosphoproteomics data normalisation using RUV
ctl = which(rownames(phospho.L6.ratio) %in% SPSs)
phospho.L6.ratio.RUV = RUVphospho(phospho.L6.ratio, M = design, k = 3,
                                ctl = ctl)


# divides the phospho.L6.ratio data into groups by phosphosites
L6.sites <- gsub(' ', '', gsub('~[STY]', '~',
    sapply(strsplit(rownames(phospho.L6.ratio.RUV), '~'),
        function(x){paste(toupper(x[2]), x[3], sep='~')})))
phospho.L6.ratio.sites <- t(sapply(split(data.frame(phospho.L6.ratio.RUV),
                                        L6.sites), colMeans))

# fit linear model for each phosphosite
f <- gsub('_exp\\d', '', colnames(phospho.L6.ratio.RUV))
X <- model.matrix(~ f - 1)
fit <- lmFit(phospho.L6.ratio.RUV, X)

# extract top-ranked phosphosites for each condition compared to basal
table.AICAR <- topTable(eBayes(fit), number=Inf, coef = 1)
table.Ins <- topTable(eBayes(fit), number=Inf, coef = 3)
table.AICARIns <- topTable(eBayes(fit), number=Inf, coef = 2)

DE1.RUV <- c(sum(table.AICAR[,'adj.P.Val'] < 0.05),
    sum(table.Ins[,'adj.P.Val'] < 0.05),
    sum(table.AICARIns[,'adj.P.Val'] < 0.05))

# extract top-ranked phosphosites for each group comparison
contrast.matrix1 <- makeContrasts(fAICARIns-fIns, levels=X)
contrast.matrix2 <- makeContrasts(fAICARIns-fAICAR, levels=X)
fit1 <- contrasts.fit(fit, contrast.matrix1)
fit2 <- contrasts.fit(fit, contrast.matrix2)
table.AICARInsVSIns <- topTable(eBayes(fit1), number=Inf)
table.AICARInsVSAICAR <- topTable(eBayes(fit2), number=Inf)

DE2.RUV <- c(sum(table.AICARInsVSIns[,'adj.P.Val'] < 0.05),
    sum(table.AICARInsVSAICAR[,'adj.P.Val'] < 0.05))

o <- rownames(table.AICARInsVSIns)
Tc <- cbind(table.Ins[o,'logFC'], table.AICAR[o,'logFC'],
            table.AICARIns[o,'logFC'])
rownames(Tc) = gsub('(.*)(;[A-Z])([0-9]+)(;)', '\\1;\\3;', o)
colnames(Tc) <- c('Ins', 'AICAR', 'AICAR+Ins')

# summary phosphosite-level information to proteins for performing downstream
#  gene-centric analyses.
Tc.gene <- phosCollapse(Tc, id=gsub(';.+', '', rownames(Tc)),
    stat=apply(abs(Tc), 1, max), by = 'max')

# 1D gene-centric pathway analysis
path2 <- pathwayRankBasedEnrichment(Tc.gene[,1],
                                    annotation=Pathways.reactome,
                                    alter = 'greater')

PengyiYang/PhosR documentation built on June 21, 2020, 8:37 a.m.