setpath: Runs the Spiked Eigenvalue Test for Pathway data (SETPath) on...

Description Usage Arguments Value Author(s) References Examples

View source: R/setpath.R

Description

Compares a pathway's gene expression data from two classes, testing the null hypothesis that the first eigenvalue and the sum of the eigenvalues are equal between classes.

Usage

1
2
setpath(d1, d2, M = 1, transform = NULL, verbose = FALSE, minalpha = NULL, 
	normalize = TRUE, pvalue = "chisq", npermutations = 10000)

Arguments

d1

A n1*p matrix from the pathway's data in the first class

d2

A n2*p matrix from the pathway's data in the second class, with the same genes (column names) as d1

M

The null hypothesis specifies that the first M eigenvalues are equal between the two classes. SETPath was conceived as a test of just the first eigenvalue and the sum of the eigenvalues (M=1), but cases where multiple leading eigenvalues are of biological interest may indicate M>1.

transform

Default NULL. Otherwise, a matrix or vector specifying a linear transformation of the null hypothesis. The use of the transform argument modifies the standard SETPath test of equality of the first eigenvalue and the sum of eigenvalues between classes. Specifically, if the null hypothesis is that (alpha0.1, ..., alpha0.M,T0) = (alpha1.1, ..., alpha1.M,T1), the transform argument changes the null hypothesis to t(transform)%*%(alpha0.1, ..., alpha0.M,T0) = t(transform)%*%(alpha1.1, ..., alpha1.M,T1).

verbose

Indicates whether to return more than the p-value.

minalpha

Can be used to tweak the estimation of the leading eigenvalues under the null hypothesis. If NULL, eigenvalues are truncated below at 1 + 2*sqrt(gamma). Otherwise, eigenvalues are truncated below at 1 + sqrt(gamma) + minalpha.

normalize

SETPath assumes that the unspiked eigenvalues of each class's dataset are equal to 1. If the normalize argument is set to TRUE, the data will be rescaled by the average of the median of the non-zero eigenvalues of the two datasets. A further adjustment is performed when n<p.

pvalue

If pvalue=="chisq", the theoretical distribution of the test statistic will be used to compute a p-value. Alternatively, use pvalue=="permutation".

npermutations

If pvalue == "permutation", the number of permutations.

Value

pval

The p-value from SETPath

If verbose==TRUE, the additional values are output:

a0

Estimates of the leading M eigenvalues, assuming the null hypothesis is true.

correction

The correction factors applied to the between-class differences between the M leading eigenvalues. (Correction factors are needed to account for the sample size-dependent bias in empirical eigenvalues.)

covQ

An (M+1)*(M+1) matrix giving the estimated covariance of the between-class differences in the leading M eigenvalues and the sum of the eigenvalues.

m

M, the number of leading eigenvalues included in the null hypothesis.

Author(s)

Patrick Danaher

References

Patrick Danaher, Debashis Paul, and Pei Wang. "Covariance-based analyses of biological pathways." Biometrika (2015)

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#load data:
data(setpath.data)
# identify desired gene list:
genes.in.pathway = pathwaygenes[[1]]
# run test using theoretical quantiles to derive a p-value:
setpath(d1[,genes.in.pathway],d2[,genes.in.pathway],M=1,transform=NULL,verbose=TRUE,minalpha=NULL,
	normalize=TRUE,pvalue="chisq")
# now using a permutation test:
setpath(d1[,genes.in.pathway],d2[,genes.in.pathway],M=1,transform=NULL,verbose=TRUE,minalpha=NULL,
	normalize=TRUE,pvalue="permutation",npermutations=1000)
# now using the "transform" argument to test the null hypothesis that variability unrelated to the
#  first principal component (i.e. the sum of the second through final eigenvalues) is the same 
#  between classes:
setpath(d1[,genes.in.pathway],d2[,genes.in.pathway],M=1,transform=c(-1,1),verbose=TRUE,
	minalpha=NULL,normalize=TRUE,pvalue="chisq",npermutations=1000)
# now using the "transform" argument to test the compound null hypothesis that the second and third
#  eigenvalues are the same between classes:
linear.transformation = matrix(c(0,1,0,0,0,0,1,0),4)
print(linear.transformation)
setpath(d1[,genes.in.pathway],d2[,genes.in.pathway],M=3,transform=linear.transformation,
	verbose=TRUE,minalpha=NULL,normalize=TRUE,pvalue="chisq",npermutations=1000)



## The function is currently defined as
function (d1, d2, M = 1, transform = NULL, verbose = FALSE, minalpha = NULL, 
    normalize = TRUE, pvalue = "chisq", npermutations = 10000) 
{
    p = dim(d1)[2]
    n1 = dim(d1)[1]
    n2 = dim(d2)[1]
    if (normalize) {
      e1 = eigen(cov(d1),symmetric=TRUE,only.values=TRUE)$values
      e2 = eigen(cov(d2),symmetric=TRUE,only.values=TRUE)$values
      if(n1>p){medianeigen1 = median(e1)}
      if(n2>p){medianeigen2 = median(e2)}
      if(n1<=p){medianeigen1 = median(e1[e1>1e-12])*n1/p}
      if(n2<=p){medianeigen2 = median(e2[e2>1e-12])*n2/p}
      scaling.factor = mean(medianeigen1,medianeigen2)
      d1 = d1/sqrt(scaling.factor)
      d2 = d2/sqrt(scaling.factor)
    }
    p = dim(d1)[2]
    n1 = dim(d1)[1]
    n2 = dim(d2)[1]
    n = c(n1, n2)
    w = n/sum(n)
    d = list(d1, d2)
    g1 = p/n1
    g2 = p/n2
    g = c(g1, g2)
    covs = list()
    covs[[1]] = cov(d1)
    covs[[2]] = cov(d2)
    sighat = list(cov(d1), cov(d2))
    e1 = eigen(sighat[[1]], symmetric = TRUE, only.values = TRUE)$values
    e2 = eigen(sighat[[2]], symmetric = TRUE, only.values = TRUE)$values
    L = cbind(e1[1:M], e2[1:M])
    T1 = sum(e1)
    T2 = sum(e2)
    T = c(T1, T2)
    alphabar = matrix(NA, M, 2)
    a0 = QLcorrection = c()
    for (m in 1:M) {
        eigencorrect = unbias.eigens(L[m, ], g, w, minalpha)
        alphabar[m, ] = eigencorrect$a
        a0[m] = eigencorrect$a0
        QLcorrection[m] = eigencorrect$QLcorrection
    }
    thresh = (1 + sqrt(g))^2 + sqrt(2 * log(n)/n)
    mhat = c()
    mhat[1] = max(sum(e1 > thresh[1]), M)
    mhat[2] = max(sum(e2 > thresh[2]), M)
    spikes = list()
    for (k in 1:2) {
        spikes[[k]] = rep(NA, mhat[k])
    }
    for (k in 1:k) {
        for (m in 1:mhat[k]) {
            tempeigencorrect = unbias.eigens(c(e1[m], e2[m]), 
                g, w, minalpha)
            spikes[[k]][m] = tempeigencorrect$a[k]
        }
    }
    covQ = matrix(0, M + 1, M + 1)
    varT = c()
    for (k in 1:2) {
        varT[k] = 2 * (sum(a0^2)/n[k] + (p - M)/n[k])
        if (mhat[k] > M) {
            varT[k] = varT[k] + 2/n[k] * (sum((spikes[[k]][(M + 
                1):mhat[k]])^2) - (mhat[k] - M))
        }
    }
    covQ[M + 1, M + 1] = sum(varT)
    for (m in 1:M) {
        rho = theta = varLs = c()
        c0 = (1/g[1] + 1/g[2])^2 * (a0[m] - 1)^2
        for (k in 1:2) {
            rho[k] = a0[m] * (1 + g[k]/(a0[m] - 1))
            deriv.f.k = 0.5 * (1 + (rho[k] - 1 - g[k])/sqrt((rho[k] - 
                1 - g[k])^2 - 4 * g[k]))
            theta[k] = 1 + (g[1] - g[2])/c0 * deriv.f.k/g[k]
            varLs[k] = 2 * a0[m]/n[k] * theta[k]^2 * rho[k]/(1 + 
                a0[m] * g[k]/((a0[m] - 1)^2 - g[k]))
        }
        covQ[m, m] = sum(varLs)
    }
    for (m in 1:M) {
        rho = theta = covLTs = c()
        c0 = (1/g[1] + 1/g[2])^2 * (a0[m] - 1)^2
        for (k in 1:2) {
            rho[k] = a0[m] * (1 + g[k]/(a0[m] - 1))
            deriv.f.k = 0.5 * (1 + (rho[k] - 1 - g[k])/sqrt((rho[k] - 
                1 - g[k])^2 - 4 * g[k]))
            theta[k] = 1 + (g[1] - g[2])/c0 * deriv.f.k/g[k]
            covLTs[k] = 2 * a0[m]/n[k] * theta[k] * rho[k]/(1 + 
                a0[m] * g[k]/((a0[m] - 1)^2 - g[k]))
        }
        covLT = sum(covLTs)
        covQ[m, M + 1] = covQ[M + 1, m] = covLT
    }
    Q = c(L[, 1] - L[, 2] - QLcorrection, T1 - T2)
    A = transform
    if (length(transform) == 0) {
        A = diag(M + 1)
    }
    A=as.matrix(A)
    if(dim(A)[1]!=M+1){stop("Dimension of linear transformation does not match
					   the dimension of the null hypothesis.")}
    stat = t(Q) %*% A %*% solve(t(A) %*% covQ %*% A) %*% t(A) %*% 
        Q
    out = list()
    out$stat = stat
    if (pvalue == "chisq") {
        out$pval = 1 - pchisq(stat, dim(A)[2])
    }
    if (pvalue == "permutation") {
        d.combined = rbind(d1, d2)
        permstats = c()
        for (i in 1:npermutations) {
            prows1 = sample(1:dim(d.combined)[1], dim(d1)[1], 
                replace = FALSE)
            prows2 = setdiff(1:dim(d.combined)[1], prows1)
            permstats[i] = setpath(d1 = d.combined[prows1, ], d2 = d.combined[prows2, 
                ], M = M, transform = transform, verbose = FALSE, 
                minalpha = minalpha, normalize = FALSE, pvalue = "chisq")$stat
        }
        out$pval = mean(as.vector(stat) < permstats)
    }
    if (verbose) {
        out$stats = rbind(L, c(T1, T2))
        out$a0 = a0
        out$correction = QLcorrection
        out$covQ = covQ
        out$m = m
    }
    return(out)
  }

SETPath documentation built on May 2, 2019, 8:32 a.m.