Description Usage Arguments Details Value Note Author(s) References See Also Examples
MixTwice deploys large-scale hypothesis testing in the case when testing units provide estimated effects and estimated standard errors. It produces empirical Bayesian local false discovery and sign rates for tests of zero effect.
1 |
thetaHat |
Estimated effect size (vector over testing units)) |
s2 |
Estimated standard errors of thetaHat |
Btheta |
Grid size parameter for effect distribution |
Bsigma |
Grid size parameter for variance distribution |
df |
Degrees of freedom in chisquare associated with estimated standard error |
prop |
Proportion of units randomly selected to fit the distribution **??** |
mixtwice
is a R function that takes estimated effects and standard errors. It uses nonparametric MLE to fit the underlying distribution of effect size and standard error in a model that mixes over both unknowns.
grid.theta |
Support of the estimated mixing distribution on effects |
grid.sigma |
Support of the estimated mixing distribution of variances |
mix.theta |
Estimated distribution of effect size, on grid.theta |
mix.sigma |
Estimated distribution of standard error, on grid.sigma |
lfdr |
Local false discovery rate for each testing unit |
lfsr |
Local false sign rate for each testing unit |
cite the biorxiv/arxiv paper
Zihao Zheng, Michael A.Newton
cite our manuscript (on either github or ArXiv)
See Also as help(array1)
, help(array2)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | #### load the RA data
data(array1); data(array2)
#### pre-process the data
## effect size
delta1 = apply(array1[1:12], 1, mean) - apply(array1[13:24], 1, mean)
delta2 = apply(array2[1:8], 1, mean) - apply(array2[9:16], 1, mean)
## standard error
sd11 = apply(array1[,1:12], 1, sd)
sd10 = apply(array1[,13:24], 1, sd)
sd21 = apply(array2[,1:8], 1, sd)
sd20 = apply(array2[,9:16], 1, sd)
sigma1 = sqrt(sd11^2/12 + sd10^2/12)
sigma2 = sqrt(sd21^2/8 + sd20^2/8)
## z-score
get_zscore = function(x){
x1 = x[1:length(x)/2]
x2 = x[(length(x)/2+1):length(x)]
t = t.test(x1, x2, var.equal = T)$statistic
z = qnorm(pt(t, df = length(x)-2))
return(z)
}
z1 = apply(array1, 1, get_zscore)
z1[z1==Inf] = max(z1[z1==Inf])
z2 = apply(array2, 1, get_zscore)
## p-value
p1 = 2*(1-pnorm(abs(z1)))
p2 = 2*(1-pnorm(abs(z2)))
#### Apply on MixTwice
library(alabama)
mixtwice1 = mixtwice(theta = delta1, s2 = sigma1^2,
Btheta = 15, Bsigma = 10, df = 22, prop = 0.01)$lfsr
mixtwice2 = mixtwice(theta = delta2, s2 = sigma2^2,
Btheta = 15, Bsigma = 10, df = 14, prop = 0.01)$lfsr
#### Apply on other large-scale testing
## ASH
library(ashr)
ash1 = get_lfsr(ash(delta1, sigma1, df = 22))
ash2 = get_lfsr(ash(delta2, sigma2, df = 14))
## Efron's local fdr
library(fdrtool)
lfdr1 = fdrtool(p1, statistic = "pvalue", plot = F)$lfdr
lfdr2 = fdrtool(p2, statistic = "pvalue", plot = F)$lfdr
## Storey's qvalue
library(qvalue)
qvalue1 = qvalue(p1)$lfdr
qvalue2 = qvalue(p2)$lfdr
## BH
BH1 = p.adjust(p1, method = "BH")
BH2 = p.adjust(p2, method = "BH")
## two-step ASH (Lu and Stephens 2019)
library(edgeR)
library(limma)
condition = rep(c("A","B"),each = 12 )
colnames(array1) = condition
dgecounts = calcNormFactors(DGEList(counts=array1, group=condition))
design = model.matrix(~condition)
v = voom(dgecounts,design,plot=FALSE)
lim = lmFit(v)
lim = eBayes(lim)
betahat = lim$coefficients[,2]
se = lim$stdev.unscaled[,2]*sqrt(lim$s2.post)
df = lim$df.total[1]
ebash1 = get_lfsr(ash(betahat, se, df=df))
condition = rep(c("A","B"),each = 8 ) # group indicator
colnames(array2) = condition
dgecounts = calcNormFactors(DGEList(counts=array2, group=condition))
design = model.matrix(~condition)
v = voom(dgecounts,design,plot=FALSE)
lim = lmFit(v)
lim = eBayes(lim)
betahat = lim$coefficients[,2]
se = lim$stdev.unscaled[,2]*sqrt(lim$s2.post)
df = lim$df.total[1]
ebash2 = get_lfsr(ash(betahat, se, df=df))
#### Evalute the reproducibility
d1 = data.frame(ash1, lfdr1, mixtwice1, qvalue1, BH1, ebash1)
d2 = data.frame(ash2, lfdr2, mixtwice2, qvalue2, BH2, ebash2)
## a summay reproducibility function
s = function(l1, l2, cut, plot = T){
n1 = l1<=cut
n2 = l2<=cut
nboth = n1&n2
neither = n1|n2
return(list(n1 = sum(n1), n2 = sum(n2), nboth = sum(nboth), neither = sum(neither)))
}
C = 10^(-seq(3,5, by = 0.1))
Frac.ash <- Numb.ash <- Frac.2d <- Numb.2d <- Frac.efron <- Numb.efron <-
Frac.storey <- Numb.storey <- Frac.BH <- Numb.BH <- Frac.ebash <- Numb.ebash <- NULL
for (j in 1:length(C)) {
a.ash = s(ash1, ash2, cut = C[j], plot = F)
a.2d = s(mixtwice1, mixtwice2, cut = C[j], plot = F)
a.efron = s(lfdr1, lfdr2, cut = C[j], plot = F)
a.storey = s(qvalue1, qvalue2, cut = C[j], plot = F)
a.BH = s(BH1, BH2, cut = C[j], plot = F)
a.ebash = s(ebash1, ebash2, cut = C[j], plot = F)
Frac.ash[j] = a.ash$nboth/a.ash$neither
Numb.ash[j] = a.ash$nboth
Frac.2d[j] = a.2d$nboth/a.2d$neither
Numb.2d[j] = a.2d$nboth
Frac.efron[j] = a.efron$nboth/a.efron$neither
Numb.efron[j] = a.efron$nboth
Frac.storey[j] = a.storey$nboth/a.storey$neither
Numb.storey[j] = a.storey$nboth
Frac.BH[j] = a.BH$nboth/a.BH$neither
Numb.BH[j] = a.BH$nboth
Frac.ebash[j] = a.ebash$nboth/a.ebash$neither
Numb.ebash[j] = a.ebash$nboth
}
plot(C, Numb.ash, log = "x", ylim = c(0,max(Numb.2d)),
cex = 1, col = "blue", lwd = 2,
xlab = "Significance Level", ylab = "Number of Significance (In Common)")
lines(C, Numb.ash, lwd = 2, col = "blue")
points(C, Numb.2d,
cex = 1, col = "green")
lines(C, Numb.2d, lwd = 2, col = "green")
points(C, Numb.efron,
cex = 1, col = "orange")
lines(C, Numb.efron, lwd = 2, col = "orange")
points(C, Numb.storey,
cex = 1, col = "purple")
lines(C, Numb.storey, lwd = 2, col = "purple")
points(C, Numb.BH,
cex = 1, col = "black")
lines(C, Numb.BH, lwd = 2, col = "black")
points(C, Numb.ebash,
cex = 1, col = "pink")
lines(C, Numb.ebash, lwd = 2, col = "pink")
legend("topleft",
legend = c("BH", "Storey", "LocFDR", "ASH", "MixTwice", "VL+EB+ASH"),
col = c('black', "purple", "orange", "blue", "green", "pink"),
lwd = 1)
plot(C, Frac.ash, log = "x", ylim = c(0,0.7),
cex = 1, col = "blue", lwd = 2,
xlab = "Significance Level", ylab = "Common Fraction in Both Array")
lines(C, Frac.ash, lwd = 2, col = "blue")
points(C, Frac.2d,
cex = 1, col = "green")
lines(C, Frac.2d, lwd = 2, col = "green")
points(C, Frac.efron,
cex = 1, col = "orange")
lines(C, Frac.efron, lwd = 2, col = "orange")
points(C, Frac.storey,
cex = 1, col = "purple")
lines(C, Frac.storey, lwd = 2, col = "purple")
points(C, Frac.BH,
cex = 1, col = "black")
lines(C, Frac.BH, lwd = 2, col = "black")
points(C, Frac.ebash,
cex = 1, col = "pink")
lines(C, Frac.ebash, lwd = 2, col = "pink")
legend("topleft",
legend = c("BH", "Storey", "LocFDR", "ASH", "MixTwice", "VL+EB+ASH"),
col = c('black', "purple", "orange", "blue", "green", "pink"),
lwd = 1)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.