options(fig_caption=TRUE) library(knitr) opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center")
In this document, we show that the Wilcoxon-Mann-Whitney test is comparable or superior to alternative methods.
library(testthat) library(BioQC) library(hgu133plus2.db) ## to simulate an microarray expression dataset library(lattice) library(latticeExtra) library(gridExtra) library(gplots) library(reshape2) library(plyr) library(ggplot2) library(rbenchmark) pdf.options(family="ArialMT", useDingbats=FALSE) set.seed(1887) ## list human genes humanGenes <- unique(na.omit(unlist(as.list(hgu133plus2SYMBOL)))) ## read tissue-specific gene signatures gmtFile <- system.file("extdata/exp.tissuemark.affy.roche.symbols.gmt", package="BioQC") gmt <- readGmt(gmtFile)
Two alternative methods could be compared with the Wilcoxon-Mann-Whitney (WMW) test proposed by BioQC: the Kolmogorov-Smirnov (KS) test, and the Student’s t-test, or more particularly, the Welch’s test which does not assume equal sample number or equal variance, which is appropriate in the setting of gene expression studies.
Based on these considerations, BioQC implements a computationally efficient version of the WMW test. In order not to confuse end-users, no alternative methods are implemented.
Nevertheless, in order to demonstrate the power of WMW test in comparison with the KS-test or the t-test, we performed the sensitivity benchmark described in the simulation studies, for the two alternative tests respectively.
## Summarizes data. ## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%). ## data: a data frame. ## measurevar: the name of a column that contains the variable to be summariezed ## groupvars: a vector containing names of columns that contain grouping variables ## na.rm: a boolean that indicates whether to ignore NA's ## conf.interval: the percent range of the confidence interval (default is 95%) ## ## Source: http://www.cookbook-r.com/Manipulating_data/Summarizing_data/ summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) { # New version of length which can handle NA's: if na.rm==T, don't count them length2 <- function (x, na.rm=FALSE) { if (na.rm) sum(!is.na(x)) else length(x) } # This does the summary. For each group's data frame, return a vector with # N, mean, and sd datac <- ddply(data, groupvars, .drop=.drop, .fun = function(xx, col) { c(N = length2(xx[[col]], na.rm=na.rm), mean = mean (xx[[col]], na.rm=na.rm), sd = sd (xx[[col]], na.rm=na.rm) ) }, measurevar ) # Rename the "mean" column datac <- rename(datac, c("mean" = measurevar)) datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean # Confidence interval multiplier for standard error # Calculate t-statistic for confidence interval: # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1 ciMult <- qt(conf.interval/2 + .5, datac$N-1) datac$ci <- datac$se * ciMult return(datac) }
randomMatrixButOneSignature <- function(rows=humanGenes, signatureGenes, amplitudes=seq(0, 3, by=0.5)) { nrow <- length(rows) ncol <- length(amplitudes) mat <- matrix(rnorm(nrow*ncol), nrow=nrow, byrow=FALSE) rownames(mat) <- rows sigInd <- na.omit(match(signatureGenes, humanGenes)) colClass <- factor(amplitudes) for(colInd in unique(colClass)) { isCurrCol <- colInd==colClass replaceMatrix <- matrix(rnorm(length(sigInd)*sum(isCurrCol), mean=amplitudes[isCurrCol][1]), nrow=length(sigInd), byrow=FALSE) mat[sigInd, isCurrCol] <- replaceMatrix } return(mat) } addNoise = function(matrix, fractionAffected=.1, stdv=1) { noise = matrix(rnorm(nrow(matrix)*ncol(matrix), sd=stdv), nrow=nrow(matrix), byrow=FALSE) addNoise = matrix(runif(nrow(matrix)*ncol(matrix)), nrow=nrow(matrix), byrow=FALSE) < fractionAffected matrix = matrix + addNoise*noise return(matrix) } do.test <- function(senseMat, tissueInd, test=t.test, alternative="greater") { bgInds = setdiff(1:nrow(senseMat), tissueInd) res = apply(senseMat, 2, function(col) { gs = col[tissueInd] bg = col[bgInds] return(test(gs, bg, alternative=alternative)$p.value) }) return(res) } selGeneSet <- "Ovary_NGS_RNASEQATLAS_0.6_3" selSignature <- gmt[[selGeneSet]]$genes tissueInds <- sapply(gmt, function(x) match(x$genes, humanGenes)) senseAmplitudes <- rep(c(seq(0, 1, by=0.25), seq(1.5, 3, by=0.5)), each=50) senseMat <- randomMatrixButOneSignature(rows=humanGenes, signatureGenes=selSignature, amplitudes=senseAmplitudes) senseMatOutlier = addNoise(senseMat, stdv=15, fractionAffected=.01) compare.tests = function(senseMat) { senseBioQC <- wmwTest(senseMat, tissueInds, valType="p.greater", simplify=TRUE)[selGeneSet,] senseTTest <- do.test(senseMat, tissueInds[[selGeneSet]], test=t.test) senseKS <- do.test(senseMat, tissueInds[[selGeneSet]], test=function(x, y, alternative) { ks.test(y, x, alternative=alternative)}) comp = data.frame(BioQC=-log10(senseBioQC), t.test=-log10(senseTTest), ks.test=-log10(senseKS), senseAmplitudes=senseAmplitudes) comp.molten = melt(comp, id="senseAmplitudes") comp.summary = summarySE(comp.molten, measurevar="value", groupvars=c("senseAmplitudes", "variable")) return(comp.summary) } comp.summary = compare.tests(senseMat) comp.summary.outlier = compare.tests(senseMatOutlier) plot.comp = ggplot(comp.summary, aes(x=senseAmplitudes, y=value, color=variable)) + geom_line() + geom_errorbar(aes(ymin=value-se, ymax=value+se), width=.1) + geom_point() + xlab("Mean expression differnce") + ylab("Enrichment score") + ggtitle("without noise") + theme_bw() + scale_color_brewer(palette="Dark2") plot.comp.outlier = ggplot(comp.summary.outlier, aes(x=senseAmplitudes, y=value, color=variable)) + geom_line() + geom_errorbar(aes(ymin=value-se, ymax=value+se), width=.1) + geom_point() + xlab("Mean expression differnce") + ylab("Enrichment score") + ggtitle("with noise") + theme_bw() + scale_color_brewer(palette="Dark2") grid.arrange(plot.comp, plot.comp.outlier, ncol=2)
As expected, the results suggest, that both the KS-test and the WMW-test are robust to noise, while the performance of the t-test drops significantly on noisy data. Additionally, the WMW-test appears to be superior to the KS-test for low expression differences.
runWMW = function() {return(wmwTest(senseMat, tissueInds, valType="p.greater", simplify=TRUE))} runKS = function() {return(do.test(senseMat, tissueInds[[selGeneSet]], test=function(x, y, alternative) { ks.test(y, x, alternative=alternative)}))} benchmark.res = benchmark(runWMW(), runKS(), columns=c("test", "replications", "elapsed", "relative"), replications=5)
Since the KS-test is so slow, we did not replicate the sensitivity benchmark from the simulation studies using the enrichment score rank. While it takes BioQC about r round(benchmark.res[2, 'elapsed']/benchmark.res[2, 'replications'])
seconds on a single thread to test all 155 signatures, it already takes the KS-test about r round(benchmark.res[1, 'elapsed']/benchmark.res[1, 'replications'])
seconds to test a single signature.
benchmark.res
sessionInfo()
[^1]: Irizarry, Rafael A., et al. "Gene set enrichment analysis made simple."Statistical methods in medical research 18.6 (2009): 565-575. [^2]: Filion, Guillaume J. "The signed Kolmogorov-Smirnov test: why it should not be used." GigaScience 4.1 (2015): 1.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.