#### metTest (mset) ####
#' Univariate hypothesis testing
#'
#' Provides numerical metrics and graphical overview of an ExpressionSet instance
#'
#' @param x An S4 object of class \code{MultiDataSet}
#' @param factorC Character: Factor of interest (name of a column from the
#' pData(x))
#' @param testC Character: One of the 6 available hypothesis tests can be selected
#' (either 'ttest', 'limma', 'wilcoxon', 'anova', 'kruskal', 'pearson', or 'spearman');
#' if set to 'param' (or 'nonparam' [default]), the appropriate (non-)parametric test will be used
#' @param adjustC Character: Name of the method for correction of multiple testing
#' (the p.adjust function is used)
#' @param adjustThreshN Numeric: Threshold for (corrected) p-values
#' @param fig.pdfC Character: Name of the file for the graphics from the significant features;
#' if NA, the graphics are displayed on the screen; if NULL, no plot is generated
#' @param info.txtC Character: File name for the printed results (call to
#' 'sink()'); if NA (default), messages will be printed on the screen; if NULL,
#' no verbose will be generated
#' @return \code{MultiDataSet} including the adjusted p-values in the fData data frames
#' @rdname metTest
#' @export
#' @examples
#' prometMset <- metRead(system.file("extdata/promet", package="metabolis"))
#' prometMset <- metTest(prometMset, "gene")
setMethod("metTest", signature(x = "MultiDataSet"),
function(x,
factorC,
testC = c("param", "nonparam", "ttest", "limma", "wilcoxon", "anova", "kruskal", "pearson", "spearman")[2],
adjustC = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[5],
adjustThreshN = 0.05,
fig.pdfC = NA,
info.txtC = NA) {
if (!is.null(info.txtC) && !is.na(info.txtC))
sink(info.txtC, append = TRUE)
infTxtC <- info.txtC
if (!is.null(infTxtC))
infTxtC <- NA
if (!is.null(fig.pdfC) && !is.na(fig.pdfC))
pdf(fig.pdfC)
figPdfC <- fig.pdfC
if (!is.null(figPdfC))
figPdfC <- NA
for (setC in names(x)) {
if (!is.null(info.txtC))
cat("\nHypothesis testing for the '",
setC,
"' dataset:\n",
sep = "")
ese <- x[[setC]]
if (!(factorC %in% colnames(Biobase::pData(ese)))) {
if (!is.null(info.txtC))
cat("The '",
factorC,
"' factor was not found in the pData columns from the '",
setC,
"' dataset.\nNo test will be computed for this dataset.\n",
sep = "")
} else {
ese <- metTest(ese,
factorC,
testC = testC,
adjustC = adjustC,
adjustThreshN = adjustThreshN,
plotMainC = setC,
fig.pdfC = figPdfC,
info.txtC = infTxtC)
x <- MultiDataSet::add_eset(x,
ese,
dataset.type = setC,
GRanges = NA,
overwrite = TRUE,
warnings = FALSE)
}
}
if (!is.null(fig.pdfC) && !is.na(fig.pdfC))
dev.off()
if (!is.null(info.txtC) &&
!is.na(info.txtC))
sink()
return(invisible(x))
})
#### metTest (eset) ####
#' Univariate hypothesis testing
#'
#' Provides numerical metrics and graphical overview of an ExpressionSet instance
#'
#' @param x An S4 object of class \code{ExpressionSet}
#' @param factorC Character: Factor of interest (name of a column from the
#' pData(x))
#' @param testC Character: One of the 6 available hypothesis tests can be selected
#' (either 'ttest', 'limma', 'wilcoxon', 'anova', 'kruskal', 'pearson', or 'spearman');
#' if set to 'param' (or 'nonparam' [default]), the appropriate (non-)parametric test will be used
#' @param adjustC Character: Name of the method for correction of multiple testing
#' (the p.adjust function is used)
#' @param adjustThreshN Numeric: Threshold for (corrected) p-values
#' @param fig.pdfC Character: Name of the file for the graphics from the significant features;
#' if NA, the graphics are displayed on the screen; if NULL, no plot is generated
#' @param plotMainC character: title to be displayed on the first plot
#' @param info.txtC Character: File name for the printed results (call to
#' 'sink()'); if NA (default), messages will be printed on the screen; if NULL,
#' no verbose will be generated
#' @return \code{ExpressionSet} including the adjusted p-values in fData (or the vector of adjusted p-values when returnAdjustOnlyL is TRUE)
#' @rdname metTest
#' @export
#' @examples
#' sacSet <- metRead(system.file("extdata/sacurine", package="metabolis"))
#' sacSet <- metCorrect(sacSet)
#' sacSet <- metTransform(sacSet)
#' sacSet <- metTest(sacSet, "gender", "ttest")
setMethod("metTest", signature(x = "ExpressionSet"),
function(x,
factorC,
testC = c("param", "nonparam", "ttest", "limma", "wilcoxon", "anova", "kruskal", "pearson", "spearman")[2],
adjustC = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[5],
adjustThreshN = 0.05,
fig.pdfC = NA,
plotMainC = NA,
info.txtC = NA) {
if (!is.null(info.txtC) &&
!is.na(info.txtC))
sink(info.txtC, append = TRUE)
if (!(factorC %in% colnames(Biobase::pData(x))))
stop("'", factorC, "' was not found in the column names of pData(x).",
call. = FALSE)
if (is.na(plotMainC))
plotMainC <- Biobase::experimentData(x)@title
fdaDF <- Biobase::fData(x)
## Getting the response (either a factor or a numeric)
tesLs <- metabolis:::.chooseTest(x, factorC, testC)
facFcVn <- tesLs[["facFcVn"]]
facLevVc <- tesLs[["facLevVc"]]
testC <- tesLs[["tesC"]]
varPfxC <- tesLs[["varPfxC"]]
if (!is.null(info.txtC))
cat("\nPerforming '", testC, "'\n", sep = "")
## Two-level or quantitative
if (testC %in% c("ttest", "limma", "wilcoxon",
"pearson", "spearman")) {
staC <- ifelse(testC %in% c("ttest", "limma", "wilcoxon"),
"dif",
"cor")
switch(testC,
ttest = {
staF <- function(y) diff(tapply(y,
facFcVn,
function(x)
mean(x, na.rm = TRUE)))
tesF <- function(y) t.test(y ~ facFcVn)[["p.value"]]
},
limma = {
staF <- function(y) diff(tapply(y,
facFcVn,
function(x)
mean(x, na.rm = TRUE)))
},
wilcoxon = {
staF <- function(y) diff(tapply(y,
facFcVn,
function(x)
median(x, na.rm = TRUE)))
tesF <- function(y) wilcox.test(y ~ facFcVn)[["p.value"]]
},
pearson = {
staF <- function(y) cor(facFcVn, y, method = "pearson",
use = "pairwise.complete.obs")
tesF <- function(y) cor.test(facFcVn, y, method = "pearson",
use = "pairwise.complete.obs")[["p.value"]]
},
spearman = {
staF <- function(y) cor(facFcVn, y, method = "spearman",
use = "pairwise.complete.obs")
tesF <- function(y) cor.test(facFcVn, y, method = "spearman",
use = "pairwise.complete.obs")[["p.value"]]
})
staVn <- apply(Biobase::exprs(x), 1, staF)
optWrnN <- options()$warn
options(warn = -1)
if (testC != "limma") {
adjVn <- p.adjust(apply(Biobase::exprs(x), 1, tesF),
method = adjustC)
} else {
dsgMN <- cbind(lev2 = rep(1, Biobase::dims(x)["Samples", ]),
lev1.lev2 = as.numeric(tesLs[["facFcVn"]] == tesLs[["facLevVc"]][1]))
rownames(dsgMN) <- Biobase::sampleNames(x)
limFit <- limma::lmFit(x, dsgMN)
limFit <- limma::eBayes(limFit)
adjVn <- p.adjust(limFit$p.value[, "lev1.lev2"],
method = adjustC)
}
options(warn = optWrnN)
sigVn <- as.numeric(adjVn <= adjustThreshN)
resMN <- cbind(staVn, adjVn, sigVn)
rownames(resMN) <- Biobase::featureNames(x)
colnames(resMN) <- paste0(varPfxC, c(staC, adjustC, "sig"))
for (colC in colnames(resMN))
fdaDF[, colC] <- resMN[, colC]
if (sum(sigVn, na.rm = TRUE) > 0) {
resSigMN <- resMN[sigVn > 0, 1:2, drop = FALSE]
resSigMN <- resSigMN[order(adjVn[sigVn > 0]), , drop = FALSE]
} else {
resSigMN <- NULL
}
## graphic
if (!is.null(fig.pdfC)) {
if (!is.na(fig.pdfC))
pdf(fig.pdfC, onefile = TRUE)
.volcano(staVn = staVn,
adjVn = adjVn,
adjC = adjustC,
cexN = 1,
colC = "green4",
facC = factorC,
labVc = Biobase::featureNames(x),
levVc = facLevVc,
maiC = plotMainC,
staC = staC,
tesC = testC,
thrN = adjustThreshN)
## if (!is.na(fig.pdfC)) {
varVi <- which(sigVn > 0)
if (testC %in% c("ttest", "limma", "wilcoxon")) {
facVc <- as.character(facFcVn)
names(facVc) <- Biobase::sampleNames(x)
for (varI in varVi) {
varC <- Biobase::featureNames(x)[varI]
.boxplot(facFcVn,
t(Biobase::exprs(x))[, varI],
paste0(varC, " (", adjustC, " = ", signif(adjVn[varI], 2), ")"),
facVc)
}
} else {## pearson or spearman
for (varI in varVi) {
varC <- Biobase::featureNames(x)[varI]
mod <- lm(t(Biobase::exprs(x))[, varI] ~ facFcVn)
plot(facFcVn, t(Biobase::exprs(x))[, varI],
xlab = factorC,
ylab = "",
pch = 18,
main = paste0(varC, " (", adjustC, " = ", signif(adjVn[varI], 2), ", R2 = ", signif(summary(mod)$r.squared, 2), ")"))
abline(mod, col = "red")
}
}
if (!is.na(fig.pdfC))
dev.off()
## }
}
## More than two levels
} else if (testC == "anova") {
## getting the names of the pairwise comparisons 'class1Vclass2'
prwVc <- rownames(TukeyHSD(aov(t(Biobase::exprs(x))[, 1] ~ facFcVn))[["facFcVn"]])
prwVc <- gsub("-", ".", prwVc, fixed = TRUE) ## 2016-08-05: '-' character in dataframe column names seems not to be converted to "." by write.table on ubuntu R-3.3.1
## omnibus and post-hoc tests
aovMN <- t(apply(t(Biobase::exprs(x)),
2,
function(varVn) {
aovMod <- aov(varVn ~ facFcVn)
pvaN <- summary(aovMod)[[1]][1, "Pr(>F)"]
hsdMN <- TukeyHSD(aovMod)[["facFcVn"]]
c(pvaN, c(hsdMN[, c("diff", "p adj")]))
}))
difVi <- 1:length(prwVc) + 1
## difference of the means for each pairwise comparison
difMN <- aovMN[, difVi]
colnames(difMN) <- paste0(varPfxC, prwVc, "_dif")
## correction for multiple testing
aovMN <- aovMN[, -difVi, drop = FALSE]
aovMN <- apply(aovMN, 2, function(pvaVn) p.adjust(pvaVn, method = adjustC))
## significance coding (0 = not significant, 1 = significant)
adjVn <- aovMN[, 1]
sigVn <- as.numeric(adjVn < adjustThreshN)
aovMN <- aovMN[, -1, drop = FALSE]
colnames(aovMN) <- paste0(varPfxC, prwVc, "_", adjustC)
aovSigMN <- aovMN < adjustThreshN
mode(aovSigMN) <- "numeric"
colnames(aovSigMN) <- paste0(varPfxC, prwVc, "_sig")
## final aggregated table
resMN <- cbind(adjVn, sigVn, difMN, aovMN, aovSigMN)
rownames(resMN) <- Biobase::featureNames(x)
colnames(resMN)[1:2] <- paste0(varPfxC, c(adjustC, "sig"))
for (colC in colnames(resMN))
fdaDF[, colC] <- resMN[, colC]
if (sum(sigVn, na.rm = TRUE) > 0) {
resSigMN <- resMN[sigVn > 0, 1, drop = FALSE]
resSigMN <- resSigMN[order(adjVn[sigVn > 0]), , drop = FALSE]
} else {
resSigMN <- NULL
}
## graphic
if (!is.null(fig.pdfC)) {
if (!is.na(fig.pdfC))
pdf(fig.pdfC, onefile = TRUE)
for (varI in 1:nrow(Biobase::fData(x))) {
if (sum(aovSigMN[varI, ]) > 0) {
varC <- Biobase::featureNames(x)[varI]
boxplot(t(exprs(x))[, varI] ~ facFcVn,
main = paste0(varC, " (", adjustC, " = ", signif(adjVn[varI], 2), ")"))
for (prwI in 1:length(prwVc)) {
if (aovSigMN[varI, paste0(varPfxC, prwVc[prwI], "_sig")] == 1) {
claVc <- unlist(strsplit(prwVc[prwI], ".", fixed = TRUE))
aovClaVl <- facFcVn %in% claVc
aovFc <- facFcVn[aovClaVl, drop = TRUE]
aovVc <- as.character(aovFc)
names(aovVc) <- Biobase::sampleNames(x)[aovClaVl]
.boxplot(aovFc,
t(Biobase::exprs(x))[aovClaVl, varI],
paste0(varC, " (", adjustC, " = ", signif(aovMN[varI, paste0(varPfxC, prwVc[prwI], "_", adjustC)], 2), ")"),
aovVc)
}
}
}
}
if (!is.na(fig.pdfC))
dev.off()
}
} else if (testC == "kruskal") {
## getting the names of the pairwise comparisons 'class1.class2'
optWrnN <- options()$warn
options(warn = -1)
nemMN <- PMCMRplus::kwAllPairsNemenyiTest(t(Biobase::exprs(x))[, 1], facFcVn, "Tukey")[["p.value"]]
options(warn = optWrnN)
nemVl <- c(lower.tri(nemMN, diag = TRUE))
nemClaMC <- cbind(rownames(nemMN)[c(row(nemMN))][nemVl],
colnames(nemMN)[c(col(nemMN))][nemVl])
nemNamVc <- paste0(nemClaMC[, 1], ".", nemClaMC[, 2])
pfxNemVc <- paste0(varPfxC, nemNamVc)
## omnibus and post-hoc tests
optWrnN <- options()$warn
options(warn = -1)
nemMN <- t(apply(Biobase::exprs(x), 1, function(varVn) {
pvaN <- kruskal.test(varVn ~ facFcVn)[["p.value"]]
varNemMN <- PMCMRplus::kwAllPairsNemenyiTest(varVn, facFcVn, "Tukey")[["p.value"]]
c(pvaN, c(varNemMN))
}))
options(warn = optWrnN)
## correction for multiple testing
nemMN <- apply(nemMN, 2,
function(pvaVn) p.adjust(pvaVn, method = adjustC))
adjVn <- nemMN[, 1]
sigVn <- as.numeric(adjVn < adjustThreshN)
nemMN <- nemMN[, c(FALSE, nemVl)]
colnames(nemMN) <- paste0(pfxNemVc, "_", adjustC)
## significance coding (0 = not significant, 1 = significant)
nemSigMN <- nemMN < adjustThreshN
mode(nemSigMN) <- "numeric"
colnames(nemSigMN) <- paste0(pfxNemVc, "_sig")
nemSigMN[is.na(nemSigMN)] <- 0
## difference of the medians for each pairwise comparison
difMN <- sapply(1:nrow(nemClaMC), function(prwI) {
prwVc <- nemClaMC[prwI, ]
prwVi <- which(facFcVn %in% prwVc)
prwFacFc <- factor(as.character(facFcVn)[prwVi], levels = prwVc)
apply(Biobase::exprs(x)[, prwVi], 1,
function(varVn)
-diff(as.numeric(tapply(varVn, prwFacFc,
function(x)
median(x, na.rm = TRUE)))))
})
colnames(difMN) <- gsub("_sig", "_dif", colnames(nemSigMN))
## final aggregated table
resMN <- cbind(adjVn, sigVn, difMN, nemMN, nemSigMN)
rownames(resMN) <- Biobase::featureNames(x)
colnames(resMN)[1:2] <- paste0(varPfxC, c(adjustC, "sig"))
for (colC in colnames(resMN))
fdaDF[, colC] <- resMN[, colC]
if (sum(sigVn, na.rm = TRUE) > 0) {
resSigMN <- resMN[sigVn > 0, 1, drop = FALSE]
resSigMN <- resSigMN[order(adjVn[sigVn > 0]), , drop = FALSE]
} else {
resSigMN <- NULL
}
## graphic
if (!is.null(fig.pdfC)) {
if (!is.na(fig.pdfC))
pdf(fig.pdfC, onefile = TRUE)
for (varI in 1:nrow(Biobase::fData(x))) {
if (sum(nemSigMN[varI, ]) > 0) {
varC <- Biobase::featureNames(x)[varI]
boxplot(t(Biobase::exprs(x))[, varI] ~ facFcVn,
main = paste0(varC, " (", adjustC, " = ", signif(adjVn[varI], 2), ")"))
for (nemI in 1:length(nemNamVc)) {
if (nemSigMN[varI, paste0(varPfxC, nemNamVc[nemI], "_sig")] == 1) {
nemClaVc <- nemClaMC[nemI, ]
nemClaVl <- facFcVn %in% nemClaVc
nemFc <- facFcVn[nemClaVl, drop = TRUE]
nemVc <- as.character(nemFc)
names(nemVc) <- Biobase::sampleNames(x)[nemClaVl]
.boxplot(nemFc,
t(Biobase::exprs(x))[nemClaVl, varI],
paste0(varC, " (", adjustC, " = ", signif(nemMN[varI, paste0(varPfxC, nemNamVc[nemI], "_", adjustC)], 2), ")"),
nemVc)
}
}
}
}
if (!is.na(fig.pdfC))
dev.off()
}
}
## Finalizing
if (!is.null(info.txtC)) {
if (!is.null(resSigMN)) {
cat("\n", nrow(resSigMN), " variable",
ifelse(nrow(resSigMN) > 1, "s", ""),
" (", round(nrow(resSigMN) / length(sigVn) * 100), "%) ",
ifelse(nrow(resSigMN) > 1, "were", "was"),
" found significant at the ", adjustThreshN,
" level (after the '", adjustC, "' correction).\n",
ifelse(nrow(resSigMN) == 1,
"It is",
ifelse(nrow(resSigMN) <= 15,
"They are",
"The first 15 are")),
" displayed below",
ifelse(nrow(resSigMN) > 1,
" (sorted by increasing corrected p-values)",
""),
":\n",
sep = "")
print(resSigMN[1:min(15, nrow(resSigMN)), , drop = FALSE])
} else
cat("\nNo significant variable found at the selected ", adjustThreshN, " level\n", sep = "")
}
Biobase::fData(x) <- fdaDF
validObject(x)
if (!is.null(info.txtC) &&
!is.na(info.txtC))
sink()
return(invisible(x))
})
.boxplot <- function(xFc,
yVn,
maiC,
xVc) {
boxLs <- boxplot(yVn ~ xFc,
main = maiC)
outVn <- boxLs[["out"]]
if (length(outVn)) {
for (outI in 1:length(outVn)) {
levI <- which(levels(xFc) == xVc[names(outVn)[outI]])
text(levI,
outVn[outI],
labels = names(outVn)[outI],
pos = ifelse(levI == 2, 2, 4))
}
}
}
.chooseTest <- function(eset, facC, tesC){
if (mode(Biobase::pData(eset)[, facC]) == "character") {
facFcVn <- factor(Biobase::pData(eset)[, facC])
facLevVc <- levels(facFcVn)
facLevN <- nlevels(facFcVn)
if (tesC %in% c("param", "nonparam")) {
if (facLevN == 1) {
stop("'", facC, "' has only one level.", call. = FALSE)
} else if (facLevN == 2) {
tesC <- switch(tesC, param = "ttest", nonparam = "wilcoxon")
} else {
tesC <- switch(tesC, param = "anova", nonparam = "kruskal")
}
}
} else {
facFcVn <- Biobase::pData(eset)[, facC]
facLevVc <- NA
if (tesC %in% c("param", "nonparam"))
tesC <- switch(tesC, param = "pearson", nonparam = "spearman")
}
varPfxC <- paste0(make.names(facC), "_", tesC, "_")
if (tesC %in% c("ttest", "limma", "wilcoxon"))
varPfxC <- paste0(varPfxC, paste(facLevVc, collapse = "."), "_")
return(list(facFcVn = facFcVn,
facLevVc = facLevVc,
tesC = tesC,
varPfxC = varPfxC))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.