################################################################################
# Statistical testing
################################################################################
#' testAssoc
#'
#' Tests for association between two vectors. Based on \code{RnBeads:::test.traits}
#'
#' @param x Sample values for the first trait. This must be a vector of type \code{factor}, \code{integer} or
#' \code{numeric}.
#' @param y Sample values for the second trait. This must be a vector of type \code{factor}, \code{integer} or
#' \code{numeric}.
#' @param permMat Matrix of sample permutations (indices of x that will be used in permutation tests) in case none of the traits is a \code{factor}, and thus
#' permutation-based p-value from correlations is computed. If this parameter is \code{NULL} and
#' both \code{x} and \code{y} are sequences of numbers, no p-value is calculated.
#' @return List of four elements:
#' \describe{
#' \item{error}{Error, if any, that prevented this function from computing a p-value for trait
#' association.}
#' \item{test}{Type of test performed. This is one of \code{"Fisher"}, \code{"Wilcoxon"},
#' \code{"Kruskal-Wallis"}, \code{"Correlation"} or \code{NA}. The last value indicates that
#' the traits cannot be tested for association.}
#' \item{correlation}{Value of the pearson correlation coefficient between \code{x} and \code{y},
#' or \code{NA} if any of them is \code{factor}.}
#' \item{pvalue}{Calculated p-value, or \code{NA} if the traits cannot be tested for association.}
#' }
#' @export
testAssoc <- function(x, y, permMat=NULL) {
result <- list(
"error" = as.character(NA),
"test" = as.character(NA),
"statistic" = as.double(NA),
"pvalue" = as.double(NA))
if (length(x) != length(y)) logger.error(c("x and y must have matching lengths"))
if (class(x) == "Date") { x <- as.integer(x) }
if (class(y) == "Date") { y <- as.integer(y) }
if (is.character(x) || is.logical(x)) { x <- factor(x) }
if (is.character(y) || is.logical(y)) { y <- factor(y) }
disc <- is.na(x) | is.na(y)
if (sum(disc) > 0){
logger.warning(c("Discarding", sum(disc), "observations with NA (testAssoc)"))
inds <- which(!disc)
x <- x[inds]
y <- y[inds]
}
if (length(x) < 2) {
result[["error"]] <- "not enough shared values between x and y"
return(result)
}
if (is.factor(x)) {
x <- as.factor(as.character(x))
if (nlevels(x) < 2) {
## Not enough categories in y
result[["error"]] <- "not enough categories in x"
return(result)
}
}
if (is.factor(y)) {
y <- as.factor(as.character(y))
if (nlevels(y) < 2) {
## Not enough categories in y
result[["error"]] <- "not enough categories in y"
return(result)
}
}
## Perform a test or compute correlation
get.stat <- function(expr, statName) { tryCatch(suppressWarnings(expr[[statName]]), error = function(er) { as.double(NA) }) }
if (is.factor(x)) {
if (is.factor(y)) {
simulate <- (nlevels(x) > 2 || nlevels(y) > 2)
testRes <- fisher.test(x, y, conf.int = FALSE, simulate.p.value = simulate, B = 50000)
result[["test"]] <- "Fisher"
result[["pvalue"]] <- get.stat(testRes, "p.value")
result[["statistic"]] <- get.stat(testRes, "estimate")
} else if (nlevels(x) == 2) {
result[["test"]] <- "Wilcoxon"
values <- tapply(y, x, identity)
testRes <- wilcox.test(values[[1]], values[[2]], alternative = "two.sided")
result[["pvalue"]] <- get.stat(testRes, "p.value")
result[["statistic"]] <- get.stat(testRes, "statistic")
} else {
testRes <- kruskal.test(y, x)
result[["test"]] <- "Kruskal-Wallis"
result[["pvalue"]] <- get.stat(testRes, "p.value")
result[["statistic"]] <- get.stat(testRes, "statistic")
}
} else if (is.factor(y)) {
if (nlevels(y) == 2) {
result[["test"]] <- "Wilcoxon"
values <- tapply(x, y, identity)
testRes <- wilcox.test(values[[1]], values[[2]], alternative = "two.sided")
result[["pvalue"]] <- get.stat(testRes, "p.value")
result[["statistic"]] <- get.stat(testRes, "statistic")
} else {
testRes <- kruskal.test(x, y)
result[["test"]] <- "Kruskal-Wallis"
result[["pvalue"]] <- get.stat(testRes, "p.value")
result[["statistic"]] <- get.stat(testRes, "statistic")
}
} else {
result[["test"]] <- "Pearson correlation"
if (is.null(permMat)) {
result[["statistic"]] <- cor(x, y)
} else {
N <- length(x)
values <- apply(permMat, 2, function(i) { cor(x[i[i <= N]], y) })
result[["statistic"]] <- values[1]
values <- abs(values)
result[["pvalue"]] <- mean(values[1] <= values)
}
}
if (is.na(result[["pvalue"]])) {
result[["error"]] <- "test failed"
}
return(result)
}
#' plotFisherTest
#'
#' Conduct a Fisher's exact test and plot the results as a heatmap
#' @param x factor object or one that can be coerced to one. Alternative a 2x2 contingency matrix
#' @param y factor object or one that can be coerced to one
#' @param name.x optional character string specifying the name for the first grouping
#' @param name.y optional character string specifying the name for the second grouping
#' @param ... arguments passed on to \code{fisher.test}
#' @return an \code{S3} object containing the test result object as returned by \code{fisher.test} and a \code{ggplot} object
#' @author Fabian Mueller
#' @export
plotFisherTest <- function(x, y=NULL, name.x=NULL, name.y=NULL, ...){
testRes <- fisher.test(x=x, y=y, ...)
if (is.matrix(x) && nrow(x)==2 && ncol(x)==2){
ct <- as.table(x)
if (is.null(name.x)) name.x <- names(dimnames(conMat))[1]
if (is.null(name.y)) name.y <- names(dimnames(conMat))[2]
} else {
ct <- table(x, y)
if (is.null(name.x)) name.x <- deparse(substitute(x))
if (is.null(name.y)) name.y <- deparse(substitute(y))
}
# pp <- ggplot2.heatmap(ct) + xlab(name.x) + ylab(name.y)
df2p <- data.frame(ct, check.names=FALSE)
colnames(df2p) <- c("var1", "var2", "count")
pp <- ggplot(df2p) + aes(var2, var1) + geom_tile(aes(fill=count)) + scale_y_discrete(limits = rev(levels(df2p[,2]))) + coord_fixed() +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) + geom_text(aes(label=count)) + xlab(name.y) + ylab(name.x) +
labs(subtitle=paste0("OR=", round(testRes$estimate, 4), " (Alternative: '", testRes$alternative,"'; p-value=", round(testRes$p.value, 4), ")"))
res <- list(
testRes=testRes,
plot=pp
)
class(res) <- "FisherTestPlot"
return(res)
}
#' rowTtest
#'
#' performs a two-sided Welch's t-test (unequal variances, equal or unequal sample sizes) on each row of a matrix X with the indices inds.1 vs indices idx2 as group assignments.
#' @author Fabian Mueller
#' @param X Matrix on which the test is performed for every row
#' @param idx1 column indices of group 1 members
#' @param idx2 column indices of group 2 members
#' @param na.rm Should NAs be removed (logical)
#' @param alternative Testing alternative. Must be one of "two.sided" (default),"less","greater" or "all".
#' in case of "all" a data frome with corresping alternative variables is returned.
#' Otherwise the result is a vector.
#' @return vector (or data.frame if alternative=="all") of p-values resulting from the Welch's t-test
#' @export
#' @note Requires \code{matrixStats} package
rowTtest <- function(X, idx1, idx2=-idx1, na.rm=FALSE, alternative="two.sided"){
require(matrixStats)
if (!(alternative %in% c("two.sided","less","greater","all"))) {
stop("invalid value for testing alternative")
}
X.1 <- X[,idx1]
X.2 <- X[,idx2]
# sparse matrices not fully supported yet: convert to dense matrix
if (!is.matrix(X.1)) X.1 <- as.matrix(X.1)
if (!is.matrix(X.2)) X.2 <- as.matrix(X.2)
if (na.rm){
n.1 <- rowSums(!is.na(X.1), FALSE)
n.2 <- rowSums(!is.na(X.2), FALSE)
} else {
n.1 <- length(idx1)
n.2 <- length(idx2)
}
rm.1 <- rowMeans(X.1, na.rm = na.rm)
rm.2 <- rowMeans(X.2, na.rm = na.rm)
rv.1 <- rowVars(X.1, na.rm = na.rm)
rv.2 <- rowVars(X.2, na.rm = na.rm)
rq.1 <- rv.1/n.1
rq.2 <- rv.2/n.2
t.stat <- (rm.1 - rm.2)/sqrt(rq.1 + rq.2)
rdf <- (rq.1 + rq.2)^2/(rq.1^2/(n.1-1) + rq.2^2/(n.2-1)) #degrees of freedom
rp <- rep(NA,nrow(X))
if (alternative == "two.sided" || alternative == "all") {
rp.2s <- 2*pt(-abs(t.stat),rdf)
}
if (alternative == "less" || alternative == "all") {
rp.l <- pt(t.stat,rdf)
}
if (alternative == "greater" || alternative == "all") {
rp.g <- pt(t.stat,rdf,lower.tail=FALSE)
}
if (alternative == "two.sided") rp <- rp.2s
if (alternative == "greater") rp <- rp.g
if (alternative == "less") rp <- rp.l
if (alternative == "all") rp <- data.frame(less=rp.l,greater=rp.g,two.sided=rp.2s)
res <- data.frame(
pval = rp,
mean1 = rm.1,
mean2 = rm.2,
var1 = rv.1,
var2 = rv.2,
n1 = n.1,
n2 = n.2
)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.