#' Calculate and draw p-values in lattice plots
#'
#' This panel function allows to overlay p-values obtained from a statistical
#' significance test on plots, together with "significance" symbols such
#' as stars as it is often in encountered in the scientific literature. It supports
#' Student's t-test and Wilcoxon rank sum test (also called Mann-Whitney U-test
#' for two-sample comparisons). The Wilcoxon test is used for non-parametric (non
#' normally distributed data). Different input options for the tests can be specified.
#'
#' @import grid
#' @import lattice
#' @import latticeExtra
#' @importFrom grDevices grey
#' @importFrom methods new
#'
#' @param x,y (numeric, character) variables to be plotted. The x variable
#' is treated as a grouping variable, i.e. p-values are calculated between groups
#' of unique x values.
#' @param method (character) "t.test" for Student's t-test, or "wilcox.test" for Wilcoxon
#' ranked sum test. The latter is also called Mann-Whitney U-test for two-sample comparisons (the default).
#' @param alternative (character) passed to t.test(), one of "two.sided", "less", or "greater"
#' @param paired (logical) if the different groups of y values are paired (default: FALSE)
#' @param two_sample (logical) if TRUE a two-sample is made, else a one-sample comparison (default: TRUE)
#' @param mu (numeric) a number indicating the true value of the mean (or difference in
#' means if you are performing a two sample test). Passed to the test indicated by 'method'
#' @param std (character, numeric) the value of x that is used as standard. If NULL, the
#' first value is used as standard. If a numeric scalar i, std is determined
#' as the i'th element of unique(x). If character, it must match one of x. If
#' std = 'all_values', each unique group of x is compared to the total population.
#' Ignored for one sample tests, that are compared against 'mu'
#' @param symbol (logical) if '*' symbols should be drawn for significance
#' @param pvalue (logical) if numeric p-values should be drawn for significance
#' @param pval_digits (numeric) how many p-value digits should be printed (default is 2)
#' @param cex (numeric) character size of the symbol
#' @param offset (numeric) offset in native plot units used to adjust the position of text labels.
#' If offset is length 1, only the vertical position is adjusted; if it is length 2 the horizontal
#' and vertical position is adjusted
#' @param fixed_pos (numeric) fixed vertical position of the p-value,
#' if NULL determined from the data (the default)
#' @param verbose (logical) if a summary of the p-value calculation should be
#' printed to the terminal
#' @param col (character) the color to be used
#' @param ... other arguments passed to the function
#'
#' @examples
#' library(lattice)
#' data(mtcars)
#'
#' # p-values are calculated between groups of x, grouping is ignored
#' xyplot(mpg ~ factor(cyl), mtcars, groups = cyl, pch = 19, cex = 0.7,
#' panel = function(x, y, ...) {
#' panel.stripplot(x, y, jitter.data = TRUE,
#' horizontal = FALSE, amount = 0.15, ...)
#' panel.pvalue(x, y, std = 1, symbol = TRUE,
#' pvalue = TRUE, offset = 6)
#' })
#'
#' @export
# ------------------------------------------------------------------------------
panel.pvalue <- function(x, y,
method = "t.test",
alternative = "two.sided",
paired = FALSE,
two_sample = TRUE,
mu = 0,
std = NULL,
symbol = TRUE, pvalue = FALSE,
pval_digits = 2,
cex = 0.8, offset = NULL,
fixed_pos = NULL, verbose = FALSE,
col = trellis.par.get()$plot.symbol$col, ...
)
{
# if x is a factor, reorder x and y based on factor levels
if (is.factor(x)) {
ord <- order(as.numeric(x))
x <- x[ord]; y <- y[ord]
}
# two-sample test
# ---------------
if (two_sample) {
# if no standard is passed to function, just take the first unique x
if (is.null(std)) {
std <- 1
} else if (is.numeric(std)) {
stopifnot(std %in% seq_along(unique(x)))
} else if (std == "all_values") {
std <- seq_along(unique(x))
} else if (is.character(std)) {
stopifnot(std %in% x)
std <- which(unique(x) %in% std)
}
std_val <- unique(x)[std]
# calculate p-value between all x-variable groups and standard
pval <- tapply(y, x, function(y_test) {
result <- do.call(method, list(
x = y_test, y = y[x == std_val],
alternative = alternative,
paired = paired,
mu = mu)
)
result$p.value
})
} else {
# one-sample test
# ---------------
# calculate p-value between all x-variable groups and mu
std_val = "not applicable"
pval <- tapply(y, x, function(y_test) {
result <- do.call(method, list(
x = y_test, alternative = alternative,
paired = paired,
mu = mu)
)
result$p.value
})
}
# construct and print p-value symbols
pval_symbol <- sapply(pval, function(x) {
if (is.na(x)) "" else
if (x <= 0.001) "***" else
if (x <= 0.01 & x > 0.001) "**" else
if (x <= 0.05 & x > 0.01) "*" else ""
})
# collect all p-value data in a df
df_pval <- data.frame(row.names = seq_along(unique(x)),
Var_1 = rep(std_val, length(unique(x))),
Var_2 = unique(x),
p_value = pval,
p_value_text = paste0("\np = ", gsub("e", "x10^", format(pval, nsmall = 3, digits = pval_digits))),
p_sig = pval_symbol,
test = rep(paste0(method, ", ", alternative), length(unique(x))),
two_sample = two_sample,
paired = paired
)
# determine offset for X and Y position
# the default is to use 10% of the Y-range
if (is.null(offset)) {
offset <- c(0, diff(lattice::current.panel.limits()$ylim)*0.1)
} else if (is.numeric(offset)) {
if (length(offset) == 1) offset <- c(0, offset) else
if (length(offset) == 2) offset <- offset
} else {
stop("Offset must be NULL or a numeric value of length 1 or 2")
}
# determine X position of p-value text
df_pval$x_pos <- seq_along(unique(x))+offset[1]
# determine Y position of p-value text
if (is.null(fixed_pos)) {
ypos = tapply(y, x, function(z) {
mean(z, na.rm = TRUE) + sd(z, na.rm = TRUE)
})
df_pval$y_pos <- ypos+offset[2]
} else if (is.numeric(fixed_pos)){
df_pval$y_pos <- fixed_pos+offset[2]
} else {
stop("fixed_pos must be NULL for automatic y positioning, or a numeric value.")
}
# optionally print summary table
if (verbose) {
print(df_pval)
}
# finally print p-value text labels
with(df_pval[-std, ], {
if (symbol) {
panel.text(x_pos, y_pos, labels = p_sig,
cex = 2*cex, pos = 1, col = col, ...)
}
if (pvalue) {
panel.text(x_pos, y_pos, labels = p_value_text,
cex = cex, pos = 1, col = col, ...)
}
})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.