#' @title Appnds formatted results of pairwise tests to data frame
#'
#' @description Split by some columns, further split by \dQuote{split.col},
#' and do pairwise comparisons of the \code{value.col} column based on
#' a statistical test. Results are stored in \code{testresult.col}.
#'
#' @param x [\code{data.frame}]\cr
#' Data frame.
#' @param by [\code{character}]\cr
#' Column names used for splitting the data frame.
#' This will usually be columns that describe the problems / problem parameters.
#' @param split.col [\code{character(1)}]\cr
#' Single column name. This will usually be the algorithm column.
#' @param value.col [\code{character(1)}]\cr
#' Single column name with numeric values to undergo statistical tests.
#' @param alpha [\code{numeric(1)}]\cr
#' Significance level.
#' Default is 0.05, i.e. five percent.
#' @param testresult.col [\code{character(1)}]\cr
#' Column name of new column where test results should be stored.
#' @param test.fun [\code{function(x, y, ...)}]\cr
#' Statistical test function.
#' Default is \code{wilcox.test}.
#' @param show.positive.only [\code{logical(1)}]\cr
#' Show entries for rejected zero hypothesis only?
#' Default is \code{FALSE}.
#' @param show.numbers.only [\code{logical(1)}]\cr
#' Show algorithm number only, i.e., without plus/minus indicator for test
#' result? The value \code{TRUE} is only meaningful
#' if \code{show.positive.only = TRUE}.
#' Default is \code{FALSE}.
#' @param colors [\code{character}]\cr
#' Vector of colors used to highlight test results. If one color is passed it
#' is simply repeated k times where k is the number of algorithms. Otherwise
#' the k-th color is used for latex output if the zero hypothesis is rejected
#' for this algorithm.
#' @param show.intervals [\code{logical(1)}]\cr
#' Show results in intervals when possible and not comma-seperated?
#' This might lead to narrower columns sometimes.
#' Default is \code{FALSE}.
#' @param ... [any]\cr
#' Further parameters passed down to \code{test.fun}.
#' E.g., \code{alternative = "less"}.
#' @return [\code{data.frame}]
#' @export
test.pairwise = function(x,
by, split.col, value.col, testresult.col,
show.positive.only = FALSE,
show.numbers.only = FALSE,
colors = "black",
show.intervals = FALSE,
alpha = 0.05, test.fun = stats::wilcox.test, ...) {
checkmate::assertDataFrame(x)
checkmate::assertCharacter(by, min.len = 1L, any.missing = FALSE, all.missing = FALSE)
checkmate::assertString(split.col)
checkmate::assertString(value.col)
checkmate::assertString(testresult.col)
checkmate::assertLogical(show.positive.only)
checkmate::assertLogical(show.numbers.only)
checkmate::assertLogical(show.intervals)
checkmate::assertNumber(alpha, lower = 0.0000001, upper = 0.9999999)
checkmate::assertFunction(test.fun)
if (!show.positive.only & show.numbers.only) {
BBmisc::stopf("[test.pairwise] show.positive.only = FALSE and show.numbers.only = TRUE does not allow to distinguish test results.")
}
# initialize results column
x[[testresult.col]] = NA_character_
#FIXME: C&P -> write helper
splits = lapply(by, function(b) x[[b]])
x.parts = split(x, splits)
# some combinations of by-values may be empty. Get rid of these!
idx.nonempty = which(sapply(x.parts, nrow) != 0)
x.parts = x.parts[idx.nonempty]
x.parts = lapply(x.parts, function(part) {
# now split by split.col
part2 = split(part, part[[split.col]])
idx.nonempty = which(sapply(part2, nrow) != 0)
part2 = part2[idx.nonempty]
part2.ns = names(part2)
n.part2 = length(part2.ns)
if (length(colors) != n.part2) {
if (length(colors) == 1)
colors = rep(colors, n.part2)
else
BBmisc::stopf("[test.pairwise] There are %i algorithms, but only %i colors passed.", n.part2, length(colors))
}
for (i in seq_len(n.part2)) {
n1 = part2.ns[i]
alternatives = c()
results = c()
for (j in seq_len(n.part2)) {
n2 = part2.ns[j]
if (n1 == n2) {
next
}
vals1 = part2[[n1]][[value.col]]
vals2 = part2[[n2]][[value.col]]
test.result = test.fun(x = vals1, y = vals2, ...)
alternatives = c(alternatives, n2)
results = c(results, test.result$p.value < alpha)
}
if (show.intervals) {
altpos = alternatives[results]
altneg = alternatives[!results]
#print(altpos)
#print(altneg)
alternatives = ""
if (length(altpos) > 0) {
alternatives = buildIntervalString(as.integer(altpos))
stopifnot(length(alternatives) == 1L)
if (!show.numbers.only) {
if (nchar(alternatives) > 1L) {
alternatives = sprintf("$[\\text{%s}]^{+}$", alternatives)
} else {
alternatives = sprintf("$\\text{%s}^{+}$", alternatives)
}
}
}
if (length(altneg) > 0 & !show.positive.only) {
interval.string.neg = buildIntervalString(as.integer(altneg))
if (!show.numbers.only) {
if (nchar(interval.string.neg) > 1L) {
interval.string.neg = sprintf("$[\\text{%s}]^{+}$", interval.string.neg)
} else {
interval.string.neg = sprintf("$\\text{%s}^{+}$", interval.string.neg)
}
}
alternatives = sprintf("%s%s%s", interval.string.neg, if(alternatives == "") "" else ",", alternatives)
}
#print(alternatives)
#stop()
} else {
if (show.numbers.only) {
alternatives = sprintf("$\\text{%s}$", alternatives)
} else {
alternatives = sprintf("$\\text{%s}^{%s}$", alternatives, ifelse(results, "+", "-"))
}
# assign colors
the.colors = colors[-i] # all but color of the first algorithm in this iteration
alternatives[results] = sprintf("\\textcolor{%s}{%s}", the.colors[results], alternatives[results])
# show positive and negative results?
if (show.positive.only) {
alternatives = alternatives[results]
}
alternatives = BBmisc::collapse(alternatives, sep = ", ")
}
part2[[n1]][[testresult.col]] = rep(alternatives, nrow(part2[[n1]]))
}
do.call(rbind, part2)
})
x.parts = do.call(rbind, x.parts)
return(x.parts)
}
buildIntervalString = function(x) {
#x = sort(x)
if (length(x) == 0)
return("")
st = ""
last.pos = -100
inInterval = FALSE
for (i in 1:length(x)) {
curr.pos = x[i]
if (last.pos < 0) {
st = paste0(st, curr.pos)
} else if(((curr.pos - 1L) == last.pos) & !inInterval) {
inInterval = TRUE
st = paste0(st, "-") # interval begins
} else if(((curr.pos - 1L) == last.pos) & inInterval) {
} else if (inInterval & ((curr.pos - 1L) != last.pos)) {
inInterval = FALSE
st = paste0(st, last.pos, ",", curr.pos)
} else {
st = paste0(st, ",", curr.pos)
}
last.pos = curr.pos
}
if (inInterval) {
st = paste0(st, last.pos)
}
return(st)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.