Nothing
#' @title
#' Discrete Test Results Class
#'
#' @description
#' This is the class used by the statistical test functions of this package for
#' returning not only p-values, but also the supports of their distributions and
#' the parameters of the respective tests. Objects of this class are obtained by
#' setting the `simple.output` parameter of a test function to `FALSE` (the
#' default). All data members of this class are private to avoid inconsistencies
#' by deliberate or inadvertent changes by the user. However, the results can be
#' read by public methods.
#'
#' @examples
#' ## one-sided binomial test
#' # parameters
#' x <- 2:4
#' n <- 5
#' p <- 0.4
#' m <- length(x)
#' # support (same for all three tests) and p-values
#' support <- sapply(0:n, function(k) binom.test(k, n, p, "greater")$p.value)
#' pv <- support[x + 1]
#' # DiscreteTestResults object
#' res <- DiscreteTestResults$new(
#' # string with name of the test
#' test_name = "Exact binomial test",
#' # list of data frames
#' inputs = list(
#' observations = data.frame(
#' `number of successes` = x,
#' # no name check of column header to have a speaking name for 'print'
#' check.names = FALSE
#' ),
#' parameters = data.frame(
#' # parameter 'n', needs to be replicated to length of 'x'
#' `number of trials` = rep(n, m),
#' # mandatory parameter 'alternative', needs to be replicated to length of 'x'
#' alternative = rep("greater", m),
#' # no name check of column header to have a speaking name for 'print'
#' check.names = FALSE
#' ),
#' nullvalues = data.frame(
#' # here: only one null value, 'p'; needs to be replicated to length of 'x'
#' `probability of success` = rep(p, m),
#' # no name check of column header to have a speaking name for 'print'
#' check.names = FALSE
#' )
#' ),
#' # numerical vector of p-values
#' p_values = pv,
#' # list of supports (here: only one support); values must be sorted and unique
#' pvalue_supports = list(unique(sort(support))),
#' # list of indices that indicate which p-value/hypothesis each support belongs to
#' support_indices = list(1:m),
#' # name of input data variables
#' data_name = "x, n and p"
#' )
#'
#' # print results without supports
#' print(res)
#' # print results with supports
#' print(res, supports = TRUE)
#'
#' @importFrom R6 R6Class
#' @importFrom checkmate assert_character assert_data_frame assert_integerish assert_list assert_numeric assert_subset qassert
#' @export
DiscreteTestResults <- R6Class(
"DiscreteTestResults",
## public ----
public = list(
#' @description
#' Creates a new `DiscreteTestResults` object.
#'
#' @param test_name single character string with the name of the
#' test(s).
#' @param inputs named list of **exactly three** elements
#' containing the observations, test parameters and
#' hypothesised null values **as data frames**;
#' names of these list fields must be
#' `observations`, `nullvalues` and `parameters`.
#' See details for further information about the
#' requirements for these fields.
#' @param p_values numeric vector of the p-values calculated by
#' each hypothesis test.
#' @param pvalue_supports list of **unique** numeric vectors containing
#' all p-values that are observable under the
#' respective hypothesis; each value of `p_values`
#' must occur in its respective p-value support.
#' @param support_indices list of numeric vectors containing the test
#' indices that indicates to which individual
#' testing scenario each unique parameter set and
#' each unique support belongs.
#' @param data_name single character string with the name of the
#' variable that contains the observed data.
#'
#' @details
#' The fields of the `inputs` have the following requirements:
#' \describe{
#' \item{`$observations`}{data frame that contains the observed data; if
#' the observed data is a matrix, it must be
#' converted to a data frame; must not be `NULL`,
#' only numerical and character values are
#' allowed.}
#' \item{`$nullvalues`}{data frame that contains the hypothesised values
#' of the tests, e.g. the rate parameters for Poisson
#' tests; must not be `NULL`, only numerical values
#' are allowed.}
#' \item{`$parameters`}{data frame that holds the parameter combinations
#' of the null distribution of each test (e.g.
#' numbers of Bernoulli trials for binomial tests, or
#' `m`, `n` and `k` for the hypergeometric
#' distribution used by Fisher's Exact Test, which
#' have to be derived from the observations first);
#' **must** include a mandatory column named
#' `alternative`; only numerical and character values
#' are allowed.}
#' }
#'
#' Missing values or `NULL`s are not allowed for any of these fields. All
#' data frames must have the same number of rows. Their column names are
#' used by the `print` method for producing text output, therefore they
#' should be informative, i.e. short and (if necessary) non-syntactic,
#' like e.g. `` `number of success` ``.
#'
initialize = function(
test_name,
inputs,
p_values,
pvalue_supports,
support_indices,
data_name
) {
# ensure that test name is a single character string
qassert(x = test_name, rules = "S1")
# ensure that inputs are given as lists, data frame or numeric/string vectors
assert_list(
x = inputs,
types = c("data.frame"),
any.missing = TRUE,
len = 3,
names = "named"
)
# ensure input list elements are named
# 'observations', 'parameters', 'null values' and 'alternatives'
if(any(!(c("observations", "parameters", "nullvalues") %in% names(inputs))))
stop("Names of list 'inputs' must be 'observations', 'parameters' and 'nullvalues'")
# ensure observations are a data frame with as many rows as p-values
assert_data_frame(
x = inputs$observations,
types = c("numeric", "character"),
any.missing = FALSE
)
# overall number of tests, i.e. observations that were tested
len <- nrow(inputs$observations)
# ensure that the parameters are in a data.frame with at least one row,
# containing only numbers or strings
assert_data_frame(
x = inputs$parameters,
types = c("numeric", "character"),
any.missing = FALSE,
nrows = len
)
# ensure that alternatives exist and that they are strings
if(exists("alternative", inputs$parameters)) {
assert_character(
x = inputs$parameters$alternative,
any.missing = FALSE
)
} else stop("'parameters' must have an 'alternative' column")
# ensure that all alternatives are from the 'usual' ones
assert_subset(
x = inputs$parameters$alternative,
choices = c("greater", "less", "two.sided", "minlike",
"blaker", "central", "absdist"),
empty.ok = FALSE
)
# ensure that all null (i.e. hypothesised) values are in a numeric data frame
assert_data_frame(
x = inputs$nullvalues,
types = "numeric",
any.missing = FALSE,
nrows = len
)
# ensure that vector of p-values is numeric with probabilities in [0, 1]
assert_numeric(
x = p_values,
lower = 0,
upper = 1,
any.missing = FALSE,
len = len
)
# ensure that list of support values is a list
assert_list(
x = pvalue_supports,
types = "numeric",
any.missing = FALSE,
min.len = 1,
max.len = len
)
for(i in seq_along(pvalue_supports)){
# ensure each list item contains sorted vectors of probabilities in [0, 1]
assert_numeric(
x = pvalue_supports[[i]],
lower = 0,
upper = 1,
any.missing = FALSE,
min.len = 1,
sorted = TRUE
)
}
# set of p-value indices for checking of correct indices in supports list
idx_set <- 1L:len
# ensure that list of support indices is a list
assert_list(
x = support_indices,
types = "numeric",
any.missing = FALSE,
min.len = 1
)
for(i in seq_along(support_indices)){
# ensure indices are integerish vectors (and coerce them)
support_indices[[i]] <- assert_integerish(
x = support_indices[[i]],
lower = 1,
upper = len,
any.missing = FALSE,
min.len = 1,
unique = TRUE,
coerce = TRUE
)
# remove indices of current list item from check set
idx_set <- setdiff(idx_set, support_indices[[i]])
# ensure that each p-value is taken from its respective distribution
if(!all(p_values[support_indices[[i]]] %in% c(0, pvalue_supports[[i]])))
stop("All observed p-values must occur in their respective distribution")
}
# ensure check set is empty
if(length(idx_set))
stop("All support set indices must be unique")
# ensure that data variable name is a single character string
qassert(x = data_name, c("S+", "0"))
# assign inputs
private$test_name <- test_name
private$inputs <- inputs
private$p_values <- p_values
private$pvalue_supports <- pvalue_supports
private$support_indices <- support_indices
private$data_name <- data_name
# add row names in input data (if present) to p-values
names(private$p_values) <- rownames(inputs$observations)
},
#' @description
#' Returns the computed p-values.
#'
#' @param named single logical value that indicates whether the vector is
#' to be returned as a named vector (if names are present)
#'
#' @return
#' A numeric vector of the p-values of all null hypotheses.
#'
get_pvalues = function(named = TRUE) {
if(named)
return(private$p_values) else
return(as.numeric(private$p_values))
},
#' @description
#' Return the list of the test inputs.
#'
#' @param unique single logical value that indicates whether only unique
#' combinations of parameter sets and null values are to be
#' returned. If `unique = FALSE` (the default), the returned
#' data frames may contain duplicate sets.
#'
#' @return
#' A list of three elements. The first one contains a data frame with the
#' observations for each tested null hypothesis, while the second is another
#' data frame with the hypothesised null values (e.g. `p` for binomial
#' tests). The third list field holds the parameter sets (e.g. `n` in case
#' of a binomial test). If `unique = TRUE`, only unique combinations of
#' parameter sets and null values are returned, but observations remain
#' unchanged.
#'
get_inputs = function(unique = FALSE) {
if(unique) {
lst <- private$inputs[c("observations", "nullvalues", "parameters")]
nc <- ncol(lst$nullvalues)
df <- unique(cbind(lst$nullvalues, lst$parameters))
lst$nullvalues <- df[ seq_len(nc)]
lst$parameters <- df[-seq_len(nc)]
return(lst)
} else return(private$inputs)
},
#' @description
#' Returns the p-value supports, i.e. all observable p-values under the
#' respective null hypothesis of each test.
#'
#' @param unique single logical value that indicates whether only unique
#' p-value supports are to be returned. If `unique = FALSE`
#' (the default), the returned supports may be duplicated.
#'
#' @return
#' A list of numeric vectors containing the supports of the p-value null
#' distributions.
#'
get_pvalue_supports = function(unique = FALSE) {
if(!unique) {
idx_scns <- unlist(private$support_indices)
idx_lens <- sapply(private$support_indices, length)
return(rep(private$pvalue_supports, idx_lens)[order(idx_scns)])
} else return(private$pvalue_supports)
},
#' @description
#' Returns the indices that indicate to which testing scenario each
#' unique support belongs.
#'
#' @return
#' A list of numeric vectors. Each one contains the indices of the null
#' hypotheses to which the respective support and/or unique parameter set
#' belongs.
#'
get_support_indices = function(){
return(private$support_indices)
},
#' @description
#' Prints the computed p-values.
#'
#' @param inputs single logical value that indicates if the inputs
#' values (i.e. observations and parameters) are to be
#' printed; defaults to `TRUE`.
#' @param pvalues single logical value that indicates if the resulting
#' p-values are to be printed; defaults to `TRUE`.
#' @param supports single logical value that indicates if the p-value
#' supports are to be printed; defaults to `FALSE`.
#' @param test_idx integer vector giving the indices of the tests whose
#' results are to be printed; if `NULL` (the default),
#' results of every test up to the index specified by
#' `limit` (see below) are printed
#' @param limit single integer that indicates the maximum number of
#' test results to be printed; if `limit = 0`, results of
#' every test are printed; ignored if `test_idx` is not
#' set to `NULL`
#' @param ... further arguments passed to `print.default`.
#'
#' @return
#' Prints a summary of the tested null hypotheses. The object itself is
#' invisibly returned.
#'
#' @importFrom checkmate assert_int qassert
print = function(
inputs = TRUE,
pvalues = TRUE,
supports = FALSE,
test_idx = NULL,
limit = 10,
...
) {
qassert(inputs, "B1")
qassert(pvalues, "B1")
qassert(supports, "B1")
if(!is.null(test_idx) && !all(is.na(test_idx)))
qassert(test_idx, "x+[0,)")
qassert(limit, c("0", "x1", "X1[0,)"))
assert_int(x = limit, na.ok = TRUE, lower = 0, null.ok = TRUE)
if(inputs || pvalues){
pars <- self$get_inputs(unique = FALSE)
idx <- which(names(pars$parameters) == "alternative")
}
if(supports)
supp <- self$get_pvalue_supports(unique = FALSE)
cat("\n")
cat(strwrap(private$test_name, prefix = "\t"), "\n")
cat("\n")
cat("data: ", private$data_name, "\n")
cat("number of tests: ", length(private$p_values), "\n")
if(any(inputs, pvalues, supports)) {
n <- length(private$p_values)
if(is.null(test_idx)) {
limit <- ifelse(is.null(limit) || is.na(limit), n, limit)
nums <- seq_len(ifelse(limit, min(limit, n), n))
} else nums <- unique(pmin(na.omit(test_idx), n))
names <- rownames(pars$observations)
for(i in nums) {
# number of digits of i
chars_i <- floor(log10(i)) + 1
cat("\n")
cat("Test", i)
if(!is.null(names) && names[i] != i) {
# how many characters can be printed to console (minus "Test", " ", ":")
chars_tst_line <- getOption("width") - 6
# how many characters can be used by tag (= row name)
chars_name_max <- chars_tst_line - 3
# legth of current row name
chars_name <- nchar(names[i])
# print row name in brackets behind test number
if(chars_name > chars_name_max) {
# abbreviate names that are too long and add "..."
cat(
" (",
substr(gsub("[\r\n]", "_", names[i]), 1, chars_name_max - 3),
"...)",
sep = ""
)
} else cat(" (", gsub("[\r\n]", "_", names[i]), ")", sep = "")
# length of names (plus " ", "(" and ")") for underscores
chars_name <- min(chars_name, chars_name_max) + 3
} else chars_name <- 0
cat(":\n")
for(j in 1:(6 + chars_name + chars_i)) cat("-")
cat("\n")
if(inputs) {
# print function with wrapping and indentation for observations,
# parameters and hypotheses
print_wrap <- function(alt, str, indent) {
cat(
paste0(alt, paste(rep(" ", indent - nchar(alt)), collapse = "")),
strwrap(
x = str,
width = getOption("width") - indent,
exdent = indent,
prefix = "\n",
initial = ""
),
"\n",
sep = ""
)
}
# print observations
str <- paste(
paste0(
names(pars$observations), " = ", pars$observations[i, ]
),
collapse = ", "
)
print_wrap("observations:", str, 18)
# print parameters
if(ncol(pars$parameters) > 1) {
str <- paste(
paste0(
names(pars$parameters)[-idx], " = ", pars$parameters[i, -idx]
),
collapse = ", "
)
print_wrap("parameters:", str, 18)
}
if(pars$parameters[[idx]][i] == "greater") {
alt <- "greater than"
null <- "less than or equal to"
} else if(pars$parameters[[idx]][i] == "less") {
alt <- "less than"
null <- "greater than or equal to"
} else {
alt <- "not equal to"
null <- "equal to"
}
str <- paste("true", names(pars$nullvalues)[1], "is", null,
pars$nullvalues[[1]][i])
print_wrap("null hypothesis:", str, 18)
str <- paste("true", names(pars$nullvalues)[1], "is", alt,
pars$nullvalues[[1]][i])
print_wrap("alternative:", str, 18)
}
if(pvalues) {
meth <- switch(
EXPR = pars$parameters[[idx]][i],
minlike = "(minimum likelihood)",
blaker = "(combined tails)",
absdist = "(absolute distance from mean)",
central = "(minimum tail doubling)",
""
)
cat("p-value: ")
cat(private$p_values[i], meth, "\n")
}
if(supports) {
cat("support:\n")
print(supp[[i]], ...)
}
}
cat("\n")
if(is.null(test_idx) && limit > 0 && limit < n)
cat(
paste(
"[ print limit reached --", n - limit, "results omitted --",
"use print parameter 'limit' for more results ]\n"
)
)
if(!is.null(test_idx))
cat(paste("[", length(nums), "out of", n, "results printed ]\n"))
}
self
}
),
## private ----
private = list(
## @field test_name single character string with the name of the
## test(s), e.g. "Fisher's Exact Test"
test_name = character(),
## @field inputs named list containing the observations and the
## tests' parameters
inputs = list(),
## @field p_values numeric vector of the p-values calculated for
## each discrete test setting
p_values = numeric(),
## @field pvalue_supports list of UNIQUE numeric vectors containing all
## p-values that can be observed in the respective
## discrete test setting
pvalue_supports = list(),
## @field support_indices list of numeric vectors containing the test
## indices that indicate to which individual test
## setting each unique support belongs
support_indices = list(),
## @field data_name single character string with the name of the
## variable that contains the observed data
data_name = character()
)
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.