#' Anderson--Darling K-Sample Test
#'
#' @description
#' This function performs an Anderson--Darling k-sample test. This is used to
#' determine if several samples (groups) share a common (unspecified)
#' distribution.
#'
#' @param data a data.frame
#' @param x the variable in the data.frame on which to perform the
#' Anderson--Darling k-Sample test (usually strength)
#' @param groups a variable in the data.frame that defines the groups
#' @param alpha the significance level (default 0.025)
#'
#' @return
#' Returns an object of class `adk`. This object has the following fields:
#' - `call` the expression used to call this function
#' - `data` the original data used to compute the ADK
#' - `groups` a vector of the groups used in the computation
#' - `alpha` the value of alpha specified
#' - `n` the total number of observations
#' - `k` the number of groups
#' - `sigma` the computed standard deviation of the test statistic
#' - `ad` the value of the Anderson--Darling k-Sample test statistic
#' - `p` the computed p-value
#' - `reject_same_dist` a boolean value indicating whether the null
#' hypothesis that all samples come from the same distribution is rejected
#' - `raw` the original results returned from
#' [ad.test][kSamples::ad.test]
#'
#'
#' @details
#' This function is a wrapper for the [ad.test][kSamples::ad.test] function from
#' the package `kSamples`. The method "exact" is specified in the call to
#' `ad.test`. Refer to that package's documentation for details.
#'
#' There is a minor difference in the formulation of the Anderson--Darling
#' k-Sample test in CMH-17-1G, compared with that in the Scholz and
#' Stephens (1987). This difference affects the test statistic and the
#' critical value in the same proportion, and therefore the conclusion of
#' the test is unaffected. When
#' comparing the test statistic generated by this function to that generated
#' by software that uses the CMH-17-1G formulation (such as ASAP, CMH17-STATS,
#' etc.), the test statistic reported by this function will be greater by
#' a factor of \eqn{(k - 1)}, with a corresponding change in the critical
#' value.
#'
#' For more information about the difference between this function and
#' the formulation in CMH-17-1G, see the vignette on the subject, which
#' can be accessed by running `vignette("adktest")`
#'
#' @references
#' F. W. Scholz and M. Stephens, “K-Sample Anderson--Darling Tests,” Journal
#' of the American Statistical Association, vol. 82, no. 399. pp. 918–924,
#' Sep-1987.
#'
#' “Composite Materials Handbook, Volume 1. Polymer Matrix Composites
#' Guideline for Characterization of Structural Materials,” SAE International,
#' CMH-17-1G, Mar. 2012.
#'
#' @examples
#' library(dplyr)
#'
#' carbon.fabric %>%
#' filter(test == "WT") %>%
#' filter(condition == "RTD") %>%
#' ad_ksample(strength, batch)
#' ##
#' ## Call:
#' ## ad_ksample(data = ., x = strength, groups = batch)
#' ##
#' ## N = 18 k = 3
#' ## ADK = 0.912 p-value = 0.95989
#' ## Conclusion: Samples come from the same distribution ( alpha = 0.025 )
#'
#' @importFrom rlang enquo eval_tidy
#' @importFrom kSamples ad.test
#' @export
ad_ksample <- function(data = NULL, x, groups, alpha = 0.025) {
res <- list()
class(res) <- "adk"
res$call <- match.call()
verify_tidy_input(
df = data,
x = x,
c = match.call(),
arg_name = "x")
res$data <- eval_tidy(enquo(x), data)
verify_tidy_input(
df = data,
x = groups,
c = match.call(),
arg_name = "groups")
res$groups <- eval_tidy(enquo(groups), data)
if (length(res$data) != length(res$groups)) {
stop("Error: `x` and `groups` must be of same length.")
}
res$alpha <- alpha
td <- NULL
res$transformed_data <- td
grps <- lapply(levels(as.factor(res[["groups"]])),
function(l) {
res[["data"]][res[["groups"]] == l]
}
)
raw <- ad.test(grps, method = "exact")
res$n <- raw$N
res$k <- raw$k
res$sigma <- raw$sig
res$ad <- raw$ad[2, 1]
res$p <- raw$ad[2, 3]
res$reject_same_dist <- res$p < alpha
res$raw <- raw
return(res)
}
#' Glance at a `adk` (Anderson--Darling k-Sample) object
#'
#' @description
#' Glance accepts an object of type `adk` and returns a
#' [tibble::tibble()] with
#' one row of summaries.
#'
#' Glance does not do any calculations: it just gathers the results in a
#' tibble.
#'
#' @param x an `adk` object
#' @param ... Additional arguments. Not used. Included only to match generic
#' signature.
#'
#'
#' @return
#' A one-row [tibble::tibble()] with the following
#' columns:
#'
#' - `alpha` the significance level for the test
#' - `n` the sample size for the test
#' - `k` the number of samples
#' - `sigma` the computed standard deviation of the test statistic
#' - `ad` the test statistic
#' - `p` the p-value of the test
#' - `reject_same_dist` whether the test concludes that the samples
#' are drawn from different populations
#'
#'
#' @seealso
#' [ad_ksample()]
#'
#' @examples
#' x <- c(rnorm(20, 100, 5), rnorm(20, 105, 6))
#' k <- c(rep(1, 20), rep(2, 20))
#' a <- ad_ksample(x = x, groups = k)
#' glance(a)
#'
#' ## A tibble: 1 x 7
#' ## alpha n k sigma ad p reject_same_dist
#' ## <dbl> <int> <int> <dbl> <dbl> <dbl> <lgl>
#' ## 1 0.025 40 2 0.727 4.37 0.00487 TRUE
#'
#' @method glance adk
#' @importFrom tibble tibble
#'
#' @export
glance.adk <- function(x, ...) { # nolint
# nolint start: object_usage_linter
with(
x,
tibble::tibble(
alpha = alpha,
n = n,
k = k,
sigma = sigma,
ad = ad,
p = p,
reject_same_dist = reject_same_dist
)
)
# nolint end
}
#' @export
print.adk <- function(x, ...) {
cat("\nCall:\n",
paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "")
justify <- c("left", "left")
width <- c(16L, 16L)
cat(format_row_equal(list("N", x$n, "k", x$k),
justify, width, ...))
cat(format_row_equal(list("ADK", x$ad, "p-value", x$p),
justify, width, ...))
if (x$reject_same_dist) {
cat("Conclusion: Samples do not come from the same distribution (alpha =",
x$alpha, ")\n\n")
} else {
cat("Conclusion: Samples come from the same distribution ( alpha =",
x$alpha, ")\n\n")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.