R/adk.R
In cmstatr: Statistical Methods for Composite Material Data

Documented in ad_ksample glance.adk

#' Anderson--Darling K-Sample Test
#'
#' @description
#' This function performs an Anderson--Darling k-sample test. This is used to
#' determine if several samples (groups) share a common (unspecified)
#' distribution.
#'
#' @param data a data.frame
#' @param x the variable in the data.frame on which to perform the
#'          Anderson--Darling k-Sample test (usually strength)
#' @param groups a variable in the data.frame that defines the groups
#' @param alpha the significance level (default 0.025)
#'
#' @return
#' Returns an object of class `adk`. This object has the following fields:
#' - `call` the expression used to call this function
#' - `data` the original data used to compute the ADK
#' - `groups` a vector of the groups used in the computation
#' - `alpha` the value of alpha specified
#' - `n` the total number of observations
#' - `k` the number of groups
#' - `sigma` the computed standard deviation of the test statistic
#' - `ad` the value of the Anderson--Darling k-Sample test statistic
#' - `p` the computed p-value
#' - `reject_same_dist` a boolean value indicating whether the null
#'   hypothesis that all samples come from the same distribution is rejected
#' - `raw` the original results returned from
#'   [ad.test][kSamples::ad.test]
#'
#'
#' @details
#' This function is a wrapper for the [ad.test][kSamples::ad.test] function from
#' the package `kSamples`. The method "exact" is specified in the call to
#' `ad.test`. Refer to that package's documentation for details.
#'
#' There is a minor difference in the formulation of the Anderson--Darling
#' k-Sample test in CMH-17-1G, compared with that in the Scholz and
#' Stephens (1987). This difference affects the test statistic and the
#' critical value in the same proportion, and therefore the conclusion of
#' the test is unaffected. When
#' comparing the test statistic generated by this function to that generated
#' by software that uses the CMH-17-1G formulation (such as ASAP, CMH17-STATS,
#' etc.), the test statistic reported by this function will be greater by
#' a factor of \eqn{(k - 1)}, with a corresponding change in the critical
#' value.
#'
#' For more information about the difference between this function and
#' the formulation in CMH-17-1G, see the vignette on the subject, which
#' can be accessed by running `vignette("adktest")`
#'
#' @references
#' F. W. Scholz and M. Stephens, “K-Sample Anderson--Darling Tests,” Journal
#' of the American Statistical Association, vol. 82, no. 399. pp. 918–924,
#' Sep-1987.
#'
#' “Composite Materials Handbook, Volume 1. Polymer Matrix Composites
#' Guideline for Characterization of Structural Materials,” SAE International,
#' CMH-17-1G, Mar. 2012.
#'
#' @examples
#' library(dplyr)
#'
#' carbon.fabric %>%
#'   filter(test == "WT") %>%
#'   filter(condition == "RTD") %>%
#'   ad_ksample(strength, batch)
#' ##
#' ## Call:
#' ## ad_ksample(data = ., x = strength, groups = batch)
#' ##
#' ## N = 18          k = 3
#' ## ADK = 0.912     p-value = 0.95989
#' ## Conclusion: Samples come from the same distribution ( alpha = 0.025 )
#'
#' @importFrom rlang enquo eval_tidy
#' @importFrom kSamples ad.test
#' @export
ad_ksample <- function(data = NULL, x, groups, alpha = 0.025) {
  res <- list()
  class(res) <- "adk"

  res$call <- match.call()

  verify_tidy_input(
    df = data,
    x = x,
    c = match.call(),
    arg_name = "x")
  res$data <- eval_tidy(enquo(x), data)

  verify_tidy_input(
    df = data,
    x = groups,
    c = match.call(),
    arg_name = "groups")
  res$groups <- eval_tidy(enquo(groups), data)

  if (length(res$data) != length(res$groups)) {
    stop("Error: `x` and `groups` must be of same length.")
  }

  res$alpha <- alpha

  td <- NULL

  res$transformed_data <- td

  grps <- lapply(levels(as.factor(res[["groups"]])),
                 function(l) {
                   res[["data"]][res[["groups"]] == l]
                 }
  )

  raw <- ad.test(grps, method = "exact")
  res$n <- raw$N
  res$k <- raw$k
  res$sigma <- raw$sig
  res$ad <- raw$ad[2, 1]
  res$p <- raw$ad[2, 3]
  res$reject_same_dist <- res$p < alpha

  res$raw <- raw

  return(res)
}

#' Glance at a `adk` (Anderson--Darling k-Sample) object
#'
#' @description
#' Glance accepts an object of type `adk` and returns a
#' [tibble::tibble()] with
#' one row of summaries.
#'
#' Glance does not do any calculations: it just gathers the results in a
#' tibble.
#'
#' @param x an `adk` object
#' @param ... Additional arguments. Not used. Included only to match generic
#'            signature.
#'
#'
#' @return
#' A one-row [tibble::tibble()] with the following
#' columns:
#'
#' - `alpha` the significance level for the test
#' - `n` the sample size for the test
#' - `k` the number of samples
#' - `sigma` the computed standard deviation of the test statistic
#' - `ad` the test statistic
#' - `p` the p-value of the test
#' - `reject_same_dist` whether the test concludes that the samples
#'   are drawn from different populations
#'
#'
#' @seealso
#' [ad_ksample()]
#'
#' @examples
#' x <- c(rnorm(20, 100, 5), rnorm(20, 105, 6))
#' k <- c(rep(1, 20), rep(2, 20))
#' a <- ad_ksample(x = x, groups = k)
#' glance(a)
#'
#' ## A tibble: 1 x 7
#' ##   alpha     n     k sigma    ad       p reject_same_dist
#' ##   <dbl> <int> <int> <dbl> <dbl>   <dbl> <lgl>
#' ## 1 0.025    40     2 0.727  4.37 0.00487 TRUE
#'
#' @method glance adk
#' @importFrom tibble tibble
#'
#' @export
glance.adk <- function(x, ...) {  # nolint
  # nolint start: object_usage_linter
  with(
    x,
    tibble::tibble(
      alpha = alpha,
      n = n,
      k = k,
      sigma = sigma,
      ad = ad,
      p = p,
      reject_same_dist = reject_same_dist
    )
  )
  # nolint end
}

#' @export
print.adk <- function(x, ...) {
  cat("\nCall:\n",
      paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "")

  justify <- c("left", "left")
  width <- c(16L, 16L)

  cat(format_row_equal(list("N", x$n, "k", x$k),
                       justify, width, ...))
  cat(format_row_equal(list("ADK", x$ad, "p-value", x$p),
                       justify, width, ...))
  if (x$reject_same_dist) {
    cat("Conclusion: Samples do not come from the same distribution (alpha =",
        x$alpha, ")\n\n")
  } else {
    cat("Conclusion: Samples come from the same distribution ( alpha =",
        x$alpha, ")\n\n")
  }
}