#' Inference on the Mean for Multivariate and Functional Data
#'
#' These functions implement testing procedures for the mean of populations of
#' vectors or functions. In particular, when the optional parameter \code{B =
#' 0}, a parametric test based on Hotelling's statistic is used, while, if
#' \code{B > 0}, a permutation test is instead used and more statistics can be
#' chosen by the user. The parametric test assumes that the data is normally
#' distributed or that the sample size is large such that the central limit
#' theorem can be applied. The permutation test assumes that the distribution
#' from which the data come is symmetric.
#'
#' All the statistics of the \pkg{fdahotelling} package can be used with the
#' permutation version of this test. In addition, the user can implement its own
#' statistic to be used with the test, provided that: \itemize{ \item the
#' user-defined statistic takes the same input arguments as the ones in the
#' \pkg{fdahotelling} package, i.e., the mandatory first dataset \code{x}, the
#' optional second dataset \code{y} and the mean function under the null
#' hypothesis \code{mu} that can have a default value, \item high values of the
#' statistic yield rejection of the null hypothesis. }
#'
#' @inheritParams statistics
#' @param skip_check Flag to skip checks on input types. Default is off. This
#' parameter should always be set to its default \code{FALSE}. It is set to
#' \code{TRUE} only internally, in power calculation functions, in which data
#' generated by the hypothesized model is already in matrix form.
#' @param B Number of bootstrap permutations (default: 1000).
#' @param statistic Statistic to be used within the permutation framework.
#' Choices are Hotelling (default), L1, L2, Linf, StandardizedL1,
#' StandardizedL2, StandardizedLinf and All.
#' @param verbose Activate verbose (default: \code{TRUE}).
#'
#' @return A 3-column \code{\link[tibble]{tibble}} containing the names of the
#' required statistics and their values, along with an estimate of the p-value
#' of the corresponding statistical test.
#'
#' @seealso The parametric test is described in details in Secchi, P., Stamm,
#' A., & Vantini, S. (2013). \emph{Inference for the mean of large \eqn{p}
#' small \eqn{n} data: A finite-sample high-dimensional generalization of
#' Hotelling theorem}. Electronic Journal of Statistics, 7, pp. 2005-2031.
#' doi:10.1214/13-EJS833, available online at
#' \url{http://projecteuclid.org/euclid.ejs/1375708877}. The permutation test
#' is described in details in the technical report by Pini, A., Stamm, A., &
#' Vantini, S. (2015). \emph{Hotelling \eqn{T^2} in functional Hilbert
#' spaces}, available online at
#' \url{https://mox.polimi.it/publication-results/?id=524&tipo=add_qmox}.
#'
#' @examples
#' ####################
#' # One-Sample Tests #
#' ####################
#' x <- aneurisk %>%
#' dplyr::filter(variable == "curvature" & group == "low") %>%
#' dplyr::select(data) %>%
#' purrr::flatten_df()
#' # Parametric test
#' test_onesample(x = x, mu = 0, step_size = 0.01, B = 0)
#' # Permutation test
#' test_onesample(x = x, mu = 0, step_size = 0.01, B = 100)
#' ####################
#' # Two-Sample Tests #
#' ####################
#' y <- aneurisk %>%
#' dplyr::filter(variable == "curvature" & group == "up") %>%
#' dplyr::select(data) %>%
#' purrr::flatten_df()
#' # Parametric test
#' test_twosample(x = x, y = y, mu = 0, step_size = 0.01, B = 0)
#' # Permutation test
#' test_twosample(x = x, y = y, mu = 0, step_size = 0.01, B = 100)
#'
#' @name tests
NULL
#' @rdname tests
#' @export
test_onesample <- function(x,
mu = 0, step_size = 0, B = 1000,
statistic = "Hotelling", verbose = TRUE, skip_check = FALSE) {
if (B == 0)
return(parametric_test(
x = x, mu = mu,
step_size = step_size,
skip_check = skip_check
))
permutation_test(
x = x, mu = mu,
step_size = step_size,
B = B,
statistic = statistic,
verbose = verbose,
skip_check = skip_check
)
}
#' @rdname tests
#' @export
test_twosample <- function(x, y,
mu = 0, paired = FALSE, step_size = 0, B = 1000,
statistic = "Hotelling", verbose = TRUE, skip_check = FALSE) {
if (B == 0)
return(parametric_test(
x = x, y = y, mu = mu,
paired = paired,
step_size = step_size,
skip_check = skip_check
))
permutation_test(
x = x, y = y, mu = mu,
paired = paired,
step_size = step_size,
B = B,
statistic = statistic,
verbose = verbose,
skip_check = skip_check
)
}
parametric_test <- function(x, y = NULL, mu = NULL,
paired = FALSE, step_size = 0, skip_check = FALSE) {
if (!skip_check) {
l <- check_arguments(x = x, y = y, mu = mu, step_size = step_size)
x <- l$x
y <- l$y
mu <- l$mu
}
oneSample <- dim(y)[2] == 0
p <- dim(x)[2]
if (!oneSample && dim(y)[2] != p)
stop("Both groups should have the same number of components.")
nx <- dim(x)[1]
ny <- ifelse(oneSample, 1, dim(y)[1])
stat <- stat_hotelling_impl(x = x, y = y, mu = mu,
paired = paired, step_size = step_size,
use_correction = TRUE)
df1 <- min(nx + ny - 2, p)
df2 <- abs(nx + ny - 2 - p) + 1
pvalue <- 1 - stats::pf(stat, df1, df2)
dplyr::data_frame(
statName = names(stat),
statVal = stat,
pValue = pvalue
)
}
permutation_test <- function(x, y = NULL, mu = 0,
paired = FALSE, step_size = 0,
B = 1000, statistic = "Hotelling", verbose = TRUE,
skip_check = FALSE) {
if (!skip_check) {
l <- check_arguments(x = x, y = y, mu = mu, step_size = step_size)
x <- l$x
y <- l$y
mu <- l$mu
}
oneSample <- dim(y)[2] == 0
stat_fun <- switch (statistic,
"Hotelling" = stat_hotelling_impl,
"L1" = stat_L1_impl,
"L2" = stat_L2_impl,
"Linf" = stat_Linf_impl,
"StandardizedL1" = stat_L1_std_impl,
"StandardizedL2" = stat_L2_std_impl,
"StandardizedLinf" = stat_Linf_std_impl,
"All" = stat_all_impl,
stop("Unrecognised statistic. Please choose between Hotelling, L1, L2, Linf,
StandardizedL1, StandardizedL2, StandardizedLinf and All.")
)
# Get statistic value
stat <- stat_fun(x = x, y = y, mu = mu, paired = paired, step_size = step_size)
if (oneSample)
boot_data <- perm_onesample(x, mu, step_size, B, stat_fun, verbose)
else
boot_data <- perm_twosample(x, y, mu, paired, step_size, B, stat_fun, verbose)
boot_data %>%
dplyr::mutate(test = purrr::map(stat_boot, `>=`, stat)) %>%
`$`(test) %>%
purrr::transpose() %>%
purrr::simplify_all() %>%
purrr::map_dbl(mean) %>%
tibble::tibble(statName = names(.), statVal = stat, pValue = .)
}
perm_onesample <- function(x,
mu,
step_size,
B,
statistic,
verbose) {
n <- dim(x)[1]
p <- dim(x)[2]
# Decide on bootstrap vs exact test
n_comb <- 2^n
p_min <- 1 / min(B, n_comb)
if (verbose)
writeLines(paste(" - P-value resolution:", p_min))
exactTest <- B >= n_comb
if (exactTest) { # Case of exact test
B <- t(expand.grid(rep(list(c(0, 1)), n)))
if (verbose) {
writeLines(" - Computing exact p-value.")
writeLines(paste(" - P-value will never drop below", p_min))
}
} else { # Case of approximate test
n_comb <- B
if (verbose) {
writeLines(paste(" - Computing approximate p-value using",
B, "random reflexions."))
writeLines(paste(" - P-value will not drop below", 1 / n_comb, "in average."))
}
}
matrixMu <- matrix(data = mu, nrow = n, ncol = p, byrow = TRUE)
y <- matrix(nrow = 0, ncol = 0)
tibble::tibble(bootId = seq_len(n_comb)) %>%
dplyr::mutate(
r = purrr::map(bootId, ~ if (exactTest) {
B[, .x]
} else {
sample.int(n = 2, size = n, replace = TRUE) - 1
}),
data = purrr::map(r, ~ matrixMu + diag((-1)^(.x)) %*% (x - matrixMu)),
stat_boot = purrr::map(data, statistic, y = y, mu = mu, step_size = step_size)
)
}
perm_twosample <- function(x,
y,
mu,
paired,
step_size,
B,
statistic,
verbose) {
if (dim(y)[2] != dim(x)[2])
stop("Both groups should have the same number of components.")
# Get sample sizes
nx <- dim(x)[1]
ny <- dim(y)[1]
px <- nx / (nx + ny)
py <- ny / (nx + ny)
# Decide on MC vs exact test
n_comb <- choose(nx + ny, nx)
p_min <- 1 / min(B, n_comb)
if (verbose)
writeLines(paste(" - P-value resolution:", p_min))
if (B >= n_comb) { # Case of exact test
B <- combn(nx + ny, nx)
if (verbose) {
writeLines(" - Computing exact p-value.")
writeLines(paste(" - P-value will never drop below", p_min))
}
} else { # Case of approximate test
if (verbose) {
writeLines(paste(" - Computing approximate p-value using",
B, "random permutations."))
writeLines(paste(" - P-value will not drop below",
1 / n_comb, "in average."))
}
}
# Generate boostrap data
rbind(x, y) %>%
partition(B, c(D1 = px, D2 = py)) %>%
dplyr::mutate(
stat_boot = purrr::map2(D1, D2, statistic, mu = mu,
paired = paired,
step_size = step_size)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.