Nothing
#' @title Measures of association for contingency tables
#' @name crosstable_statistics
#'
#' @description This function calculates various measure of association for
#' contingency tables and returns the statistic and p-value.
#' Supported measures are Cramer's V, Phi, Spearman's rho,
#' Kendall's tau and Pearson's r.
#'
#' @param data A data frame or a table object. If a table object, `x1` and
#' `x2` will be ignored. For Kendall's _tau_, Spearman's _rho_ or Pearson's
#' product moment correlation coefficient, `data` needs to be a data frame.
#' If `x1` and `x2` are not specified, the first two columns of the data
#' frames are used as variables to compute the crosstab.
#' @param formula A formula of the form `lhs ~ rhs` where `lhs` is a
#' numeric variable giving the data values and `rhs` a factor giving the
#' corresponding groups.
#' @param tab A [`table()`] or [`ftable()`]. Tables of class [`xtabs()`] and
#' other will be coerced to `ftable` objects.
#' @param x1 Name of first variable that should be used to compute the
#' contingency table. If `data` is a table object, this argument will be
#' irgnored.
#' @param x2 Name of second variable that should be used to compute the
#' contingency table. If `data` is a table object, this argument will be
#' irgnored.
#' @param statistics Name of measure of association that should be computed. May
#' be one of `"auto"`, `"cramer"`, `"phi"`, `"spearman"`, `"kendall"`,
#' `"pearson"` or `"fisher"`. See 'Details'.
#' @param ci.lvl Scalar between 0 and 1. If not `NULL`, returns a data
#' frame including lower and upper confidence intervals.
#' @param weights Name of variable in `x` that indicated the vector of weights
#' that will be applied to weight all observations. Default is `NULL`, so no
#' weights are used.
#' @param ... Other arguments, passed down to the statistic functions
#' [`chisq.test()`], [`fisher.test()`] or [`cor.test()`].
#'
#' @inheritParams bootstrap
#' @inheritParams boot_ci
#'
#' @return For [`phi()`], the table's Phi value. For [`cramers_v()]`, the
#' table's Cramer's V.
#'
#' For `crosstable_statistics()`, a list with following components:
#'
#' - `estimate`: the value of the estimated measure of association.
#' - `p.value`: the p-value for the test.
#' - `statistic`: the value of the test statistic.
#' - `stat.name`: the name of the test statistic.
#' - `stat.html`: if applicable, the name of the test statistic, in HTML-format.
#' - `df`: the degrees of freedom for the contingency table.
#' - `method`: character string indicating the name of the measure of association.
#' - `method.html`: if applicable, the name of the measure of association, in HTML-format.
#' - `method.short`: the short form of association measure, equals the `statistics`-argument.
#' - `fisher`: logical, if Fisher's exact test was used to calculate the p-value.
#'
#' @details The p-value for Cramer's V and the Phi coefficient are based
#' on `chisq.test()`. If any expected value of a table cell is smaller than 5,
#' or smaller than 10 and the df is 1, then `fisher.test()` is used to compute
#' the p-value, unless `statistics = "fisher"`; in this case, the use of
#' `fisher.test()` is forced to compute the p-value. The test statistic is
#' calculated with `cramers_v()` resp. `phi()`.
#'
#' Both test statistic and p-value for Spearman's rho, Kendall's tau and
#' Pearson's r are calculated with `cor.test()`.
#'
#' When `statistics = "auto"`, only Cramer's V or Phi are calculated, based on
#' the dimension of the table (i.e. if the table has more than two rows or
#' columns, Cramer's V is calculated, else Phi).
#'
#' @references Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M.,
#' Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data
#' That Use the Chi‑Squared Statistic. Mathematics, 11, 1982.
#' \doi{10.3390/math11091982}
#'
#' @examples
#' # Phi coefficient for 2x2 tables
#' tab <- table(sample(1:2, 30, TRUE), sample(1:2, 30, TRUE))
#' phi(tab)
#'
#' # Cramer's V for nominal variables with more than 2 categories
#' tab <- table(sample(1:2, 30, TRUE), sample(1:3, 30, TRUE))
#' cramer(tab)
#'
#' # formula notation
#' data(efc)
#' cramer(e16sex ~ c161sex, data = efc)
#'
#' # bootstrapped confidence intervals
#' cramer(e16sex ~ c161sex, data = efc, ci.lvl = .95, n = 100)
#'
#' # 2x2 table, compute Phi automatically
#' crosstable_statistics(efc, e16sex, c161sex)
#'
#' # more dimensions than 2x2, compute Cramer's V automatically
#' crosstable_statistics(efc, c172code, c161sex)
#'
#' # ordinal data, use Kendall's tau
#' crosstable_statistics(efc, e42dep, quol_5, statistics = "kendall")
#'
#' # calcilate Spearman's rho, with continuity correction
#' crosstable_statistics(efc,
#' e42dep,
#' quol_5,
#' statistics = "spearman",
#' exact = FALSE,
#' continuity = TRUE
#' )
#' @export
crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("auto", "cramer", "phi", "spearman", "kendall", "pearson", "fisher"), weights = NULL, ...) {
# match arguments
statistics <- match.arg(statistics)
# name for test statistics in HTML
stat.html <- NULL
# check if data is a table
if (is.table(data)) {
# 'data' is a table - copy to table object
tab <- data
# check if statistics are possible to compute
if (statistics %in% c("spearman", "kendall", "pearson")) {
stop(
sprintf(
"Need arguments `data`, `x1` and `x2` to compute %s-statistics.",
statistics
),
call. = FALSE
)
}
} else {
# evaluate unquoted names
x1 <- deparse(substitute(x1))
x2 <- deparse(substitute(x2))
weights <- deparse(substitute(weights))
# if names were quotes, remove quotes
x1 <- gsub("\"", "", x1, fixed = TRUE)
x2 <- gsub("\"", "", x2, fixed = TRUE)
weights <- gsub("\"", "", weights, fixed = TRUE)
if (insight::is_empty_object(weights) || weights == "NULL")
weights <- NULL
else
weights <- data[[weights]]
# check for "NULL" and get data
if (x1 != "NULL" && x2 != "NULL")
data <- data[, c(x1, x2)]
else
data <- data[, 1:2]
if (!is.null(weights)) data <- cbind(data, weights)
# make table
if (!is.null(weights)) {
tab <- as.table(round(stats::xtabs(data[[3]] ~ data[[1]] + data[[2]])))
class(tab) <- "table"
} else {
tab <- table(data)
}
}
# get expected values
tab.val <- table_values(tab)
# remember whether fisher's exact test was used or not
use.fisher <- FALSE
# select statistics automatically, based on number of rows/columns
if (statistics %in% c("auto", "cramer", "phi", "fisher")) {
# get chisq-statistics, for df and p-value
chsq <- suppressWarnings(stats::chisq.test(tab, ...))
pv <- chsq$p.value
test <- chsq$statistic
# set statistics name
names(test) <- "Chi-squared"
stat.html <- "χ<sup>2</sup>"
# check row/columns
if ((nrow(tab) > 2 || ncol(tab) > 2 || statistics %in% c("cramer", "fisher")) && statistics != "phi") {
# get cramer's V
s <- cramer(tab)
# if minimum expected values below 5, compute fisher's exact test
if (statistics == "fisher" ||
min(tab.val$expected) < 5 ||
(min(tab.val$expected) < 10 && chsq$parameter == 1)) {
pv <- stats::fisher.test(tab, simulate.p.value = TRUE, ...)$p.value
use.fisher <- TRUE
}
# set statistics
statistics <- "cramer"
} else {
# get Phi
s <- phi(tab)
# if minimum expected values below 5 and df=1, compute fisher's exact test
if (min(tab.val$expected) < 5 ||
(min(tab.val$expected) < 10 && chsq$parameter == 1)) {
pv <- stats::fisher.test(tab, ...)$p.value
use.fisher <- TRUE
}
# set statistics
statistics <- "phi"
}
} else {
# compute correlation coefficient
cv <- stats::cor.test(x = data[[1]], y = data[[2]], method = statistics, ...)
# get statistics and p-value
s <- cv$estimate
pv <- cv$p.value
test <- cv$statistic
stat.html <- names(test)
}
# compute method string
method <- ifelse(statistics == "kendall", "Kendall's tau",
ifelse(statistics == "spearman", "Spearman's rho", # nolint
ifelse(statistics == "pearson", "Pearson's r", # nolint
ifelse(statistics == "cramer", "Cramer's V", "Phi") # nolint
)
)
)
# compute method string
method.html <- ifelse(statistics == "kendall", "Kendall's τ",
ifelse(statistics == "spearman", "Spearman's ρ", # nolint
ifelse(statistics == "pearson", "Pearson's r", # nolint
ifelse(statistics == "cramer", "Cramer's V", "&phi") # nolint
)
)
)
# return result
structure(class = "sj_xtab_stat", list(
estimate = s,
p.value = pv,
statistic = test,
stat.name = names(test),
stat.html = stat.html,
df = (nrow(tab) - 1) * (ncol(tab) - 1),
n_obs = sum(tab, na.rm = TRUE),
method = method,
method.html = method.html,
method.short = statistics,
fisher = use.fisher
))
}
#' @rdname crosstable_statistics
#' @export
xtab_statistics <- crosstable_statistics
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.