Nothing
#' (Non-)Linear Tests for Null Hypotheses, Joint Hypotheses, Equivalence, Non Superiority, and Non Inferiority
#'
#' @description
#' Uncertainty estimates are calculated as first-order approximate standard errors for linear or non-linear functions of a vector of random variables with known or estimated covariance matrix. In that sense, [`hypotheses`] emulates the behavior of the excellent and well-established [car::deltaMethod] and [car::linearHypothesis] functions, but it supports more models; requires fewer dependencies; expands the range of tests to equivalence and superiority/inferiority; and offers convenience features like robust standard errors.
#'
#' To learn more, read the hypothesis tests vignette, visit the
#' package website:
#'
#' * <https://marginaleffects.com/chapters/hypothesis.html>
#' * <https://marginaleffects.com/>
#'
#' Warning #1: Tests are conducted directly on the scale defined by the `type` argument. For some models, it can make sense to conduct hypothesis or equivalence tests on the `"link"` scale instead of the `"response"` scale which is often the default.
#'
#' Warning #2: For hypothesis tests on objects produced by the `marginaleffects` package, it is safer to use the `hypothesis` argument of the original function. Using `hypotheses()` may not work in certain environments, in lists, or when working programmatically with *apply style functions.
#'
#' Warning #3: The tests assume that the `hypothesis` expression is (approximately) normally distributed, which for non-linear functions of the parameters may not be realistic. More reliable confidence intervals can be obtained using the \code{inferences()} function with `method = "boot"`.
#'
#' @inheritParams comparisons
#' @param model Model object or object generated by the `comparisons()`, `slopes()`, or `predictions()` functions.
#' @param joint Joint test of statistical significance. The null hypothesis value can be set using the `hypothesis` argument.
#' - FALSE: Hypotheses are not tested jointly.
#' - TRUE: All parameters are tested jointly.
#' - String: A regular expression to match parameters to be tested jointly. `grep(joint, perl = TRUE)`
#' - Character vector of parameter names to be tested. Characters refer to the names of the vector returned by `marginaleffects::get_coef(object)`.
#' - Integer vector of indices. Which parameters positions to test jointly.
#' - Note: When using the `joint` argument, the `hypothesis` argument is limited to `NULL` or numeric values. Users can chain multiple `hypotheses()` for complex joint hypothesis tests.
#' @param conf_level NULL or numeric value between 0 and 1. Confidence level to use to build a confidence interval. When `NULL` and `model` was generated by `marginaleffects`, the confidence level is taken from the `conf_level` attribute of the model. Otherwise, the default value is 0.95.
#' @param joint_test A character string specifying the type of test, either "f" or "chisq". The null hypothesis is set by the `hypothesis` argument, with default null equal to 0 for all parameters.
#' @param df Degrees of freedom used to compute p values and confidence intervals.
#' - A single numeric value between 1 and `Inf`, or a numeric vector with length equal to the number of rows in the output. When `df` is `Inf`, the normal distribution is used. When `df` is finite, the `t` distribution is used.
#' - "residual": Calls [insight::get_df] to extract degrees of freedom from the model automatically.
#' - When using `joint_test="f"`, the `df` argument should be a numeric vector of length 2.
#' @param multcomp Logical or string. If `TRUE` or string, apply multiple comparison adjustment to the p values and report family-wise confidence intervals. Valid strings: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "single-step", "Shaffer", "Westfall", "free". When `multcomp` is `TRUE`, the "holm" method is used.
#'
#' @section Joint hypothesis tests:
#' The test statistic for the joint Wald test is calculated as (R * theta_hat - r)' * inv(R * V_hat * R') * (R * theta_hat - r) / Q,
#' where theta_hat is the vector of estimated parameters, V_hat is the estimated covariance matrix, R is a Q x P matrix for testing Q hypotheses on P parameters,
#' r is a Q x 1 vector for the null hypothesis, and Q is the number of rows in R. If the test is a Chi-squared test, the test statistic is not normalized.
#'
#' The p-value is then calculated based on either the F-distribution (for F-test) or the Chi-squared distribution (for Chi-squared test).
#' For the F-test, the degrees of freedom are Q and (n - P), where n is the sample size and P is the number of parameters.
#' For the Chi-squared test, the degrees of freedom are Q.
#'
#' @template equivalence
#' @examples
#' mod <- lm(mpg ~ hp + wt + factor(cyl), data = mtcars)
#'
#' hypotheses(mod)
#'
#' # Test of equality between coefficients
#' hypotheses(mod, hypothesis = "hp = wt")
#'
#' # Non-linear function
#' hypotheses(mod, hypothesis = "exp(hp + wt) = 0.1")
#'
#' # Robust standard errors
#' hypotheses(mod, hypothesis = "hp = wt", vcov = "HC3")
#'
#' # b1, b2, ... shortcuts can be used to identify the position of the
#' # parameters of interest in the output of
#' hypotheses(mod, hypothesis = "b2 = b3")
#'
#' # wildcard
#' hypotheses(mod, hypothesis = "b* / b2 = 1")
#'
#' # term names with special characters have to be enclosed in backticks
#' hypotheses(mod, hypothesis = "`factor(cyl)6` = `factor(cyl)8`")
#'
#' mod2 <- lm(mpg ~ hp * drat, data = mtcars)
#' hypotheses(mod2, hypothesis = "`hp:drat` = drat")
#'
#' # predictions(), comparisons(), and slopes()
#' mod <- glm(am ~ hp + mpg, data = mtcars, family = binomial)
#' cmp <- comparisons(mod, newdata = "mean")
#' hypotheses(cmp, hypothesis = "b1 = b2")
#'
#' mfx <- slopes(mod, newdata = "mean")
#' hypotheses(cmp, hypothesis = "b2 = 0.2")
#'
#' pre <- predictions(mod, newdata = datagrid(hp = 110, mpg = c(30, 35)))
#' hypotheses(pre, hypothesis = "b1 = b2")
#'
#' # The `hypothesis` argument can be used to compute standard errors for fitted values
#' mod <- glm(am ~ hp + mpg, data = mtcars, family = binomial)
#'
#' f <- function(x) predict(x, type = "link", newdata = mtcars)
#' p <- hypotheses(mod, hypothesis = f)
#' head(p)
#'
#' f <- function(x) predict(x, type = "response", newdata = mtcars)
#' p <- hypotheses(mod, hypothesis = f)
#' head(p)
#'
#' # Complex aggregation
#' # Step 1: Collapse predicted probabilities by outcome level, for each individual
#' # Step 2: Take the mean of the collapsed probabilities by group and `cyl`
#' library(dplyr)
#' library(MASS)
#' library(dplyr)
#' library(magrittr)
#'
#' dat <- transform(mtcars, gear = factor(gear))
#' mod <- polr(gear ~ factor(cyl) + hp, dat)
#'
#' aggregation_fun <- function(x) {
#' predictions(x, vcov = FALSE) %>%
#' mutate(group = ifelse(group %in% c("3", "4"), "3 & 4", "5")) %>%
#' summarize(estimate = sum(estimate), .by = c("rowid", "cyl", "group")) %>%
#' summarize(estimate = mean(estimate), .by = c("cyl", "group")) %>%
#' rename(term = cyl)
#' }
#'
#' hypotheses(mod, hypothesis = aggregation_fun)
#'
#' # Equivalence, non-inferiority, and non-superiority tests
#' mod <- lm(mpg ~ hp + factor(gear), data = mtcars)
#' p <- predictions(mod, newdata = "median")
#' hypotheses(p, equivalence = c(17, 18))
#'
#' mfx <- avg_slopes(mod, variables = "hp")
#' hypotheses(mfx, equivalence = c(-.1, .1))
#'
#' cmp <- avg_comparisons(mod, variables = "gear", hypothesis = ~pairwise)
#' hypotheses(cmp, equivalence = c(0, 10))
#'
#' # joint hypotheses: character vector
#' model <- lm(mpg ~ as.factor(cyl) * hp, data = mtcars)
#' hypotheses(model, joint = c("as.factor(cyl)6:hp", "as.factor(cyl)8:hp"))
#'
#' # joint hypotheses: regular expression
#' hypotheses(model, joint = "cyl")
#'
#' # joint hypotheses: integer indices
#' hypotheses(model, joint = 2:3)
#'
#' # joint hypotheses: different null hypotheses
#' hypotheses(model, joint = 2:3, hypothesis = 1)
#' hypotheses(model, joint = 2:3, hypothesis = 1:2)
#'
#' # joint hypotheses: marginaleffects object
#' cmp <- avg_comparisons(model)
#' hypotheses(cmp, joint = "cyl")
#'
#' # Multiple comparison adjustment
#' # p values and family-wise confidence intervals
#' cmp <- avg_comparisons(model)
#' hypotheses(cmp, multcomp = "hochberg")
#'
#' @export
hypotheses <- function(
model = NULL,
hypothesis = NULL,
vcov = TRUE,
conf_level = NULL,
df = NULL,
equivalence = NULL,
joint = FALSE,
joint_test = "f",
multcomp = FALSE,
numderiv = "fdforward",
...) {
call <- construct_call(model, "hypotheses")
# Early validation and setup
if (is.null(model)) model <- ...get("model_perturbed")
if (is.null(model)) stop_sprintf("`model` is missing.")
sanity_multcomp(multcomp, hypothesis, joint)
# Early returns for special cases - removed mice check, moved later
if (!isFALSE(joint)) {
return(joint_test(
object = model,
joint_index = joint,
joint_test = joint_test,
hypothesis = hypothesis,
df = df,
vcov = vcov
))
}
# Handle marginaleffects objects vs regular models
internal_classes <- c("predictions", "comparisons", "slopes", "hypotheses")
if (inherits(model, internal_classes)) {
if (!isTRUE(checkmate::check_flag(vcov))) {
stop_sprintf(
"The `vcov` argument is not available when `model` is a `predictions`, `comparisons`, `slopes`, or `hypotheses` object. Please specify the type of standard errors in the initial `marginaleffects` call."
)
}
mfx <- components(model, "all")
if (!is.null(mfx) && !is.null(mfx@draws)) {
stop_sprintf(
"The `hypotheses()` function cannot be used to post-process `marginaleffects` objects that include draws from a bootstrap, simulation, or bayesian posterior distribution. Users should call the `get_draws()` function and process draws manually."
)
}
if (is.null(conf_level)) {
conf_level <- mfx@conf_level
}
}
# init
# in this function we need to sanitize_model() later
if (inherits(model, "marginaleffects_internal")) {
mfx <- model
} else {
# hypotheses() supports raw data frames
if (!inherits(model, c(internal_classes, "data.frame"))) {
model <- sanitize_model(model, call = call, wts = wts, vcov = vcov, by = by, ...)
}
mfx <- new_marginaleffects_internal(
call = call,
model = model
)
}
# multiple imputation - moved here after mfx object creation
if (inherits(model, c("mira", "amest"))) {
return(process_imputation(mfx))
}
mfx@conf_level <- sanitize_conf_level(conf_level, ...)
hypothesis_is_formula <- isTRUE(checkmate::check_formula(hypothesis))
# Setup degrees of freedom and hypothesis
df <- if (is.null(df)) Inf else df
mfx <- add_degrees_of_freedom(mfx = mfx, df = df, vcov = vcov)
mfx <- add_hypothesis(mfx, hypothesis)
mfx <- add_numderiv(mfx, numderiv)
vcov_false <- isFALSE(vcov)
if (!isTRUE(checkmate::check_matrix(vcov))) {
mfx@vcov_type <- get_vcov_label(vcov = vcov)
vcov <- get_vcov(model = model, vcov = vcov)
mfx@vcov_model <- vcov
} else {
mfx@vcov_type <- "matrix"
}
b <- get_hypotheses(
model_perturbed = model,
hypothesis = mfx@hypothesis,
hypothesis_is_formula = hypothesis_is_formula,
...
)
# Store hypothesis group-by variables in S4 slot
hypothesis_by_vars <- attr(b, "hypothesis_function_by")
if (!is.null(hypothesis_by_vars)) {
mfx@variable_names_by_hypothesis <- hypothesis_by_vars
}
# bayesian posterior
mfx@draws <- attr(b, "posterior_draws")
if (!is.null(mfx@draws)) {
J <- NULL
se <- rep(NA, length(b))
# standard errors via delta method
} else if (!vcov_false && isTRUE(checkmate::check_matrix(vcov))) {
args <- list(
mfx = mfx,
model_perturbed = model,
vcov = vcov,
hypothesis = mfx@hypothesis,
FUN = get_hypotheses,
hypothesis_is_formula = hypothesis_is_formula
)
if (...length() > 0) {
args <- utils::modifyList(args, list(...))
}
se <- do.call("get_se_delta", args)
J <- attr(se, "jacobian")
mfx@jacobian <- J
attr(se, "jacobian") <- NULL
mfx@draws <- NULL
# no standard error
} else {
J <- mfx@draws <- NULL
se <- rep(NA, length(b))
}
# Build output data frame
hyplab <- attr(b, "label")
if (is.null(hyplab) && !is.null(mfx@hypothesis)) {
hyplab <- attr(mfx@hypothesis, "label")
}
if (!is.null(hyplab) && nrow(hyplab) == length(b)) {
out <- cbind(hyplab, data.frame(estimate = b, std.error = se))
} else {
term_name <- if (!is.null(mfx@hypothesis)) "custom" else paste0("b", seq_along(b))
out <- data.frame(term = term_name, estimate = b, std.error = se)
}
# Remove std.error column when not computing st.errors and in bootstrap
if (vcov_false) {
out$std.error <- NULL
}
out[["group"]] <- attr(b, "grouplab")
out <- get_ci(out, mfx)
if (!is.null(equivalence)) {
out <- equivalence(
out,
df = mfx@df,
equivalence = equivalence,
draws = mfx@draws
)
}
out <- sort_columns(out)
data.table::setDF(out)
class(out) <- c("hypotheses", class(out))
# Add common attributes from mfx S4 slots
out <- add_attributes(out, mfx)
out <- prune_attributes(out)
# must be after attributes for vcov
out <- multcomp_test(
out,
multcomp = multcomp,
conf_level = mfx@conf_level,
df = mfx@df
)
# Issue #1102: hypotheses() should not be called twice on the same object
attr(out, "hypotheses_call") <- TRUE
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.