#' @rdname get_emmeans
#' @export
get_marginalcontrasts <- function(model,
contrast = NULL,
by = NULL,
predict = NULL,
ci = 0.95,
comparison = "pairwise",
estimate = NULL,
p_adjust = "none",
transform = NULL,
keep_iterations = FALSE,
verbose = TRUE,
...) {
# check if available
insight::check_if_installed("marginaleffects", minimum_version = "0.25.0")
# temporarily overwrite settings that error on "too many" rows
me_option <- getOption("marginaleffects_safe")
options(marginaleffects_safe = FALSE)
on.exit(options(marginaleffects_safe = me_option))
# First step: prepare arguments ---------------------------------------------
# ---------------------------------------------------------------------------
# set default, if NULL
if (is.null(contrast)) {
contrast <- "auto"
}
if (is.null(estimate)) {
estimate <- getOption("modelbased_estimate", "typical")
}
# check whether contrasts should be made for numerics or categorical
model_data <- insight::get_data(model, source = "mf", verbose = FALSE)
on_the_fly_factors <- attributes(model_data)$factors
# model details
model_info <- insight::model_info(model, verbose = FALSE)
# Guess arguments
my_args <- .guess_marginaleffects_arguments(
model,
by,
contrast,
verbose = verbose,
...
)
# sanitize comparison argument, to ensure compatibility between different
# marginaleffects versions - newer versions don't accept a string argument,
# only formulas (older versions don't accept formulas)
my_args <- .get_marginaleffects_hypothesis_argument(
comparison,
my_args,
model_data,
estimate,
...
)
# extract first focal term
first_focal <- my_args$contrast[1]
# sanity check - is it a list? if so, use name
if (is.list(first_focal)) {
first_focal <- names(first_focal)
}
# sanity check: `contrast` and `by` cannot be the same
cleaned_by <- gsub("=.*", "\\1", my_args$by)
cleaned_contrast <- gsub("=.*", "\\1", my_args$contrast)
if (length(cleaned_by) && length(cleaned_contrast) && (all(cleaned_by %in% cleaned_contrast) || all(cleaned_contrast %in% cleaned_by))) { # nolint
insight::format_error(
"You cannot specifiy the same variables in `contrast` and `by`. Either omit `by`, or choose a different variable for `contrast` or `by`." # nolint
)
}
# Second step: compute contrasts, for slopes or categorical -----------------
# ---------------------------------------------------------------------------
# if first focal term is numeric, we contrast slopes, but slopes only for
# # numerics with many values, not for binary or likert-alike
if (is.numeric(model_data[[first_focal]]) && !.is_likert(model_data[[first_focal]], ...) && !first_focal %in% on_the_fly_factors) { # nolint
# sanity check - contrast for slopes only makes sense when we have a "by" argument
if (is.null(my_args$by)) {
insight::format_error("Please specify the `by` argument to calculate contrasts of slopes.") # nolint
}
# call slopes with hypothesis argument
out <- estimate_slopes(
model = model,
trend = my_args$contrast,
by = my_args$by,
ci = ci,
hypothesis = my_args$comparison_slopes,
backend = "marginaleffects",
transform = transform,
keep_iterations = keep_iterations,
verbose = verbose,
...
)
} else {
# for contrasts of categorical predictors, we call avg_predictions
out <- estimate_means(
model = model,
by = unique(c(my_args$contrast, my_args$by)),
ci = ci,
hypothesis = my_args$comparison,
predict = predict,
backend = "marginaleffects",
estimate = estimate,
transform = transform,
keep_iterations = keep_iterations,
verbose = verbose,
...
)
}
# filter results - for `estimate_contrasts()`, and when `estimate = "average"`,
# we don't filter using the data grid; due to the flexible way of defining
# comparisons, we need the full data grid and filter here (e.g., when we have
# `by="Petal.Width=c(1, 2)"`)
out <- .filter_contrasts_average(out, my_args)
# adjust p-values
if (!model_info$is_bayesian) {
out <- .p_adjust(model, out, p_adjust, verbose, ...)
}
# Last step: Save information in attributes --------------------------------
# ---------------------------------------------------------------------------
out <- .add_attributes(
out,
by = my_args$by,
model_info = model_info,
info = list(
contrast = my_args$contrast,
predict = predict,
comparison = my_args$comparison,
estimate = estimate,
p_adjust = p_adjust,
contrast_filter = my_args$contrast_filter,
keep_iterations = keep_iterations
)
)
# remove "estimate_means" class attribute
class(out) <- setdiff(
unique(c("marginaleffects_contrasts", class(out))),
"estimate_means"
)
out
}
# filter "contrasts" for `estimate = "average"` -------------------------------
.filter_contrasts_average <- function(out, my_args) {
# filter results - for `estimate_contrasts()`, and when `estimate = "average"`,
# we don't filter using the data grid; due to the flexible way of defining
# comparisons, we need the full data grid and filter here (e.g., when we have
# `by="Petal.Width=c(1, 2)"`)
if (!is.null(my_args$by_filter) && all(names(my_args$by_filter) %in% colnames(out))) {
for (i in names(my_args$by_filter)) {
filter_ok <- any(my_args$by_filter[[i]] %in% out[[i]])
# stop if not...
if (!filter_ok) {
# set up informative message
example_values <- sample(unique(out[[i]]), pmin(3, insight::n_unique(out[[i]])))
# tell user...
insight::format_error(paste0(
"None of the values specified for the predictor `", i,
"` are available in the data. This is required for `estimate=\"average\"`.",
" Either use a different option for the `estimate` argument, or use values that",
" are present in the data, such as ",
datawizard::text_concatenate(example_values, last = " or ", enclose = "`"),
"."
))
}
out <- out[out[[i]] %in% my_args$by_filter[[i]], ]
}
}
out
}
# make "comparison" argument compatible -----------------------------------
# this function has two major tasks: format the "comparison" argument for use
# in the marginaleffects package, and extract the potential filter values used
# in `by` and `contrast` (if any), to "clean" these arguments and save the levels
# or values at which rows should be filtered later...
.get_marginaleffects_hypothesis_argument <- function(comparison,
my_args,
model_data = NULL,
estimate = NULL,
...) {
# init
comparison_slopes <- by_filter <- contrast_filter <- by_token <- NULL
# save original `by`
original_by <- my_args$by
# make sure "by" is a valid column name, and no filter-directive, like
# "Species='setosa'". If `by` is also used for filtering, split and extract
# filter value for later - we have to filter rows manually after calculating
# contrasts (but only for `estimate = "average"`!). Furthermore, "clean" `by`
# argument (remove filter), because we need the pure variable name for setting
# up the hypothesis argument, where variables in `by` are used in the formula
if (!is.null(my_args$by) && any(grepl("=", my_args$by, fixed = TRUE))) { # "[^0-9A-Za-z\\._]"
# find which element in `by` has a filter
filter_index <- grep("=", my_args$by, fixed = TRUE)
for (f in filter_index) {
# look for filter values
filter_value <- insight::trim_ws(unlist(
strsplit(my_args$by[f], "=", fixed = TRUE),
use.names = FALSE
))
if (length(filter_value) > 1) {
# parse filter value and save for later use - we create a named list,
# because we need to know *which* variables in `by` used a filter. we
# could have `by = c("x", "y=c(1,2)")`, but also `by = c("x=c('a','b')", "y")`.
# the list has the variable name as name, and the filter values as element
by_value <- stats::setNames(
list(.safe(eval(str2lang(filter_value[2])))),
filter_value[1]
)
by_filter <- c(by_filter, by_value)
# check if evaluation was possible, or if we had a "token", like
# "[sd]" or "[fivenum]". If not, update `by`, else preserve
if (is.null(by_value[[1]]) && !grepl("[\\[\\]]", filter_value[2])) {
by_token <- c(by_token, stats::setNames(list(filter_value[2]), filter_value[1]))
}
# copy "cleaned" variable
my_args$by[f] <- filter_value[1]
}
}
}
# if filtering is requested for contrasts, we also want to extract the filter
# values for later use. we only need this for `estimate = "average"`, because
# that is the only situation where we do *not* use a data grid, which we else
# could use for filtering, by dropping not-wanted rows from the grid.
if (identical(estimate, "average") && !is.null(my_args$contrast) && any(grepl("=", my_args$contrast, fixed = TRUE))) { # nolint
# find which element in `by` has a filter
filter_index <- grep("=", my_args$contrast, fixed = TRUE)
for (f in filter_index) {
# look for filter values
filter_value <- insight::trim_ws(unlist(
strsplit(my_args$contrast[f], "=", fixed = TRUE),
use.names = FALSE
))
if (length(filter_value) > 1) {
# parse filter value and save for later user - we need as named list
# for the same reason as mentioned above...
contrast_filter <- c(
contrast_filter,
stats::setNames(list(.safe(eval(str2lang(filter_value[2])))), filter_value[1])
)
}
}
}
# convert comparison and by into a formula
if (is.null(comparison)) {
# default to pairwise, if comparison = NULL
comparison <- comparison_slopes <- ~pairwise
} else if (.is_custom_comparison(comparison)) {
# we have not set "comparison_slopes" yet - we also set it to custom hypothesis
comparison_slopes <- comparison
} else {
# only proceed if we don't have custom comparisons
# if we have a formula as comparison, we convert it into strings in order
# to extract the information for "comparison" and "by", because we
# recombine the formula later - we always use variables in `by` as group
# in the formula-definition for marginaleffects
if (inherits(comparison, "formula")) {
# check if we have grouping in the formula, indicated via "|". we split
# the formula into the three single components: lhs ~ rhs | group
f <- insight::trim_ws(unlist(strsplit(insight::safe_deparse(comparison), "[~|]")))
# extract formula parts
formula_lhs <- f[1]
formula_rhs <- f[2]
formula_group <- f[3]
# can be NA when no group
if (is.na(formula_group) || !nzchar(formula_group)) {
# no grouping via formula
formula_group <- NULL
} else {
# else, if we have groups, update by-argument
my_args$by <- formula_group
}
} else {
# if comparison is a string, do sanity check for "comparison" argument
insight::validate_argument(comparison, .valid_hypothesis_strings())
formula_lhs <- "difference"
formula_rhs <- comparison
}
# we put "by" into the formula. user either provided "by", or we put the
# group variable from the formula into "by" (see code above), hence,
# "my_args$by" definitely contains the requested groups
formula_group <- my_args$by
# compose formula
f <- paste(formula_lhs, "~", paste(formula_rhs, collapse = "+"))
# for contrasts of slopes, we don *not* want the group-variable in the formula
comparison_slopes <- stats::as.formula(f)
# for contrasts of categorical, we add the group variable and update `by`
if (!is.null(formula_group)) {
f <- paste(f, "|", paste(formula_group, collapse = "+"))
my_args$by <- formula_group
}
comparison <- stats::as.formula(f)
}
# remove "by" from "contrast"
my_args$contrast <- setdiff(my_args$contrast, my_args$by)
# for `estimate = "average"`, we cannot create a data grid, thus we need to
# filter manually. However, for all other `estimate` options, we can simply
# use the data grid for filtering
if (!identical(estimate, "average") && !is.null(original_by)) {
my_args$by <- original_by
by_filter <- NULL
} else if (!is.null(by_token)) {
# add back token to `by`
for (i in names(by_token)) {
my_args$by[my_args$by == i] <- paste(i, by_token[[i]], sep = "=")
}
}
c(
# the "my_args" argument, containing "by" and "contrast"
my_args,
list(
# the modifed comparison, as formula, which also includes "by" as group
comparison = comparison,
# the modifed comparison, as formula, excluding "by" as group
comparison_slopes = comparison_slopes,
# the filter-value, in case `by` or contrast indicated any filtering
by_filter = insight::compact_list(by_filter),
contrast_filter = insight::compact_list(contrast_filter)
)
)
}
.valid_hypothesis_strings <- function() {
c(
"pairwise", "reference", "sequential", "meandev", "meanotherdev",
"revpairwise", "revreference", "revsequential", "poly", "helmert",
"trt_vs_ctrl"
)
}
# check for custom hypothesis --------------------------------------
.is_custom_comparison <- function(comparison) {
!is.null(comparison) &&
length(comparison) == 1 &&
is.character(comparison) &&
grepl("=", comparison, fixed = TRUE) &&
grepl("\\bb\\d+\\b", comparison)
}
.extract_custom_comparison <- function(comparison) {
# find all "b" strings
matches <- gregexpr("\\bb\\d+\\b", comparison)[[1]]
match_lengths <- attr(matches, "match.length")
# extract all "b" strings, so we have a vector of all "b" used in the comparison
unlist(lapply(seq_along(matches), function(i) {
substr(comparison, matches[i], matches[i] + match_lengths[i] - 1)
}), use.names = FALSE)
}
.reorder_custom_hypothesis <- function(comparison, datagrid, focal) {
# create a data frame with the same sorting as the data grid, but only
# for the focal terms terms
datagrid <- data.frame(expand.grid(lapply(datagrid[focal], unique)))
# this is the row-order we use in modelbased
datagrid$.rowid <- 1:nrow(datagrid)
# this is the row-order in marginaleffects
datagrid <- datawizard::data_arrange(
datagrid,
colnames(datagrid)[1:(length(datagrid) - 1)]
)
# we need to extract all b's and the former parameter numbers
b <- .extract_custom_comparison(comparison)
old_b_numbers <- as.numeric(gsub("b", "", b, fixed = TRUE))
# these are the new numbers of the b-values
new_b_numbers <- match(old_b_numbers, datagrid$.rowid)
new_b <- paste0("b", new_b_numbers)
# we need to replace all occurences of "b" in comparison with "new_b".
# however, to avoid overwriting already replaced values with "gsub()", we
# first replace with a non-existing pattern "new_b_letters", which we will
# replace with "new_b" in a second step
new_b_letters <- paste0("b", letters[new_b_numbers])
# first, numbers to letters
for (i in seq_along(b)) {
comparison <- gsub(b[i], new_b_letters[i], comparison, fixed = TRUE)
}
# next, letters to new numbers
for (i in seq_along(b)) {
comparison <- gsub(new_b_letters[i], new_b[i], comparison, fixed = TRUE)
}
comparison
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.