Nothing
#' Group-specific parameters of mixed models random effects
#'
#' Extract random parameters of each individual group in the context of mixed
#' models, commonly referred to as BLUPs (Best Linear Unbiased Predictors).
#' Can be reshaped to be of the same dimensions as the original data,
#' which can be useful to add the random effects to the original data.
#'
#' @param model A mixed model with random effects.
#' @param type String, describing the type of estimates that should be returned.
#' Can be `"random"`, `"total"`, or `"marginal"` (experimental).
#' - If `"random"` (default), the coefficients correspond to the conditional
#' estimates of the random effects (as they are returned by `lme4::ranef()`).
#' They typically correspond to the deviation of each individual group from
#' their fixed effect (assuming the random effect is also included as a fixed
#' effect). As such, a coefficient close to 0 means that the participants'
#' effect is the same as the population-level effect (in other words, it is
#' "in the norm").
#' - If `"total"`, it will return the sum of the random effect and its
#' corresponding fixed effects, which internally relies on the `coef()` method
#' (see `?coef.merMod`). Note that `type = "total"` yet does not return
#' uncertainty indices (such as SE and CI) for models from *lme4* or
#' *glmmTMB*, as the necessary information to compute them is not yet
#' available. However, for Bayesian models, it is possible to compute them.
#' - If `"marginal"` (experimental), it returns marginal group-levels
#' estimates. The random intercepts are computed using marginal means (see
#' [estimate_means()]), and the random slopes using marginal effects (see
#' [estimate_slopes()]). This method does not directly extract the parameters
#' estimated by the model, but recomputes them using model predictions. While
#' this is more computationally intensive, one of the benefits include
#' interpretability: the random intercepts correspond to the "mean" value of
#' the outcome for each group, and the random slopes correspond to the direct
#' average "effect" of the predictor for each random group. Note that in this
#' case, the group-level estimates are not technically "intercepts" or model
#' parameters, but marginal average levels and effects.
#' @param dispersion,test,diagnostic Arguments passed to
#' [parameters::model_parameters()] for Bayesian models. By default, it won't
#' return significance or diagnostic indices (as it is not typically very
#' useful).
#' @param ... Other arguments passed to [parameters::model_parameters()].
#'
#' @details
#' Unlike raw group means, BLUPs apply shrinkage: they are a compromise between
#' the group estimate and the population estimate. This improves generalizability
#' and prevents overfitting.
#'
#'
#' @examplesIf all(insight::check_if_installed(c("see", "lme4"), quietly = TRUE)) && packageVersion("insight") > "1.1.0" && packageVersion("parameters") > "0.24.1"
#' # lme4 model
#' data(mtcars)
#' model <- lme4::lmer(mpg ~ hp + (1 | carb), data = mtcars)
#' random <- estimate_grouplevel(model)
#'
#' # Show group-specific effects
#' random
#'
#' # Visualize random effects
#' plot(random)
#'
#' # Reshape to wide data...
#' reshaped <- reshape_grouplevel(random, group = "carb", indices = c("Coefficient", "SE"))
#'
#' # ...and can be easily combined with the original data
#' alldata <- merge(mtcars, reshaped)
#'
#' # overall coefficients
#' estimate_grouplevel(model, type = "total")
#' @export
estimate_grouplevel <- function(model, ...) {
UseMethod("estimate_grouplevel")
}
#' @rdname estimate_grouplevel
#' @export
estimate_grouplevel.default <- function(model, type = "random", ...) {
# validate argument
type <- insight::validate_argument(type, c("random", "total", "marginal"))
# sanity check
if (is.null(insight::find_random(model))) {
insight::format_error("Model must be a mixed model with random effects.")
}
# Extract params
params <- parameters::model_parameters(
model,
effects = ifelse(type == "random", "grouplevel", "total"),
...
)
if (type %in% c("random", "total")) {
# get cleaned parameter names with additional information
clean_parameters <- attributes(params)$clean_parameters
# Re-add info
if (!"Group" %in% names(params) && !is.null(clean_parameters)) {
params$Group <- clean_parameters$Group
}
if (!"Level" %in% names(params) && !is.null(clean_parameters)) {
params$Level <- clean_parameters$Cleaned_Parameter
}
# TODO: improve / add new printing that groups by group/level?
random <- as.data.frame(params)
# Remove columns with only NaNs (as these are probably those of fixed effects)
random[vapply(random, function(x) all(is.na(x)), TRUE)] <- NULL
} else if (type == "marginal") {
# EXPERIMENTAL
random <- .grouplevel_marginal(model)
}
# Clean and Reorganize columns
random <- .clean_grouplevel(random)
# Sort
random <- .sort_random_effects(random)
# add data and attributes
.add_grouplevel_attributes(model, type, params, random)
}
#' @rdname estimate_grouplevel
#' @export
estimate_grouplevel.brmsfit <- function(
model,
type = "random",
dispersion = TRUE,
test = NULL,
diagnostic = NULL,
...
) {
# validate argument
type <- insight::validate_argument(type, c("random", "total"))
# sanity check
if (is.null(insight::find_random(model))) {
insight::format_error("Model must be a mixed model with random effects.")
}
# Extract params
params <- parameters::model_parameters(
model,
effects = ifelse(type == "random", "grouplevel", "total"),
dispersion = dispersion,
test = test,
diagnostic = diagnostic,
...
)
# get cleaned parameter names with additional information
clean_parameters <- attributes(params)$clean_parameters
# match parameters
if (!is.null(clean_parameters)) {
# fmt: skip
clean_parameters <- clean_parameters[clean_parameters$Parameter %in% params$Parameter, , drop = FALSE] # nolint
}
# Re-add info
if (!"Group" %in% names(params) && !is.null(clean_parameters)) {
params$Group <- clean_parameters$Group
}
if (!"Level" %in% names(params) && !is.null(clean_parameters)) {
params$Level <- clean_parameters$Cleaned_Parameter
}
# TODO: improve / add new printing that groups by group/level?
random <- as.data.frame(params)
# Remove columns with only NaNs (as these are probably those of fixed effects)
random[vapply(random, function(x) all(is.na(x)), TRUE)] <- NULL
# Clean and Reorganize columns
random <- .clean_grouplevel(random)
# Clean-up brms output
if (type == "random") {
# Save brms name (just in case)
random$Name <- random$Parameter
# Filter out non-random effects
random <- random[startsWith(random$Parameter, "r_"), ]
# Remove Group from Level
random$Level <- sapply(seq_len(nrow(random)), function(i) {
gsub(paste0("^", random$Group[i], "\\."), "", random$Level[i])
})
# Find the group name (what follows "r_" and before the first "[" or "__")
random$Group <- gsub("^r_(.*?)(\\[.*|__.*)", "\\1", random$Name)
# Keep Parameter what's in between [ and ]
random$Parameter <- gsub("^r_.*?\\[(.*?)\\].*", "\\1", random$Name)
# Remove Level from it
random$Parameter <- sapply(seq_len(nrow(random)), function(i) {
gsub(paste0("^", random$Level[i], "\\,"), "", random$Parameter[i])
})
# remove temporary name column
random$Name <- NULL
}
# Sort
random <- .sort_random_effects(random)
# add data and attributes
.add_grouplevel_attributes(model, type, params, random)
}
#' @export
estimate_grouplevel.stanreg <- function(
model,
type = "random",
dispersion = TRUE,
test = NULL,
diagnostic = NULL,
...
) {
# validate argument
type <- insight::validate_argument(type, c("random", "total"))
# sanity check
if (is.null(insight::find_random(model))) {
insight::format_error("Model must be a mixed model with random effects.")
}
# Extract params
params <- parameters::model_parameters(
model,
effects = ifelse(type == "random", "grouplevel", "total"),
dispersion = dispersion,
test = test,
diagnostic = diagnostic,
drop = "^Sigma\\[",
...
)
# get cleaned parameter names with additional information
clean_parameters <- attributes(params)$clean_parameters
## TODO: for now, rstanarm has no random effects on the sigma parameter
## however, if this changes, we need another solution here (and in
## insight::clean_parameter())
if (!is.null(clean_parameters)) {
# fix for rstanarm, which contains a sigma columns
# fmt: skip
clean_parameters <- clean_parameters[clean_parameters$Component != "sigma" & !startsWith(clean_parameters$Parameter, "Sigma["), , drop = FALSE] # nolint
# fmt: skip
clean_parameters <- clean_parameters[clean_parameters$Parameter %in% params$Parameter, , drop = FALSE] # nolint
params$Parameter <- insight::trim_ws(sub(":.*", "", clean_parameters$Group))
params$Group <- insight::trim_ws(sub("^[^:]*:", "", clean_parameters$Group))
params$Level <- insight::trim_ws(sub("^[^:]*:", "", clean_parameters$Cleaned_Parameter))
}
# TODO: improve / add new printing that groups by group/level?
random <- as.data.frame(params)
# Remove columns with only NaNs (as these are probably those of fixed effects)
random[vapply(random, function(x) all(is.na(x)), TRUE)] <- NULL
# Clean and Reorganize columns
random <- .clean_grouplevel(random)
# Sort
random <- .sort_random_effects(random)
# add data and attributes
.add_grouplevel_attributes(model, type, params, random)
}
# helpers ----------------------------------------------------------
.add_grouplevel_attributes <- function(model, type, params, random) {
model_data <- insight::get_data(model, source = "mf", verbose = FALSE)
model_random <- insight::find_random(model, split_nested = TRUE, flatten = TRUE)
# Assign new class
attr(random, "type") <- type
attr(random, "model") <- model
attr(random, "parameters") <- params
attr(random, "coef_name") <- intersect(.valid_coefficient_names(model), colnames(random))
attr(random, "data") <- .safe(model_data[model_random])
class(random) <- c("estimate_grouplevel", class(random))
random
}
.clean_grouplevel <- function(random) {
row.names(random) <- NULL
random$Effects <- NULL
if ("Component" %in% names(random) &&
insight::has_single_value(random$Component, remove_na = TRUE) &&
unique(random$Component) == "conditional"
) {
random$Component <- NULL
}
# Reorganize columns
random <- datawizard::data_relocate(
random,
c("Component", "Group", "Level", "Parameter"),
verbose = FALSE
)
random
}
.sort_random_effects <- function(random) {
if ("Component" %in% names(random)) {
random <- random[
order(
random$Component,
random$Group,
datawizard::coerce_to_numeric(random$Level),
random$Parameter
),
]
} else {
random <- random[
order(random$Group, datawizard::coerce_to_numeric(random$Level), random$Parameter),
]
}
random
}
# Experimental ------------------------------------------------------------
# Quick tests:
# model <- lme4::lmer(mpg ~ hp + (1 | carb), data = mtcars)
# model <- lme4::lmer(mpg ~ hp + (1 + hp | carb), data = mtcars)
# model <- lme4::lmer(mpg ~ hp + (1 + hp | carb) + (1 | gear), data = mtcars)
# m1 <- estimate_grouplevel(model, type = "random")
# m2 <- estimate_grouplevel(model, type = "total")
# m3 <- estimate_grouplevel(model, type = "marginal")
# cor.test(m1$Coefficient, m3$Coefficient) # r = 1
# cor.test(m2$Coefficient, m3$Coefficient) # r = 1
.grouplevel_marginal <- function(model) {
insight::check_if_installed("lme4")
out <- list()
# TODO: currently only takes random effects for main component into account
# we could also look for random effects in zero-inflated, dispersion, etc.
# Analyze random effect structure
randomgroups <- insight::find_random(model, split_nested = TRUE)$random
# extract random slopes and their related grouping variable
randomslopes <- list()
p <- lme4::ranef(model)
for (g in names(p)) {
s <- names(p[[g]])[names(p[[g]]) != "(Intercept)"]
# Only pick non-factor random slopes for now
# TODO: correctly deal with the case when the random slope is a factor
pred <- insight::find_predictors(model, effects = "all", flatten = TRUE)
s <- s[s %in% pred]
if (length(s) > 0) {
randomslopes[[g]] <- s
}
}
# Extract coefs
for (g in randomgroups) {
intercepts <- estimate_means(model, by = g, include_random = TRUE, estimate = "average")
out[[g]] <- data.frame(
Group = g,
Level = intercepts[[g]],
Parameter = "(Intercept)",
Coefficient = intercepts$Mean,
CI_low = intercepts$CI_low,
CI_high = intercepts$CI_high,
stringsAsFactors = FALSE
)
if (g %in% names(randomslopes)) {
for (s in randomslopes[[g]]) {
slopes <- estimate_slopes(model, by = g, trend = s, include_random = TRUE)
out[[paste(g, s, sep = "_")]] <- data.frame(
Group = g,
Level = slopes[[g]],
Parameter = s,
Coefficient = slopes$Slope,
CI_low = slopes$CI_low,
CI_high = slopes$CI_high,
stringsAsFactors = FALSE
)
}
}
}
params <- do.call(rbind, out)
row.names(params) <- NULL
# TODO: robustify this function and integrate into the above
params
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.