R/estimate_grouplevel.R

Defines functions .grouplevel_marginal .sort_random_effects .clean_grouplevel .add_grouplevel_attributes estimate_grouplevel.stanreg estimate_grouplevel.brmsfit estimate_grouplevel.default estimate_grouplevel

Documented in estimate_grouplevel estimate_grouplevel.brmsfit estimate_grouplevel.default

#' Group-specific parameters of mixed models random effects
#'
#' Extract random parameters of each individual group in the context of mixed
#' models, commonly referred to as BLUPs (Best Linear Unbiased Predictors).
#' Can be reshaped to be of the same dimensions as the original data,
#' which can be useful to add the random effects to the original data.
#'
#' @param model A mixed model with random effects.
#' @param type String, describing the type of estimates that should be returned.
#'   Can be `"random"`, `"total"`, or `"marginal"` (experimental).
#'   - If `"random"` (default), the coefficients correspond to the conditional
#'   estimates of  the random effects (as they are returned by `lme4::ranef()`).
#'   They typically correspond to the deviation of each individual group from
#'   their fixed effect (assuming the random effect is also included as a fixed
#'   effect). As such, a coefficient close to 0 means that the participants'
#'   effect is the same as the population-level effect (in other words, it is
#'   "in the norm").
#'   - If `"total"`, it will return the sum of the random effect and its
#'   corresponding fixed effects, which internally relies on the `coef()` method
#'   (see `?coef.merMod`). Note that `type = "total"` yet does not return
#'   uncertainty indices (such as SE and CI) for models from *lme4* or
#'   *glmmTMB*, as the necessary information to compute them is not yet
#'   available. However, for Bayesian models, it is possible to compute them.
#'   - If `"marginal"` (experimental), it returns marginal group-levels
#'   estimates. The random intercepts are computed using marginal means (see
#'   [estimate_means()]), and the random slopes using marginal effects (see
#'   [estimate_slopes()]). This method does not directly extract the parameters
#'   estimated by the model, but recomputes them using model predictions. While
#'   this is more computationally intensive, one of the benefits include
#'   interpretability: the random intercepts correspond to the "mean" value of
#'   the outcome for each group, and the random slopes correspond to the direct
#'   average "effect" of the predictor for each random group. Note that in this
#'   case, the group-level estimates are not technically "intercepts" or model
#'   parameters, but marginal average levels and effects.
#' @param dispersion,test,diagnostic Arguments passed to
#'    [parameters::model_parameters()] for Bayesian models. By default, it won't
#'    return significance or diagnostic indices (as it is not typically very
#'    useful).
#' @param ... Other arguments passed to [parameters::model_parameters()].
#'
#' @details
#' Unlike raw group means, BLUPs apply shrinkage: they are a compromise between
#' the group estimate and the population estimate. This improves generalizability
#' and prevents overfitting.
#'
#'
#' @examplesIf all(insight::check_if_installed(c("see", "lme4"), quietly = TRUE)) && packageVersion("insight") > "1.1.0" && packageVersion("parameters") > "0.24.1"
#' # lme4 model
#' data(mtcars)
#' model <- lme4::lmer(mpg ~ hp + (1 | carb), data = mtcars)
#' random <- estimate_grouplevel(model)
#'
#' # Show group-specific effects
#' random
#'
#' # Visualize random effects
#' plot(random)
#'
#' # Reshape to wide data...
#' reshaped <- reshape_grouplevel(random, group = "carb", indices = c("Coefficient", "SE"))
#'
#' # ...and can be easily combined with the original data
#' alldata <- merge(mtcars, reshaped)
#'
#' # overall coefficients
#' estimate_grouplevel(model, type = "total")
#' @export
estimate_grouplevel <- function(model, ...) {
  UseMethod("estimate_grouplevel")
}


#' @rdname estimate_grouplevel
#' @export
estimate_grouplevel.default <- function(model, type = "random", ...) {
  # validate argument
  type <- insight::validate_argument(type, c("random", "total", "marginal"))

  # sanity check
  if (is.null(insight::find_random(model))) {
    insight::format_error("Model must be a mixed model with random effects.")
  }

  # Extract params
  params <- parameters::model_parameters(
    model,
    effects = ifelse(type == "random", "grouplevel", "total"),
    ...
  )

  if (type %in% c("random", "total")) {
    # get cleaned parameter names with additional information
    clean_parameters <- attributes(params)$clean_parameters

    # Re-add info
    if (!"Group" %in% names(params) && !is.null(clean_parameters)) {
      params$Group <- clean_parameters$Group
    }
    if (!"Level" %in% names(params) && !is.null(clean_parameters)) {
      params$Level <- clean_parameters$Cleaned_Parameter
    }

    # TODO: improve / add new printing that groups by group/level?
    random <- as.data.frame(params)

    # Remove columns with only NaNs (as these are probably those of fixed effects)
    random[vapply(random, function(x) all(is.na(x)), TRUE)] <- NULL
  } else if (type == "marginal") {
    # EXPERIMENTAL
    random <- .grouplevel_marginal(model)
  }

  # Clean and Reorganize columns
  random <- .clean_grouplevel(random)

  # Sort
  random <- .sort_random_effects(random)

  # add data and attributes
  .add_grouplevel_attributes(model, type, params, random)
}


#' @rdname estimate_grouplevel
#' @export
estimate_grouplevel.brmsfit <- function(
  model,
  type = "random",
  dispersion = TRUE,
  test = NULL,
  diagnostic = NULL,
  ...
) {
  # validate argument
  type <- insight::validate_argument(type, c("random", "total"))

  # sanity check
  if (is.null(insight::find_random(model))) {
    insight::format_error("Model must be a mixed model with random effects.")
  }

  # Extract params
  params <- parameters::model_parameters(
    model,
    effects = ifelse(type == "random", "grouplevel", "total"),
    dispersion = dispersion,
    test = test,
    diagnostic = diagnostic,
    ...
  )

  # get cleaned parameter names with additional information
  clean_parameters <- attributes(params)$clean_parameters

  # match parameters
  if (!is.null(clean_parameters)) {
    # fmt: skip
    clean_parameters <- clean_parameters[clean_parameters$Parameter %in% params$Parameter, , drop = FALSE] # nolint
  }

  # Re-add info
  if (!"Group" %in% names(params) && !is.null(clean_parameters)) {
    params$Group <- clean_parameters$Group
  }
  if (!"Level" %in% names(params) && !is.null(clean_parameters)) {
    params$Level <- clean_parameters$Cleaned_Parameter
  }

  # TODO: improve / add new printing that groups by group/level?
  random <- as.data.frame(params)

  # Remove columns with only NaNs (as these are probably those of fixed effects)
  random[vapply(random, function(x) all(is.na(x)), TRUE)] <- NULL

  # Clean and Reorganize columns
  random <- .clean_grouplevel(random)

  # Clean-up brms output
  if (type == "random") {
    # Save brms name (just in case)
    random$Name <- random$Parameter
    # Filter out non-random effects
    random <- random[startsWith(random$Parameter, "r_"), ]
    # Remove Group from Level
    random$Level <- sapply(seq_len(nrow(random)), function(i) {
      gsub(paste0("^", random$Group[i], "\\."), "", random$Level[i])
    })
    # Find the group name (what follows "r_" and before the first "[" or "__")
    random$Group <- gsub("^r_(.*?)(\\[.*|__.*)", "\\1", random$Name)
    # Keep Parameter what's in between [ and ]
    random$Parameter <- gsub("^r_.*?\\[(.*?)\\].*", "\\1", random$Name)
    # Remove Level from it
    random$Parameter <- sapply(seq_len(nrow(random)), function(i) {
      gsub(paste0("^", random$Level[i], "\\,"), "", random$Parameter[i])
    })
    # remove temporary name column
    random$Name <- NULL
  }

  # Sort
  random <- .sort_random_effects(random)

  # add data and attributes
  .add_grouplevel_attributes(model, type, params, random)
}


#' @export
estimate_grouplevel.stanreg <- function(
  model,
  type = "random",
  dispersion = TRUE,
  test = NULL,
  diagnostic = NULL,
  ...
) {
  # validate argument
  type <- insight::validate_argument(type, c("random", "total"))

  # sanity check
  if (is.null(insight::find_random(model))) {
    insight::format_error("Model must be a mixed model with random effects.")
  }

  # Extract params
  params <- parameters::model_parameters(
    model,
    effects = ifelse(type == "random", "grouplevel", "total"),
    dispersion = dispersion,
    test = test,
    diagnostic = diagnostic,
    drop = "^Sigma\\[",
    ...
  )

  # get cleaned parameter names with additional information
  clean_parameters <- attributes(params)$clean_parameters

  ## TODO: for now, rstanarm has no random effects on the sigma parameter
  ## however, if this changes, we need another solution here (and in
  ## insight::clean_parameter())
  if (!is.null(clean_parameters)) {
    # fix for rstanarm, which contains a sigma columns
    # fmt: skip
    clean_parameters <- clean_parameters[clean_parameters$Component != "sigma" & !startsWith(clean_parameters$Parameter, "Sigma["), , drop = FALSE] # nolint
    # fmt: skip
    clean_parameters <- clean_parameters[clean_parameters$Parameter %in% params$Parameter, , drop = FALSE] # nolint

    params$Parameter <- insight::trim_ws(sub(":.*", "", clean_parameters$Group))
    params$Group <- insight::trim_ws(sub("^[^:]*:", "", clean_parameters$Group))
    params$Level <- insight::trim_ws(sub("^[^:]*:", "", clean_parameters$Cleaned_Parameter))
  }

  # TODO: improve / add new printing that groups by group/level?
  random <- as.data.frame(params)

  # Remove columns with only NaNs (as these are probably those of fixed effects)
  random[vapply(random, function(x) all(is.na(x)), TRUE)] <- NULL

  # Clean and Reorganize columns
  random <- .clean_grouplevel(random)

  # Sort
  random <- .sort_random_effects(random)

  # add data and attributes
  .add_grouplevel_attributes(model, type, params, random)
}


# helpers ----------------------------------------------------------

.add_grouplevel_attributes <- function(model, type, params, random) {
  model_data <- insight::get_data(model, source = "mf", verbose = FALSE)
  model_random <- insight::find_random(model, split_nested = TRUE, flatten = TRUE)

  # Assign new class
  attr(random, "type") <- type
  attr(random, "model") <- model
  attr(random, "parameters") <- params
  attr(random, "coef_name") <- intersect(.valid_coefficient_names(model), colnames(random))
  attr(random, "data") <- .safe(model_data[model_random])

  class(random) <- c("estimate_grouplevel", class(random))
  random
}


.clean_grouplevel <- function(random) {
  row.names(random) <- NULL
  random$Effects <- NULL
  if ("Component" %in% names(random) &&
    insight::has_single_value(random$Component, remove_na = TRUE) &&
    unique(random$Component) == "conditional"
  ) {
    random$Component <- NULL
  }

  # Reorganize columns
  random <- datawizard::data_relocate(
    random,
    c("Component", "Group", "Level", "Parameter"),
    verbose = FALSE
  )
  random
}


.sort_random_effects <- function(random) {
  if ("Component" %in% names(random)) {
    random <- random[
      order(
        random$Component,
        random$Group,
        datawizard::coerce_to_numeric(random$Level),
        random$Parameter
      ),
    ]
  } else {
    random <- random[
      order(random$Group, datawizard::coerce_to_numeric(random$Level), random$Parameter),
    ]
  }
  random
}


# Experimental ------------------------------------------------------------

# Quick tests:
# model <- lme4::lmer(mpg ~ hp + (1 | carb), data = mtcars)
# model <- lme4::lmer(mpg ~ hp + (1 + hp | carb), data = mtcars)
# model <- lme4::lmer(mpg ~ hp + (1 + hp | carb) + (1 | gear), data = mtcars)

# m1 <- estimate_grouplevel(model, type = "random")
# m2 <- estimate_grouplevel(model, type = "total")
# m3 <- estimate_grouplevel(model, type = "marginal")
# cor.test(m1$Coefficient, m3$Coefficient)  # r = 1
# cor.test(m2$Coefficient, m3$Coefficient)  # r = 1
.grouplevel_marginal <- function(model) {
  insight::check_if_installed("lme4")

  out <- list()

  # TODO: currently only takes random effects for main component into account
  # we could also look for random effects in zero-inflated, dispersion, etc.

  # Analyze random effect structure
  randomgroups <- insight::find_random(model, split_nested = TRUE)$random
  # extract random slopes and their related grouping variable
  randomslopes <- list()
  p <- lme4::ranef(model)
  for (g in names(p)) {
    s <- names(p[[g]])[names(p[[g]]) != "(Intercept)"]

    # Only pick non-factor random slopes for now
    # TODO: correctly deal with the case when the random slope is a factor
    pred <- insight::find_predictors(model, effects = "all", flatten = TRUE)
    s <- s[s %in% pred]

    if (length(s) > 0) {
      randomslopes[[g]] <- s
    }
  }

  # Extract coefs
  for (g in randomgroups) {
    intercepts <- estimate_means(model, by = g, include_random = TRUE, estimate = "average")
    out[[g]] <- data.frame(
      Group = g,
      Level = intercepts[[g]],
      Parameter = "(Intercept)",
      Coefficient = intercepts$Mean,
      CI_low = intercepts$CI_low,
      CI_high = intercepts$CI_high,
      stringsAsFactors = FALSE
    )

    if (g %in% names(randomslopes)) {
      for (s in randomslopes[[g]]) {
        slopes <- estimate_slopes(model, by = g, trend = s, include_random = TRUE)
        out[[paste(g, s, sep = "_")]] <- data.frame(
          Group = g,
          Level = slopes[[g]],
          Parameter = s,
          Coefficient = slopes$Slope,
          CI_low = slopes$CI_low,
          CI_high = slopes$CI_high,
          stringsAsFactors = FALSE
        )
      }
    }
  }
  params <- do.call(rbind, out)
  row.names(params) <- NULL

  # TODO: robustify this function and integrate into the above

  params
}

Try the modelbased package in your browser

Any scripts or data that you put into this service are public.

modelbased documentation built on Dec. 8, 2025, 5:13 p.m.