Nothing
#' @title Compare performance of different models
#' @name compare_performance
#'
#' @description `compare_performance()` computes indices of model
#' performance for different models at once and hence allows comparison of
#' indices across models.
#'
#' @param ... Multiple model objects (also of different classes).
#' @param metrics Can be `"all"`, `"common"` or a character vector of
#' metrics to be computed. See related
#' [`documentation()`][model_performance] of object's class for
#' details.
#' @param rank Logical, if `TRUE`, models are ranked according to 'best'
#' overall model performance. See 'Details'.
#' @inheritParams performance_aic
#'
#' @return A data frame with one row per model and one column per "index" (see
#' `metrics`).
#'
#' @note There is also a [`plot()`-method](https://easystats.github.io/see/articles/performance.html) implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}.
#'
#' @details \subsection{Model Weights}{
#' When information criteria (IC) are requested in `metrics` (i.e., any of `"all"`,
#' `"common"`, `"AIC"`, `"AICc"`, `"BIC"`, `"WAIC"`, or `"LOOIC"`), model
#' weights based on these criteria are also computed. For all IC except LOOIC,
#' weights are computed as `w = exp(-0.5 * delta_ic) / sum(exp(-0.5 * delta_ic))`,
#' where `delta_ic` is the difference between the model's IC value and the
#' smallest IC value in the model set (Burnham and Anderson, 2002).
#' For LOOIC, weights are computed as "stacking weights" using
#' [loo::stacking_weights()].
#' }
#'
#' \subsection{Ranking Models}{
#' When `rank = TRUE`, a new column `Performance_Score` is returned.
#' This score ranges from 0\% to 100\%, higher values indicating better model
#' performance. Note that all score value do not necessarily sum up to 100\%.
#' Rather, calculation is based on normalizing all indices (i.e. rescaling
#' them to a range from 0 to 1), and taking the mean value of all indices for
#' each model. This is a rather quick heuristic, but might be helpful as
#' exploratory index.
#' \cr \cr
#' In particular when models are of different types (e.g. mixed models,
#' classical linear models, logistic regression, ...), not all indices will be
#' computed for each model. In case where an index can't be calculated for a
#' specific model type, this model gets an `NA` value. All indices that
#' have any `NA`s are excluded from calculating the performance score.
#' \cr \cr
#' There is a `plot()`-method for `compare_performance()`,
#' which creates a "spiderweb" plot, where the different indices are
#' normalized and larger values indicate better model performance.
#' Hence, points closer to the center indicate worse fit indices
#' (see [online-documentation](https://easystats.github.io/see/articles/performance.html)
#' for more details).
#' }
#'
#' \subsection{REML versus ML estimator}{
#' By default, `estimator = "ML"`, which means that values from information
#' criteria (AIC, AICc, BIC) for specific model classes (like models from *lme4*)
#' are based on the ML-estimator, while the default behaviour of `AIC()` for
#' such classes is setting `REML = TRUE`. This default is intentional, because
#' comparing information criteria based on REML fits is usually not valid
#' (it might be useful, though, if all models share the same fixed effects -
#' however, this is usually not the case for nested models, which is a
#' prerequisite for the LRT). Set `estimator = "REML"` explicitly return the
#' same (AIC/...) values as from the defaults in `AIC.merMod()`.
#' }
#'
#' @references
#' Burnham, K. P., and Anderson, D. R. (2002).
#' _Model selection and multimodel inference: A practical information-theoretic approach_ (2nd ed.).
#' Springer-Verlag. \doi{10.1007/b97636}
#'
#' @examplesIf require("lme4")
#' data(iris)
#' lm1 <- lm(Sepal.Length ~ Species, data = iris)
#' lm2 <- lm(Sepal.Length ~ Species + Petal.Length, data = iris)
#' lm3 <- lm(Sepal.Length ~ Species * Petal.Length, data = iris)
#' compare_performance(lm1, lm2, lm3)
#' compare_performance(lm1, lm2, lm3, rank = TRUE)
#'
#' m1 <- lm(mpg ~ wt + cyl, data = mtcars)
#' m2 <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
#' m3 <- lme4::lmer(Petal.Length ~ Sepal.Length + (1 | Species), data = iris)
#' compare_performance(m1, m2, m3)
#' @inheritParams model_performance.lm
#' @export
compare_performance <- function(..., metrics = "all", rank = FALSE, estimator = "ML", verbose = TRUE) {
# process input
model_objects <- insight::ellipsis_info(..., only_models = TRUE)
# ensure proper object names
model_objects <- .check_objectnames(model_objects, sapply(match.call(expand.dots = FALSE)[["..."]], as.character))
# drop unsupport models
supported_models <- sapply(model_objects, function(i) insight::is_model_supported(i) | inherits(i, "lavaan"))
object_names <- names(model_objects)
if (!all(supported_models)) {
insight::format_alert(
"Following objects are not supported:",
datawizard::text_concatenate(object_names[!supported_models], enclose = "`")
)
model_objects <- model_objects[supported_models]
object_names <- object_names[supported_models]
}
# iterate over all models, i.e. model-performance for each model
m <- mapply(function(.x, .y) {
dat <- model_performance(.x, metrics = metrics, estimator = estimator, verbose = FALSE)
model_name <- gsub("\"", "", insight::safe_deparse(.y), fixed = TRUE)
perf_df <- data.frame(Name = model_name, Model = class(.x)[1], dat, stringsAsFactors = FALSE)
attributes(perf_df) <- c(attributes(perf_df), attributes(dat)[!names(attributes(dat)) %in% c("names", "row.names", "class")])
perf_df
}, model_objects, object_names, SIMPLIFY = FALSE)
attri <- lapply(m, function(x) {
attri <- attributes(x)
attri[!names(attri) %in% c("names", "row.names", "class")]
})
dfs <- Reduce(function(x, y) merge(x, y, all = TRUE, sort = FALSE), m)
if (any(c("AIC", "AICc", "BIC", "WAIC") %in% names(dfs))) {
dfs$AIC_wt <- .ic_weight(dfs[["AIC"]])
dfs$AICc_wt <- .ic_weight(dfs[["AICc"]])
dfs$BIC_wt <- .ic_weight(dfs[["BIC"]])
dfs$WAIC_wt <- .ic_weight(dfs[["WAIC"]])
}
if ("LOOIC" %in% names(dfs)) {
lpd_point <- do.call(cbind, lapply(attri, function(x) x$loo$pointwise[, "elpd_loo"]))
dfs$LOOIC_wt <- as.numeric(loo::stacking_weights(lpd_point))
}
# check if all models were fit from same data
if (!isTRUE(attributes(model_objects)$same_response) && verbose) {
insight::format_alert(
"When comparing models, please note that probably not all models were fit from same data."
)
}
# create "ranking" of models
if (isTRUE(rank)) {
dfs <- .rank_performance_indices(dfs, verbose)
}
# Reorder columns
if (all(c("BIC", "BF") %in% names(dfs))) {
idx1 <- grep("^BIC$", names(dfs))
idx2 <- grep("BF", names(dfs), fixed = TRUE)
last_part <- (idx1 + 1):ncol(dfs)
dfs <- dfs[, c(1:idx1, idx2, last_part[last_part != idx2])]
}
if (all(c("AIC", "AIC_wt") %in% names(dfs))) {
idx1 <- grep("^AIC$", names(dfs))
idx2 <- grep("AIC_wt", names(dfs), fixed = TRUE)
last_part <- (idx1 + 1):ncol(dfs)
dfs <- dfs[, c(1:idx1, idx2, last_part[last_part != idx2])]
}
if (all(c("BIC", "BIC_wt") %in% names(dfs))) {
idx1 <- grep("^BIC$", names(dfs))
idx2 <- grep("BIC_wt", names(dfs), fixed = TRUE)
last_part <- (idx1 + 1):ncol(dfs)
dfs <- dfs[, c(1:idx1, idx2, last_part[last_part != idx2])]
}
if (all(c("AICc", "AICc_wt") %in% names(dfs))) {
idx1 <- grep("^AICc$", names(dfs))
idx2 <- grep("AICc_wt", names(dfs), fixed = TRUE)
last_part <- (idx1 + 1):ncol(dfs)
dfs <- dfs[, c(1:idx1, idx2, last_part[last_part != idx2])]
}
if (all(c("WAIC", "WAIC_wt") %in% names(dfs))) {
idx1 <- grep("^WAIC$", names(dfs))
idx2 <- grep("WAIC_wt", names(dfs), fixed = TRUE)
last_part <- (idx1 + 1):ncol(dfs)
dfs <- dfs[, c(1:idx1, idx2, last_part[last_part != idx2])]
}
if (all(c("LOOIC", "LOOIC_wt") %in% names(dfs))) {
idx1 <- grep("^LOOIC$", names(dfs))
idx2 <- grep("LOOIC_wt", names(dfs), fixed = TRUE)
last_part <- (idx1 + 1):ncol(dfs)
dfs <- dfs[, c(1:idx1, idx2, last_part[last_part != idx2])]
}
# for REML fits, warn user
if (isTRUE(verbose) &&
# only warn for REML fit
identical(estimator, "REML") &&
# only for IC comparison
any(grepl("(AIC|BIC)", names(dfs))) &&
# only when mixed models are involved, others probably don't have problems with REML fit
any(sapply(model_objects, insight::is_mixed_model)) &&
# only if not all models have same fixed effects (else, REML is ok)
!isTRUE(attributes(model_objects)$same_fixef)) {
insight::format_alert(
"Information criteria (like AIC) are based on REML fits (i.e. `estimator=\"REML\"`).",
"Please note that information criteria are probably not directly comparable and that it is not recommended comparing models with different fixed effects in such cases."
)
}
# dfs[order(sapply(object_names, as.character), dfs$Model), ]
class(dfs) <- c("compare_performance", "see_compare_performance", class(dfs))
dfs
}
# methods ----------------------------
#' @export
print.compare_performance <- function(x, digits = 3, layout = "horizontal", ...) {
layout <- match.arg(layout, choices = c("horizontal", "vertical"))
table_caption <- c("# Comparison of Model Performance Indices", "blue")
formatted_table <- format(x = x, digits = digits, format = "text", ...)
if ("Performance_Score" %in% colnames(formatted_table)) {
footer <- c(sprintf("\nModel `%s` (of class `%s`) performed best with an overall performance score of %s.", formatted_table$Model[1], formatted_table$Type[1], formatted_table$Performance_Score[1]), "yellow")
} else {
footer <- NULL
}
# switch to vertical layout
if (layout == "vertical") {
formatted_table <- datawizard::rownames_as_column(as.data.frame(t(formatted_table)), "Metric")
formatted_table <- datawizard::row_to_colnames(formatted_table)
colnames(formatted_table)[1] <- "Metric"
}
cat(insight::export_table(x = formatted_table, digits = digits, format = "text", caption = table_caption, footer = footer, ...))
invisible(x)
}
#' @export
plot.compare_performance <- function(x, ...) {
insight::check_if_installed("see", "for model comparison plots")
NextMethod()
}
# utilities ------------------------------
.rank_performance_indices <- function(x, verbose) {
# all models comparable?
if (length(unique(x$Type)) > 1 && isTRUE(verbose)) {
insight::format_alert(
"Models are not of same type. Comparison of indices might be not meaningful."
)
}
# set reference for Bayes factors to 1
if ("BF" %in% colnames(x)) x$BF[is.na(x$BF)] <- 1
# don't include test statistic in ranking
x$p_CochransQ <- NULL
x$p_Omnibus <- NULL
x$p <- NULL
x$p_LRT <- NULL
# use weights instead of information criteria
x$AIC <- NULL
x$AICc <- NULL
x$BIC <- NULL
x$LOOIC <- NULL
x$WAIC <- NULL
# remove extra columns from LOO criteria
x$ELPD <- NULL
x$ELPD_SE <- NULL
x$LOOIC_SE <- NULL
# don't rank with BF when there is also BIC (same information)
if ("BF" %in% colnames(x) && "BIC_wt" %in% colnames(x)) {
x$BF <- NULL
}
out <- x
# normalize indices, for comparison
out[] <- lapply(out, function(i) {
if (is.numeric(i)) i <- .normalize_vector(i)
i
})
# recode some indices, so higher values = better fit
for (i in c("RMSE", "Sigma")) {
if (i %in% colnames(out)) {
out[[i]] <- 1 - out[[i]]
}
}
# any indices with NA?
missing_indices <- sapply(out, anyNA)
if (any(missing_indices) && isTRUE(verbose)) {
insight::format_alert(sprintf(
"Following indices with missing values are not used for ranking: %s",
toString(colnames(out)[missing_indices])
))
}
# create rank-index, only for complete indices
numeric_columns <- sapply(out, function(i) is.numeric(i) & !anyNA(i))
rank_index <- rowMeans(out[numeric_columns], na.rm = TRUE)
x$Performance_Score <- rank_index
x <- x[order(rank_index, decreasing = TRUE), ]
rownames(x) <- NULL
x
}
.normalize_vector <- function(x) {
if (all(is.na(x)) || all(is.infinite(x))) {
return(x)
}
as.vector((x - min(x, na.rm = TRUE)) / diff(range(x, na.rm = TRUE), na.rm = TRUE))
}
.ic_weight <- function(ic) {
# ic should be in the deviance metric (-2 * loglik)
if (is.null(ic)) {
return(NULL)
}
diffs <- ic - min(ic)
f <- exp(-0.5 * diffs)
f / sum(f)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.