Nothing
#' Run the same \pkg{brms} model on multiple datasets
#'
#' Run the same \pkg{brms} model on multiple datasets and then combine the
#' results into one fitted model object. This is useful in particular for
#' multiple missing value imputation, where the same model is fitted on multiple
#' imputed data sets. Models can be run in parallel using the \pkg{future}
#' package.
#'
#' @inheritParams brm
#' @param data A \emph{list} of data.frames each of which will be used to fit a
#' separate model. Alternatively, a \code{mids} object from the \pkg{mice}
#' package.
#' @param data2 A \emph{list} of named lists each of which will be used to fit a
#' separate model. Each of the named lists contains objects representing data
#' which cannot be passed via argument \code{data} (see \code{\link{brm}} for
#' examples). The length of the outer list should match the length of the list
#' passed to the \code{data} argument.
#' @param recompile Logical, indicating whether the Stan model should be
#' recompiled for every imputed data set. Defaults to \code{FALSE}. If
#' \code{NULL}, \code{brm_multiple} tries to figure out internally, if recompilation
#' is necessary, for example because data-dependent priors have changed.
#' Using the default of no recompilation should be fine in most cases.
#' @param combine Logical; Indicates if the fitted models should be combined
#' into a single fitted model object via \code{\link{combine_models}}.
#' Defaults to \code{TRUE}.
#' @param fit An instance of S3 class \code{brmsfit_multiple} derived from a
#' previous fit; defaults to \code{NA}. If \code{fit} is of class
#' \code{brmsfit_multiple}, the compiled model associated with the fitted
#' result is re-used and all arguments modifying the model code or data are
#' ignored. It is not recommended to use this argument directly, but to call
#' the \code{\link[brms:update.brmsfit_multiple]{update}} method, instead.
#' @param ... Further arguments passed to \code{\link{brm}}.
#'
#' @details The combined model may issue false positive convergence warnings, as
#' the MCMC chains corresponding to different datasets may not necessarily
#' overlap, even if each of the original models did converge. To find out
#' whether each of the original models converged, subset the draws belonging
#' to the individual models and then run convergence diagnostics.
#' See Examples below for details.
#'
#' @return If \code{combine = TRUE} a \code{brmsfit_multiple} object, which
#' inherits from class \code{brmsfit} and behaves essentially the same. If
#' \code{combine = FALSE} a list of \code{brmsfit} objects.
#'
#' @examples
#' \dontrun{
#' library(mice)
#' m <- 5
#' imp <- mice(nhanes2, m = m)
#'
#' # fit the model using mice and lm
#' fit_imp1 <- with(lm(bmi ~ age + hyp + chl), data = imp)
#' summary(pool(fit_imp1))
#'
#' # fit the model using brms
#' fit_imp2 <- brm_multiple(bmi ~ age + hyp + chl, data = imp, chains = 1)
#' summary(fit_imp2)
#' plot(fit_imp2, variable = "^b_", regex = TRUE)
#'
#' # investigate convergence of the original models
#' library(posterior)
#' draws <- as_draws_array(fit_imp2)
#' # every dataset has just one chain here
#' draws_per_dat <- lapply(1:m, \(i) subset_draws(draws, chain = i))
#' lapply(draws_per_dat, summarise_draws, default_convergence_measures())
#'
#' # use the future package for parallelization
#' library(future)
#' plan(multisession, workers = 4)
#' fit_imp3 <- brm_multiple(bmi ~ age + hyp + chl, data = imp, chains = 1)
#' summary(fit_imp3)
#' }
#'
#' @export
brm_multiple <- function(formula, data, family = gaussian(), prior = NULL,
data2 = NULL, autocor = NULL, cov_ranef = NULL,
sample_prior = c("no", "yes", "only"),
sparse = NULL, knots = NULL, stanvars = NULL,
stan_funs = NULL, silent = 1, recompile = FALSE,
combine = TRUE, fit = NA,
algorithm = getOption("brms.algorithm", "sampling"),
seed = NA, file = NULL, file_compress = TRUE,
file_refit = getOption("brms.file_refit", "never"),
...) {
combine <- as_one_logical(combine)
file_refit <- match.arg(file_refit, file_refit_options())
if (!is.null(file)) {
if (file_refit == "on_change") {
stop2("file_refit = 'on_change' is not supported for brm_multiple yet.")
}
# optionally load saved model object
if (!combine) {
stop2("Cannot use 'file' if 'combine' is FALSE.")
}
if (file_refit != "always") {
fits <- read_brmsfit(file)
if (!is.null(fits)) {
return(fits)
}
}
}
algorithm <- match.arg(algorithm, algorithm_choices())
silent <- validate_silent(silent)
recompile <- as_one_logical(recompile)
data_name <- substitute_name(data)
if (inherits(data, "mids")) {
require_package("mice", version = "3.0.0")
data <- lapply(seq_len(data$m), mice::complete, data = data)
} else if (!is_data_list(data)) {
stop2("'data' must be a list of data.frames.")
}
if (!is.null(data2)) {
if (!is_data2_list(data2)) {
stop2("'data2' must be a list of named lists.")
}
if (length(data2) != length(data)) {
stop2("'data2' must have the same length as 'data'.")
}
}
if (is.brmsfit(fit)) {
# avoid complications when updating the model
class(fit) <- setdiff(class(fit), "brmsfit_multiple")
} else {
args <- nlist(
formula, data = data[[1]], family, prior, data2 = data2[[1]],
autocor, cov_ranef, sample_prior, sparse, knots, stanvars,
stan_funs, algorithm, silent, seed, ...
)
args$chains <- 0
if (silent < 2) {
message("Compiling the C++ model")
}
fit <- suppressMessages(do_call(brm, args))
}
dots <- list(...)
# allow compiling the model without sampling (#671)
if (isTRUE(dots$chains == 0) || isTRUE(dots$iter == 0)) {
class(fit) <- c("brmsfit_multiple", class(fit))
return(fit)
}
.brm <- function(i, ...) {
if (silent < 2) {
message("Fitting imputed model ", i)
}
update(fit, newdata = data[[i]], data2 = data2[[i]],
recompile = recompile, silent = silent, ...)
}
fits <- future.apply::future_lapply(
seq_along(data), .brm, ..., future.seed = TRUE
)
if (combine) {
fits <- combine_models(mlist = fits, check_data = FALSE)
attr(fits$data, "data_name") <- data_name
# attribute to remember how many imputed datasets where used
attr(fits, "nimp") <- length(data)
class(fits) <- c("brmsfit_multiple", class(fits))
}
if (!is.null(file)) {
fits <- write_brmsfit(fits, file, compress = file_compress)
}
fits
}
#' Combine Models fitted with \pkg{brms}
#'
#' Combine multiple \code{brmsfit} objects, which fitted the same model.
#' This is usefully for instance when having manually run models in parallel.
#'
#' @param ... One or more \code{brmsfit} objects.
#' @param mlist Optional list of one or more \code{brmsfit} objects.
#' @param check_data Logical; indicates if the data should be checked
#' for being the same across models (defaults to \code{TRUE}).
#' Setting it to \code{FALSE} may be useful for instance
#' when combining models fitted on multiple imputed data sets.
#'
#' @details This function just takes the first model and replaces
#' its \code{stanfit} object (slot \code{fit}) by the combined
#' \code{stanfit} objects of all models.
#'
#' @return A \code{brmsfit} object.
#'
#' @export
combine_models <- function(..., mlist = NULL, check_data = TRUE) {
models <- c(list(...), mlist)
check_data <- as_one_logical(check_data)
if (!length(models)) {
stop2("No models supplied to 'combine_models'.")
}
for (i in seq_along(models)) {
if (!is.brmsfit(models[[i]])) {
stop2("Model ", i, " is no 'brmsfit' object.")
}
models[[i]] <- restructure(models[[i]])
}
ref_formula <- formula(models[[1]])
ref_pars <- variables(models[[1]])
ref_mf <- model.frame(models[[1]])
for (i in seq_along(models)[-1]) {
if (!is_equal(formula(models[[i]]), ref_formula)) {
stop2("Models 1 and ", i, " have different formulas.")
}
if (!is_equal(variables(models[[i]]), ref_pars)) {
stop2("Models 1 and ", i, " have different parameters.")
}
if (check_data && !is_equal(model.frame(models[[i]]), ref_mf)) {
stop2(
"Models 1 and ", i, " have different data. ",
"Set 'check_data' to FALSE to turn off checking of the data."
)
}
}
sflist <- from_list(models, "fit")
out <- models[[1]]
out$fit <- rstan::sflist2stanfit(sflist)
# fixes issue #1603
out <- save_old_par_order(out, models[[1]])
if (out$backend == "cmdstanr") {
att <- attributes(models[[1]]$fit)
attributes(out$fit)$CmdStanModel <- att$CmdStanModel
attributes(out$fit)$metadata <- att$metadata
}
out
}
# validity check for 'data' input of 'brm_multiple'
is_data_list <- function(x) {
# see also issue #1383
is.list(x) && (is.vector(x) || all(vapply(x, is.data.frame, logical(1L))))
}
# validity check for 'data2' input of 'brm_multiple'
is_data2_list <- function(x) {
is.list(x) && all(ulapply(x, function(y) is.list(y) && is_named(y)))
}
warn_brmsfit_multiple <- function(x, newdata = NULL) {
if (is.brmsfit_multiple(x) && is.null(newdata)) {
warning2(
"Using only the first imputed data set. Please interpret the results ",
"with caution until a more principled approach has been implemented."
)
}
invisible(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.