#' @title Create default priors for brms-models
#' @name auto_prior
#'
#' @description This function creates default priors for brms-regression
#' models, based on the same automatic prior-scale adjustment as in
#' \pkg{rstanarm}.
#'
#' @param formula A formula describing the model, which just needs to contain
#' the model terms, but no notation of interaction, splines etc. Usually,
#' you want only those predictors in the formula, for which automatic
#' priors should be generated. Add informative priors afterwards to the
#' returned \code{brmsprior}-object.
#' @param data The data that will be used to fit the model.
#' @param gaussian Logical, if the outcome is gaussian or not.
#' @param locations A numeric vector with location values for the priors. If
#' \code{locations = NULL}, \code{0} is used as location parameter.
#'
#' @return A \code{brmsprior}-object.
#'
#' @details \code{auto_prior()} is a small, convenient function to create
#' some default priors for brms-models with automatically adjusted prior
#' scales, in a similar way like \pkg{rstanarm} does. The default scale for
#' the intercept is 10, for coefficients 2.5. If the outcome is gaussian,
#' both scales are multiplied with \code{sd(y)}. Then, for categorical
#' variables, nothing more is changed. For numeric variables, the scales
#' are divided by the standard deviation of the related variable.
#' \cr \cr
#' All prior distributions are \emph{normal} distributions. \code{auto_prior()}
#' is intended to quickly create default priors with feasible scales. If
#' more precise definitions of priors is necessary, this needs to be done
#' directly with brms-functions like \code{set_prior()}.
#'
#' @note As \code{auto_prior()} also sets priors on the intercept, the model
#' formula used in \code{brms::brm()} must be rewritten to something like
#' \code{y ~ 0 + intercept ...}, see \code{\link[brms]{set_prior}}.
#'
#' @examplesIf requireNamespace("brms")
#' data(efc)
#' efc$c172code <- as.factor(efc$c172code)
#' efc$c161sex <- as.factor(efc$c161sex)
#'
#' mf <- formula(neg_c_7 ~ c161sex + c160age + c172code)
#' auto_prior(mf, efc, TRUE)
#'
#' ## compare to
#' # m <- rstanarm::stan_glm(mf, data = efc, chains = 2, iter = 200)
#' # ps <- prior_summary(m)
#' # ps$prior_intercept$adjusted_scale
#' # ps$prior$adjusted_scale
#'
#' ## usage
#' # ap <- auto_prior(mf, efc, TRUE)
#' # brm(mf, data = efc, prior = ap)
#'
#' # add informative priors
#' mf <- formula(neg_c_7 ~ c161sex + c172code)
#'
#' auto_prior(mf, efc, TRUE) +
#' brms::prior(normal(.1554, 40), class = "b", coef = "c160age")
#'
#' # example with binary response
#' efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1)
#' mf <- formula(neg_c_7d ~ c161sex + c160age + c172code + e17age)
#' auto_prior(mf, efc, FALSE)
#' @export
auto_prior <- function(formula, data, gaussian, locations = NULL) {
insight::check_if_installed("brms")
scale.b <- 2.5
scale.y <- 10
pred <- insight::find_predictors(formula, effects = "all", flatten = TRUE)
y.name <- insight::find_response(formula, combine = TRUE)
data <- stats::na.omit(data[c(y.name, pred)])
y <- data[[y.name]]
# check if response is binary
if (missing(gaussian) && insight::n_unique(y) == 2) gaussian <- FALSE
if (isTRUE(gaussian) && insight::n_unique(y) == 2)
insight::format_alert("Priors were calculated based on assumption that the response is Gaussian, however it seems to be binary.") # nolint
if (gaussian) {
scale.factor <- stats::sd(y, na.rm = TRUE)
scale.b <- scale.b * scale.factor
scale.y <- scale.y * scale.factor
}
if (!is.null(locations))
location.y <- locations[1]
else
location.y <- 0
priors <- brms::set_prior(
sprintf("normal(%s, %s)", round(location.y, 2), round(scale.y, 2)),
class = "Intercept"
)
is.fac <- NULL
term.names <- NULL
scale.pred <- NULL
# we need to check which predictors are categorical and then "mimic"
# their coefficient name as it is represented in the model (i.e. variable
# name + category name)
for (i in pred) {
f <- data[[i]]
if (is.factor(f)) {
i <- sprintf("%s%s", i, levels(f)[2:nlevels(f)])
is.fac <- c(is.fac, rep(TRUE, nlevels(f) - 1))
scale.pred <- c(scale.pred, rep(scale.b, nlevels(f) - 1))
} else {
is.fac <- c(is.fac, FALSE)
scale.pred <- c(scale.pred, scale.b / stats::sd(f, na.rm = TRUE))
}
term.names <- c(term.names, i)
}
for (i in seq_along(term.names)) {
if (!is.null(locations) && length(locations) >= (i + 1))
location.b <- locations[i + 1]
else
location.b <- 0
priors <- priors + brms::set_prior(
sprintf("normal(%s, %s)", round(location.b, 2), round(scale.pred[i], 2)),
class = "b",
coef = term.names[i]
)
}
priors
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.