#' Obtain information about factors in regression models
#'
#' This function takes a \code{\link[brms]{brms}} model fit for a
#' factorial design and returns all design cells along with their encoding in
#' the regression model.
#'
#' @param fit An object of class \code{\link[brms]{brmsfit}}.
#'
#' @return A \code{\link[tibble]{tibble}} containing the minimal design matrix.
#'
#' @note
#' The \pkg{faintr} package currently does not support multivariate models and
#' models that use families \code{categorical}, \code{dirichlet}, \code{multinomial},
#' and \code{logistic_normal}. Furthermore, models must not include special effect
#' terms \code{mo()}, \code{mi()}, \code{me()}, and \code{cs()} for fixed effects.
#' Also note that \pkg{faintr} currently does not support models where the intercept
#' is a population-level parameter (class \code{b}), as is the case when using the
#' \code{0 + Intercept} syntax in the \code{brm} function call.
#'
#' @references
#' Bürkner, P.-C. (2017). brms: An R Package for Bayesian Multilevel Models
#' Using Stan. \emph{Journal of Statistical Software}, \emph{80}(1), 1-28.
#' \doi{10.18637/jss.v080.i01}
#'
#' @examples
#' \dontrun{
#' # fit a linear mixed effects model on data from a 2 x 2 factorial design
#' ## regress voice pitch against gender and context
#' fit <- brms::brm(formula = pitch ~ gender * context + (1 | subject + sentence),
#' data = politeness)
#'
#' # check encoding scheme used in the model
#' get_cell_definitions(fit)
#'}
#'
#' @export
get_cell_definitions <- function(fit) {
check_model(fit)
# get fixed effects names
fixef <- all.vars(brms::brmsterms(stats::formula(fit))$dpars$mu$fe)
# stop if the intercept is a population-level parameter
if ("Intercept" %in% fixef || "intercept" %in% fixef) {
stop('Models with the intercept as population-level effect are currently not supported.', call. = FALSE)
}
# concatenate design matrix and actual data
cell_defs <- dplyr::bind_cols(
fit$data %>% dplyr::select(dplyr::all_of(fixef)),
as.data.frame(brms::standata(fit)$X)
) %>% unique() %>%
tibble::rowid_to_column(var = 'cell')
return(tibble::as_tibble(cell_defs))
}
#' Extract posterior draws for all factorial design cells
#'
#' This function takes as input a \code{\link[brms]{brms}} model fit for a factorial
#' design and returns posterior draws for each factor level combination.
#'
#' @param fit An object of class \code{\link[brms]{brmsfit}}.
#'
#' @return A \code{\link[posterior]{draws_df}} object containing posterior draws
#' and additional metadata.
#'
#' @note
#' The \pkg{faintr} package currently does not support multivariate models and
#' models that use families \code{categorical}, \code{dirichlet}, \code{multinomial},
#' and \code{logistic_normal}. Furthermore, models must not include special effect
#' terms \code{mo()}, \code{mi()}, \code{me()}, and \code{cs()} for fixed effects.
#' Also note that \pkg{faintr} currently does not support models where the intercept
#' is a population-level parameter (class \code{b}), as is the case when using the
#' \code{0 + Intercept} syntax in the \code{brm} function call.
#'
#' @references
#' Bürkner, P.-C. (2017). brms: An R Package for Bayesian Multilevel Models
#' Using Stan. \emph{Journal of Statistical Software}, \emph{80}(1), 1-28.
#' \doi{10.18637/jss.v080.i01}
#'
#' @examples
#' \dontrun{
#' # fit a linear mixed effects model on data from a 2 x 2 factorial design
#' ## regress voice pitch against gender and context
#' fit <- brms::brm(formula = pitch ~ gender * context + (1 | subject + sentence),
#' data = politeness)
#'
#' # extract draws for all four factor cells
#' extract_cell_draws(fit)
#' }
#'
#' @export
extract_cell_draws <- function(fit) {
# get minimal design matrix as tibble with row numbers in column
design_matrix <- get_cell_definitions(fit)
# get fixed effects names
fixef <- all.vars(brms::brmsterms(stats::formula(fit))$dpars$mu$fe)
# extract coefficient names of fixed effects
coeff_names <- paste0('b_', brms::standata(fit)$X %>% colnames())
# extract posterior draws
draws <- posterior::as_draws_df(fit, variable = coeff_names)
# store meta information
meta_info <- data.frame(.chain = draws$.chain, .draw = draws$.draw, .iteration = draws$.iteration)
# re-extract minimal design matrix as matrix
X <- design_matrix %>%
tibble::column_to_rownames(var = "cell") %>%
dplyr::select(!dplyr::all_of(fixef)) %>%
as.matrix(rownames.force = TRUE)
# extract relevant draws as matrix
Y <- tibble::as_tibble(draws) %>%
dplyr::select(dplyr::all_of(coeff_names)) %>%
as.matrix()
# use matrix product to get draws for each cell
draws_for_cells <- Y %*% t(X)
# attach meta information
out <- draws_for_cells %>%
as.data.frame() %>%
dplyr::bind_cols(meta_info)
# add intelligible column names
colnames(out)[match(design_matrix$cell, colnames(out))] <- do.call(paste, c(design_matrix[fixef], sep = ":"))
# return draws in 'draws_df' format
return(posterior::as_draws_df(out))
}
#' Get posterior draws for one subset of factorial design cells
#'
#' This function takes as input a \code{\link[brms]{brms}} model fit for a
#' factorial design and a group specification (one subset of the design cells)
#' and returns the posterior draws for that group. If no group is specified,
#' the returned draws are grand means.
#'
#' @param fit An object of class \code{\link[brms]{brmsfit}}.
#' @param group An expression specifying the group to filter the draws for.
#' @param colname A string specifying the column name of the returned data frame;
#' defaults to 'draws'.
#'
#' @return A \code{\link[posterior]{draws_df}} object containing posterior draws
#' for the specified group and additional metadata.
#'
#' @note
#' The \pkg{faintr} package currently does not support multivariate models and
#' models that use families \code{categorical}, \code{dirichlet}, \code{multinomial},
#' and \code{logistic_normal}. Furthermore, models must not include special effect
#' terms \code{mo()}, \code{mi()}, \code{me()}, and \code{cs()} for fixed effects.
#' Also note that \pkg{faintr} currently does not support models where the intercept
#' is a population-level parameter (class \code{b}), as is the case when using the
#' \code{0 + Intercept} syntax in the \code{brm} function call.
#'
#' @references
#' Bürkner, P.-C. (2017). brms: An R Package for Bayesian Multilevel Models
#' Using Stan. \emph{Journal of Statistical Software}, \emph{80}(1), 1-28.
#' \doi{10.18637/jss.v080.i01}
#'
#' @examples
#' \dontrun{
#' # fit a linear mixed effects model on data from a 2 x 2 factorial design
#' ## regress voice pitch against gender and context
#' fit <- brms::brm(formula = pitch ~ gender * context + (1 | subject + sentence),
#' data = politeness)
#'
#' # get the draws for female speakers in informal contexts
#' filter_cell_draws(fit, gender == "F" & context == "inf")
#'
#' # get the draws for male speakers or informal contexts
#' filter_cell_draws(fit, gender == "M" | context == "inf")
#'
#' # averaged over gender, get the draws for all but polite contexts
#' filter_cell_draws(fit, context != "pol")
#'
#' # get the posterior draws averaged over all factors (grand mean)
#' filter_cell_draws(fit)
#' }
#'
#' @importFrom rlang .data `:=`
#'
#' @export
filter_cell_draws <- function(fit, group=NULL, colname="draws") {
## extract draws for each design cell ----
# get minimal design matrix as tibble with row numbers in column
design_matrix <- get_cell_definitions(fit)
# get draws for each cell
draws_for_cells <- extract_cell_draws(fit)
## filter the draws ----
group_spec <- rlang::enquo(group)
if (rlang::quo_is_null(group_spec)) {
cell_numbers <- design_matrix$cell
} else {
# get cell numbers based on specification
cell_numbers <- design_matrix %>%
dplyr::filter(!!group_spec) %>%
dplyr::select(.data$cell) %>%
dplyr::pull()
}
# get names of relevant columns
relevant_columns <- posterior::variables(draws_for_cells)[cell_numbers]
if (length(relevant_columns) == 1) {
out <- draws_for_cells %>%
posterior::subset_draws(variable = relevant_columns)
posterior::variables(out) <- colname
} else if (length(relevant_columns) > 1) {
out <- draws_for_cells %>%
posterior::subset_draws(variable = relevant_columns) %>%
posterior::mutate_variables(!!colname := suppressWarnings(rowMeans(draws_for_cells[relevant_columns]))) %>%
posterior::subset_draws(variable = colname)
} else {
stop("Level specification '", rlang::quo_get_expr(group_spec) %>% deparse(), "' unknown.")
}
return(out)
}
#' Compare two subsets of factorial design cells
#'
#' Convenience function for comparing two subsets of factorial design cells.
#' It takes a \code{\link[brms]{brms}} model fit, two group specifications
#' (two subsets of design cells), and the probability mass within the highest
#' density interval of the difference in groups. It outputs the posterior mean
#' of the 'higher' minus the 'lower' group, a credible interval of the mean difference,
#' as well as the posterior probability and odds that the mean estimate of the 'higher'
#' group is higher than that of the 'lower' group. A comparison of one group against
#' the grand mean can be obtained by leaving out one of the two group specifications
#' in the function call.
#'
#' @param fit An object of class \code{\link[brms]{brmsfit}}.
#' @param higher An expression specifying the 'higher' group to filter the draws for.
#' @param lower An expression specifying the 'lower' group to filter the draws for.
#' @param hdi A single value (0, 1) defining the probability mass within the
#' highest density interval; defaults to 0.95.
#' @param include_bf A Boolean flag indicating whether Bayes Factors should be
#' approximated (required additional sampling); defaults to FALSE.
#'
#' @return An object of class 'faintCompare' containing summary statistics of the comparison.
#'
#' @note
#' The \pkg{faintr} package currently does not support multivariate models and
#' models that use families \code{categorical}, \code{dirichlet}, \code{multinomial},
#' and \code{logistic_normal}. Furthermore, models must not include special effect
#' terms \code{mo()}, \code{mi()}, \code{me()}, and \code{cs()} for fixed effects.
#' Also note that \pkg{faintr} currently does not support models where the intercept
#' is a population-level parameter (class \code{b}), as is the case when using the
#' \code{0 + Intercept} syntax in the \code{brm} function call.
#'
#' @references
#' Bürkner, P.-C. (2017). brms: An R Package for Bayesian Multilevel Models
#' Using Stan. \emph{Journal of Statistical Software}, \emph{80}(1), 1-28.
#' \doi{10.18637/jss.v080.i01}
#'
#' @examples
#' \dontrun{
#' # fit a linear mixed effects model on data from a 2 x 2 factorial design
#' ## regress voice pitch against gender and context
#' fit <- brms::brm(formula = pitch ~ gender * context + (1 | subject + sentence),
#' data = politeness)
#'
#' # compare female speakers in informal contexts against male speakers in polite contexts
#' compare_groups(
#' fit = fit,
#' higher = gender == "F" & context == "inf",
#' lower = gender == "M" & context == "pol"
#' )
#'
#' # compare informal contexts against polite contexts, averaged over gender
#' compare_groups(
#' fit = fit,
#' higher = context == "inf",
#' lower = context == "pol"
#' )
#'
#' # compare female speakers against the grand mean
#' compare_groups(
#' fit = fit,
#' higher = gender == "F",
#' hdi = 0.8
#' )
#' }
#'
#' @export
compare_groups <- function(fit, higher=NULL, lower=NULL, hdi=0.95, include_bf=FALSE) {
# check for invalid 'hdi' input
if(!is.numeric(hdi) || length(hdi) != 1 || hdi <= 0 || hdi >= 1) {
stop("Argument 'hdi' must be a single value between 0 and 1.")
}
higher <- rlang::enquo(higher)
lower <- rlang::enquo(lower)
# get cell draws for both group specifications
post_samples_higher <- filter_cell_draws(fit = fit, group = !!higher)
post_samples_lower <- filter_cell_draws(fit = fit, group = !!lower)
# get names of group specification
get_group_names <- function(group){
if (rlang::quo_is_null(group)) {
return('grand mean')
}
rlang::quo_get_expr(group) %>% deparse()
}
# compute summary statistics
diff <- post_samples_higher$draws - post_samples_lower$draws
mean_diff <- mean(diff)
ci <- HDInterval::hdi(diff, credMass = hdi)
post_prob <- mean(post_samples_higher$draws > post_samples_lower$draws)
post_odds <- post_prob / (1 - post_prob)
# if BF is requested, get prior-only fit, compute
if (include_bf) {
suppressMessages(
fit_prior_only <- stats::update(
fit,
silent = TRUE,
refresh = 0,
sample_prior = "only"
))
prior_samples_higher <- filter_cell_draws(fit = fit_prior_only, group = !!higher)
prior_samples_lower <- filter_cell_draws(fit = fit_prior_only, group = !!lower)
prior_prob <- mean(prior_samples_higher$draws > prior_samples_lower$draws)
prior_odds <- prior_prob / (1 - prior_prob)
bayes_factor = post_odds / prior_odds
} else {
bayes_factor = NA
}
outlist <- list(
hdi = hdi,
higher = get_group_names(higher),
lower = get_group_names(lower),
mean_diff = mean_diff,
l_ci = as.vector(ci[1]),
u_ci = as.vector(ci[2]),
post_prob = post_prob,
post_odds = post_odds,
include_bf = include_bf,
bayes_factor = bayes_factor
)
class(outlist) <- 'faintCompare'
return(outlist)
}
#' Print group comparison object
#' @param x An object containing the summary statistics of a group comparison.
#' @param ... Ignored.
#'
#' @export
print.faintCompare <- function(x, ...) {
cat("Outcome of comparing groups: \n")
cat(" * higher: ", x$higher, "\n")
cat(" * lower: ", x$lower, "\n")
cat("Mean 'higher - lower': ", signif(x$mean_diff, 4), "\n")
cat(paste0(x$hdi*100, "% HDI: "), "[", signif(x$l_ci, 4), ";", signif(x$u_ci, 4), "]\n")
cat("P('higher - lower' > 0): ", signif(x$post_prob, 4), "\n")
cat("Posterior odds: ", signif(x$post_odds, 4), "\n")
if (x$include_bf) {
cat("Bayes factor: ", signif(x$bayes_factor, 4), "\n")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.