tidybayes.R
In tidybayes: Tidy Data and 'Geoms' for Bayesian Models

params <-
list(EVAL = TRUE)

## ----chunk_options, include=FALSE-------------------------------------------------------------------------------------
if (requireNamespace("pkgdown", quietly = TRUE) && pkgdown::in_pkgdown()) {
  tiny_width = small_width = med_width = 6.75
  tiny_height = small_height = med_height = 4.5
  large_width = 8
  large_height = 5.25
} else {
  tiny_width = 5.5
  tiny_height = 3 + 2/3
  small_width = med_width = 6.75
  small_height = med_height = 4.5
  large_width = 8
  large_height = 5.25
}

eval_chunks = if (isTRUE(exists("params"))) params$EVAL else FALSE
knitr::opts_chunk$set(
  fig.width = small_width,
  fig.height = small_height,
  eval = eval_chunks
)
if (capabilities("cairo") && Sys.info()[['sysname']] != "Darwin") {
  knitr::opts_chunk$set(
    dev.args = list(png = list(type = "cairo"))
  )
}
dir.create("models", showWarnings = FALSE)

## ----setup, message = FALSE, warning = FALSE--------------------------------------------------------------------------
library(magrittr)
library(dplyr)
library(forcats)
library(modelr)
library(ggdist)
library(tidybayes)
library(ggplot2)
library(cowplot)
library(broom)
library(rstan)
library(rstanarm)
library(brms)
library(bayesplot)
library(RColorBrewer)

theme_set(theme_tidybayes() + panel_border())

## ----eval=FALSE-------------------------------------------------------------------------------------------------------
#  rstan_options(auto_write = TRUE)
#  options(mc.cores = parallel::detectCores())

## ----hidden_options, include=FALSE------------------------------------------------------------------------------------
# While the previous code chunk is the actual recommended approach,
# CRAN vignette building policy limits us to 2 cores, so we use at most
# 2 to build this vignette (but show the previous chunk to
# the reader as a best pratice example)
rstan_options(auto_write = TRUE)
options(mc.cores = 1) #min(2, parallel::detectCores()))

options(width = 120)

## ---------------------------------------------------------------------------------------------------------------------
set.seed(5)
n = 10
n_condition = 5
ABC =
  tibble(
    condition = factor(rep(c("A","B","C","D","E"), n)),
    response = rnorm(n * 5, c(0,1,2,1,-1), 0.5)
  )

## ---------------------------------------------------------------------------------------------------------------------
head(ABC, 10)

## ----fig.width = tiny_width, fig.height = tiny_height-----------------------------------------------------------------
ABC %>%
  ggplot(aes(x = response, y = fct_rev(condition))) +
  geom_point(alpha = 0.5) +
  ylab("condition")

## ---------------------------------------------------------------------------------------------------------------------
compose_data(ABC)

## ----message = FALSE, results = 'hide'--------------------------------------------------------------------------------
m = sampling(ABC_stan, data = compose_data(ABC), control = list(adapt_delta = 0.99))

## ---------------------------------------------------------------------------------------------------------------------
m

## ---------------------------------------------------------------------------------------------------------------------
str(rstan::extract(m))

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  head(10)

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  recover_types(ABC) %>%
  spread_draws(condition_mean[condition]) %>%
  head(10)

## ---------------------------------------------------------------------------------------------------------------------
m %<>% recover_types(ABC)

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  spread_draws(overall_mean, response_sd) %>%
  head(10)

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  spread_draws(overall_mean, response_sd) %>%
  median_qi(overall_mean, response_sd)

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  spread_draws(overall_mean, response_sd) %>%
  median_qi()

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  median_qi()

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  group_by(condition) %>%    # this line not necessary (done automatically by spread_draws)
  median_qi(condition_mean)

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  summarise_draws()

## ----fig.width = tiny_width, fig.height = tiny_height-----------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  ggplot(aes(y = fct_rev(condition), x = condition_mean)) +
  stat_pointinterval()

## ----fig.width = tiny_width, fig.height = tiny_height-----------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  ggplot(aes(y = fct_rev(condition), x = condition_mean)) +
  stat_halfeye(.width = c(.90, .5))

## ----fig.width = tiny_width, fig.height = tiny_height-----------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  ggplot(aes(y = fct_rev(condition), x = condition_mean, fill = after_stat(abs(x) < .8))) +
  stat_halfeye() +
  geom_vline(xintercept = c(-.8, .8), linetype = "dashed") +
  scale_fill_manual(values = c("gray80", "skyblue"))

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  median_qi(.width = c(.95, .8, .5))

## ----fig.width = tiny_width, fig.height = tiny_height-----------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  median_qi(.width = c(.95, .66)) %>%
  ggplot(aes(
    y = fct_rev(condition), x = condition_mean, xmin = .lower, xmax = .upper,
    # size = -.width means smaller probability interval => thicker line
    # this can be omitted, geom_pointinterval includes it automatically
    # if a .width column is in the input data.
    linewidth = -.width
  )) +  
  geom_pointinterval()

## ----fig.width = tiny_width, fig.height = tiny_height-----------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  ggplot(aes(x = condition_mean, y = fct_rev(condition))) +
  stat_dotsinterval(quantiles = 100)

## ---------------------------------------------------------------------------------------------------------------------
set.seed(123)
multimodal_draws = tibble(
  x = c(rnorm(5000, 0, 1), rnorm(2500, 4, 1))
)

## ---------------------------------------------------------------------------------------------------------------------
multimodal_draws %>%
  mode_hdi(x, .width = .80)

## ----fig.width = large_width, fig.height = large_height/2-------------------------------------------------------------
multimodal_draws %>%
  ggplot(aes(x = x)) +
  stat_slab(aes(y = 0)) +
  stat_pointinterval(aes(y = -0.5), point_interval = median_qi, .width = c(.95, .80)) +
  annotate("text", label = "median, 80% and 95% quantile intervals", x = 6, y = -0.5, hjust = 0, vjust = 0.3) +
  stat_pointinterval(aes(y = -0.25), point_interval = mode_hdi, .width = c(.95, .80)) +
  annotate("text", label = "mode, 80% and 95% highest-density intervals", x = 6, y = -0.25, hjust = 0, vjust = 0.3) +
  xlim(-3.25, 18) +
  scale_y_continuous(breaks = NULL)

## ---------------------------------------------------------------------------------------------------------------------
m %>% 
  spread_draws(overall_mean, condition_mean[condition]) %>%
  head(10)

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  spread_draws(overall_mean, condition_mean[condition]) %>%
  mutate(condition_offset = condition_mean - overall_mean) %>%
  median_qi(condition_offset)

## ----fig.width = tiny_width, fig.height = tiny_height-----------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition], response_sd) %>%
  mutate(y_rep = rnorm(n(), condition_mean, response_sd)) %>%
  median_qi(y_rep, .width = c(.95, .8, .5)) %>%
  ggplot(aes(y = fct_rev(condition), x = y_rep)) +
  geom_interval(aes(xmin = .lower, xmax = .upper)) + #auto-sets aes(color = fct_rev(ordered(.width)))
  geom_point(aes(x = response), data = ABC) +
  scale_color_brewer()

## ----fig.width = tiny_width, fig.height = tiny_height-----------------------------------------------------------------
draws = m %>%
  spread_draws(condition_mean[condition], response_sd)

reps = draws %>%
  mutate(y_rep = rnorm(n(), condition_mean, response_sd))

ABC %>%
  ggplot(aes(y = condition)) +
  stat_interval(aes(x = y_rep), .width = c(.95, .8, .5), data = reps) +
  stat_pointinterval(aes(x = condition_mean), .width = c(.95, .66), position = position_nudge(y = -0.3), data = draws) +
  geom_point(aes(x = response)) +
  scale_color_brewer()

## ----fig.width = tiny_width, fig.height = tiny_height-----------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  compare_levels(condition_mean, by = condition) %>%
  ggplot(aes(y = condition, x = condition_mean)) +
  stat_halfeye()

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  gather_draws(overall_mean, condition_mean[condition]) %>%
  median_qi()

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  spread_draws(overall_mean, condition_mean[condition]) %>%
  mutate(condition_offset = condition_mean - overall_mean) %>%
  gather_variables() %>%
  median_qi()

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  tidy_draws() %>%
  head(10)

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  tidy_draws() %>%
  gather_variables() %>%
  head(10)

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  spread_draws(`condition_.*`[condition], regex = TRUE) %>%
  head(10)

## ----m_mpg_brms, results = "hide", message = FALSE, warning = FALSE, cache = TRUE-------------------------------------
m_mpg = brm(
  mpg ~ hp * cyl, 
  data = mtcars, 
  
  file = "models/tidybayes_m_mpg.rds"  # cache model (can be removed)
)

## ---------------------------------------------------------------------------------------------------------------------
mtcars %>%
  group_by(cyl) %>%
  data_grid(hp = seq_range(hp, n = 51)) %>%
  add_epred_draws(m_mpg) %>%
  ggplot(aes(x = hp, y = mpg, color = ordered(cyl))) +
  stat_lineribbon(aes(y = .epred)) +
  geom_point(data = mtcars) +
  scale_fill_brewer(palette = "Greys") +
  scale_color_brewer(palette = "Set2")

## ---------------------------------------------------------------------------------------------------------------------
mtcars %>%
  group_by(cyl) %>%
  data_grid(hp = seq_range(hp, n = 101)) %>%
  # NOTE: this shows the use of ndraws to subsample within add_epred_draws()
  # ONLY do this IF you are planning to make spaghetti plots, etc.
  # NEVER subsample to a small sample to plot intervals, densities, etc.
  add_epred_draws(m_mpg, ndraws = 100) %>%
  ggplot(aes(x = hp, y = mpg, color = ordered(cyl))) +
  geom_line(aes(y = .epred, group = paste(cyl, .draw)), alpha = .1) +
  geom_point(data = mtcars) +
  scale_color_brewer(palette = "Dark2")

## ---------------------------------------------------------------------------------------------------------------------
m_linear = lm(response ~ condition, data = ABC)

## ----eval = eval_chunks && requireNamespace("emmeans", quietly = TRUE)------------------------------------------------
linear_results = m_linear %>% 
  emmeans::emmeans(~ condition) %>%
  tidy(conf.int = TRUE) %>%
  mutate(model = "OLS")

linear_results

## ---------------------------------------------------------------------------------------------------------------------
bayes_results = m %>%
  spread_draws(condition_mean[condition]) %>%
  median_qi(estimate = condition_mean) %>%
  to_broom_names() %>%
  mutate(model = "Bayes")

bayes_results

## ----fig.width = tiny_width, fig.height = tiny_height-----------------------------------------------------------------
bind_rows(linear_results, bayes_results) %>%
  mutate(condition = fct_rev(condition)) %>%
  ggplot(aes(y = condition, x = estimate, xmin = conf.low, xmax = conf.high, color = model)) +
  geom_pointinterval(position = position_dodge(width = .3))

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  compare_levels(condition_mean, by = condition) %>%
  head(10)

## ---------------------------------------------------------------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  compare_levels(condition_mean, by = condition) %>%
  unspread_draws(condition_mean[condition]) %>%
  head(10)

## ----fig.width = tiny_width, fig.height = tiny_height-----------------------------------------------------------------
m %>%
  spread_draws(condition_mean[condition]) %>%
  compare_levels(condition_mean, by = condition) %>%
  unspread_draws(condition_mean[condition], drop_indices = TRUE) %>%
  bayesplot::mcmc_areas()

## ----results = "hide", message = FALSE--------------------------------------------------------------------------------
m_rst = stan_glm(response ~ condition, data = ABC)

## ----eval = eval_chunks && requireNamespace("emmeans", quietly = TRUE)------------------------------------------------
m_rst %>%
  emmeans::emmeans( ~ condition) %>%
  gather_emmeans_draws() %>%
  median_qi()

## ----eval = eval_chunks && requireNamespace("emmeans", quietly = TRUE)------------------------------------------------
m_rst %>%
  emmeans::emmeans( ~ condition) %>%
  emmeans::contrast(method = "pairwise") %>%
  gather_emmeans_draws() %>%
  median_qi()

## ----fig.width = tiny_width, fig.height = tiny_height, eval = eval_chunks && requireNamespace("emmeans", quietly = TRUE)----
m_rst %>%
  emmeans::emmeans( ~ condition) %>%
  emmeans::contrast(method = "pairwise") %>%
  gather_emmeans_draws() %>%
  ggplot(aes(x = .value, y = contrast)) +
  stat_halfeye()

## ----eval = eval_chunks && requireNamespace("emmeans", quietly = TRUE)------------------------------------------------
m_rst %>%
  emmeans::emmeans(pairwise ~ condition) %>%
  gather_emmeans_draws() %>%
  median_qi()