In stefanocoretta/bayes.handson: Learning materials for the learnB4SS workshop

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

library(tidyverse)
theme_set(theme_minimal())
library(ggrepel)
library(learnB4SS)
library(extraDistr)
library(HDInterval)
library(tidybayes)
library(bayesplot)
library(modelr)
library(broom.mixed)
library(brms)

data("incomplete")

# Set custom b4ss colours
b4ss_colors <- c(purple = "#8970FF", orange = "#FFA70B")
scale_fill_b4ss <- function(...) {
  scale_fill_manual(..., values = c(b4ss_colors[[1]], b4ss_colors[[2]]))
}
scale_color_b4ss <- function(...) {
  scale_color_manual(..., values = c(b4ss_colors[[1]], b4ss_colors[[2]]))
}

Study overview and data

glimpse(incomplete)

Model formula

m1_bf <- brmsformula(
  correct ~
    correct_voicing *
    repetitiontype +
    # random slopes for interaction across listeners
    (correct_voicing * repetitiontype | listener) +
    # random slopes for interaction across speaker voices
    (correct_voicing * repetitiontype | speaker_voice) +
    # random slopes for interaction across minimal pairs
    (correct_voicing * repetitiontype | item_pair),
  family = bernoulli()
)

Priors and prior predictive checks

Prior distribution of the outcome variable (likelihood, family)

$$correct_i \sim Bernoulli(p)$$

y <- dbern(c(0, 1), p = 0.75)

ggplot() +
  aes(c("0", "1"), y) +
  geom_linerange(aes(ymin = 0, ymax = y), size = 2, colour = b4ss_colors) +
  geom_point(size = 5, colour = b4ss_colors) +
  ylim(0, 1) +
  labs(x = "outcome", y = "p")

Get priors

# get_prior(m1_bf, data = incomplete)
get_prior(m1_bf, data = incomplete) %>%
  as_tibble() %>%
  select(prior:group)

Log-odds space

dots <- tibble(
  p = seq(0.1, 0.9, by = 0.1),
  log_odds = qlogis(p)
)

tibble(
  p = seq(0, 1, by = 0.001),
  log_odds = qlogis(p)
) %>%
  ggplot(aes(p, log_odds)) +
  geom_hline(yintercept = 0, alpha = 0.5) +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  geom_line(size = 2, colour = b4ss_colors[2]) +
  geom_point(data = dots, size = 4, colour = b4ss_colors[1]) +
  scale_x_continuous(breaks = seq(0, 1, by = 0.1), minor_breaks = NULL) +
  scale_y_continuous(breaks = seq(-6, 6, by = 2), minor_breaks = NULL) +
  labs(
    title = "Correspondence between probabilities and log-odds",
    x = "probability",
    y = "log-odds"
  )

# log-odds = 0; get probability
glue::glue("Log-odds 0 = p(", plogis(0), ")")
# p = 0.5; get log-odds
glue::glue("p(0.5) = log-odds ", qlogis(0.5))

cat("\n\n")

# log-odds = 1.5; get probability
glue::glue("Log-odds 1.5 = p(", round(plogis(1.5), 4), ")")
# p = 0.5; get log-odds
glue::glue("p(0.8176) = log-odds ", round(qlogis(0.8175745), 4))

Prior for the intercept

# Log-odds to p
plogis(-6) # ~= 0 
plogis(6) # ~= 1

x <- seq(-15, 15, by = 0.1)
# 95% Cri [-6, +6] => SD = 6/2 = 3
y <- dnorm(x, mean = 0, sd = 3)

ggplot() +
  aes(x, y) +
  geom_line(size = 1.5, colour = b4ss_colors[1]) +
  labs(x = "log-odds")

Prior for b

# Log-odds to odds
exp(-2) # ~= 0
exp(2) # ~= 7

x <- seq(-4, 4, by = 0.1)
# 95% Cri [-2, +2] => SD = 2/2 = 1
y <- dnorm(x, mean = 0, sd = 1)

ggplot() +
  aes(x, y) +
  geom_line(size = 1.5, colour = b4ss_colors[1]) +
  labs(x = "log-odds")

Prior for sd

inverseCDF(c(0.025, 0.975), phcauchy, sigma = 0.1)

x <- seq(0, 3, by = 0.01)
y <- dhcauchy(x, sigma = 0.1)

ggplot() +
  aes(x, y) +
  geom_line(size = 1, colour = b4ss_colors[2]) +
  labs(x = "log-odds")

Set priors

priors <- c(
  prior(normal(0, 3), class = Intercept),
  prior(normal(0, 1), class = b),
  prior(cauchy(0, 0.1), class = sd),
  prior(lkj(2), class = cor)
)

Prior predictive checks

m1_priorpc <- brm(
  m1_bf,
  data = incomplete,
  prior = priors,
  sample_prior = "only",
  file = system.file("extdata/m1_priorpc.rds", package = "learnB4SS")
)

conditional_effects(m1_priorpc, effects = "correct_voicing:repetitiontype")

Let's try VERY strong priors for comparison

priors_strong <- c(
  prior(normal(2, 0.1), class = Intercept),
  prior(normal(1, 0.1), class = b),
  prior(cauchy(0, 0.1), class = sd),
  prior(lkj(2), class = cor)
)

m1_priorpc_strong <- brm(
  m1_bf,
  data = incomplete,
  prior = priors_strong,
  sample_prior = "only",
  cores = 4,
  file = system.file("extdata/m1_priorpc_strong.rds", package = "learnB4SS")
)

conditional_effects(m1_priorpc_strong, effects = "correct_voicing:repetitiontype")

Run model

m1_full <- brm(
  m1_bf,
  data = incomplete,
  prior = priors,
  cores = parallel::detectCores(), 
  chains = 4, 
  iter = 2000, 
  warmup = 1000, 
  file = system.file("extdata/m1_full.rds", package = "learnB4SS")
)

Model output and interpretation

Checks and diagnostics

# Sample data from the generative model and compare with real data
pp_check(m1_full, nsamples = 200)

# Generate trace and density plots for MCMC samples
# Only looking at pop-level parameters (no SDs or cor)
plot(m1_full, pars = "^b_")

# Pairs plots to help spot degeneracies (doesn't apply here but 
# useful if you have divergent transitions)
pairs(m1_full, pars = "^b_")

# Check Rhat and ESS (remove the rest of the output so it doesn't distract)
summary(m1_full)$fixed[, 5:7]

Prior sensitivity analysis

m1_fixed <- tidy(m1_full, effects = "fixed", conf.level = 0.95, fix.intercept = FALSE) %>%
  mutate(
    ci_width = abs(conf.low - conf.high)
  )
m1_fixed

m1_fixed %>%
  mutate(
    theta = c(0, 0, 0, 0),
    sigma_prior = c(3, 1, 1, 1),
    z = abs((estimate - theta) / std.error), # it's called here std.error but is the standard deviation
    s = 1 - (std.error^2 / sigma_prior^2)
  ) %>%
  ggplot(aes(s, z, label = term)) +
  geom_point() +
  geom_label_repel(arrow = arrow()) +
  xlim(0, 1) + ylim(0, 5)

labels <- tibble(
  x = c(0.25, 0.25, 0.6, 0.75),
  y = c(1.25, 3.75, 1.25, 3.75),
  labs = c("Poorly identified", "Prior/Posterior\nconflict", "Ideal", "Overfit")
)

m1_fixed %>%
  mutate(
    theta = c(0, 0, 0, 0),
    sigma_prior = c(3, 1, 1, 1),
    z = abs((estimate - theta) / std.error), # it's called here std.error but is the standard deviation
    s = 1 - (std.error^2 / sigma_prior^2)
  ) %>%
  ggplot(aes(s, z, label = term)) +
  annotate("rect", xmin = 0, ymin = 0, xmax = 0.5, ymax = 2.5, alpha = 0.5, fill = "#e66101") +
  annotate("rect", xmin = 0, ymin = 2.5, xmax = 0.5, ymax = 5, alpha = 0.5, fill = "#fdb863") +
  annotate("rect", xmin = 0.5, ymin = 0, xmax = 1, ymax = 2.5, alpha = 0.5, fill = "#b2abd2") +
  annotate("rect", xmin = 0.5, ymin = 2.5, xmax = 1, ymax = 5, alpha = 0.5, fill = "#5e3c99") +
  geom_label(data = labels, aes(x, y, label = labs), colour = "white", fill = "black") +
  geom_label_repel(fill = NA) +
  geom_point() +
  xlim(0, 1) + ylim(0, 5)

Visual effects

A quick and dirty way to assess the posterior predictions is using the conditional_effects() function. It is also useful because it plots the data into the original scale.

# quick and dirty plot on the original scale
plot(conditional_effects(m1_full), ask = FALSE)

We can also plot the posterior distributions of our population-level coefficients. This can be conveniently done with the bayesplot package.

posterior <- as.matrix(m1_full)

mcmc_areas(posterior,
           pars = c("b_Intercept", 
                    "b_correct_voicingvoiceless", 
                    "b_repetitiontyperepeated", 
                    "b_correct_voicingvoiceless:repetitiontyperepeated"),
           # arbitrary threshold for shading probability mass
           prob = 0.83)

For more traditional plotting of the actual levels of the predictors, we can use the data_grid() and add_fitted_draws() functions. Here is an example.

post_data <- incomplete %>% 
  data_grid(correct_voicing, repetitiontype) %>%
  add_fitted_draws(m1_full, n = 4000, re_formula = NA)

post_plot <- post_data %>%
  # plot
  ggplot(aes(y = .value, x = correct_voicing, 
             fill = correct_voicing)) +
  # density plus CrIs
  stat_halfeye() +
  # reference line at chance level = 0.5
  geom_hline(yintercept = 0.5, lty = "dashed") +
  # split by repetition type
  facet_grid(~repetitiontype) +
  # color code
  scale_fill_manual(values = c("#8970FF", "#FFA70B")) + 
  #scale_color_manual(values = c("#8970FF", "#FFA70B")) +
  # rename y axis
  labs(y = "probability of correct",
       x = "underlying voicing")
post_plot

If you want to add the actual aggregated accuracy for each listener to the plot (for example), you can add that information on top. Makes for a very informative plot.

#aggregate
incomplete_agg <- incomplete %>% 
  group_by(listener, correct_voicing, repetitiontype) %>% 
  summarise(.value = mean(as.numeric(correct)), .groups = "drop")

post_plot +
  geom_point(data = incomplete_agg,
             aes(y = .value, 
                 x = correct_voicing, 
                 fill = correct_voicing),
             pch = 21, alpha = 0.5,
             position = position_nudge(x = -0.1))

Inference

Interpreting model output

# Describe posterior using fixef (to get simple, clean output... 
# focus on understanding each estimate in context and examining CrI's)
# Use in conjuction with levels_plot-2
fixef(m1_full)

# Focus on describing posterior
hdi_range <- bayestestR::hdi(m1_full, ci = c(0.65, 0.70, 0.80, 0.89, 0.95))
hdi_range

plot(hdi_range, show_intercept = T)

stefanocoretta/bayes.handson documentation built on Oct. 3, 2023, 11:03 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

stefanocoretta/bayes.handson
Learning materials for the learnB4SS workshop

In stefanocoretta/bayes.handson: Learning materials for the learnB4SS workshop

Study overview and data

Model formula

Priors and prior predictive checks

Prior distribution of the outcome variable (likelihood, family)

Get priors

Log-odds space

Prior for the intercept

Prior for b

Prior for sd

Set priors

Prior predictive checks

Let's try VERY strong priors for comparison

Run model

Model output and interpretation

Checks and diagnostics

Prior sensitivity analysis

Visual effects

Inference

Interpreting model output

R Package Documentation

Browse R Packages

We want your feedback!

stefanocoretta/bayes.handson Learning materials for the learnB4SS workshop

In stefanocoretta/bayes.handson: Learning materials for the learnB4SS workshop

Study overview and data

Model formula

Priors and prior predictive checks

Prior distribution of the outcome variable (likelihood, family)

Get priors

Log-odds space

Prior for the intercept

Prior for b

Prior for sd

Set priors

Prior predictive checks

Let's try VERY strong priors for comparison

Run model

Model output and interpretation

Checks and diagnostics

Prior sensitivity analysis

Visual effects

Inference

Interpreting model output

R Package Documentation

Browse R Packages

We want your feedback!

stefanocoretta/bayes.handson
Learning materials for the learnB4SS workshop