knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(tidyverse)
theme_set(theme_minimal())
library(ggrepel)
library(learnB4SS)
library(extraDistr)
library(HDInterval)
library(tidybayes)
library(bayesplot)
library(modelr)
library(broom.mixed)
library(brms)

data("incomplete")

# Set custom b4ss colours
b4ss_colors <- c(purple = "#8970FF", orange = "#FFA70B")
scale_fill_b4ss <- function(...) {
  scale_fill_manual(..., values = c(b4ss_colors[[1]], b4ss_colors[[2]]))
}
scale_color_b4ss <- function(...) {
  scale_color_manual(..., values = c(b4ss_colors[[1]], b4ss_colors[[2]]))
}

Study overview and data

glimpse(incomplete)

Model formula

m1_bf <- brmsformula(
  correct ~
    correct_voicing *
    repetitiontype +
    # random slopes for interaction across listeners
    (correct_voicing * repetitiontype | listener) +
    # random slopes for interaction across speaker voices
    (correct_voicing * repetitiontype | speaker_voice) +
    # random slopes for interaction across minimal pairs
    (correct_voicing * repetitiontype | item_pair),
  family = bernoulli()
)

Priors and prior predictive checks

Prior distribution of the outcome variable (likelihood, family)

$$correct_i \sim Bernoulli(p)$$

y <- dbern(c(0, 1), p = 0.75)

ggplot() +
  aes(c("0", "1"), y) +
  geom_linerange(aes(ymin = 0, ymax = y), size = 2, colour = b4ss_colors) +
  geom_point(size = 5, colour = b4ss_colors) +
  ylim(0, 1) +
  labs(x = "outcome", y = "p")

Get priors

# get_prior(m1_bf, data = incomplete)
get_prior(m1_bf, data = incomplete) %>%
  as_tibble() %>%
  select(prior:group)

Log-odds space

dots <- tibble(
  p = seq(0.1, 0.9, by = 0.1),
  log_odds = qlogis(p)
)

tibble(
  p = seq(0, 1, by = 0.001),
  log_odds = qlogis(p)
) %>%
  ggplot(aes(p, log_odds)) +
  geom_hline(yintercept = 0, alpha = 0.5) +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  geom_line(size = 2, colour = b4ss_colors[2]) +
  geom_point(data = dots, size = 4, colour = b4ss_colors[1]) +
  scale_x_continuous(breaks = seq(0, 1, by = 0.1), minor_breaks = NULL) +
  scale_y_continuous(breaks = seq(-6, 6, by = 2), minor_breaks = NULL) +
  labs(
    title = "Correspondence between probabilities and log-odds",
    x = "probability",
    y = "log-odds"
  )
# log-odds = 0; get probability
glue::glue("Log-odds 0 = p(", plogis(0), ")")
# p = 0.5; get log-odds
glue::glue("p(0.5) = log-odds ", qlogis(0.5))

cat("\n\n")

# log-odds = 1.5; get probability
glue::glue("Log-odds 1.5 = p(", round(plogis(1.5), 4), ")")
# p = 0.5; get log-odds
glue::glue("p(0.8176) = log-odds ", round(qlogis(0.8175745), 4))

Prior for the intercept

# Log-odds to p
plogis(-6) # ~= 0 
plogis(6) # ~= 1
x <- seq(-15, 15, by = 0.1)
# 95% Cri [-6, +6] => SD = 6/2 = 3
y <- dnorm(x, mean = 0, sd = 3)

ggplot() +
  aes(x, y) +
  geom_line(size = 1.5, colour = b4ss_colors[1]) +
  labs(x = "log-odds")

Prior for b

# Log-odds to odds
exp(-2) # ~= 0
exp(2) # ~= 7
x <- seq(-4, 4, by = 0.1)
# 95% Cri [-2, +2] => SD = 2/2 = 1
y <- dnorm(x, mean = 0, sd = 1)

ggplot() +
  aes(x, y) +
  geom_line(size = 1.5, colour = b4ss_colors[1]) +
  labs(x = "log-odds")

Prior for sd

inverseCDF(c(0.025, 0.975), phcauchy, sigma = 0.1)
x <- seq(0, 3, by = 0.01)
y <- dhcauchy(x, sigma = 0.1)

ggplot() +
  aes(x, y) +
  geom_line(size = 1, colour = b4ss_colors[2]) +
  labs(x = "log-odds")

Set priors

priors <- c(
  prior(normal(0, 3), class = Intercept),
  prior(normal(0, 1), class = b),
  prior(cauchy(0, 0.1), class = sd),
  prior(lkj(2), class = cor)
)

Prior predictive checks

m1_priorpc <- brm(
  m1_bf,
  data = incomplete,
  prior = priors,
  sample_prior = "only",
  file = system.file("extdata/m1_priorpc.rds", package = "learnB4SS")
)
conditional_effects(m1_priorpc, effects = "correct_voicing:repetitiontype")

Let's try VERY strong priors for comparison

priors_strong <- c(
  prior(normal(2, 0.1), class = Intercept),
  prior(normal(1, 0.1), class = b),
  prior(cauchy(0, 0.1), class = sd),
  prior(lkj(2), class = cor)
)
m1_priorpc_strong <- brm(
  m1_bf,
  data = incomplete,
  prior = priors_strong,
  sample_prior = "only",
  cores = 4,
  file = system.file("extdata/m1_priorpc_strong.rds", package = "learnB4SS")
)
conditional_effects(m1_priorpc_strong, effects = "correct_voicing:repetitiontype")

Run model

m1_full <- brm(
  m1_bf,
  data = incomplete,
  prior = priors,
  cores = parallel::detectCores(), 
  chains = 4, 
  iter = 2000, 
  warmup = 1000, 
  file = system.file("extdata/m1_full.rds", package = "learnB4SS")
)

Model output and interpretation

Checks and diagnostics

# Sample data from the generative model and compare with real data
pp_check(m1_full, nsamples = 200)
# Generate trace and density plots for MCMC samples
# Only looking at pop-level parameters (no SDs or cor)
plot(m1_full, pars = "^b_")
# Pairs plots to help spot degeneracies (doesn't apply here but 
# useful if you have divergent transitions)
pairs(m1_full, pars = "^b_")
# Check Rhat and ESS (remove the rest of the output so it doesn't distract)
summary(m1_full)$fixed[, 5:7]

Prior sensitivity analysis

m1_fixed <- tidy(m1_full, effects = "fixed", conf.level = 0.95, fix.intercept = FALSE) %>%
  mutate(
    ci_width = abs(conf.low - conf.high)
  )
m1_fixed
m1_fixed %>%
  mutate(
    theta = c(0, 0, 0, 0),
    sigma_prior = c(3, 1, 1, 1),
    z = abs((estimate - theta) / std.error), # it's called here std.error but is the standard deviation
    s = 1 - (std.error^2 / sigma_prior^2)
  ) %>%
  ggplot(aes(s, z, label = term)) +
  geom_point() +
  geom_label_repel(arrow = arrow()) +
  xlim(0, 1) + ylim(0, 5)
labels <- tibble(
  x = c(0.25, 0.25, 0.6, 0.75),
  y = c(1.25, 3.75, 1.25, 3.75),
  labs = c("Poorly identified", "Prior/Posterior\nconflict", "Ideal", "Overfit")
)

m1_fixed %>%
  mutate(
    theta = c(0, 0, 0, 0),
    sigma_prior = c(3, 1, 1, 1),
    z = abs((estimate - theta) / std.error), # it's called here std.error but is the standard deviation
    s = 1 - (std.error^2 / sigma_prior^2)
  ) %>%
  ggplot(aes(s, z, label = term)) +
  annotate("rect", xmin = 0, ymin = 0, xmax = 0.5, ymax = 2.5, alpha = 0.5, fill = "#e66101") +
  annotate("rect", xmin = 0, ymin = 2.5, xmax = 0.5, ymax = 5, alpha = 0.5, fill = "#fdb863") +
  annotate("rect", xmin = 0.5, ymin = 0, xmax = 1, ymax = 2.5, alpha = 0.5, fill = "#b2abd2") +
  annotate("rect", xmin = 0.5, ymin = 2.5, xmax = 1, ymax = 5, alpha = 0.5, fill = "#5e3c99") +
  geom_label(data = labels, aes(x, y, label = labs), colour = "white", fill = "black") +
  geom_label_repel(fill = NA) +
  geom_point() +
  xlim(0, 1) + ylim(0, 5)

Visual effects

A quick and dirty way to assess the posterior predictions is using the conditional_effects() function. It is also useful because it plots the data into the original scale.

# quick and dirty plot on the original scale
plot(conditional_effects(m1_full), ask = FALSE)

We can also plot the posterior distributions of our population-level coefficients. This can be conveniently done with the bayesplot package.

posterior <- as.matrix(m1_full)

mcmc_areas(posterior,
           pars = c("b_Intercept", 
                    "b_correct_voicingvoiceless", 
                    "b_repetitiontyperepeated", 
                    "b_correct_voicingvoiceless:repetitiontyperepeated"),
           # arbitrary threshold for shading probability mass
           prob = 0.83) 

For more traditional plotting of the actual levels of the predictors, we can use the data_grid() and add_fitted_draws() functions. Here is an example.

post_data <- incomplete %>% 
  data_grid(correct_voicing, repetitiontype) %>%
  add_fitted_draws(m1_full, n = 4000, re_formula = NA)

post_plot <- post_data %>%
  # plot
  ggplot(aes(y = .value, x = correct_voicing, 
             fill = correct_voicing)) +
  # density plus CrIs
  stat_halfeye() +
  # reference line at chance level = 0.5
  geom_hline(yintercept = 0.5, lty = "dashed") +
  # split by repetition type
  facet_grid(~repetitiontype) +
  # color code
  scale_fill_manual(values = c("#8970FF", "#FFA70B")) + 
  #scale_color_manual(values = c("#8970FF", "#FFA70B")) +
  # rename y axis
  labs(y = "probability of correct",
       x = "underlying voicing")
post_plot

If you want to add the actual aggregated accuracy for each listener to the plot (for example), you can add that information on top. Makes for a very informative plot.

#aggregate
incomplete_agg <- incomplete %>% 
  group_by(listener, correct_voicing, repetitiontype) %>% 
  summarise(.value = mean(as.numeric(correct)), .groups = "drop")

post_plot +
  geom_point(data = incomplete_agg,
             aes(y = .value, 
                 x = correct_voicing, 
                 fill = correct_voicing),
             pch = 21, alpha = 0.5,
             position = position_nudge(x = -0.1))

Inference

Interpreting model output

# Describe posterior using fixef (to get simple, clean output... 
# focus on understanding each estimate in context and examining CrI's)
# Use in conjuction with levels_plot-2
fixef(m1_full)
# Focus on describing posterior
hdi_range <- bayestestR::hdi(m1_full, ci = c(0.65, 0.70, 0.80, 0.89, 0.95))
hdi_range

plot(hdi_range, show_intercept = T)


stefanocoretta/bayes.handson documentation built on Oct. 3, 2023, 11:03 p.m.