knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(tidyverse) theme_set(theme_minimal()) library(ggrepel) library(learnB4SS) library(extraDistr) library(HDInterval) library(tidybayes) library(bayesplot) library(modelr) library(broom.mixed) library(brms) data("incomplete") # Set custom b4ss colours b4ss_colors <- c(purple = "#8970FF", orange = "#FFA70B") scale_fill_b4ss <- function(...) { scale_fill_manual(..., values = c(b4ss_colors[[1]], b4ss_colors[[2]])) } scale_color_b4ss <- function(...) { scale_color_manual(..., values = c(b4ss_colors[[1]], b4ss_colors[[2]])) }
glimpse(incomplete)
m1_bf <- brmsformula( correct ~ correct_voicing * repetitiontype + # random slopes for interaction across listeners (correct_voicing * repetitiontype | listener) + # random slopes for interaction across speaker voices (correct_voicing * repetitiontype | speaker_voice) + # random slopes for interaction across minimal pairs (correct_voicing * repetitiontype | item_pair), family = bernoulli() )
$$correct_i \sim Bernoulli(p)$$
y <- dbern(c(0, 1), p = 0.75) ggplot() + aes(c("0", "1"), y) + geom_linerange(aes(ymin = 0, ymax = y), size = 2, colour = b4ss_colors) + geom_point(size = 5, colour = b4ss_colors) + ylim(0, 1) + labs(x = "outcome", y = "p")
# get_prior(m1_bf, data = incomplete) get_prior(m1_bf, data = incomplete) %>% as_tibble() %>% select(prior:group)
dots <- tibble( p = seq(0.1, 0.9, by = 0.1), log_odds = qlogis(p) ) tibble( p = seq(0, 1, by = 0.001), log_odds = qlogis(p) ) %>% ggplot(aes(p, log_odds)) + geom_hline(yintercept = 0, alpha = 0.5) + geom_vline(xintercept = 0.5, linetype = "dashed") + geom_line(size = 2, colour = b4ss_colors[2]) + geom_point(data = dots, size = 4, colour = b4ss_colors[1]) + scale_x_continuous(breaks = seq(0, 1, by = 0.1), minor_breaks = NULL) + scale_y_continuous(breaks = seq(-6, 6, by = 2), minor_breaks = NULL) + labs( title = "Correspondence between probabilities and log-odds", x = "probability", y = "log-odds" )
# log-odds = 0; get probability glue::glue("Log-odds 0 = p(", plogis(0), ")") # p = 0.5; get log-odds glue::glue("p(0.5) = log-odds ", qlogis(0.5)) cat("\n\n") # log-odds = 1.5; get probability glue::glue("Log-odds 1.5 = p(", round(plogis(1.5), 4), ")") # p = 0.5; get log-odds glue::glue("p(0.8176) = log-odds ", round(qlogis(0.8175745), 4))
# Log-odds to p plogis(-6) # ~= 0 plogis(6) # ~= 1
x <- seq(-15, 15, by = 0.1) # 95% Cri [-6, +6] => SD = 6/2 = 3 y <- dnorm(x, mean = 0, sd = 3) ggplot() + aes(x, y) + geom_line(size = 1.5, colour = b4ss_colors[1]) + labs(x = "log-odds")
# Log-odds to odds exp(-2) # ~= 0 exp(2) # ~= 7
x <- seq(-4, 4, by = 0.1) # 95% Cri [-2, +2] => SD = 2/2 = 1 y <- dnorm(x, mean = 0, sd = 1) ggplot() + aes(x, y) + geom_line(size = 1.5, colour = b4ss_colors[1]) + labs(x = "log-odds")
inverseCDF(c(0.025, 0.975), phcauchy, sigma = 0.1)
x <- seq(0, 3, by = 0.01) y <- dhcauchy(x, sigma = 0.1) ggplot() + aes(x, y) + geom_line(size = 1, colour = b4ss_colors[2]) + labs(x = "log-odds")
priors <- c( prior(normal(0, 3), class = Intercept), prior(normal(0, 1), class = b), prior(cauchy(0, 0.1), class = sd), prior(lkj(2), class = cor) )
m1_priorpc <- brm( m1_bf, data = incomplete, prior = priors, sample_prior = "only", file = system.file("extdata/m1_priorpc.rds", package = "learnB4SS") )
conditional_effects(m1_priorpc, effects = "correct_voicing:repetitiontype")
priors_strong <- c( prior(normal(2, 0.1), class = Intercept), prior(normal(1, 0.1), class = b), prior(cauchy(0, 0.1), class = sd), prior(lkj(2), class = cor) )
m1_priorpc_strong <- brm( m1_bf, data = incomplete, prior = priors_strong, sample_prior = "only", cores = 4, file = system.file("extdata/m1_priorpc_strong.rds", package = "learnB4SS") )
conditional_effects(m1_priorpc_strong, effects = "correct_voicing:repetitiontype")
m1_full <- brm( m1_bf, data = incomplete, prior = priors, cores = parallel::detectCores(), chains = 4, iter = 2000, warmup = 1000, file = system.file("extdata/m1_full.rds", package = "learnB4SS") )
# Sample data from the generative model and compare with real data pp_check(m1_full, nsamples = 200)
# Generate trace and density plots for MCMC samples # Only looking at pop-level parameters (no SDs or cor) plot(m1_full, pars = "^b_")
# Pairs plots to help spot degeneracies (doesn't apply here but # useful if you have divergent transitions) pairs(m1_full, pars = "^b_")
# Check Rhat and ESS (remove the rest of the output so it doesn't distract) summary(m1_full)$fixed[, 5:7]
m1_fixed <- tidy(m1_full, effects = "fixed", conf.level = 0.95, fix.intercept = FALSE) %>% mutate( ci_width = abs(conf.low - conf.high) ) m1_fixed
m1_fixed %>% mutate( theta = c(0, 0, 0, 0), sigma_prior = c(3, 1, 1, 1), z = abs((estimate - theta) / std.error), # it's called here std.error but is the standard deviation s = 1 - (std.error^2 / sigma_prior^2) ) %>% ggplot(aes(s, z, label = term)) + geom_point() + geom_label_repel(arrow = arrow()) + xlim(0, 1) + ylim(0, 5)
labels <- tibble( x = c(0.25, 0.25, 0.6, 0.75), y = c(1.25, 3.75, 1.25, 3.75), labs = c("Poorly identified", "Prior/Posterior\nconflict", "Ideal", "Overfit") ) m1_fixed %>% mutate( theta = c(0, 0, 0, 0), sigma_prior = c(3, 1, 1, 1), z = abs((estimate - theta) / std.error), # it's called here std.error but is the standard deviation s = 1 - (std.error^2 / sigma_prior^2) ) %>% ggplot(aes(s, z, label = term)) + annotate("rect", xmin = 0, ymin = 0, xmax = 0.5, ymax = 2.5, alpha = 0.5, fill = "#e66101") + annotate("rect", xmin = 0, ymin = 2.5, xmax = 0.5, ymax = 5, alpha = 0.5, fill = "#fdb863") + annotate("rect", xmin = 0.5, ymin = 0, xmax = 1, ymax = 2.5, alpha = 0.5, fill = "#b2abd2") + annotate("rect", xmin = 0.5, ymin = 2.5, xmax = 1, ymax = 5, alpha = 0.5, fill = "#5e3c99") + geom_label(data = labels, aes(x, y, label = labs), colour = "white", fill = "black") + geom_label_repel(fill = NA) + geom_point() + xlim(0, 1) + ylim(0, 5)
A quick and dirty way to assess the posterior predictions is using the conditional_effects() function. It is also useful because it plots the data into the original scale.
# quick and dirty plot on the original scale plot(conditional_effects(m1_full), ask = FALSE)
We can also plot the posterior distributions of our population-level coefficients. This can be conveniently done with the bayesplot package.
posterior <- as.matrix(m1_full) mcmc_areas(posterior, pars = c("b_Intercept", "b_correct_voicingvoiceless", "b_repetitiontyperepeated", "b_correct_voicingvoiceless:repetitiontyperepeated"), # arbitrary threshold for shading probability mass prob = 0.83)
For more traditional plotting of the actual levels of the predictors, we can use the data_grid() and add_fitted_draws() functions. Here is an example.
post_data <- incomplete %>% data_grid(correct_voicing, repetitiontype) %>% add_fitted_draws(m1_full, n = 4000, re_formula = NA) post_plot <- post_data %>% # plot ggplot(aes(y = .value, x = correct_voicing, fill = correct_voicing)) + # density plus CrIs stat_halfeye() + # reference line at chance level = 0.5 geom_hline(yintercept = 0.5, lty = "dashed") + # split by repetition type facet_grid(~repetitiontype) + # color code scale_fill_manual(values = c("#8970FF", "#FFA70B")) + #scale_color_manual(values = c("#8970FF", "#FFA70B")) + # rename y axis labs(y = "probability of correct", x = "underlying voicing") post_plot
If you want to add the actual aggregated accuracy for each listener to the plot (for example), you can add that information on top. Makes for a very informative plot.
#aggregate incomplete_agg <- incomplete %>% group_by(listener, correct_voicing, repetitiontype) %>% summarise(.value = mean(as.numeric(correct)), .groups = "drop") post_plot + geom_point(data = incomplete_agg, aes(y = .value, x = correct_voicing, fill = correct_voicing), pch = 21, alpha = 0.5, position = position_nudge(x = -0.1))
# Describe posterior using fixef (to get simple, clean output... # focus on understanding each estimate in context and examining CrI's) # Use in conjuction with levels_plot-2 fixef(m1_full)
# Focus on describing posterior hdi_range <- bayestestR::hdi(m1_full, ci = c(0.65, 0.70, 0.80, 0.89, 0.95)) hdi_range plot(hdi_range, show_intercept = T)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.