7.1. Building an interaction.

Here we load the rugged data.

library(rethinking)
data(rugged)
d <- rugged

And here we switch out rethinking for brms.

detach(package:rethinking, unload = T)
library(brms)
rm(rugged)

We'll continue to use tidyverse-style syntax to wrangle the data.

library(tidyverse)

# make log version of outcome
d <- 
  d %>%
  mutate(log_gdp = log(rgdppc_2000))

# extract countries with GDP data
dd <-
  d %>%
  filter(complete.cases(rgdppc_2000))

# split countries into Africa and not-Africa
d.A1 <-
  dd %>%
  filter(cont_africa == 1)

d.A0 <-
  dd %>%
  filter(cont_africa == 0)

Here are the first two univariable models, predicting log_gdp.

b7.1 <-
  brm(data = d.A1, family = gaussian,
      log_gdp ~ 1 + rugged,
      prior = c(set_prior("normal(8, 100)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 1000, cores = 4)

b7.2 <-
  brm(data = d.A0, family = gaussian,
      log_gdp ~ 1 + rugged,
      prior = c(set_prior("normal(8, 100)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 1000, cores = 4)

In the text, McElreath more or less dares us to figure out how to make Figure 7.2. Here's the brms-relevant data processing.

p7.1 <- posterior_samples(b7.1)
p7.2 <- posterior_samples(b7.2)

nd <- 
  tibble(rugged = seq(from = 0, 
                      to = 6.3, 
                      length.out = 30))

fit.7.1 <-
  fitted(b7.1, newdata = nd) %>%
  as_tibble() %>%
  bind_cols(nd)

fit.7.2 <-
  fitted(b7.2, newdata = nd) %>%
  as_tibble() %>%
  bind_cols(nd)

# Here we'll put both in a single data object, with fit.7.1 stacked atop fit.7.2
fit.both <-
  full_join(fit.7.1, fit.7.2) %>%
  mutate(cont_africa = rep(c("Africa", "not Africa"), each = 30))

For this chapter, we'll take our plot theme from the ggthemes package, which you can learn more about here.

# install.packages("ggthemes", dependencies = T)
library(ggthemes)

Here's the plot code for our version of Figure 7.2.

dd %>%
  mutate(cont_africa = ifelse(cont_africa == 1, "Africa", "not Africa")) %>%

  ggplot(aes(x = rugged)) +
  theme_pander() + 
  scale_colour_pander() +
  scale_fill_pander() +
  geom_ribbon(data = fit.both,
              aes(ymin = `2.5%ile`, 
                  ymax = `97.5%ile`,
                  fill = cont_africa),
              alpha = 1/4) +
  geom_line(data = fit.both,
              aes(y = Estimate, 
                  color = cont_africa)) +
  geom_point(aes(y = log_gdp, color = cont_africa),
             size = 2/3) +
  scale_x_continuous(expand = c(0, 0)) +
  labs(x = "Terrain Ruggedness Index",
       y = "log GDP from year 2000") +
  facet_wrap(~cont_africa) +
  theme(text = element_text(family = "Times"),
        legend.position = "none")

7.1.1. Adding a dummy variable doesn't work.

Here's our model with all the countries, but without our cont_africa dummy.

b7.3 <-
  brm(data = dd, family = gaussian,
      log_gdp ~ 1 + rugged,
      prior = c(set_prior("normal(8, 100)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      iter = 2000, warmup = 1000, cores = 4, chains = 4)

Now we'll add the dummy.

b7.4 <-
  brm(data = dd, family = gaussian,
      log_gdp ~ 1 + rugged + cont_africa,
      prior = c(set_prior("normal(8, 100)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      iter = 2000, warmup = 1000, cores = 4, chains = 4)

And here we can compare them with information criteria.

waic(b7.3, b7.4)

loo(b7.3, b7.4)

Happily, the WAIC and the LOO are in agreement. The model with the dummy, b7.4, fit the data much better. Here are the WAIC model weights.

model_weights(b7.3, b7.4,
              weights = "waic") %>% 
  round(digits = 3)

As in the text, no contest.

Here we'll plot the model for b7.4. First, we process the data.

p7.4 <- posterior_samples(b7.4)

nd <- 
  tibble(rugged = rep(seq(from = 0,
                          to = 6.3, 
                          length.out = 30),
                      times = 2),
         cont_africa = rep(0:1, each = 30))

fit.7.4 <-
  fitted(b7.4, newdata = nd) %>%
  as_tibble() %>%
  bind_cols(nd) %>%
  mutate(cont_africa = ifelse(cont_africa == 1, "Africa", "not Africa"))

The plot.

dd %>%
  mutate(cont_africa = ifelse(cont_africa == 1, "Africa", "not Africa")) %>%

ggplot(aes(x = rugged)) +
  theme_pander() + 
  scale_colour_pander() +
  scale_fill_pander() +
  geom_ribbon(data = fit.7.4,
              aes(ymin = `2.5%ile`, 
                  ymax = `97.5%ile`,
                  fill = cont_africa,
                  group = cont_africa),
              alpha = 1/4) +
  geom_line(data = fit.7.4,
              aes(y = Estimate, 
                  color = cont_africa,
                  group = cont_africa)) +
  geom_point(aes(y = log_gdp, color = cont_africa),
             size = 2/3) +
  scale_x_continuous(expand = c(0, 0)) +
  labs(x = "Terrain Ruggedness Index",
       y = "log GDP from year 2000") +
  theme(text = element_text(family = "Times"),
        legend.position = c(.69, .94),
        legend.title = element_blank(),
        legend.direction = "horizontal")

7.1.2. Adding a linear interaction does work.

Yes, it sure does.

b7.5 <-
  brm(data = dd, family = gaussian,
      log_gdp ~ 1 + rugged*cont_africa,
      prior = c(set_prior("normal(8, 100)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 1000, cores = 4)

For kicks, we'll just use the LOO to compare the last three models.

loo(b7.3, b7.4, b7.5,
    cores = 4)  # We can speed things up using the `cores` argument

And we can weigh the models based on the LOO rather than the WAIC, too.

model_weights(b7.3, b7.4, b7.5,
              weights = "loo") %>% 
  round(digits = 3)
Overthinking: Conventional form of interaction.

Instead of the y ~ 1 + x1*x2 approach, which will work fine with brm(), you can use this syntax.

b7.5b <-
  brm(data = dd, family = gaussian,
      log_gdp ~ 1 + rugged + cont_africa + rugged:cont_africa,
      prior = c(set_prior("normal(8, 100)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 1000, cores = 4)

Same model, same WAIC and LOO:

waic(b7.5, b7.5b)

loo(b7.5, b7.5b)

7.1.3. Plotting the interaction.

Here's our prep work for the figure.

p7.5 <- posterior_samples(b7.5)

nd <- 
  tibble(rugged = rep(seq(from = 0, 
                          to = 6.3, 
                          length.out = 30),
                      times = 2),
         cont_africa = rep(0:1, each = 30))

fit.7.5 <-
  fitted(b7.5, newdata = nd) %>%
  as_tibble() %>%
  bind_cols(nd) %>%
  mutate(cont_africa = ifelse(cont_africa == 1, "Africa", "not Africa"))

And here's the code for our version of Figure 7.4.

dd %>%
  mutate(cont_africa = ifelse(cont_africa == 1, "Africa", "not Africa")) %>%

ggplot(aes(x = rugged)) +
  theme_pander() + 
  scale_colour_pander() +
  scale_fill_pander() +
  geom_ribbon(data = fit.7.5,
              aes(ymin = `2.5%ile`, 
                  ymax = `97.5%ile`,
                  fill = cont_africa,
                  group = cont_africa),
              alpha = 1/4) +
  geom_line(data = fit.7.5,
              aes(y = Estimate, 
                  color = cont_africa,
                  group = cont_africa)) +
  geom_point(aes(y = log_gdp, color = cont_africa),
             size = 2/3) +
  scale_x_continuous(expand = c(0, 0)) +
  labs(x = "Terrain Ruggedness Index",
       y = "log GDP from year 2000") +
  theme(text = element_text(family = "Times"),
        legend.position = "none") +
  facet_wrap(~cont_africa)

7.1.4.1. Parameters change meaning.

We have already extracted samples from b7.5, but it doesn't hurt to repeat the code.

p7.5 <-
  posterior_samples(b7.5) 

p7.5 %>%
  mutate(gamma_Africa = b_rugged + `b_rugged:cont_africa`,
         gamma_notAfrica = b_rugged) %>%
  select(gamma_Africa:gamma_notAfrica) %>%
  gather(key, value) %>%
  group_by(key) %>%
  summarise(mean = mean(value))

And here is our version of Figure 7.5.

p7.5 %>%
  mutate(gamma_Africa = b_rugged + `b_rugged:cont_africa`,
         gamma_notAfrica = b_rugged) %>%
  select(gamma_Africa:gamma_notAfrica) %>%
  gather(key, value) %>%

  ggplot(aes(x = value, group = key, color = key, fill = key)) +
  theme_pander() + 
  scale_colour_pander() +
  scale_fill_pander() +
  geom_density(alpha = 1/4) +
  scale_x_continuous(expression(gamma), expand = c(0, 0)) +
  scale_y_continuous(NULL, breaks = NULL) +
  labs(title = "Terraine Ruggedness slopes",
       subtitle = "Blue = African nations, Green = others") +
  theme(text = element_text(family = "Times"),
        legend.position = "none")

What proportion of these differences is below zero?

p7.5 %>%
  mutate(gamma_Africa = b_rugged + `b_rugged:cont_africa`,
         gamma_notAfrica = b_rugged,
         diff = gamma_Africa -gamma_notAfrica) %>%
  summarise(Proportion_of_the_difference_below_0 = sum(diff < 0)/length(diff))

Here is our version of McElreath's Figure 7.6.

nd <- 
  tibble(rugged = rep(range(dd$rugged),
                      times = 2),
         cont_africa = rep(0:1, each = 2),
         ox = rep(c(-0.05, 0.05), times = 2))

fit.7.5 <-
  fitted(b7.5, newdata = nd) %>%
  as_tibble() %>%
  bind_cols(nd)

dd %>% 
  mutate(ox = ifelse(rugged > median(rugged), 0.05, -0.05),
         cont_africa = cont_africa + ox) %>%
  select(cont_africa, everything()) %>%

ggplot(aes(x = cont_africa)) +
  theme_pander() +
  scale_colour_pander() +
  scale_fill_pander() +
  geom_ribbon(data = fit.7.5,
              aes(ymin = `2.5%ile`, 
                  ymax = `97.5%ile`,
                  fill = factor(ox),
                  group = factor(ox)),
              alpha = 1/4) +
  geom_line(data = fit.7.5,
              aes(y = Estimate,
                  color = factor(ox),
                  group = factor(ox),
                  linetype = factor(ox))) +
  geom_point(aes(y = log_gdp, color = factor(ox)),
             alpha = 1/2, shape = 1) +
  scale_x_continuous(breaks = 0:1, 
                     labels = c("other", "Africa")) +
  coord_cartesian(xlim = c(-.2, 1.2)) +
  labs(x = "Continent",
       y = "log GDP from year 2000") +
  theme(text = element_text(family = "Times"),
        legend.position = "none")

7.3. Continuous interactions

7.3.1. The data.

Look at the tulips.

library(rethinking)
data(tulips)
d <- tulips
str(d)

Load brms.

detach(package:rethinking, unload = T)
library(brms)
rm(tulips)

Here we continue with McElreath's very flat priors for the multivariable and interaction models.

b7.6 <-
  brm(data = d, family = gaussian,
      blooms ~ 1 + water + shade,
      prior = c(set_prior("normal(0, 100)", class = "Intercept"),
                set_prior("normal(0, 100)", class = "b"),
                set_prior("uniform(0, 100)", class = "sigma")),
      iter = 2000, warmup = 1000, cores = 4, chains = 4)

b7.7 <- update(b7.6, 
               formula = blooms ~ 1 + water + shade + water:shade)

Note our use of the update() function. It can come in real handy when all you want is to make a mild adjustment to an existing model. Not only is it compact, it often cuts out the compilation time altogether.

Much like in the text, these models yielded divergent transitions. Here, we'll try to combat them by following Stan's advice and "[increase] adapt_delta above 0.8." While we're at it, we'll put better priors on $\sigma$.

b7.6 <-
  brm(data = d, family = gaussian,
      blooms ~ 1 + water + shade,
      prior = c(set_prior("normal(0, 100)", class = "Intercept"),
                set_prior("normal(0, 100)", class = "b"),
                set_prior("cauchy(0, 10)", class = "sigma")),
      iter = 2000, warmup = 1000, cores = 4, chains = 4,
      control = list(adapt_delta = 0.9))

b7.7 <- update(b7.6, 
               formula = blooms ~ 1 + water + shade + water:shade)

Increasing adapt_delta did the trick. Instead of coeftab(), we can also use posterior_summary(), which gets us most of the way there.

posterior_summary(b7.6) %>% round(digits = 2)
posterior_summary(b7.7) %>% round(digits = 2)

This is an example where HMC yielded point estimates notably different from MAP. However, look at the size of those posterior standard deviations (i.e., Est.Error)! The MAP estimates are well within a fraction of those SDs.

Anyway, let's look at WAIC.

waic(b7.6, b7.7)

7.3.3. Center and re-estimate.

Here's a tidyverse way to center the predictors.

d <-
  d %>%
  mutate(shade.c = shade - mean(shade),
         water.c = water - mean(water))

Refitting the models with our shiny new centered predictors.

b7.8 <-
  brm(data = d, family = gaussian,
      blooms ~ 1 + water.c + shade.c,
      prior = c(set_prior("normal(130, 100)", class = "Intercept"),
                set_prior("normal(0, 100)", class = "b"),
                set_prior("cauchy(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 1000, cores = 4,
      control = list(adapt_delta = 0.9))

b7.9 <- update(b7.8, 
               formula = blooms ~ 1 + water.c + shade.c + water.c:shade.c)
posterior_summary(b7.8) %>% round(digits = 2)
posterior_summary(b7.9) %>% round(digits = 2)

And okay fine, if you really want a coeftab()-like summary, here's a grotesque way to do it.

# First, we reformat b7.8
posterior_summary(b7.8) %>% 
  data.frame() %>% 
  rownames_to_column() %>% 
  # Now we bind the rows of b7.9 to it
  bind_rows(
    posterior_summary(b7.9) %>% 
      data.frame() %>%
      rownames_to_column()
  ) %>% 
  # Miscellaneous wrangling
  mutate(model = c(rep("b7.8", times = 5),
                   rep("b7.9", times = 6))) %>% 
  select(rowname, Estimate, model) %>% 
  filter(rowname != "lp__") %>% 
  spread(key = model, value = Estimate) %>% 
  rename(parameter = rowname) %>% 
  mutate_if(is.double, round, digits = 2)

Anyway, centering helped a lot. Now, not only do the results in the text match up better than those from Stan, but the Est.Error values are uniformly smaller.

7.3.3.2. Estimates changed less across models.

k <- fixef(b7.7)
k[1] + k[2]*2 + k[3]*2 + k[4]*2*2
k <- fixef(b7.9)
k[1] + k[2]*0 + k[3]*0 + k[4]*0*0
print(b7.9)

7.3.4. Plotting implied predictions.

Now we're ready for the bottom row of Figure 7.7. Here's our variation on McElreath's tryptych loop code, adjusted for brms and ggplot2.

# loop over values of water.c and plot predictions
shade.seq <- -1:1

for(w in -1:1){
  # defining the subset of the original data
  dt <- d[d$water.c == w, ]
  # defining our new data
  nd <- tibble(water.c = w, shade.c = shade.seq)
  # using our sampling skills, like before
  fit.7.9 <- fitted(b7.9, newdata = nd) %>%
    as_tibble() %>%
    bind_cols(nd)

  # specifying our custom plot
  fig <- ggplot() +
    theme_pander() + 
    geom_ribbon(data = fit.7.9, 
                aes(x = shade.c,
                    ymin = `2.5%ile`,
                    ymax = `97.5%ile`), 
                fill = "#CC79A7", alpha = 1/5) +
    geom_line(data = fit.7.9, aes(x = shade.c, y = Estimate), 
              color = "#CC79A7") +
    geom_point(data = dt, aes(x = shade.c, y = blooms),
               shape = 1, color = "#CC79A7") +
    coord_cartesian(xlim = range(d$shade.c), 
                    ylim = range(d$blooms)) +
    scale_x_continuous(breaks = c(-1, 0, 1)) +
    labs(x = "Shade (centered)", y = "Blooms", 
         title = paste("Water (centered) =", w)) +
    theme(text = element_text(family = "Times"))

  # plotting that joint
  plot(fig)
}

But we don't necessarily need a loop. We can achieve all of McElreath's Figure 7.7 with fitted(), a some data wrangling, and a little help from ggplot2::facet_grid().

# fitted() for model b7.8
fitted(b7.8) %>%
  as_tibble() %>%
  # adding fitted() for model b7.9
  bind_rows(
    fitted(b7.9) %>% 
      as_tibble()
  ) %>% 
  # We'll want to index the models
  mutate(fit  = rep(c("b7.8", "b7.9"), each = 27)) %>% 
  # Here we add the data, d
  bind_cols(bind_rows(d, d)) %>% 
  # These will come in handy for ggplot2::facet_grid()
  mutate(x_grid = paste("water.c =", water.c),
         y_grid = paste("model: ", fit)) %>% 

  ggplot(aes(x = shade.c)) +
  geom_ribbon(aes(ymin = `2.5%ile`,
                  ymax = `97.5%ile`), 
              fill = "#CC79A7", alpha = 1/5) +
  geom_line(aes(y = Estimate), 
            color = "#CC79A7") +
  geom_point(aes(y = blooms, group = x_grid), 
             shape = 1, color = "#CC79A7") +
  coord_cartesian(xlim = range(d$shade.c), 
                  ylim = range(d$blooms)) +
  scale_x_continuous(breaks = c(-1, 0, 1)) +
  labs(x = "Shade (centered)", y = "Blooms") +
  theme_pander() + 
  theme(text = element_text(family = "Times"),
        panel.background = element_rect(color = "black")) +
  facet_grid(y_grid ~ x_grid)
Bonus: marginal_effects()

The brms package includes the marginal_effects() function as a convenient way to look at simple effects and two-way interactions. Recall the simple univariable model, b7.3:

b7.3$formula

We can look at the regression line and its percentile-based intervals like so:

marginal_effects(b7.3)

If we nest marginal_effects() within plot() with a points = T argument, we can add the original data to the figure.

plot(marginal_effects(b7.3), points = T)

We can further customize the plot. For example, we can replace the intervals with a spaghetti plot. While we're at it, we can use point_args to adjust the geom_jitter() parameters.

plot(marginal_effects(b7.3,
                      spaghetti = T, nsamples = 200),
     points = T,
     point_args = c(alpha = 1/2, size = 1))

With multiple predictors, things get more complicated. Consider our multivariable, non-interaction model, b7.4.

b7.4$formula
marginal_effects(b7.4)

We got one plot for each predictor, controlling the other predictor at zero. Note how the plot for cont_africa treated it as a continuous variable. This is because the variable was saved as an integer in the original data set:

b7.4$data %>% 
  glimpse()

One way to fix that is to adjust the data set and refit the model.

d_factor <-
  b7.4$data %>% 
  mutate(cont_africa = factor(cont_africa))

b7.4_factor <- update(b7.4, newdata = d_factor)

Using the update() syntax often speeds up the re-fitting process.

marginal_effects(b7.4_factor)

Now our second marginal plot more clearly expresses the cont_africa predictor as categorical.

Things get more complicated with the interaction model, b7.5.

b7.5$formula
marginal_effects(b7.5)

The marginal_effects() function defaults to expressing interactions such that the first variable in the term--in this case, rugged--is on the x axis and the second variable in the term--cont_africa, treated as an integer--is depicted in three lines corresponding its mean and its mean +/- one standard deviation. This is great for continuous variables, but incoherent for categorical ones. The fix is, you guessed it, to refit the model after adjusting the data.

d_factor <-
  b7.5$data %>% 
  mutate(cont_africa = factor(cont_africa))

b7.5_factor <- update(b7.5, newdata = d_factor)

Just for kicks, we'll use probs = c(.25, .75) to return 50% intervals, rather than the conventional 95%.

marginal_effects(b7.5_factor,
                 probs = c(.25, .75))

With the effects argument, we can just return the interaction effect, which is where all the action's at. While we're at it, we'll use plot() to change some of the settings.

plot(marginal_effects(b7.5_factor,
                      effects = "rugged:cont_africa", 
                      spaghetti = T, nsamples = 150),
     points = T,
     point_args = c(alpha = 2/3, size = 1), mean = F)

Note, the ordering of the variables matters for the interaction term. Consider our interaction model for the tulips data.

b7.9$formula

The plot tells a slightly different story, depending on whether you specify effects = "shade.c:water.c" or effects = "water.c:shade.c".

plot(marginal_effects(b7.9, 
                      effects = "shade.c:water.c"),
     points = T)

plot(marginal_effects(b7.9, 
                      effects = "water.c:shade.c"),
     points = T)

One might want to evaluate the effects of the second term in the interaction--water.c, in this case--at values other than the mean and the mean +/- one standard deviation. When we reproduced the bottom row of Figure 7.7, we expressed the interaction based on values -1, 0, and 1 for water.c. We can do that, here, by using the int_conditions argument. It expects a list, so we'll put our desired water.c values in just that.

ic <- 
  list(water.c = c(-1, 0, 1))

plot(marginal_effects(b7.9, 
                      effects = "shade.c:water.c",
                      int_conditions = ic),
     points = T)
rm(d, dd, d.A1, d.A0, b7.1, b7.2, p7.1, p7.2, nd, fit.7.1, fit.7.2, fit.both, b7.3, b7.4, p7.4, fit.7.4, b7.5, b7.5b, p7.5, fit.7.5, b7.6, b7.7, b7.8, b7.9, k, shade.seq, dt, fit.7.9, fig, w, d_factor, b7.4_factor, b7.5_factor, ic)

Note. The analyses in this document were done with:

Reference

McElreath, R. (2016). Statistical rethinking: A Bayesian course with examples in R and Stan. Chapman & Hall/CRC Press.



joepowers16/rethinking documentation built on June 2, 2019, 6:52 p.m.