In jvparidon/lmerMultiMember: Multiple membership random effects

General idea

Consider a situation where we have multiple items of some sort which have associated effects, but we can't association observations with single items; instead, each observation is associated with a group of items. (Two examples which have come up are (1) authorship of papers and (2) hockey players on a team.) These are not quite identical to "multi-membership models" (in part because group membership is binary, i.e., not weighted), although similar techniques to those shown here could work for multi-membership models.

Simple example

Load packages (we don't really need anything beyond lmerMultiMember; the rest are for convenience/drawing pictures).

library(lmerMultiMember)
library(broom.mixed)
library(ggplot2)
theme_set(theme_bw())
library(Matrix)

Construct a simulated example: first, simulate the design (structure).

nm <- 20 # number of groups
nobs <- 1000
set.seed(101)
## choose items for observations
pres <- matrix(rbinom(nobs * nm, prob = 0.25, size = 1), nrow = nobs, ncol = nm)
dimnames(pres) <- list(NULL, LETTERS[seq(nm)])
pres[1:5, ]

image(Matrix(pres),
  ylim = c(1, 10), sub = "", ylab = "Observation",
  xlab = "Item",
  ## draw tick labels at top
  scales = list(at = 1:20, x = list(
    labels = colnames(pres),
    alternating = 2
  ))
)

We can look at how many times a particular number of groups occurs (which should match $Binomial(n=20, 0, p=0.25)$)

hist(rowSums(pres),
  main = "Multi-Group Membership",
  xlab = "Number of groups associated with an observation"
)

Since we chose which items/individuals to include for each observation randomly and independently (Bernoulli with probability 0.25), we have a highly variable number of individuals present for different observations (1-11). This would be realistic for some examples (authorship), unrealistic for others (hockey) ... I don't think it really makes much difference computationally (statistically, having some observations with a single member must make estimation more powerful ...) The 0/1 matrix (indicator variable for whether item $i$ is included in observation $j$) is convenient, and will turn out to be the form we need for inclusion in the model. It should be fairly straightforward to convert other forms (e.g. a list of sets of items associated with each observation) to this form ...

Now simulate the response variable.

b <- rnorm(nm, sd = 2) ## item-level effects
## n.b. get in trouble if we don't add residual error
## (theta is scaled relative to residual error)
y <- c(pres %*% b) + rnorm(nobs, sd = 1.5)

Fit and the results look OK (correct item and residual variance estimated):

m1 <- lmer(y ~ 1 + (1 | membership),
  data = data.frame(y = y, x = rep(1, nobs)),
  memberships = list(membership = t(pres))
)
summary(m1)

The conditional modes by item look good:

dd <- tidy(m1, effects = "ran_vals")
dd <- transform(dd, level = reorder(level, estimate))
truth <- data.frame(level = LETTERS[seq(nm)], estimate = b)
ggplot(dd, aes(x = level, y = estimate)) +
  geom_pointrange(aes(
    ymin = estimate - 2 * std.error,
    ymax = estimate + 2 * std.error
  )) +
  coord_flip() +
  geom_point(data = truth, colour = "red")

Zooming In: Equal, indepent contributions

Here, we look at a model where each observation is associated with precisely two groups to get a better feel.

set.seed(42)
nobs <- 1000
nm <- 10
dat <- data.frame(
  x = runif(nobs),
  z = runif(nobs),
  # TODO: double check that all(g1 != g2)
  g1 = sample(1:nm, nobs, TRUE),
  g2 = sample(1:nm, nobs, TRUE)
)

dat$grps <- paste(LETTERS[dat$g1], LETTERS[dat$g2], sep = ",")

# fixed effects
beta <- c(1, 2, 3)
# random effects
bint <- rnorm(nm, sd = 3.14)
# we could be clever about setting the model matrix here, but it's
# easier and clearer how we're constructing bits if we're inefficient

fe <- beta[1] + beta[2] * dat$x + beta[3] * dat$z

# note that the group contributions for each observation are assumed to be
# independent and equally weighted.
# TODO: add example of unequal weights
re <- bint[dat$g1] + bint[dat$g2]

dat$y <- fe + re + rnorm(nobs, sd = 1)
weights <- weights_from_vector(dat$grps)
head(dat)

image(t(weights),
  ylim = c(1, 10), sub = "", ylab = "Observation",
  xlab = "Item",
  ## draw tick labels at top
  scales = list(at = 1:20, x = list(
    labels = colnames(pres),
    alternating = 2
  ))
)

m2 <- lmer(y ~ 1 + x + z + (1 | g),
  data = dat,
  memberships = list(g = weights), REML = FALSE
)
summary(m2)

Not ideal, but not horrible -- it looks like there is an upward bias, but this is inline with the estimate for the group standard deviation being a bit off.

dd <- tidy(m2, effects = "ran_vals")
dd <- transform(dd, level = reorder(level, estimate))
truth <- data.frame(level = LETTERS[seq(nm)], estimate = bint)
ggplot(dd, aes(x = level, y = estimate)) +
  geom_pointrange(aes(
    ymin = estimate - 2 * std.error,
    ymax = estimate + 2 * std.error
  )) +
  coord_flip() +
  geom_point(data = truth, colour = "red")

A slightly more complex example

Here we build on our last example by adding a random slope.

set.seed(42)
nobs <- 10000
nm <- 26
dat <- data.frame(
  x = runif(nobs),
  z = runif(nobs),
  # TODO: double check that all(g1 != g2)
  g1 = sample(1:nm, nobs, TRUE),
  g2 = sample(1:nm, nobs, TRUE)
)

dat$grps <- paste(LETTERS[dat$g1], LETTERS[dat$g2], sep = ",")

# fixed effects
beta <- c(1, 2, 3)
# random effects
bint <- rnorm(nm, sd = 3.14)
bx <- rnorm(nm, sd = 0.2)

# we could be clever about setting the model matrix here, but it's
# easier and clearer how we're constructing bits if we're inefficient

fe <- beta[1] + beta[2] * dat$x + beta[3] * dat$z

# note that the group contributions for each observation are assumed to be
# independent and equally weighted.
# TODO: add example of unequal weights
re <- bint[dat$g1] + bint[dat$g2] + (bx[dat$g1] + bx[dat$g2]) * dat$x

dat$y <- fe + re + rnorm(nobs, sd = 1)
head(dat)

image(t(weights_from_vector(dat$grps)),
  ylim = c(1, 10), sub = "", ylab = "Observation",
  xlab = "Item",
  ## draw tick labels at top
  scales = list(at = 1:20, x = list(
    labels = colnames(pres),
    alternating = 2
  ))
)

m3 <- lmer(y ~ 1 + x + z + (1 + x | g),
  data = dat, REML = FALSE,
  memberships = list(g = weights_from_vector(dat$grps))
)
summary(m3)

dd <- tidy(m3, effects = "ran_vals")
dd <- transform(dd, level = reorder(level, estimate))
truth <- rbind(
  data.frame(level = LETTERS[seq(nm)], estimate = bint, term = "(Intercept)"),
  data.frame(level = LETTERS[seq(nm)], estimate = bx, term = "x")
)
ggplot(dd, aes(x = level, y = estimate)) +
  geom_pointrange(aes(
    ymin = estimate - 2 * std.error,
    ymax = estimate + 2 * std.error
  )) +
  coord_flip() +
  geom_point(data = truth, colour = "red") +
  facet_wrap(~term)

# note that we have a random slope for which we didn't introduce any variation
# in the data, which is the same as setting it to zero
m4 <- lmer(y ~ 1 + x + z + (1 + x + z | g),
  data = dat, REML = FALSE,
  memberships = list(g = weights_from_vector(dat$grps))
)
summary(m4)

# note that we have a random slope for which we didn't introduce any variation
# in the data, which is the same as setting it to zero
m4zc <- lmer(y ~ 1 + x + z + (1 + x + z || g),
  data = dat, REML = FALSE,
  memberships = list(g = weights_from_vector(dat$grps))
)
summary(m4zc)