Version: r as.character(Sys.time())
This document reproduces and extends the procedures originally invented by Doug Bates (currently part of a draft paper on GLMMs in lme4
) to visualize the likelihood profiles of individual conditional modes (random effects parameters for individual groups) in GLMM. In particular, this visualization gives a useful way to understand the accuracy of Laplace and Gauss-Hermite quadrature approximations.
library("lme4") library("lattice") library("ggplot2"); theme_set(theme_bw()) library("plyr") ## for reshaping library("abind") ## ditto library("reshape2") ## for melt() generic library("bbmle") ## for AICtab
zetaDevfun <- function(m,grps=NULL) { stopifnot (is(m, "glmerMod"), length(m@flist) == 1L) # single grouping factor nvar <- length(m@cnms[[1]]) ff <- getME(m,"flist")[[1]] if (is.null(grps)) grps <- seq(length(levels(ff))) ## DRY ... ngrps <- length(grps) rr <- m@resp ## extract response module u0 <- getME(m,"u") ## conditional modes L <- getME(m,"L") ## sd <- 1/getME(pp,"L")@x ## filled elements of L matrix==diag for simple case ## for more general case need the following -- still efficient sd <- sqrt(diag(chol2inv(L))) ## fixed-effects contribution to linear predictor fc <- getME(m,"X") %*% getME(m,"beta") ZL <- t(getME(m,"Lambdat") %*% getME(m,"Zt")) ## evaluate the unscaled conditional density on the deviance scale dc <- function(z) { uu <- u0 + z * sd ## displace conditional modes ## should still work if z is a vector (by recycling, because u values ## applying to each group are stored adjacent to each other) rr$updateMu(fc + ZL %*% uu) ## update linear predictor drc <- unname(as.vector(tapply(rr$devResid(), ff, sum))) uuc <- colSums(matrix(uu * uu,nrow=nvar)) (drc + uuc)[grps] } return(dc) } zeta <- function(m, zmin=-3, zmax=3, npts=NULL, grps=NULL, center=TRUE, zvals = seq(zmin, zmax, length.out = npts)) { ff <- getME(m,"flist")[[1]] if (is.null(grps)) grps <- seq(length(levels(ff))) ## DRY ... ngrps <- length(grps) nvar <- length(m@cnms[[1]]) if (nvar>2) stop("can't handle vector RE with length >2") if (is.null(npts)) npts <- if (nvar>1) 31L else 301L dc <- zetaDevfun(m,grps) if (nvar==1) { # scalar-valued random effects vv <- vapply(zvals,dc,numeric(ngrps), USE.NAMES=FALSE) vv <- t(vv) ## n.z * n.id } else { # vector-valued random effects nz <- length(zvals) vv <- mapply(function(x,y) { dc(c(x,y)) }, rep(zvals,nz),rep(zvals,each=nz)) ## result: nu*(nz^2) matrix; want nz*nz*nu array ## *with* each nu slice being a nz^2 matrix for one group ## I'm sure there's a clever way to do this with array/aperm, ## but I just couldn't figure it out. Instead, ## (1) take each row of vv and make into a matrix, return as list ## of matrices ## (2) bind matrices into an array vv <- do.call(abind,c(alply(vv,1,matrix,nrow=nz),list(along=3))) } d0 <- dc(0) # restore the model to its incoming state devarr <- vv if (center) { sweep.margin <- if (nvar==1) 2 else 3 devarr <- sweep(devarr,sweep.margin,d0,"-") } ## computing deviance rather than signed sqrt, since we're not using it ## anyway and it's harder to generalize to >1 dimension ... rr <- list(zvals=zvals, devarr=devarr) ## signed square root ## array(ifelse(zvals < 0, -1, 1), c(npts, length(u0)))) class(rr) <- "laplaceDiag" rr }
## converts zeta objects from a list ($z$ value vector plus array of deviances) to a data frame ... melt.laplaceDiag <- function(data,...) { require(reshape2) zvals <- data$zval if (length(dim(data$devarr))==2) { n.id <- ncol(data$devarr) n.z <- nrow(data$devarr) data.frame(id=gl(n.id,n.z), zvals, dev=c(data$devarr)) } else { ## assume for now same z resolution for both dimensions n.z <- dim(data$devarr)[2] n.id <- dim(data$devarr)[3] data.frame(id=gl(n.id,n.z^2), zval1=rep(zvals,n.z), ## recycle zval2=rep(zvals,each=n.z), ## recycle dev=c(data$devarr)) } } dnorm2d <- function(x) { dnorm(x)/sqrt(2*pi) ## == exp(-x^2/2)/(2*pi) } dnorm2d2 <- function(z1,z2) { exp(-(z1^2+z2^2)/2)/(2*pi) } plot.laplaceDiag <- function(x,scaled=FALSE, id=NULL, type=c("g","l"), aspect=0.6, xlab="z",ylab="density", ...) { nvar <- length(dim(x$devarr))-1 mm <- melt(x) if (!is.null(id)) { mm <- mm[mm$id %in% as.character(id),] } mm <- transform(mm, y = if (nvar==1) { if (!scaled) { dnorm(sqrt(dev)) } else { dnorm(sqrt(dev))/dnorm(zvals) } } else { if (!scaled) { dnorm2d(sqrt(dev)) } else { dnorm2d(sqrt(dev))/dnorm2d2(zval1,zval2) } }) if (nvar==1) { print(xyplot(y ~ zvals|id, data=mm, type=type, aspect=aspect, xlab=xlab,ylab=ylab, ..., panel=function(x,y,...){ if (!scaled) { panel.lines(x, dnorm(x), lty=2) } else { panel.abline(h=1, lty=2) } panel.xyplot(x,y,...) })) } else { print(contourplot(y ~ zval1*zval2|id, data=mm, type=type, aspect=aspect, labels=FALSE, xlab=xlab,ylab=ylab, scales=list(z=list(relation="free")))) } invisible(mm) }
Replicate glmer paper Figs 2/3: these graphs show first the likelihood profiles for the individual conditional modes, then the likelihood profiles scaled by the standard Normal. If the likelihood profiles are proportional to $N(0,1)$, the Laplace approximation will be adequate; if the likelihood profiles are proportional to $f_n(x) \exp(-x^2)$, where $f_n$ is an $n^\textrm{th}$-order polynomial, then an AGHQ fit with $n-1$ quadrature points will be adequate.
The CBPP data are actually pretty boring (if you're looking for deviations):
m1 <- glmer(cbind(incidence, size-incidence) ~ period + (1|herd), cbpp, binomial) m1.z <- zeta(m1)
plot(m1.z,layout=c(5,3))
The standardized plots make it easier to see the deviations:
plot(m1.z,scaled=TRUE,layout=c(5,3))
The toenail data are much more poorly behaved (presumably? due to smaller total numbers of observations per group):
toenail <- read.csv("toenail.csv") m2 <- glmer(outcome~treatment*visit+(1|patient),toenail, family=binomial)
Since there are r length(unique(toenail$patient))
groups, it will be awkward to plot them all -- we'll look at a subset (to do: sort groups by degree of non-Normality)
m2.z <- zeta(m2,grps=1:36)
plot(m2.z)
plot(m2.z,id=1:9,as.table=TRUE)
plot(m2.z,scaled=TRUE)
Try a random-slopes model:
m3 <- glmer(outcome~treatment*visit+(visit|patient),toenail, family=binomial, control=glmerControl(optimizer="bobyqa"))
The data strongly support a random-slopes model:
AICtab(m2,m3)
m3.z <- zeta(m3,grps=1:25)
This is a nice collection of mussels ...
plot(m3.z)
Still trying to work out the analogue of Figure 3 (i.e., a version where we scale by the bivariate normal). I think this actually should work, but this example is very badly behaved in the corner ...
Just look at patient #1 to try to sort out what's going on here ...
zz <- m3.z$zvals mm <- 2*pi*dnorm2d(sqrt(m3.z$devarr[,,1])) m0 <- 2*pi*dnorm2d(sqrt(outer(zz^2,zz^2,"+"))) par(mfrow=c(2,2)) persp(zz,zz,mm,col="gray",main="conditional density") persp(zz,zz,m0,col="lightblue",main="bivariate normal") persp(zz,zz,mm/m0,col="pink",main="ratio") persp(zz,zz,log(mm/m0),col="lightgreen",main="log ratio")
Does this really matter, or are we only in trouble if we put quadrature points there?
How do we actually put GHQ into practice based on this information? Can we come up with a sort of a score statistic for GHQ (i.e., what would the difference be in log-likehood conditional on parameters for different numbers of quadrature points)?
Check that GHrule
and GQdk
are equivalent (they're based on
the same underlying code, so they should be!)
(gh5 <- GHrule(5)) gh5B <- GQdk(1,5) gh5B <- gh5B[order(gh5B[,2]),] all.equal(gh5B[,2:1],unname(gh5[,-3]))
Can we do some simple calculations with GHrule
and the zeta
values
to reconstruct the 1-D Gauss-Hermite results and convince ourselves we're
getting the same results as from glmerAGQ
?
zd <- zetaDevfun(m1) z2A <- zeta(m1,zvals=gh5[,"z"]) z2 <- t(sapply(gh5[,"z"],zd)) z2B <- sweep(z2,2,zd(0),"-") all.equal(unname(z2A$devarr),z2B) GH1d <- function(m,nAGQ=2) { gh <- GHrule(nAGQ) z <- zeta(m,zvals=gh[,"z"]) sum(rowSums(z$devarr)*gh[,"w"]) } GH1vals <- sapply(1:25,GH1d,m=m1) plot(-GH1vals[-1])
m1GHvec <- sapply(2:25, function(q) { deviance(update(m1,nAGQ=q)) }) m1GHvec2 <- sapply(2:25, function(q) { dd <- update(m1,devFunOnly=TRUE,nAGQ=q) dd(unlist(getME(m1,c("theta","fixef")))) })
par(las=1,bty="l") matplot(cbind(diff(m1GHvec2),-diff(GH1vals[-1])), type="b",pch=16,lty=1)
These patterns don't match ... but we don't necessarily expect them to match, as the hand-rolled comparison keeps theta, beta, and the conditional modes fixed, while using the deviance function updates the conditional modes (I think ...)
Toenail data from here.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.