Laplace diagnostics

Version: r as.character(Sys.time())

This document reproduces and extends the procedures originally invented by Doug Bates (currently part of a draft paper on GLMMs in lme4) to visualize the likelihood profiles of individual conditional modes (random effects parameters for individual groups) in GLMM. In particular, this visualization gives a useful way to understand the accuracy of Laplace and Gauss-Hermite quadrature approximations.

library("lme4")
library("lattice")
library("ggplot2"); theme_set(theme_bw())
library("plyr")      ## for reshaping
library("abind")     ## ditto
library("reshape2")  ## for melt() generic
library("bbmle")     ## for AICtab
zetaDevfun <- function(m,grps=NULL) {    
    stopifnot (is(m, "glmerMod"),
               length(m@flist) == 1L)    # single grouping factor
    nvar <- length(m@cnms[[1]])
    ff <- getME(m,"flist")[[1]]
    if (is.null(grps)) grps <- seq(length(levels(ff)))  ## DRY ...
    ngrps <- length(grps)
    rr <- m@resp                         ## extract response module
    u0 <- getME(m,"u")                  ## conditional modes
    L <- getME(m,"L")
    ## sd <- 1/getME(pp,"L")@x
    ## filled elements of L matrix==diag for simple case
    ## for more general case need the following -- still efficient
    sd <- sqrt(diag(chol2inv(L)))
    ## fixed-effects contribution to linear predictor
    fc <- getME(m,"X") %*% getME(m,"beta") 
    ZL <- t(getME(m,"Lambdat") %*% getME(m,"Zt"))
    ## evaluate the unscaled conditional density on the deviance scale
    dc <- function(z) {
        uu <- u0 + z * sd    ## displace conditional modes
        ##  should still work if z is a vector (by recycling, because u values
        ##  applying to each group are stored adjacent to each other)
        rr$updateMu(fc + ZL %*% uu)     ## update linear predictor
        drc <- unname(as.vector(tapply(rr$devResid(), ff, sum)))
        uuc <- colSums(matrix(uu * uu,nrow=nvar))
        (drc + uuc)[grps]
    }
    return(dc)
}
zeta <- function(m, zmin=-3, zmax=3, npts=NULL,
                 grps=NULL, center=TRUE,
                 zvals = seq(zmin, zmax, length.out = npts)) {
    ff <- getME(m,"flist")[[1]]
    if (is.null(grps)) grps <- seq(length(levels(ff)))  ## DRY ...
    ngrps <- length(grps)
    nvar <- length(m@cnms[[1]])
    if (nvar>2) stop("can't handle vector RE with length >2")
    if (is.null(npts)) npts <- if (nvar>1) 31L else 301L
    dc <- zetaDevfun(m,grps)
    if (nvar==1) { # scalar-valued random effects
        vv <- vapply(zvals,dc,numeric(ngrps), USE.NAMES=FALSE)
        vv <- t(vv)  ## n.z * n.id
    } else { # vector-valued random effects
        nz <- length(zvals)
        vv <- mapply(function(x,y) { dc(c(x,y)) },
                         rep(zvals,nz),rep(zvals,each=nz))
        ## result: nu*(nz^2) matrix; want nz*nz*nu array
        ## *with* each nu slice being a nz^2 matrix for one group
        ## I'm sure there's a clever way to do this with array/aperm,
        ## but I just couldn't figure it out.  Instead,
        ## (1) take each row of vv and make into a matrix, return as list
        ##     of matrices
        ## (2) bind matrices into an array
        vv <- do.call(abind,c(alply(vv,1,matrix,nrow=nz),list(along=3)))
    }
    d0 <- dc(0) # restore the model to its incoming state
    devarr <- vv
    if (center) {
        sweep.margin <- if (nvar==1) 2 else 3 
        devarr <- sweep(devarr,sweep.margin,d0,"-")
    }
    ## computing deviance rather than signed sqrt, since we're not using it
    ## anyway and it's harder to generalize to >1 dimension ...
    rr <- list(zvals=zvals,
               devarr=devarr)
    ## signed square root
    ## array(ifelse(zvals < 0, -1, 1), c(npts, length(u0))))
    class(rr) <- "laplaceDiag"
    rr
}
## converts zeta objects from a list ($z$ value vector plus array of deviances) to a data frame ...
melt.laplaceDiag <- function(data,...) {
    require(reshape2)
    zvals <- data$zval
    if (length(dim(data$devarr))==2) {
        n.id <- ncol(data$devarr)
        n.z <- nrow(data$devarr)
        data.frame(id=gl(n.id,n.z),
                   zvals,
                   dev=c(data$devarr))
    } else {
        ## assume for now same z resolution for both dimensions
        n.z <- dim(data$devarr)[2]
        n.id <- dim(data$devarr)[3]
        data.frame(id=gl(n.id,n.z^2),
                   zval1=rep(zvals,n.z),  ## recycle
                   zval2=rep(zvals,each=n.z), ## recycle
                   dev=c(data$devarr))
    }
}

dnorm2d <- function(x) {
    dnorm(x)/sqrt(2*pi)  ## == exp(-x^2/2)/(2*pi)
}

dnorm2d2 <- function(z1,z2) {
    exp(-(z1^2+z2^2)/2)/(2*pi)
}

plot.laplaceDiag <- function(x,scaled=FALSE,
                             id=NULL,
                             type=c("g","l"),
                             aspect=0.6,
                             xlab="z",ylab="density",
                             ...) {
    nvar <- length(dim(x$devarr))-1
    mm <- melt(x)
    if (!is.null(id)) {
        mm <- mm[mm$id %in% as.character(id),]
    }
    mm <- transform(mm,
                    y = if (nvar==1) {
                        if (!scaled) {
                            dnorm(sqrt(dev))
                        } else {
                            dnorm(sqrt(dev))/dnorm(zvals)
                        }
                    } else {
                        if (!scaled) {
                            dnorm2d(sqrt(dev))
                        } else {
                            dnorm2d(sqrt(dev))/dnorm2d2(zval1,zval2)
                        }
                    })
    if (nvar==1) {
        print(xyplot(y ~ zvals|id, data=mm,
               type=type, aspect=aspect,
               xlab=xlab,ylab=ylab,
               ...,
               panel=function(x,y,...){
                   if (!scaled) {
                       panel.lines(x, dnorm(x), lty=2)
                   } else {
                       panel.abline(h=1, lty=2)
                   }
                   panel.xyplot(x,y,...)
               }))
    } else {
        print(contourplot(y ~ zval1*zval2|id, data=mm,
                    type=type, aspect=aspect,
                    labels=FALSE,
                    xlab=xlab,ylab=ylab,
                    scales=list(z=list(relation="free"))))
    }
    invisible(mm)
}

Replicate glmer paper Figs 2/3: these graphs show first the likelihood profiles for the individual conditional modes, then the likelihood profiles scaled by the standard Normal. If the likelihood profiles are proportional to $N(0,1)$, the Laplace approximation will be adequate; if the likelihood profiles are proportional to $f_n(x) \exp(-x^2)$, where $f_n$ is an $n^\textrm{th}$-order polynomial, then an AGHQ fit with $n-1$ quadrature points will be adequate.

The CBPP data are actually pretty boring (if you're looking for deviations):

m1 <- glmer(cbind(incidence, size-incidence) ~ period + (1|herd),
                  cbpp, binomial)
m1.z <- zeta(m1)
plot(m1.z,layout=c(5,3))

The standardized plots make it easier to see the deviations:

plot(m1.z,scaled=TRUE,layout=c(5,3))

Toenail data example

The toenail data are much more poorly behaved (presumably? due to smaller total numbers of observations per group):

toenail <- read.csv("toenail.csv")
m2 <- glmer(outcome~treatment*visit+(1|patient),toenail,
            family=binomial)

Since there are r length(unique(toenail$patient)) groups, it will be awkward to plot them all -- we'll look at a subset (to do: sort groups by degree of non-Normality)

m2.z <- zeta(m2,grps=1:36)
plot(m2.z)
plot(m2.z,id=1:9,as.table=TRUE)
plot(m2.z,scaled=TRUE)

Example with vector-valued RE

Try a random-slopes model:

m3 <- glmer(outcome~treatment*visit+(visit|patient),toenail,
            family=binomial,
            control=glmerControl(optimizer="bobyqa"))

The data strongly support a random-slopes model:

AICtab(m2,m3)
m3.z <- zeta(m3,grps=1:25)

This is a nice collection of mussels ...

plot(m3.z)

Still trying to work out the analogue of Figure 3 (i.e., a version where we scale by the bivariate normal). I think this actually should work, but this example is very badly behaved in the corner ...

Just look at patient #1 to try to sort out what's going on here ...

zz <- m3.z$zvals
mm <- 2*pi*dnorm2d(sqrt(m3.z$devarr[,,1]))
m0 <- 2*pi*dnorm2d(sqrt(outer(zz^2,zz^2,"+")))
par(mfrow=c(2,2))
persp(zz,zz,mm,col="gray",main="conditional density")
persp(zz,zz,m0,col="lightblue",main="bivariate normal")
persp(zz,zz,mm/m0,col="pink",main="ratio")
persp(zz,zz,log(mm/m0),col="lightgreen",main="log ratio")

Does this really matter, or are we only in trouble if we put quadrature points there?

Further thoughts

How do we actually put GHQ into practice based on this information? Can we come up with a sort of a score statistic for GHQ (i.e., what would the difference be in log-likehood conditional on parameters for different numbers of quadrature points)?

Check that GHrule and GQdk are equivalent (they're based on the same underlying code, so they should be!)

(gh5 <- GHrule(5))
gh5B <- GQdk(1,5)
gh5B <- gh5B[order(gh5B[,2]),]
all.equal(gh5B[,2:1],unname(gh5[,-3]))

Can we do some simple calculations with GHrule and the zeta values to reconstruct the 1-D Gauss-Hermite results and convince ourselves we're getting the same results as from glmerAGQ ?

zd <- zetaDevfun(m1)
z2A <- zeta(m1,zvals=gh5[,"z"])
z2 <- t(sapply(gh5[,"z"],zd))
z2B <- sweep(z2,2,zd(0),"-")
all.equal(unname(z2A$devarr),z2B)
GH1d <- function(m,nAGQ=2) {
    gh <- GHrule(nAGQ)
    z <- zeta(m,zvals=gh[,"z"])
    sum(rowSums(z$devarr)*gh[,"w"])
}
GH1vals <- sapply(1:25,GH1d,m=m1)
plot(-GH1vals[-1])
m1GHvec <- sapply(2:25,
                  function(q) {
                      deviance(update(m1,nAGQ=q))
                  })
m1GHvec2 <- sapply(2:25,
                  function(q) {
                      dd <- update(m1,devFunOnly=TRUE,nAGQ=q)
                      dd(unlist(getME(m1,c("theta","fixef"))))
                  })
par(las=1,bty="l")
matplot(cbind(diff(m1GHvec2),-diff(GH1vals[-1])),
        type="b",pch=16,lty=1)

These patterns don't match ... but we don't necessarily expect them to match, as the hand-rolled comparison keeps theta, beta, and the conditional modes fixed, while using the deviance function updates the conditional modes (I think ...)

Toenail data from here.



lme4/lme4 documentation built on May 4, 2024, 6:46 p.m.