covstruct.R
In glmmTMB: Generalized Linear Mixed Models using Template Model Builder

params <-
list(EVAL = FALSE, have_metafor = TRUE, have_ape = TRUE)

## ----setup, include=FALSE, message=FALSE--------------------------------------
library(knitr)
library(glmmTMB)
library(MASS)    ## for mvrnorm()
library(TMB)     ## for tmbprofile()
## devtools::install_github("kaskr/adcomp/TMB")  ## get development version
knitr::opts_chunk$set(echo = TRUE, eval=if (exists("params")) params$EVAL else FALSE)
do_image <- exists("params") && params$EVAL
## want to *store* images within package
save_vig_dir <- file.path("inst","vignette_data")
pkg_dir <- "glmmTMB"
## guess where we are ...
if (grepl("/vignettes$",getwd())) {  ## in vignettes dir
  save_vig_dir <- file.path("../",save_vig_dir)
} else if (grepl(paste0("/",pkg_dir,"$"),getwd())) { ## in repo head
  save_vig_dir <- file.path(pkg_dir,save_vig_dir)
}
## want to *retrieve* images from system files
use_vig_dir <- system.file("vignette_data",package="glmmTMB")
mkfig <- function(expr,fn) {
  png(normalizePath(file.path(save_vig_dir,fn)))
  eval(substitute(expr))
  invisible(dev.off())
}
usefig <- function(fn) {
  knitr::include_graphics(file.path(use_vig_dir,fn))
}
## turned off caching for now: got error in chunk 'fit.us.2'
## Error in retape() : 
##   Error when reading the variable: 'thetaf'. Please check data and parameters.
## In addition: Warning message:
## In retape() : Expected object. Got NULL.
set.seed(1)
## run this in interactive session if you actually want to evaluate chunks ...
## Sys.setenv(NOT_CRAN="true")

## ----covstruct-table, echo  = FALSE, eval = TRUE------------------------------
ctab <- read.delim(sep = "#", comment = "",
                   header = TRUE,
                   check.names = FALSE,
           text = "
 Covariance                       # Notation      # no. parameters # Requirement  # Parameters
 Unstructured (general positive definite)      # `us`          #  $n(n+1)/2$     # # See [Mappings]
 Heterogeneous Toeplitz           # `toep`        #  $2n-1$         #     # log-standard deviations ($\\theta_1-\\theta_n$); correlations $\\rho_k = \\theta_{n+k}/\\sqrt{1+\\theta_{n+k}^2}$, $k = \\textrm{abs}(i-j+1)$
 Homogeneous Toeplitz           # `homtoep`        #  $n$         #     # log-SD ($\\theta_1$); correlations $\\rho_k = \\theta_{k+1}/\\sqrt{1+\\theta_{k+1}^2}$, $k = \\textrm{abs}(i-j+1)$
 Het. compound symmetry  # `cs`          #  $n+1$    #      # log-SDs ($\\theta_1-\\theta_n$); correlation $\\{a=1/(n-1); \\rho = \\textrm{plogis}(\\theta_{n+1}) \\cdot (1+a) -a \\}$
 Homogeneous compound symmetry  # `homcs`          #  $2$    #      # log-SD ($\\theta_1$); correlation $\\{a=1/(n-1); \\rho = \\textrm{plogis}(\\theta_{2}) \\cdot (1+a) -a \\}$
 Homogenous diagonal     # `homdiag`     #  $1$      #  # log-SD
 Het. diagonal           # `diag`        #  $n$            #  # log-SDs
 AR(1)                            # `ar1`         #  $2$            # Unit spaced levels # log-SD; $\\rho = \\left(\\theta_2/\\sqrt{1+\\theta_2^2}\\right)^{d_{ij}}$
 Het. AR(1)                       # `hetar1`      #  $n + 1$        # Unit spaced levels # log-SDs ($\\theta_1-\\theta_n$); $\\rho = \\left(\\theta_2/\\sqrt{1+\\theta_2^2}\\right)^{d_{ij}}$
 Ornstein-Uhlenbeck               # `ou`          #  $2$            # Coordinates  # log-SD; log-OU rate ($\\rho = \\exp(-\\exp(\\theta_2) d_{ij})$)
 Spatial exponential              # `exp`         #  $2$            # Coordinates # log-SD; log-scale ($\\rho = \\exp(-\\exp(-\\theta_2) d_{ij})$)
 Spatial Gaussian                 # `gau`         #  $2$            # Coordinates # log-SD; log-scale ($\\rho = \\exp(-\\exp(-2\\theta_2) d_{ij}^2$)
 Spatial Matèrn                   # `mat`         #  $3$            # Coordinates # log-SD, log-range, log-shape (power)
 Reduced-rank                     # `rr`          #  $nd-d(d-1)/2$  # rank (d)    # Factor loading matrix (see [Reduced-rank])
 Proportional                       # `propto`      #  $1$            # Variance-covariance matrix # log(proportionality constant)
 Equal                            # `equalto`     #  $0$            # Variance-covariance matrix # -
"
)
knitr::kable(ctab)

## ----simGroup, eval = TRUE----------------------------------------------------
simGroup <- function(g, n=25, phi=0.7) {
  ## Draw 1 sample (i.e., 1 group) from n-variate normal distribution
  x <- MASS::mvrnorm(mu = rep(0,n),
                     Sigma = phi ^ as.matrix(dist(1:n)) ) 
  y <- x + rnorm(n)              ## observation error
  times <- factor(1:n, levels = 1:n)
  group <- factor(rep(g, n))
  data.frame(y, times, group)
}
dat0 <- simGroup(g=1)

## ----fitar1-------------------------------------------------------------------
# mod <- glmmTMB(y ~ ar1(times + 0 | group), data=dat0)

## ----ar0fit,echo=FALSE--------------------------------------------------------
# fixef(mod)
# VarCorr(mod)

## ----phihat-------------------------------------------------------------------
# theta <- getME(mod, "theta")
# phi_transfun <- function(x) x/sqrt(1+x^2)
# phi_hat <- phi_transfun(theta[2])
# 

## ----phidiff------------------------------------------------------------------
# phi_sim <- 0.7
# n <- 25
# phi_diff <- abs((phi_sim-phi_hat)/phi_sim)

## ----phi_wald_ci--------------------------------------------------------------
# confint(mod)["Cor.times2.times1|group",]

## ----phi_prof_ci--------------------------------------------------------------
# suppressWarnings(ar1_prof_ci <- confint(mod, method = "profile"))
# phi_transfun(ar1_prof_ci[3,])

## ----simGroup2----------------------------------------------------------------
# ngrp <- 200
# sim1 <- lapply(1:ngrp, simGroup)
# dat1 <- do.call("rbind", sim1)

## ----fit.ar1------------------------------------------------------------------
# (fit.ar1 <- glmmTMB(y ~ ar1(times + 0 | group), data=dat1))

## ----fit.ar1.comp-------------------------------------------------------------
# phi_hat <- phi_transfun(getME(fit.ar1, "theta")[2])
# abs((phi_sim-phi_hat)/phi_sim)  ## Much closer!
# ## narrower confidence intervals
# confint(fit.ar1)["Cor.times2.times1|group",]

## ----fit.us-------------------------------------------------------------------
# fit.us <- glmmTMB(y ~ us(times + 0 | group), data=dat1, dispformula=~0)
# fit.us$sdr$pdHess ## Converged ?

## ----fit.us.vc----------------------------------------------------------------
# print(VarCorr(fit.us), maxdim = 5)

## ----fit.toep-----------------------------------------------------------------
# fit.toep <- glmmTMB(y ~ toep(times + 0 | group), data=dat1,
#                     dispformula=~0)
# fit.toep$sdr$pdHess ## Converged ?

## ----fit.toep.vc--------------------------------------------------------------
# print(vc.toep <- VarCorr(fit.toep), maxdim = 5)

## ----fit.toep.reml, warning = FALSE-------------------------------------------
# fit.toep.reml <- update(fit.toep, REML=TRUE)
# print(vc1R <- VarCorr(fit.toep.reml), maxdim = 5)

## ----fit.cs-------------------------------------------------------------------
# fit.cs <- glmmTMB(y ~ cs(times + 0 | group), data=dat1, dispformula=~0)
# fit.cs$sdr$pdHess ## Converged ?

## ----fit.cs.vc----------------------------------------------------------------
# VarCorr(fit.cs)

## ----anova1-------------------------------------------------------------------
# anova(fit.ar1, fit.toep, fit.us)

## ----anova2-------------------------------------------------------------------
# anova(fit.cs, fit.toep)

## ----sample2------------------------------------------------------------------
# x <- sample(1:2, 10, replace=TRUE)
# y <- sample(1:2, 10, replace=TRUE)

## ----numFactor----------------------------------------------------------------
# (pos <- numFactor(x,y))

## ----parseNumLevels-----------------------------------------------------------
# parseNumLevels(levels(pos))

## ----numFactor2---------------------------------------------------------------
# dat1$times <- numFactor(dat1$times)
# levels(dat1$times)

## ----fit.ou-------------------------------------------------------------------
# fit.ou <- glmmTMB(y ~ ou(times + 0 | group), data=dat1)
# fit.ou$sdr$pdHess ## Converged ?

## ----fit.ou.vc----------------------------------------------------------------
# print(VarCorr(fit.ou), maxdim = 5)

## ----fit.mat------------------------------------------------------------------
# fit.mat <- glmmTMB(y ~ mat(times + 0 | group), data=dat1, dispformula=~0)
# fit.mat$sdr$pdHess ## Converged ?

## ----fit.mat.vc---------------------------------------------------------------
# print(VarCorr(fit.mat), maxdim = 5)

## ----fit.gau------------------------------------------------------------------
# fit.gau <- glmmTMB(y ~ gau(times + 0 | group), data=dat1, dispformula=~0)
# fit.gau$sdr$pdHess ## Converged ?

## ----fit.gau.vc---------------------------------------------------------------
# print(VarCorr(fit.gau), maxdim = 5)

## ----fit.exp------------------------------------------------------------------
# fit.exp <- glmmTMB(y ~ exp(times + 0 | group), data=dat1)
# fit.exp$sdr$pdHess ## Converged ?

## ----fit.exp.vc---------------------------------------------------------------
# print(VarCorr(fit.exp), maxdim = 5)

## ----spatial_data-------------------------------------------------------------
# d <- data.frame(z = as.vector(volcano),
#                 x = as.vector(row(volcano)),
#                 y = as.vector(col(volcano)))

## ----spatial_sub_sample-------------------------------------------------------
# set.seed(1)
# d$z <- d$z + rnorm(length(volcano), sd=15)
# d <- d[sample(nrow(d), 100), ]

## ----volcano_data_image_fake,eval=FALSE---------------------------------------
# volcano.data <- array(NA, dim(volcano))
# volcano.data[cbind(d$x, d$y)] <- d$z
# image(volcano.data, main="Spatial data", useRaster=TRUE)

## ----volcano_data_image_real,echo=FALSE---------------------------------------
# if (do_image) {
#   volcano.data <- array(NA, dim(volcano))
#   volcano.data[cbind(d$x, d$y)] <- d$z
#   mkfig(image(volcano.data, main="Spatial data"),"volcano_data.png")
# }

## ----volcano_image,eval=TRUE,echo=FALSE---------------------------------------
usefig("volcano_data.png")

## ----spatial_add_pos_and_group------------------------------------------------
# d$pos <- numFactor(d$x, d$y)
# d$group <- factor(rep(1, nrow(d)))

## ----fit_spatial_model, cache=TRUE--------------------------------------------
# f <- glmmTMB(z ~ 1 + exp(pos + 0 | group), data=d)

## ----confint_sigma------------------------------------------------------------
# confint(f, "sigma")

## ----newdata_corner-----------------------------------------------------------
# newdata <- data.frame( pos=numFactor(expand.grid(x=1:3,y=1:3)) )
# newdata$group <- factor(rep(1, nrow(newdata)))
# newdata

## ----predict_corner-----------------------------------------------------------
# predict(f, newdata, type="response", allow.new.levels=TRUE)

## ----predict_column-----------------------------------------------------------
# predict_col <- function(i) {
#     newdata <- data.frame( pos = numFactor(expand.grid(1:87,i)))
#     newdata$group <- factor(rep(1,nrow(newdata)))
#     predict(f, newdata=newdata, type="response", allow.new.levels=TRUE)
# }

## ----predict_all--------------------------------------------------------------
# pred <- sapply(1:61, predict_col)

## ----image_results_fake,eval=FALSE--------------------------------------------
# image(pred, main="Reconstruction")

## ----image_results_real,echo=FALSE--------------------------------------------
# if (do_image) {
#   mkfig(image(pred, main="Reconstruction", useRaster=TRUE),
#         "volcano_results.png")
# }

## ----results_image,eval=TRUE,echo=FALSE---------------------------------------
usefig("volcano_results.png")

## ----fit.us.2-----------------------------------------------------------------
# vv0 <- VarCorr(fit.us)
# vv1 <- vv0$cond$group          ## extract 'naked' V-C matrix
# n <- nrow(vv1)
# rpars <- getME(fit.us,"theta") ## extract V-C parameters
# ## first n parameters are log-std devs:
# all.equal(unname(diag(vv1)),exp(rpars[1:n])^2)
# ## now try correlation parameters:
# cpars <- rpars[-(1:n)]
# length(cpars)==n*(n-1)/2      ## the expected number
# cc <- diag(n)
# cc[upper.tri(cc)] <- cpars
# L <- crossprod(cc)
# D <- diag(1/sqrt(diag(L)))
# prt_cor <- function(x, maxdim = 5, dig = 3) {
#   print(x[1:maxdim, 1:maxdim], digits = dig)
# }
# prt_cor(D %*% L %*% D)
# prt_cor(unname(attr(vv1,"correlation")))

## ----other_check--------------------------------------------------------------
# all.equal(c(cov2cor(vv1)),c(fit.us$obj$env$report(fit.us$fit$parfull)$corr[[1]]))

## ----fit.us.profile,cache=TRUE------------------------------------------------
# ## want $par, not $parfull: do NOT include conditional modes/'b' parameters
# ppar <- fit.us$fit$par
# length(ppar)
# range(which(names(ppar)=="theta")) ## the last n*(n+1)/2 parameters
# ## only 1 fixed effect parameter
# tt <- TMB::tmbprofile(fit.us$obj,2,trace=FALSE)

## ----fit.us.profile.plot_fake,eval=FALSE--------------------------------------
# confint(tt)
# plot(tt)

## ----fit.us.profile.plot_real,echo=FALSE--------------------------------------
# mkfig(plot(tt),"us_profile_plot.png")

## ----us_profile_image,eval=TRUE,echo=FALSE------------------------------------
usefig("us_profile_plot.png")

## ----fit.cs.profile,cache=TRUE------------------------------------------------
# ppar <- fit.cs$fit$par
# length(ppar)
# range(which(names(ppar)=="theta")) ## the last n*(n+1)/2 parameters
# ## only 1 fixed effect parameter, 1 dispersion parameter
# tt2 <- TMB::tmbprofile(fit.cs$obj,3,trace=FALSE)

## ----fit.cs.profile.plot_fake,eval=FALSE--------------------------------------
# plot(tt2)

## ----fit.cs.profile.plot_real,echo=FALSE--------------------------------------
# mkfig(plot(tt2),"cs_profile_plot.png")

## ----fit.cs.profile_image,echo=FALSE,eval=TRUE--------------------------------
usefig("cs_profile_plot.png")

## ----rr_ex, eval = FALSE------------------------------------------------------
# ## fit rank-reduced models with varying dimension
# dvec <- 2:10
# fit_list <- lapply(dvec,
#                    function(d) {
#                        glmmTMB(abund ~ Species + rr(Species + 0|id, d = d),
#                                data = spider_long)
#                    })
# names(fit_list) <- dvec
# ## compare fits via AIC
# aic_vec <- sapply(fit_list, AIC)
# delta_aic  <- aic_vec - min(aic_vec, na.rm = TRUE)

## ----spider-re-plot, message=FALSE, fig.width = 10, fig.height=7--------------
# spider_rr <- glmmTMB(abund ~ Species + rr(Species + 0|id, d = 3),
#                      data = spider_long)
# re <- as.data.frame(ranef(spider_rr))
# re <- within(re, {
#     ## sites in numeric order
#     grp <- factor(grp, levels = unique(grp))
#     ## species in site-1-predicted-abundance order
#     term <- reorder(term, condval, function(x) x[1])
#     lwr <- condval - 2*condsd
#     upr <- condval + 2*condsd
# })
# if (require("ggplot2")) {
#     ggplot(re, aes(grp, condval)) +
#         geom_pointrange(aes(ymin=lwr, ymax = upr)) +
#         facet_wrap(~term, scale = "free")
# }

## ----get-fl-------------------------------------------------------------------
# source(system.file("misc", "extract_rr.R", package = "glmmTMB"))
# rr_info <- extract_rr(spider_rr)
# lapply(rr_info, dim)

## ----spider-biplot, fig.width = 8, fig.height=8-------------------------------
# par(las = 1)
# afac <- 4
# sp_names <- abbreviate(gsub("Species", "", rownames(rr_info$fl)))
# plot(rr_info$fl[,1], rr_info$fl[,2], xlab = "factor 1", ylab = "factor 2", pch = 16, cex = 2)
# text(rr_info$b[,1]*afac*1.05, rr_info$b[,2]*afac*1.05, rownames(rr_info$b))
# arrows(0, 0, rr_info$b[,1]*afac, rr_info$b[,2]*afac)
# text(rr_info$fl[,1], rr_info$fl[,2], sp_names, pos = 3, col = 2)

## ----propto_ex, eval=params$have_ape------------------------------------------
library(ape)
data(carni70, package = "ade4")
tree <- read.tree(text = carni70$tre)
phylo_varcov <- vcv(tree)# phylogenetic variance-covariance matrix
## row/column names of phylo_varcov must match factor levels in data
## (punctuation/separators in species names and ordering)
spnames <- gsub("_", ".", rownames(carni70$tab))
carnidat <- data.frame(
    species = factor(spnames, levels = rownames(phylo_varcov)),
    dummy = factor(1),
    carni70$tab)
fit_phylo <- glmmTMB(log(range) ~ log(size) +
                         propto(0 + species | dummy, phylo_varcov),
                     data = carnidat)

## ----equalto, message=FALSE, warning=FALSE, eval=params$have_metafor----------
library(metafor)
dat <- dat.assink2016
head(dat,9) 
dat$g <- 1 #grouping variable
dat$id <- as.factor(1:nrow(dat)) #effect size id - this needs to be a factor

# create variance-covariance matrix for sampling errors 
# assume that the effect sizes within studies are correlated by rho=0.6
VCV <- metafor::vcalc(vi, cluster=study, obs=esid, data=dat, rho=0.6)
round(VCV[dat$study %in% c(1,2), dat$study %in% c(1,2)], 4)

# fit meta-analysis using approximate VCV matrix
fit_ma <- glmmTMB(yi ~ 1 + (1|study) + equalto(0 + id|g, VCV), REML=TRUE, data=dat)

## ----glmmTMB_meta, eval=params$have_metafor-----------------------------------
# assume sampling errors are independent
V <- diag(dat$vi) 
# fit meta-analysis with a diagonal matrix VCV
fit_ma <- glmmTMB(yi ~ 1 + (1|study) + equalto(0 + id|g, V), REML=TRUE, data=dat)


## ----metafor_models, echo=FALSE, eval=params$have_metafor---------------------
# equivalent model with block-diagonal VCV in metafor
fit_rma1 <- metafor::rma.mv(yi, VCV, 
                           random = list(~1 | study, ~1 | id),
                           control=list(REMLf=FALSE),
                           data=dat)

# equivalent model with diagonal VCV using map and start
fit_ma2 <- glmmTMB(yi ~ 1 + (1|study) + (1|id),
                   dispformula = ~ 0 + id, 
                   map = list(betadisp = factor(rep(NA, nrow(dat)))),
                   start = list(betadisp = log(sqrt(dat$vi))),
                  REML=TRUE, 
                  data=dat)

# equivalent model with diagonal VCV in metafor
fit_rma2 <- metafor::rma.mv(yi, vi,
                           random = list(~1 | study, ~1 | id),
                           control=list(REMLf=FALSE),
                           data=dat)

## ----mm_int, eval = TRUE------------------------------------------------------
model.matrix(~f, data.frame(f=factor(c("c", "s", "v"))))

## ----mm_noint, eval = TRUE----------------------------------------------------
model.matrix(~0+f, data.frame(f=factor(c("c", "s", "v"))))