inst/doc/mixed-effects-additive-models.R

## ----setup, echo=FALSE, message=FALSE, results="hide"-------------------------
knitr::opts_chunk$set(size = "small", prompt = TRUE, comment = NA,
                      out.width=".9\\linewidth")
knitr::knit_hooks$set(
  document = function(x) {sub('\\usepackage[]{color}', '\\usepackage{xcolor}',
                              x, fixed = TRUE)}
)
oldpar <- par(no.readonly = TRUE) ## NOTE: for setting back at the end
oldopt <- options()
options(prompt = "R> ", continue = "+  ")
options(width = 80, digits = 3)

## Dependencies
library("tramME")
library("survival")
library("mgcv")
library("glmmTMB")
library("xtable")
library("gamm4")

## ----tramME-exmpl1, eval=FALSE, echo=TRUE-------------------------------------
#  ## tramME is available from CRAN:
#  ## install.packages("tramME")
#  library("tramME")
#  mc1 <- CoxphME(                            ### conditional proportional hazards
#      Time ~                                 ### response time intervals
#             Insects +                       ### fixed effects
#             Habitat +
#             Landscape +
#             s(Temperature, k = 20) +        ### non-linear terms, as in mgcv
#             s(Elevation100, k = 20) +
#             (1 | PlotID),                   ### random intercept, as in lme4
#      data = carrion,                        ### data
#      log_first = TRUE,                      ### log(Time) before modeling
#      order = 6                              ### order of Bernstein
#  )

## ----load-carrion, include=FALSE----------------------------------------------
carrion <- read.csv("carrion.csv")
carrion$Time <- with(carrion, Surv(time1, time2, type = "interval2"))
carrion$Time_rc <- with(carrion,
  Surv(ifelse(is.finite(time2), time2, time1), event = is.finite(time2)))
carrion$Insects <- factor(carrion$Insects, labels = c("no", "yes"))
carrion$Habitat <- factor(carrion$Habitat)
carrion$Habitat <- relevel(carrion$Habitat, ref = "forest")
carrion$Landscape <- factor(carrion$Landscape)
carrion$Landscape <- relevel(carrion$Landscape, ref = "seminatural")
carrion$PlotID <- factor(carrion$PlotID)

## ----fig-carrion-cens, echo=FALSE, fig.width=8, fig.height=4, out.width="0.9\\textwidth"----
par(mfrow = c(1, 2))
## -- Plot 1
par(cex = 0.9, mar = c(5, 4, 2, 1))
idx <- 140:155
dpl <- carrion$Time[idx]
plot(0, type = "n", ylim = range(idx) + c(-0.5, 0.5), xlim = c(0, 120),
     ylab = "ID", xlab = "Follow-up time (days)", yaxt = "n")
abline(h = idx, v = seq(0, 120, by = 20), col = "lightgrey", lty = 3)
axis(2, at = idx, las = 2)
ic <- dpl[, 3] > 0
segments(y0 = idx[ic], x0 = dpl[ic, 1], x1 = dpl[ic, 2], lwd = 2)
arrows(y0 = idx[!ic], x0 = dpl[!ic, 1], x1 = 123, length = 0.1, lwd = 2)
## -- Plot 2
par(cex = 0.9, mar = c(5, 4, 2, 1))
dpl <- carrion$Time[carrion$Time[, 3] > 0] ## exclude right censored
plot(ecdf(dpl[, 2] - dpl[, 1]), xlab = "Interval length (days)", main = NULL,
     verticals = TRUE, las = 1, ylab = "Probability")
grid()

## ----fig-carrion-data, echo=FALSE, fig.width=4, fig.height=3.5, out.width=".5\\textwidth"----
sv <- survfit(Time ~ Insects, data = carrion)
par(cex = 0.8, mar = c(4, 4.2, 2, 1), las = 1)
plot(sv, lty = c(1, 2), lwd = 2, xlab = "", ylab = "P(T>t)")
title(xlab = "Follow-up time (days)", line = 2.3)
grid()
legend("topright", c("Insect = no", "Insect = yes"), lty = c(1, 2),
       lwd = 2, bty = "n")

## ----carrion-outome, echo=TRUE------------------------------------------------
head(carrion$Time, 10)

## ----carrion-model------------------------------------------------------------
dcmp <- CoxphME(Time ~ Insects + Habitat + Landscape
                + s(Temperature, k = 20) + s(Elevation100, k = 20)
                + (1 | PlotID), data = carrion,
                log_first = TRUE, order = 6)
summary(dcmp)

## ----plot-carrion-smooth, eval=FALSE------------------------------------------
#  plot(smooth_terms(dcmp), panel.first = grid())

## ----plot-carrion-smooth2, echo=FALSE, fig.width=8, fig.height=4--------------
par(mar = c(4, 4, 1, 1))
plot(smooth_terms(dcmp), panel.first = grid())

## ----resid, echo=FALSE--------------------------------------------------------
## From Surv object to a matrix with left and right censoring values
get_bounds <- function(x) {
  stopifnot(is.Surv(x))
  lb <- x[, 1]
  ub <- x[, 2]
  ty <- x[, 3]
  ub[ty == 2] <- lb[ty == 2]
  lb[ty == 2] <- -Inf
  ub[ty == 0] <- Inf
  ub[ty == 1] <- lb[ty == 1]
  cbind(lb, ub)
}

## Evaluate the marginal survivor function
marginalize <- function(model, data, type = "survivor",
  lower = -Inf, upper = Inf) {
  ## fun(y|x,g) * density(g)
  joint <- function(re, nd, mod, type) {
    nd <- nd[rep(1, length(re)), ]
    nd[[variable.names(mod, "grouping")]] <- seq(nrow(nd)) ## to take vector-valued REs
    pr <- predict(mod, newdata = nd, ranef = re, type = type) *
      dnorm(re, 0, sd = sqrt(varcov(mod)[[1]][1, 1]))
    c(pr)
  }
  ## integrate out the random effects row by row
  res <- parallel::mclapply(split(data, seq(nrow(data))), function(nd) {
    out <- integrate(joint, lower = lower, upper = upper, nd = nd, mod = model,
                     type = type)
    if (out$message == "OK") return(out$value)
    return(NA)
  }, mc.cores = 8)
  unlist(res)
}

## Cox-Snell residuals
## type:
##   ic: Evaluate the cumulative hazard at the left and right censoring values
##   and return residuals as interval-censored.
##   adj: Adjusted CS resid (Farrington,2000 <doi:10.1111/j.0006-341X.2000.00473.x>),
##   replace the intervals with the expected values under unit exponential on these
##   intervals (under the assumption of correct model fit)
CSresid <- function(model, data = model.frame(model), type = c("both", "adj", "ic")) {
  type <- match.arg(type)
  rv <- variable.names(model, "response")
  if (!is.Surv(data[[rv]])) data[[rv]] <- Surv(data[[rv]])
  bns <- get_bounds(data[[rv]])
  stopifnot(all(is.finite(bns[,1])))
  data[[rv]] <- bns[, 1]
  Sl <- marginalize(model, data, type = "survivor")
  Su <- numeric(length = nrow(bns))
  ie <- bns[,1] == bns[,2]
  Su[ie] <- Sl[ie]
  Su[ir <- is.infinite(bns[,2])] <- 0
  ii <- !(ie | ir)
  data[[rv]] <- bns[, 2]
  Su[ii] <- marginalize(model, data[ii, ], type = "survivor")
  res_ic <- Surv(-log(Sl), -log(Su), type = "interval2")
  if (type == "ic") return(res_ic)
  res <- numeric(length = nrow(bns))
  res[ie] <- -log(Sl[ie])
  res[ir] <- 1 - log(Sl[ir])
  res[ii] <- (Sl[ii] * (1 - log(Sl[ii])) - Su[ii] * (1 - log(Su[ii]))) /
    (Sl[ii] - Su[ii])
  if (type == "adj") return(res)
  data.frame(IC = res_ic, adjusted = res)
}

if (!file.exists("CS-resid.rda")) {
  resids <- CSresid(dcmp, type = "both")
  save(resids, file = "CS-resid.rda")
} else {
  load("CS-resid.rda")
}

## ----resid-plot, echo=FALSE, fig.width=7, fig.height=3.5----------------------
rf_cens <- survfit(IC ~ 1, data = resids) ## Turnbull NPMLE for interval-censored
rf_adj <- survfit(Surv(adjusted) ~ 1, data = resids) ## KM for adjusted

par(mfrow = c(1, 2), cex = 0.9, mar = c(4, 4, 2, 1))
plot(rf_cens$time, -log(rf_cens$surv), type = "s", lwd = 1, las = 1,
     xlab = "Cox-Snell residuals", ylab = "Cumulative hazard")
abline(0, 1, lwd = 2, lty = 2)
grid()
blx <- grconvertX(0.10, from = "nfc", to = "user")
bly <- grconvertY(0.98, from = "nfc", to = "user")
text(blx, bly, labels = "A", xpd = TRUE, cex = 1.2)

plot(rf_adj$time, rf_adj$cumhaz, type = "s", lwd = 1, las = 1,
     xlab = "Cox-Snell residuals", ylab = "Cumulative hazard")
abline(0, 1, lwd = 2, lty = 2)
grid()
blx <- grconvertX(0.10, from = "nfc", to = "user")
bly <- grconvertY(0.98, from = "nfc", to = "user")
text(blx, bly, labels = "B", xpd = TRUE, cex = 1.2)

## ----carrion-model2-----------------------------------------------------------
dcmp2 <- CoxphME(Time | Insects ~ Habitat + Landscape
                 + s(Temperature, k = 20) + s(Elevation100, k = 20)
                 + (1 | PlotID), data = carrion,
                 log_first = TRUE, order = 6)

## ----fig-carrion-tve, echo=FALSE, fig.width=4, fig.height=3.5, warning=FALSE, out.width=".5\\textwidth"----
cf <- coef(dcmp2, with_baseline = TRUE)
vc <- vcov(dcmp2, pargroup = "baseline")
cf <- cf[grep("Insectsyes", names(cf), fixed = TRUE)]
idx <- grep("Insectsyes", colnames(vc), fixed = TRUE)
vc <- vc[idx, idx]

ns <- 200
nd <- model.frame(dcmp2)[rep(1, ns), ]
nd[[variable.names(dcmp2, "response")]] <- seq(1, 100, length.out = ns)
X <- model.matrix(dcmp2, data = nd, type = "Y", simplify = TRUE)$Ye
idx <- grep("Insectsyes", colnames(X), fixed = TRUE)
X <- X[, idx]
ci <- confint(multcomp::glht(multcomp::parm(cf, vc), linfct = X),
              calpha = multcomp::univariate_calpha(), level = 0.95)$confint
## use multcomp::adjusted_calpha() for multiplicity adjustments

par(cex = 0.8, mar = c(4, 4, 2, 1), las = 1)
plot(nd$Time, ci[, 1], type = "l", col = 1, lwd = 2, ylim = c(-1, 3),
     panel.first = {grid(); abline(h = 0, lwd = 2, col = "lightgrey")},
     ylab = "Log-hazard ratio", xlab = "")
title(xlab = "Time (days)", line = 2.3)
polygon(c(nd$Time, rev(nd$Time)), c(ci[, 2], rev(ci[, 3])), border = NA,
        col = grey(0.5, 0.2))
ci2 <- confint(dcmp, parm = "Insectsyes", estimate = TRUE)
ci2 <- ci2[rep(1, ns), ]
matlines(nd$Time, ci2, col = 1, lwd = c(1, 1, 2), lty = c(3, 3, 2))
legend("bottomright", c("Time-varying effect", "Proportional effect"),
       lty = 1:2, lwd = 2, bty = "n", cex = 0.9)

## ----ecoli-data, echo=FALSE---------------------------------------------------
ecoli <- read.csv("Hulvey2021.csv")
fs <- c("treatment", "stream", "pasture", "cattle", "rotation")
ecoli[fs] <- lapply(ecoli[fs], factor)

## ----ecoli-est, echo=TRUE-----------------------------------------------------
## specifications w/o random effects
mf <- c(log10(ecoli_MPN) ~ treatment + cattle +
          s(DOY, bs = "cr", by = treatment),
        log10(ecoli_MPN) ~ treatment + cattle + s(DOY, bs = "cr"),
        log10(ecoli_MPN) ~ treatment + s(DOY, bs = "cr", by = treatment),
        log10(ecoli_MPN) ~ cattle + s(DOY, bs = "cr"),
        log10(ecoli_MPN) ~ treatment + s(DOY, bs = "cr"),
        log10(ecoli_MPN) ~ s(DOY, bs = "cr"))
names(mf) <- paste("Model", c(1:5, "Null"))
ecoli_res <- data.frame(matrix(NA, nrow = length(mf), ncol = 3))
colnames(ecoli_res) <- c("gamm", "LmME", "BoxCoxME")
rownames(ecoli_res) <- names(mf)
for (i in seq_along(mf)) {
  m_gamm <- gamm4(mf[[i]], data = ecoli,
                  random = ~ (1 | year:stream:pasture) + (1 | stream),
                  REML = FALSE)
  ecoli_res$gamm[i] <- logLik(m_gamm$mer)
  mf2 <- update(mf[[i]], . ~ . + (1 | year:stream:pasture) + (1 | stream))
  m_LmME <- LmME(mf2, data = ecoli)
  if (m_LmME$opt$convergence == 0) ecoli_res$LmME[i] <- logLik(m_LmME)
  m_BCME <- BoxCoxME(mf2, data = ecoli)
  if (m_BCME$opt$convergence == 0) ecoli_res$BoxCoxME[i] <- logLik(m_BCME)
}

## ----ecoli-tbl, echo=FALSE, results="asis"------------------------------------
names(ecoli_res) <- c("\\multicolumn{1}{m{2cm}}{\\centering GAMM}",
 "\\multicolumn{1}{m{4cm}}{\\centering Additive normal transformation model}",
 "\\multicolumn{1}{m{4cm}}{\\centering Additive non-normal transformation model}")
print(xtable(ecoli_res, align = c("l", "R{2cm}", rep("R{4cm}", 2))),
      floating = FALSE, booktabs = TRUE,
      sanitize.colnames.function = function(x){x})

## ----ecoli-show-m1------------------------------------------------------------
update(mf[[1]], . ~ . + (1 | year:stream:pasture) + (1 | stream))

## ----ecoli-plot-fun, echo=FALSE-----------------------------------------------
plot2cis <- function(x, y, col = c(1, 2),
                     fill = c(grey(0.1, 0.25), rgb(1, 0, 0, 0.25)),
                     xlabs = NULL, ylabs = NULL, mains = NULL,
                     ...) {
  stopifnot(length(x) == length(y))
  for (ii in seq_along(x)) {
    plot(0, type = "n",
         xlab = if (is.null(xlabs)) colnames(x[[ii]])[1] else xlabs[ii],
         ylab = if (is.null(ylabs)) colnames(x[[ii]])[2] else ylabs[ii],
         main = if (is.null(mains)) NULL else mains[ii],
         xlim = range(x[[ii]][, 1], y[[ii]][, 1]),
         ylim = range(x[[ii]][, 2:4], y[[ii]][, 2:4]),
         panel.first = grid(), ...)
    lines(x[[ii]][, 1], x[[ii]][, 2], col = col[1])
    lines(y[[ii]][, 1], y[[ii]][, 2], col = col[2])
    polygon(c(x[[ii]][, 1], rev(x[[ii]][, 1])),
            c(x[[ii]][, 3], rev(x[[ii]][, 4])),
            border = NA, col = fill[1])
    polygon(c(y[[ii]][, 1], rev(y[[ii]][, 1])),
            c(y[[ii]][, 3], rev(y[[ii]][, 4])),
            border = NA, col = fill[2])
  }
}

## NOTE: currently only w/ pnorm inverse link
smooth2ci <- function(sm, PI = FALSE, ilink = "pnorm") {
  PIfun <- switch(ilink, pnorm = function(x) pnorm(x / sqrt(2)),
                  stop("No other inverse links are available atm."))
  lapply(sm, function(x) {
    out <- matrix(0, nrow = nrow(x), ncol = 4)
    colnames(out) <- c(colnames(x)[c(1, ncol(x)-1)], "lwr", "upr")
    out[, 1] <- x[, 1]
    se <- rev(x)[, 1]
    yy <- rev(x)[, 2]
    out[, 2] <- yy
    out[, 3:4] <- yy + qnorm(0.975) * se %o% c(-1, 1)
    if (PI) out[, 2:4] <- PIfun(out[, 2:4])
    out
  })
}

## NOTE: this function assumes the smooth term: s(DOY, by = "treatment")
diff_smooth <- function(mod, DOY = NULL, n = 100) {
  XZ_sm <- function(XZ) {
    X <- XZ$X
    X[, attr(XZ$X, "type") != "sm"] <- 0
    Z_ <- Matrix::t(XZ$Zt)[, attr(XZ$Zt, "type") == "sm", drop = FALSE]
    Z <- tramME:::nullTMatrix(nrow = nrow(Z_), ncol = length(mod$param$gamma))
    Z[, attr(mod$param$gamma, "type") == "sm"] <- Z_
    list(X = X, Z = Z)
  }

  mf <- model.frame(mod, drop.unused.levels = TRUE)
  if (is.null(DOY)) DOY <- seq(range(mf$DOY)[1], range(mf$DOY)[2], length.out = n)
  nd_ <- expand.grid(DOY = DOY,
    treatment = factor(levels(mf$treatment), levels = levels(mf$treatment)))
  nd <- mf[rep(1, nrow(nd_)), ]
  nd[colnames(nd_)] <- nd_
  XZ <- model.matrix(mod, data = nd, type = c("X", "Zt"), keep_sign = FALSE)
  XZ1 <- XZ_sm(XZ)
  nd$DOY <- nd$DOY + 1
  XZ <- model.matrix(mod, data = nd, type = c("X", "Zt"), keep_sign = FALSE)
  XZ2 <- XZ_sm(XZ)
  XZ <- list(X = XZ2$X - XZ1$X, Z = tramME:::as_dgTMatrix(XZ2$Z - XZ1$Z))
  pr <- predict(mod$tmb_obj, newdata = XZ, scale = "lp")
  split(data.frame(DOY = nd_$DOY, df = pr$pred, se = pr$se), nd$treatment)
}

## ----ecoli-m1, echo=FALSE-----------------------------------------------------
fm1 <- log10(ecoli_MPN) ~ treatment + cattle +
  s(DOY, bs = "cr", by = treatment) +
  (1 | year:stream:pasture) + (1 | stream)
ecoli_m1 <- LmME(fm1, data = ecoli)
ecoli_m1_bc <- BoxCoxME(fm1, data = ecoli)

## ----plot-ecoli-m1, echo=FALSE, fig.width=9, fig.height=3---------------------
par(mfrow = c(1, 3), cex = 0.8, mar = c(4, 4, 2, 1), las = 1)
plot2cis(smooth2ci(diff_smooth(ecoli_m1), PI = TRUE),
         smooth2ci(diff_smooth(ecoli_m1_bc), PI = TRUE),
         mains = paste("treatment =", levels(ecoli$treatment)),
         ylabs = rep("PI", 3))
legend("topright", c("normal", "non-normal"), col = c(1, 2),
       lty = 1, bty = "n", cex = 0.9)

## ----plot-ecoli-trafo, echo=FALSE, fig.width=5.5, fig.height=4, out.width="0.5\\linewidth"----
nd <- model.frame(ecoli_m1_bc)[rep(1, 100), ]
nd[[1]] <- do.call(seq,
  c(as.list(range(log10(ecoli$ecoli_MPN))), length.out = 100))

Y <- model.matrix(ecoli_m1_bc, data = nd, type = "Y")$Ye
b <- coef(ecoli_m1_bc, with_baseline = TRUE)[1:7]
vc <- vcov(ecoli_m1_bc, pargroup = "baseline")
ci <- confint(multcomp::glht(multcomp::parm(b, vc), linfct = Y),
              calpha = multcomp::univariate_calpha())$confint

par(mar = c(4, 4, 1, 1), cex = 0.8, las = 1)
plot(0, type = "n", xlim = range(nd[[1]]), ylim = range(ci),
     xlab = variable.names(ecoli_m1_bc, "response"), ylab = "h(y)",
     panel.first = grid(), xaxs = "i", yaxs = "i")
lines(nd[[1]], ci[, 1], lwd = 2)
polygon(c(nd[[1]], rev(nd[[1]])), c(ci[, 2], rev(ci[, 3])), col = grey(0.2, 0.2),
        border = FALSE)
b2 <- coef(ecoli_m1, with_baseline = TRUE)
abline(b2[1:2], lwd = 2, lty = 2)
legend("topleft", c("Non-normal", "Normal"), lwd = 2, lty = 1:2, bty = "n")

## ----ecoli-cens---------------------------------------------------------------
fm1c <- update(fm1, Surv(log10(ecoli_MPN), event = ecoli_MPN < 2419.6) ~ .)
ecoli_m1_cens <- BoxCoxME(fm1c, data = ecoli)
summary(ecoli_m1_cens)

## ----ecoli-fe-tbl, results="asis", echo=FALSE---------------------------------
cis <- lapply(list(ecoli_m1, ecoli_m1_bc, ecoli_m1_cens), function(x) {
  ci <- confint(x, pargroup = "shift", estimate = TRUE)
  ci <- pnorm(ci[, c(3, 1, 2)] / sqrt(2))
  ci <- formatC(ci, format = "f", digits = 2)
  cbind(ci[, 1],
        apply(ci[, 2:3], 1, function(x) paste(x, collapse = "---")))
})

tbl <- do.call("cbind", cis)
rownames(tbl) <- c("treatment = medium", "treatment = short", "cattle = present")
add <- list()
add$pos <- list(0)
add$command <- paste(c("& \\multicolumn{2}{c}{Normal} &",
                       "\\multicolumn{2}{c}{Non-normal} &",
                       "\\multicolumn{2}{c}{Non-normal, censored} \\\\\n",
                       "\\cmidrule(lr){2-3} \\cmidrule(lr){4-5}",
                       "\\cmidrule(lr){6-7}",
                       rep("& PI & 95\\% CI ", 3),
                       "\\\\\n"), collapse = " ")
print(xtable(tbl, align = "lrrrrrr"), include.colnames = FALSE,
      floating = FALSE, add.to.row = add,
      sanitize.text.function = function(x) x, booktabs = TRUE)

## ----plot-ecoli-cens, echo=FALSE, fig.width=9, fig.height=3-------------------
par(mfrow = c(1, 3), cex = 0.8, mar = c(4, 4, 2, 1), las = 1)
plot2cis(smooth2ci(diff_smooth(ecoli_m1), PI = TRUE),
         smooth2ci(diff_smooth(ecoli_m1_cens), PI = TRUE),
         mains = paste("treatment =", levels(ecoli$treatment)),
         ylabs = rep("PI", 3))
legend("topright", c("normal", "non-normal, censored"), col = c(1, 2),
       lty = 1, bty = "n", cex = 0.9)

## ----ecoli-notr---------------------------------------------------------------
f_nontr <- update(fm1, Surv(ecoli_MPN, event = ecoli_MPN < 2419.6) ~ .)
ecoli_nontr <- BoxCoxME(f_nontr, data = ecoli, log_first = TRUE)
summary(ecoli_nontr)

## ----algae-data, echo=FALSE---------------------------------------------------
andrew <- read.csv("andrew.csv",
                   colClasses = c(QUAD = "factor", PATCH = "factor"))
andrew$TREAT <- factor(andrew$TREAT, labels = c("control", "removal", "0.33", "0.66"))
andrew$TREAT <- factor(andrew$TREAT, levels = c("control", "0.33", "0.66", "removal"))
andrew$pALGAE <- andrew$ALGAE / 100
## summary(andrew)
ecdfs <- lapply(split(andrew, andrew$TREAT), function(x) ecdf(x$pALGAE))

## ----plot-algae-treatment, echo=FALSE, fig.width=5.5, fig.height=4.5, out.width=".6\\textwidth"----
x <- seq(0, 1, length.out = 100)
plot(x, ecdfs[[1]](x), type = "s", xlab = "Algae cover proportion", ylab = "ECDF",
     lty = 1, lwd = 2, xlim = c(0, 1), ylim = c(0, 1), las = 1, panel.first = grid())
for (ii in 2:4) {
  lines(x, ecdfs[[ii]](x), type = "s", lty = ii, lwd = 2)
}
legend("bottomright", levels(andrew$TREAT), lty = 1:4, lwd = 2, bty = "n", cex = 0.9)

## ----algae-glmm, eval=FALSE---------------------------------------------------
#  urchin_zib <- glmmTMB(pALGAE ~ TREAT + (1 | PATCH), ziformula = ~ TREAT,
#                        data = andrew, family = beta_family())

## ----algae-tram---------------------------------------------------------------
urchin_tram <- ColrME(
  Surv(pALGAE, pALGAE > 0, type = "left") ~ TREAT + (1 | PATCH),
  bounds = c(-0.1, 1), support = c(-0.1, 1), data = andrew,
  order = 6)
summary(urchin_tram)

## ----pointmass-plot, echo=FALSE, fig.width=7, fig.height=3.5, out.width=".8\\textwidth"----
par(mar = c(4, 4, 1, 1))
layout(mat = matrix(1:3, nrow = 1), widths = c(45, 10, 45))
nd <- model.frame(urchin_tram)[rep(21, 100), ]
nd[[1]] <- seq(-0.1, 1, length.out = 100)
ccdf_e <- predict(urchin_tram, newdata = nd, type = "distribution", ranef = "zero")
plot(nd[[1]], ccdf_e, type = "l", ylim = c(0, 1), xlim = c(-0.1, 1),
     panel.first = grid(), xlab = "y", ylab = "h(y)")
idx <- which(nd[[1]] < 0)
lines(nd[[1]][idx], ccdf_e[idx], col = 2, lwd = 3)
par(mar = c(1, 1, 1, 1))
plot(0:1, 0:1, type = "n", yaxt = "n", xaxt = "n", xlab = "", ylab = "", bty = "n")
arrows(x0 = 0, x1 = 1, y0 = 0.5, length = 0.1, lwd = 3)
nd[[1]] <- seq(0, 1, length.out = 100)
ccdf <- predict(urchin_tram, newdata = nd, type = "distribution", ranef = "zero")
par(mar = c(4, 4, 1, 1))
plot(nd[[1]], ccdf, type = "l", ylim = c(0, 1), xlim = c(-0.1, 1), lwd = 2,
     panel.first = grid(), xlab = "y", ylab = expression(F[Y] * "(y)"))
points(0, 0, pch = 21, cex = 1, lwd = 2)
points(0, ccdf[1], pch = 20, cex = 1, lwd = 2)
segments(x0 = -0.1, x1 = -0.01, y0 = 0, lwd = 2)

## ----algae-mCDF1, echo=FALSE--------------------------------------------------
## Numerical integration to get an estimate of the marginal distribution
marginalize.zib.glmmTMB <- function(model, data,
                                     type = c("distribution", "density"),
                                     lower = -Inf, upper = Inf) {
  type <- match.arg(type)
  ## for a single data point
  joint <- function(re, nd, mod, type) {
    mu <- plogis(predict(mod, newdata = nd, type = "link", re.form = NA) + re)
    mu <- ifelse(mu == 0, .Machine$double.eps, mu)
    mu <- ifelse(mu == 1, 1 - .Machine$double.eps, mu)
    sig <- predict(mod, newdata = nd, type = "disp")
    nu <- predict(mod, newdata = nd, type = "zprob")
    out <- sapply(mu, function(m) {
      switch(type,
             distribution = gamlss.dist::pBEZI(nd[[1]], mu = m, sigma = sig, nu = nu),
             density = gamlss.dist::dBEZI(nd[[1]], mu = m, sigma = sig, nu = nu),
             stop("Unknown function!"))
    })
    out * dnorm(re, mean = 0, sd = sqrt(VarCorr(mod)$cond[[1]][1]))
  }
  ## integrate out the random effects row by row
  res <- parallel::mclapply(split(data, seq(nrow(data))), function(nd) {
    out <- integrate(joint, lower = lower, upper = upper, nd = nd, mod = model,
                     type = type)
    if (out$message == "OK") return(out$value)
    return(NA)
  }, mc.cores = 8)
  unlist(res)
}

marginalize.tramME <- function(model, data, type = "survivor", add = 0,
  lower = -Inf, upper = Inf) {
  ## fun(y|x,g) * density(g)
  joint <- function(re, nd, mod, type) {
    nd <- nd[rep(1, length(re)), ]
    rv <- variable.names(mod, "response")
    nd[[rv]] <- nd[[rv]] + add
    nd[[variable.names(mod, "grouping")]] <- seq(nrow(nd)) ## to take vector-valued REs
    pr <- predict(mod, newdata = nd, ranef = re, type = type) *
      dnorm(re, 0, sd = sqrt(varcov(mod)[[1]][1, 1]))
    c(pr)
  }
  ## integrate out the random effects row by row
  res <- parallel::mclapply(split(data, seq(nrow(data))), function(nd) {
    out <- integrate(joint, lower = lower, upper = upper, nd = nd, mod = model,
                     type = type)
    if (out$message == "OK") return(out$value)
    return(NA)
  }, mc.cores = 8)
  unlist(res)
}


nd <- expand.grid(pALGAE = seq(0, 1, length.out = 100),
                  TREAT = factor(levels(andrew$TREAT),
                                 levels = c("control", "0.33", "0.66", "removal")),
                  PATCH = andrew$PATCH[1],
                  KEEP.OUT.ATTRS = FALSE)
colnames(nd)[1] <- variable.names(urchin_tram, "response")

if (!file.exists("urchin.rda")) {
  ## estimate the glmmTMB model
  urchin_zib <- glmmTMB(pALGAE ~ TREAT + (1 | PATCH), ziformula = ~ TREAT,
                        data = andrew, family = beta_family())
  ## save results in a single df
  urchin_res <- nd
  colnames(urchin_res)[1] <- "pALGAE"
  urchin_res$tramME_shift <- marginalize.tramME(urchin_tram,
                                                data = nd, type = "distribution")
  urchin_res$zib <- marginalize.zib.glmmTMB(urchin_zib, data = nd,
                                            type = "distribution")
  urchin_zib_ll <- data.frame(ll = logLik(urchin_zib), np = length(urchin_zib$obj$par))
} else{
  load("urchin.rda")
}

## ----plot-algae-mCDF1, echo=FALSE, fig.width=9, fig.height=3.5----------------
multiplot <- function(dfs, yn, xn = colnames(dfs[[1]])[1], ecdfs = NULL,
                      cols = 1:(length(yn)+!is.null(ecdfs)),
                      ltys = rep(1, length(yn)+!is.null(ecdfs)),
                      lwds = rep(1, length(yn)+!is.null(ecdfs)),
                      mains = NULL, ...) {
  if (!is.null(ecdfs)) stopifnot(length(dfs) == length(ecdfs))
  for (i in seq_along(dfs)) {
    col <- if (is.null(ecdfs)) cols else cols[-length(cols)]
    lty <- if (is.null(ecdfs)) ltys else ltys[-length(ltys)]
    lwd <- if (is.null(ecdfs)) ltys else lwds[-length(lwds)]
    main <- if (is.null(mains)) names(dfs)[i] else mains[i]
    matplot(dfs[[i]][[xn]], dfs[[i]][yn], type = "l",
            col = col, lty = lty, lwd = lwd, panel.first = grid(), ...,
            main = main)
    if (!is.null(ecdfs)) {
      lines(dfs[[i]][[xn]], ecdfs[[i]](dfs[[i]][[xn]]), type = "s",
            lty = rev(ltys)[1], col = rev(cols)[1], lwd = rev(lwds)[1],
            ...)
    }
  }
}

cols <- c("#E16A86", "#00AD9A")
layout(mat = matrix(c(1:4, rep(5, 4)), nrow = 2, byrow = TRUE),
       heights = c(7, 1))
par(mar = c(4, 4, 3, 1), las = 1)
multiplot(split(urchin_res, urchin_res$TREAT),
          yn = c("tramME_shift", "zib"),
          ecdfs = ecdfs,
          mains = paste("treatment =", levels(urchin_res$TREAT)),
          cols = c(cols, 1), lwds = c(rep(2, 2), 1),
          xlab = "pALGAE", ylab = "prob",
          xlim = c(0, 1))
par(mar = c(0, 0, 0, 0))
plot.new()
legend("center",
       c("Linear transformation model", "Zero-inflated beta", "ECDF"),
       col = c(cols, 1), lty = 1, bty = "n", lwd = c(rep(2, 2), 1), horiz = TRUE)

## ----algae-glmm2, eval=FALSE--------------------------------------------------
#  urchin_zib_disp <- glmmTMB(pALGAE ~ TREAT + (1 | PATCH),
#                             ziformula = ~ TREAT, dispformula = ~ TREAT,
#                             data = andrew, family = beta_family())

## ----algae-tram2--------------------------------------------------------------
urchin_tram_strat <- ColrME(
  Surv(pALGAE, pALGAE > 0, type = "left") | 0 + TREAT ~ 1 + (1 | PATCH),
  bounds = c(-0.1, 1), support = c(-0.1, 1), data = andrew,
  order = 6, control = optim_control(iter.max = 1e3, eval.max = 1e3,
                                     rel.tol = 1e-9))
summary(urchin_tram_strat)

## ----algae-mCDF2, echo=FALSE--------------------------------------------------
if (!("tramME_strat" %in% colnames(urchin_res))) {
  ## fit the glmm
  urchin_zib_disp <- glmmTMB(pALGAE ~ TREAT + (1 | PATCH),
                             ziformula = ~ TREAT, dispformula = ~ TREAT,
                             data = andrew, family = beta_family())
  ## marginalize
  urchin_res$tramME_strat <- marginalize.tramME(urchin_tram_strat,
    data = nd, type = "distribution")
  urchin_res$zib_disp <- marginalize.zib.glmmTMB(urchin_zib_disp,
    data = nd, type = "distribution")
}
if (nrow(urchin_zib_ll) == 1) {
  urchin_zib_ll <- rbind(urchin_zib_ll,
    c(logLik(urchin_zib_disp), length(urchin_zib_disp$obj$par)))
}
if (!file.exists("urchin.rda")) {
  save(urchin_res, urchin_zib_ll, file = "urchin.rda")
}

## ----plot-algae-mCDF2, echo=FALSE, fig.width=9, fig.height=3.5----------------
cols <- c("#E16A86", "#00AD9A")
layout(mat = matrix(c(1:4, rep(5, 4)), nrow = 2, byrow = TRUE),
       heights = c(7, 1))
par(mar = c(4, 4, 3, 1), las = 1)
multiplot(split(urchin_res, urchin_res$TREAT),
          yn = c("tramME_strat", "zib_disp"),
          ecdfs = ecdfs,
          mains = paste("treatment =", levels(urchin_res$TREAT)),
          cols = c(cols, 1), lwds = c(rep(2, 2), 1),
          xlab = "pALGAE", ylab = "prob",
          xlim = c(0, 1))
par(mar = c(0, 0, 0, 0))
plot.new()
legend("center",
       c("Stratified linear transformation model",
         "Zero-inflated beta with dispersion model",
         "ECDF"),
       col = c(cols, 1), lty = 1, bty = "n", lwd = c(rep(2, 2), 1), horiz = TRUE)

## ----algae-loglik, results="asis", echo=FALSE---------------------------------
lls <- data.frame(c(urchin_zib_ll$ll[1], urchin_zib_ll$ll[2],
                    logLik(urchin_tram), logLik(urchin_tram_strat)),
                  c(urchin_zib_ll$np[1], urchin_zib_ll$np[2],
                    length(urchin_tram$tmb_obj$par),
                    length(urchin_tram_strat$tmb_obj$par)))
rownames(lls) <- c("Zero-inflated beta without dispersion model",
                   "Zero-inflated beta with dispersion model",
                   "Linear transformation model",
                   "Stratified linear transformation model")
colnames(lls) <- c("$\\log\\mathcal{L}$", "Number of parameters")
print(xtable(lls, align = "lrr", digits = c(0, 2, 0)), sanitize.text.function = function(x) x,
      booktabs = TRUE, floating = FALSE)

## ----mosquito-data, echo=FALSE------------------------------------------------
AGO <- read.csv("Juarez2021.csv")
factors <- c("Community", "HouseID", "Year", "Income", "Placement")
AGO[factors] <- lapply(AGO[factors], factor)
AGO$AEAfemale <- as.integer(AGO$AEAfemale)

## ----cotram-model, echo=TRUE, eval=FALSE--------------------------------------
#  ## Additive count transformation model
#  ## See ?cotram::cotram for the documentation
#  CotramME <- function(formula, data,
#                       method = c("logit", "cloglog", "loglog", "probit"),
#                       log_first = TRUE, plus_one = log_first, prob = 0.9,
#                       ...) {
#    method <- match.arg(method)
#    rv <- all.vars(formula)[1]
#    stopifnot(is.integer(data[[rv]]), all(data[[rv]] >= 0))
#    data[[rv]] <- data[[rv]] + as.integer(plus_one)
#    sup <- c(-0.5 + log_first, quantile(data[[rv]], prob = prob))
#    bou <- c(-0.9 + log_first, Inf)
#    data[[rv]] <- as.Surv(R(data[[rv]], bounds = bou))
#    fc <- match.call()
#    fc[[1L]] <- switch(method, logit = quote(ColrME), cloglog = quote(CoxphME),
#                       loglog = quote(LehmannME), probit = quote(BoxCoxME))
#    fc$method <- NULL
#    fc$plus_one <- NULL
#    fc$prob <- NULL
#    fc$log_first <- log_first
#    fc$bounds <- bou
#    fc$support <- sup
#    fc$data <- data
#    out <- eval(fc, parent.frame())
#    out$call$data <- match.call()$data
#    class(out) <- c("CotramME", class(out))
#    out
#  }
#  mosquito_tram <- CotramME(AEAfemale ~ Year + Income*Placement
#    + s(Week) + s(CovRate200) + (1|HouseID)
#    + (1|Community), offset = -log(daystrapping), data = AGO,
#    method = "logit", order = 5, log_first = TRUE, prob = 0.9)

## ----mosquito-est, echo=FALSE-------------------------------------------------
if (file.exists("mosquito_models.rda")) {
  load("mosquito_models.rda")
} else {
  ## GAMMs
  mosquito_pois <- gamm4(AEAfemale ~ offset(log(daystrapping)) + Year + Income*Placement
                         + s(Week) + s(CovRate200), random =~ (1|HouseID)
                         + (1|Community), data = AGO, family=poisson)

  mosquito_nb <- gamm4(AEAfemale ~ offset(log(daystrapping)) + Year + Income*Placement
                       + s(Week) + s(CovRate200), random =~ (1|HouseID)
                       + (1|Community), data = AGO, family = negative.binomial(1))

  ## additive count transformation model
  CotramME <- function(formula, data,
                       method = c("logit", "cloglog", "loglog", "probit"),
                       log_first = TRUE, plus_one = log_first, prob = 0.9,
                       ...) {
    method <- match.arg(method)
    rv <- all.vars(formula)[1]
    stopifnot(is.integer(data[[rv]]), all(data[[rv]] >= 0))
    data[[rv]] <- data[[rv]] + as.integer(plus_one)
    sup <- c(-0.5 + log_first, quantile(data[[rv]], prob = prob))
    bou <- c(-0.9 + log_first, Inf)
    data[[rv]] <- as.Surv(R(data[[rv]], bounds = bou))
    fc <- match.call()
    fc[[1L]] <- switch(method, logit = quote(ColrME), cloglog = quote(CoxphME),
                       loglog = quote(LehmannME), probit = quote(BoxCoxME))
    fc$method <- NULL
    fc$plus_one <- NULL
    fc$prob <- NULL
    fc$log_first <- log_first
    fc$bounds <- bou
    fc$support <- sup
    fc$data <- data
    out <- eval(fc, parent.frame())
    out$call$data <- match.call()$data
    class(out) <- c("CotramME", class(out))
    out
  }

  mosquito_tram <- CotramME(AEAfemale ~ Year + Income*Placement
                            + s(Week) + s(CovRate200) + (1|HouseID)
                            + (1|Community), offset = -log(daystrapping), data = AGO,
                            method = "logit", order = 5, log_first = TRUE, prob = 0.9)
  stopifnot(mosquito_tram$opt$convergence == 0)
}

## ----mosquito-ll-tbl, echo=FALSE, results="asis"------------------------------
if (!file.exists("mosquito_models.rda")) {
  ll_mosquito <- c("Poisson GAMM" = logLik(mosquito_pois$mer),
                   "Negative binomial GAMM" = logLik(mosquito_nb$mer),
                   "Additive count transformation model" = logLik(mosquito_tram))
  ll_mosquito <- data.frame(ll_mosquito)
  names(ll_mosquito) <- "Log-likelihood"
}
print(xtable(ll_mosquito, align = "lr"), floating = FALSE, booktabs = TRUE)

## ----mosquito_summaries, echo=FALSE-------------------------------------------
if (!file.exists("mosquito_models.rda")) {
  sums_mosquito <- list(
    nb = summary(mosquito_nb$gam),
    tram = summary(mosquito_tram)
  )
}

## ----plot-mosquito-smooth, echo=FALSE, fig.width=9, fig.height=7--------------
if (!file.exists("mosquito_models.rda")) {
  nd <- AGO[rep(1, 100), ]
  nd$Week <- seq(min(AGO$Week), max(AGO$Week), length.out = 100)
  nd$CovRate200 <- seq(min(AGO$CovRate200), max(AGO$CovRate200), length.out = 100)
  pr <- predict(mosquito_nb$gam, type = "terms",
                terms = c("s(Week)", "s(CovRate200)"),
                newdata = nd, se.fit = TRUE)

  sm_mosquito_nb <- lapply(colnames(pr[[1]]), function(n) {
    ci <- pr$fit[, n] + qnorm(0.975) * pr$se.fit[, n] %o% c(-1, 1)
    out <- data.frame(cbind(pr$fit[, n], ci))
    colnames(out) <- c("fit", "lwr", "upr")
    out
  })
  names(sm_mosquito_nb) <- colnames(pr[[1]])
  sm_mosquito_nb[[1]]$x <- nd$Week
  sm_mosquito_nb[[2]]$x <- nd$CovRate200

  sm_mosquito_tram <- smooth_terms(mosquito_tram)
}

plotsm <- function(sm, xlab, ylab) {
  plot(sm$x, sm$fit, type = "l", xlim = range(sm$x),
       ylim = range(sm[, c("lwr", "upr")]),
       xlab = xlab, ylab = ylab, panel.first = grid())
  polygon(c(sm$x, rev(sm$x)), c(sm$lwr, rev(sm$upr)),
          border = NA, col = grey(0.5, 0.25))
}

par(mfrow = c(2, 2), mar = c(4, 4, 3, 1), cex = 0.8, las = 1)
plotsm(sm_mosquito_nb[[1]], "Week", "s(Week)")
plotsm(sm_mosquito_nb[[2]], "CovRate200", "s(CovRate200)")
mtext("Negative binomial model", side = 3, line = -2, outer = TRUE)
sm <- sm_mosquito_tram
for (i in seq_along(sm_mosquito_tram)) {
  sm[[i]][, 2] <- -sm[[i]][, 2]
  plot(sm[i], panel.first = grid())
}
mtext("Count transfromation model", side = 3, line = -23, outer = TRUE)

## ----mosquito-fe-tbl, echo=FALSE, results="asis"------------------------------
if (!file.exists("mosquito_models.rda")) {
  formatCI <- function(x, digits = 2) {
    fx <- formatC(x, format = "f", digits = digits)
    fx <- matrix(paste0("$",
                        ifelse(c(x) > 0, paste0("\\phantom{-}", fx), fx),
                        "$"), ncol = 2)
    apply(fx, 1, paste, collapse = " ---")
  }

  b_nb <- sums_mosquito$nb$p.table[-1, 1]
  se_nb <- sums_mosquito$nb$p.table[-1, 2]
  ci_nb <- b_nb + qnorm(0.975) * se_nb %o% c(-1, 1)
  ci_nb <- formatCI(ci_nb)
  b_tr <- -sums_mosquito$tram$coef[, 1]
  se_tr <- sums_mosquito$tram$coef[, 2]
  ci_tr <- b_tr + qnorm(0.975) * se_tr %o% c(-1, 1)
  ci_tr <- formatCI(ci_tr)
  ci_mosquito <- data.frame(paste0("$", formatC(b_nb, format = "f", digits = 2), "$"),
                            ci_nb,
                            paste0("$", formatC(b_tr, format = "f", digits = 2), "$"),
                            ci_tr)
  rownames(ci_mosquito) <- c("Year = 2018", "Income = middle", "Placement = out",
                             "Income = middle \\& Placement = out")

  save(sm_mosquito_nb, sm_mosquito_tram,
       ll_mosquito, ci_mosquito, sums_mosquito,
       file = "mosquito_models.rda")
}
add <- list()
add$pos <- list(0)
add$command <- paste(c("& \\multicolumn{2}{c}{Negative binomial} &",
                       "\\multicolumn{2}{c}{Count transformation} \\\\\n",
                       "\\cmidrule(lr){2-3} \\cmidrule(lr){4-5}",
                       rep("& $\\widehat\\beta$ & 95\\% CI ", 2),
                       "\\\\\n"), collapse = " ")
print(xtable(ci_mosquito, align = c("@{}l", rep("r", 3), "r@{}")),
      include.colnames = FALSE, floating = FALSE, add.to.row = add,
      sanitize.text.function = function(x) x, booktabs = TRUE)

## ----dgp-code-----------------------------------------------------------------
##' @param n Numeric vector, number of observations in each group
##' @param beta Numeric, effect size
##' @param sfun Function with a numeric argument, non-linear smooth shift term
##' @param sigma Numeric, SD of random intercepts
##' @param hinv Function with a numeric argument, inverse transformation function
##' @param link Function with a single numeric argument on [0, 1] link function
##' @param scale Numeric, optional scaling constant on the transformation scale
##' @param seed Seed for the random number generator
##' @param two_sets Logical; generate both an estimation and a test sample
gen_smpl <- function(n, beta, sfun, sigma, hinv, link, scale = 1,
                     seed = NULL, two_sets = TRUE) {
  ## -- setting up the seed
  if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
    runif(1)
  if (is.null(seed))
    RNGstate <- get(".Random.seed", envir = .GlobalEnv)
  else {
    R.seed <- get(".Random.seed", envir = .GlobalEnv)
    set.seed(seed)
    RNGstate <- structure(seed, kind = as.list(RNGkind()))
    on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
  }
  if (two_sets) n <- rep(n, 2)
  x1 <- runif(sum(n))
  x2 <- runif(sum(n))
  gr <- factor(rep(seq_along(n), n))
  re <- rep(rnorm(length(n), mean = 0, sd = sigma), n)
  lp <- x1 * beta + sfun(x2) + re
  y  <- hinv((link(runif(sum(n))) + lp) * scale)
  if (two_sets) {
    n <- sum(n) / 2
    out <- list(est  = data.frame(x1 = x1[1:n], x2 = x2[1:n], gr = gr[1:n],
                                  y = y[1:n]),
                test =  data.frame(x1 = tail(x1, n), x2 = tail(x2, n),
                                   gr = tail(gr, n), y = tail(y, n)))
  } else {
    out <- data.frame(x1 = x1, x2 = x2, gr = gr, y = y)
  }
  out
}

## ----sim-setup, echo=FALSE----------------------------------------------------
## ==== Simulation inputs
b   <- qnorm(0.7) * sqrt(2) ## PI = 0.7, SD of the parametric part is sqrt(1/12) * b = 0.2141 on the LP scale
sf  <- function(x) 2 * b * sqrt(1/12) * sin(x * pi) / sqrt(2 - 16 / pi^2) ## same SD as the parametric terms on the LP scale
sig <- sqrt(1/12) * b ## same RE SD on LP scale
hi1 <- function(x) x
hi2 <- function(x) qchisq(pnorm(x), df = 3)
lnk <- qnorm

## ==== Helper function to extract results
get_res <- function(res, what, which = NULL, simplify = TRUE) {
  out <- lapply(res, function(x) {
    if (!is.null(which)) x <- x[which]
    sapply(x, function(xx) {
      if (length(xx) == 1 && is.na(xx)) NA
      else {
        if (is.function(what)) return(what(xx))
        xx[[what]]
      }
    })
  })
  if (simplify) {
    out <- do.call("rbind", out)
    if (!is.function(what))
      colnames(out) <- paste0(names(res[[1]]), "_", what)
  }
  out
}

## ----intcens-dgp--------------------------------------------------------------
make_intcens <- function(x, length = 1) {
  Surv(floor(x / length) * length, ceiling(x / length) * length,
       type = "interval2")
}

## ----sim1, echo=FALSE, results="hide"-----------------------------------------
if (!file.exists("sim1.rda")) {
  seeds <- 301:800
  system.time({
    sims1 <- parallel::mclapply(seeds, function(s) {
      smpl <- gen_smpl(rep(4, 100), beta = b, sfun = sf, sigma = sig,
                       hinv = hi1, link = lnk, seed = s, two_sets = TRUE)
      m1 <- LmME(y ~ x1 + s(x2) + (1 | gr), data = smpl$est)
      if (m1$opt$convergence == 0) {
        lmm <- list(ll    = logLik(m1),
                    lloos = logLik(m1, newdata = smpl$test, type = "fix_smooth"),
                    beta  = coef(m1), sigma = sqrt(varcov(m1)$gr[1,1]),
                    theta = m1$param$beta[attr(m1$param$beta, "type") == "bl"],
                    beta_se = sqrt(vcov(m1, parm = "x1")[1,1]))
      } else {
        lmm <- NA
      }
      m2 <- BoxCoxME(y ~ x1 + s(x2) + (1 | gr), data = smpl$est)
      if (m2$opt$convergence == 0) {
        bcm <- list(ll    = logLik(m2),
                    lloos = logLik(m2, newdata = smpl$test, type = "fix_smooth"),
                    beta  = coef(m2), sigma = sqrt(varcov(m2)$gr[1,1]),
                    theta = m2$param$beta[attr(m2$param$beta, "type") == "bl"],
                    beta_se = sqrt(vcov(m2, parm = "x1")[1,1]),
                    intercept = mean(sf(smpl$est$x2)))
      } else {
        bcm <- NA
      }
      list(LmME = lmm, BoxCoxME = bcm)
    }, mc.cores = 6)
  })
  save(sims1, file = "sim1.rda")
} else {
  load("sim1.rda")
}

## -- non-convergence
rowSums(sapply(sims1, function(x)
  sapply(x, function(xx) c(length(xx) == 1 && is.na(xx))))
  )

## ----sim1-beta-plot, echo=FALSE, fig.width=4, fig.height=3.5, out.width=".5\\textwidth"----
par(cex = 0.8, mar = c(4, 4.2, 2, 1), las = 1)
boxplot(pnorm(get_res(sims1, "beta") / sqrt(2)) - 0.7,
        ylab = "PI", names = c("Normal", "Non-normal"),
        boxwex = 0.5)
grid(nx = NA, ny = NULL)
abline(h = 0, lwd = 2, lty = 2, col = 2)

## ----sim1-beta-ci-------------------------------------------------------------
cvi <- get_res(sims1, what = function(x) {
  ci <- x$beta + x$beta_se * qnorm(0.975) * c(-1, 1)
  (ci[1] <= b) && (b < ci[2])
})
(cvr <- colMeans(cvi, na.rm = TRUE)) ## Coverage rate
sqrt(cvr * (1 - cvr) / nrow(cvi)) ## Monte Carlo SE

## ----sim1-pred-plot, echo=FALSE, fig.width=4, fig.height=3.5, out.width=".5\\textwidth"----
par(cex = 0.8, mar = c(4, 4.2, 2, 1), las = 1)
boxplot(get_res(sims1, "lloos"), ylab = "Out-of-sample log-likelihood",
        names = c("Normal", "Non-normal"), boxwex = 0.5)
grid(nx = NA, ny = NULL)

## ----sim1-baseline-plot, echo=FALSE, fig.width=4, fig.height=3.5, out.width=".5\\textwidth"----
par(cex = 0.8, mar = c(4, 4.2, 2, 1), las = 1)
th <- get_res(sims1, "theta", "BoxCoxME", simplify = FALSE)
th <- do.call("cbind", th)
## NOTE: to compare with the true transformation function (identity) we need to
## adjust the fitted baseline transformation with the expectation of the smooth
## term
ic <- get_res(sims1, "intercept", "BoxCoxME", simplify = FALSE)
ic <- sapply(ic, `[`, 1)
dat <- gen_smpl(rep(4, 100), beta = b, sfun = sf, sigma = sig,
                hinv = hi1, link = lnk, seed = 1, two_sets = FALSE)
bcm <- BoxCoxME(y ~ x1 + s(x2) + (1 | gr), data = dat, nofit = TRUE)
nd  <- dat[rep(1, 100), ]
nd[[variable.names(bcm, "response")]] <- seq(-1, 2, length.out = 100)
mm  <- model.matrix(bcm$model$ctm$bases$response, data = nd)
tr <- mm %*% th + matrix(rep(ic, each = 100), nrow = 100)
matplot(nd$y, tr, type = "l", col = grey(0.1,0.1), lty = 1,
        ylab = "h(y)", xlab = "y", panel.first = grid())
abline(0,1, lwd = 2, col = 2)

## ----intcens-example----------------------------------------------------------
dat <- gen_smpl(rep(4, 100), beta = b, sfun = sf, sigma = sig,
                hinv = hi2, link = lnk, seed = 1, two_sets = FALSE)
head(dat$y) ## exact values
head(make_intcens(dat$y, length = 2)) ## interval-censored version

## ----sim2, echo=FALSE, results="hide"-----------------------------------------
nd <- data.frame(x2 = seq(0, 1, length.out = 100))
if (!file.exists("sim2.rda")) {
  seeds <- 1001:1500
  system.time({
    sims2 <- parallel::mclapply(seeds, function(s) {
      smpl <- gen_smpl(rep(4, 100), beta = b, sfun = sf, sigma = sig,
                       hinv = hi2, link = lnk, seed = s, two_sets = TRUE)
      m1 <- LmME(y ~ x1 + s(x2) + (1 | gr), data = smpl$est)
      if (m1$opt$convergence == 0) {
        lmm <- list(ll    = logLik(m1),
                    lloos = logLik(m1, newdata = smpl$test, type = "fix_smooth"),
                    beta  = coef(m1), sigma = sqrt(varcov(m1)$gr[1,1]),
                    beta_se = sqrt(vcov(m1, parm = "x1")[1,1]))
      } else {
        lmm <- NA
      }
      m2 <- BoxCoxME(y ~ x1 + s(x2) + (1 | gr), data = smpl$est)
      if (m2$opt$convergence == 0) {
        sm <- smooth_terms(m2, newdata = nd)[[1]][,2] + mean(sf(smpl$est$x2))
        bcm <- list(ll    = logLik(m2),
                    lloos = logLik(m2, newdata = smpl$test, type = "fix_smooth"),
                    beta  = coef(m2), sigma = sqrt(varcov(m2)$gr[1,1]),
                    beta_se = sqrt(vcov(m2, parm = "x1")[1,1]),
                    smooth = sm)
      } else {
        bcm <- NA
      }
      smpl$est$y <- make_intcens(smpl$est$y, length = 2)
      m3 <- BoxCoxME(y ~ x1 + s(x2) + (1 | gr), data = smpl$est)
      if (m3$opt$convergence == 0) {
        sm <- smooth_terms(m3, newdata = nd)[[1]][, 2] + mean(sf(smpl$est$x2))
        icm <- list(ll    = logLik(m3),
                    lloos = logLik(m3, newdata = smpl$test, type = "fix_smooth"),
                    beta  = coef(m3), sigma = sqrt(varcov(m3)$gr[1,1]),
                    beta_se = sqrt(vcov(m3, parm = "x1")[1,1]),
                    smooth = sm)
      } else {
        icm <- NA
      }
      list(LmME = lmm, BoxCoxME = bcm, BoxCoxME_ic = icm)
    }, mc.cores = 8)
  })
  save(sims2, file = "sim2.rda")
} else {
  load("sim2.rda")
}

## -- non-convergence
rowSums(sapply(sims2, function(x)
  sapply(x, function(xx) c(length(xx) == 1 && is.na(xx))))
  )

## ----sim2-beta-ci-------------------------------------------------------------
cvi <- get_res(sims2, what = function(x) {
  ci <- x$beta + x$beta_se * qnorm(0.975) * c(-1, 1)
  (ci[1] <= b) && (b < ci[2])
})
(cvr <- colMeans(cvi, na.rm = TRUE)) ## Coverage rate
sqrt(cvr * (1 - cvr) / nrow(cvi)) ## Monte Carlo SE

## ----sim2-beta-plot, echo=FALSE, fig.width=4.5, fig.height=3.5, out.width=".5\\textwidth"----
par(cex = 0.8, mar = c(4, 4.2, 2, 1), las = 1)
boxplot(pnorm(get_res(sims2, "beta") / sqrt(2)) - 0.7,
        ylab = "PI", names = c("Normal", "Non-normal", "Non-normal (IC)"),
        boxwex = 0.5)
grid(nx = NA, ny = NULL)
abline(h = 0, lwd = 2, lty = 2, col = 2)

## ----sim2-pred-plot, echo=FALSE, fig.width=4.5, fig.height=3.5, out.width=".5\\textwidth"----
par(cex = 0.8, mar = c(4, 4.2, 2, 1), las = 1)
boxplot(get_res(sims2, "lloos"), ylab = "Out-of-sample log-likelihood",
        names = c("Normal", "Non-normal", "Non-normal (IC)"),
        boxwex = 0.5)
grid(nx = NA, ny = NULL)

## ----sim2-smooth-plot, echo=FALSE, fig.width=6, fig.height=3.5, out.width=".8\\textwidth"----
par(mfrow = c(1, 2), las = 1, mar = c(4, 5, 1, 1), cex = 0.8)
sms <- get_res(sims2, what = "smooth", which = c("BoxCoxME", "BoxCoxME_ic"),
               simplify = FALSE)
sm1 <- sapply(sms, function(x) x[, 1])
matplot(nd$x2, sm1, type = "l", col = grey(0.1, 0.1), lty = 1,
        ylab = expression(hat(f) * "(x)"), xlab = "x",
        panel.first = grid())
lines(nd$x2, sf(nd$x2), col = 2, lwd = 2)
sm2 <- sapply(sms, function(x) x[, 2])
matplot(nd$x2, sm2, type = "l", col = grey(0.1, 0.1), lty = 1,
        ylab = expression(hat(f) * "(x)"), xlab = "x",
        panel.first = grid())
lines(nd$x2, sf(nd$x2), col = 2, lwd = 2)

## ----info---------------------------------------------------------------------
sessionInfo()

Try the tramME package in your browser

Any scripts or data that you put into this service are public.

tramME documentation built on July 9, 2023, 7:10 p.m.