```## ----setup, include = FALSE----------------------------------------------
knitr::opts_chunk\$set(
collapse = TRUE,
comment = "#>"
)

## ------------------------------------------------------------------------
set.seed(1234)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time

# we constuct a data frame with the design:
# everyone has a baseline measurment, and then measurements at random follow-up times
DF <- data.frame(id = rep(seq_len(n), each = K),
time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))

# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex * time, data = DF)
Z <- model.matrix(~ 1, data = DF)
# design matrices for the fixed and random effects zero part
X_zi <- model.matrix(~ sex, data = DF)
Z_zi <- model.matrix(~ 1, data = DF)

betas <- c(1.5, 0.05, 0.05, -0.03) # fixed effects coefficients non-zero part
shape <- 2 # shape/size parameter of the negative binomial distribution
gammas <- c(-1.5, 0.5) # fixed effects coefficients zero part
D11 <- 0.5 # variance of random intercepts non-zero part
D22 <- 0.4 # variance of random intercepts zero part

# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)), rnorm(n, sd = sqrt(D22)))
# linear predictor non-zero part
eta_y <- as.vector(X %*% betas + rowSums(Z * b[DF\$id, 1, drop = FALSE]))
# linear predictor zero part
eta_zi <- as.vector(X_zi %*% gammas + rowSums(Z_zi * b[DF\$id, 2, drop = FALSE]))
# we simulate negative binomial longitudinal data
DF\$y <- rnbinom(n * K, size = shape, mu = exp(eta_y))
# we set the extra zeros
DF\$y[as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))] <- 0

## ------------------------------------------------------------------------
fm1 <- mixed_model(y ~ sex * time, random = ~ 1 | id, data = DF,
family = zi.poisson(), zi_fixed = ~ sex)

fm1

## ------------------------------------------------------------------------
fm2 <- update(fm1, zi_random = ~ 1 | id)

fm2

## ------------------------------------------------------------------------
anova(fm1, fm2)

## ------------------------------------------------------------------------
gm1 <- mixed_model(y ~ sex * time, random = ~ 1 | id, data = DF,
family = zi.negative.binomial(), zi_fixed = ~ sex)

gm1

## ------------------------------------------------------------------------
anova(gm1, fm2, test = FALSE)

## ------------------------------------------------------------------------
set.seed(1234)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time

# we constuct a data frame with the design:
# everyone has a baseline measurment, and then measurements at random follow-up times
DF <- data.frame(id = rep(seq_len(n), each = K),
time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))

# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex * time, data = DF)
Z <- model.matrix(~ time, data = DF)
# design matrices for the fixed and random effects zero part
X_zi <- model.matrix(~ sex, data = DF)
Z_zi <- model.matrix(~ 1, data = DF)

betas <- c(-2.13, -0.25, 0.24, -0.05) # fixed effects coefficients non-zero part
sigma <- 0.5 # standard deviation error terms non-zero part
gammas <- c(-1.5, 0.5) # fixed effects coefficients zero part
D11 <- 0.5 # variance of random intercepts non-zero part
D22 <- 0.1 # variance of random slopes non-zero part
D33 <- 0.4 # variance of random intercepts zero part

# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)), rnorm(n, sd = sqrt(D22)), rnorm(n, sd = sqrt(D33)))
# linear predictor non-zero part
eta_y <- as.vector(X %*% betas + rowSums(Z * b[DF\$id, 1:2, drop = FALSE]))
# linear predictor zero part
eta_zi <- as.vector(X_zi %*% gammas + rowSums(Z_zi * b[DF\$id, 3, drop = FALSE]))
# we simulate log-normal longitudinal data
DF\$y <- exp(rnorm(n * K, mean = eta_y, sd = sigma))
# we set the zeros from the logistic regression
DF\$y[as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))] <- 0

## ------------------------------------------------------------------------
hurdle.lognormal <- function () {
log_dens <- function (y, eta, mu_fun, phis, eta_zi) {
sigma <- exp(phis)
# binary indicator for y > 0
ind <- y > 0
# non-zero part
eta <- as.matrix(eta)
eta_zi <- as.matrix(eta_zi)
out <- eta
out[ind, ] <- plogis(eta_zi[ind, ], lower.tail = FALSE, log.p = TRUE) +
dnorm(x = log(y[ind]), mean = eta[ind, ], sd = sigma, log = TRUE)
# zero part
out[!ind, ] <- plogis(eta_zi[!ind, ], log.p = TRUE)
attr(out, "mu_y") <- eta
out
}
score_eta_fun <- function (y, mu, phis, eta_zi) {
sigma <- exp(phis)
# binary indicator for y > 0
ind <- y > 0
# non-zero part
eta <- as.matrix(mu)
out <- eta
out[!ind, ] <- 0
out[ind, ] <- (log(y[ind]) - eta[ind, ]) / sigma^2
out
}
score_eta_zi_fun <- function (y, mu, phis, eta_zi) {
ind <- y > 0
probs <- plogis(as.matrix(eta_zi))
out <- 1 - probs
out[ind, ] <- - probs[ind, ]
out
}
score_phis_fun <- function (y, mu, phis, eta_zi) {
sigma <- exp(phis)
# binary indicator for y > 0
ind <- y > 0
# non-zero part
eta <- as.matrix(mu)
out <- eta
out[!ind, ] <- 0
out[ind, ] <- - 1 + (log(y[ind]) - eta[ind, ])^2 / sigma^2
out
}
simulate <- function (n, mu, phis, eta_zi) {
y <- rlnorm(n = n, meanlog = mu, sdlog = exp(phis))
y[as.logical(rbinom(n, 1, plogis(eta_zi)))] <- 0
y
}
structure(list(family = "two-part log-normal", link = stats\$name,
score_eta_fun = score_eta_fun, score_eta_zi_fun = score_eta_zi_fun,
score_phis_fun = score_phis_fun, simulate = simulate),
class = "family")
}

## ------------------------------------------------------------------------
km1 <- mixed_model(y ~ sex * time, random = ~ 1 | id, data = DF,
family = hurdle.lognormal(), n_phis = 1,
zi_fixed = ~ sex)

km1

## ------------------------------------------------------------------------
km2 <- update(km1, random = ~ 1 || id, zi_random = ~ 1 | id)

km2

## ---- fig.align = "center", fig.width = 8.5, fig.height = 7.5------------
par(mar = c(2.5, 2.5, 0, 0), mgp = c(1.1, 0.5, 0), cex.axis = 0.7, cex.lab = 0.8)
y <- DF\$y
y[y > 0] <- log(y[y > 0])
x_vals <- seq(min(y), max(y), length.out = 500)
out <- simulate(km2, nsim = 30, acount_MLEs_var = TRUE)
ind <- out > sqrt(.Machine\$double.eps)
out[ind] <- log(out[ind])
rep_y <- apply(out, 2, function (x, x_vals) ecdf(x)(x_vals), x_vals = x_vals)
matplot(x_vals, rep_y, type = "l", lty = 1, col = "lightgrey",
xlab = "Response Variable", ylab = "Empirical CDF")
lines(x_vals, ecdf(y)(x_vals))
legend("bottomright", c("log replicated data", "log observed data"), lty = 1,
col = c("lightgrey", "black"), bty = "n", cex = 0.8)

## ------------------------------------------------------------------------
set.seed(123)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time

# we constuct a data frame with the design:
# everyone has a baseline measurment, and then measurements at random follow-up times
DF <- data.frame(id = rep(seq_len(n), each = K),
time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))

# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex * time, data = DF)
Z <- model.matrix(~ time, data = DF)
# design matrices for the fixed and random effects zero part
X_zi <- model.matrix(~ sex, data = DF)
Z_zi <- model.matrix(~ 1, data = DF)

betas <- c(1.5, 0.05, 0.05, -0.03) # fixed effects coefficients non-zero part
shape <- 2 # shape/size parameter of the negative binomial distribution
gammas <- c(-1.5, 0.5) # fixed effects coefficients zero part
D11 <- 0.5 # variance of random intercepts non-zero part
D22 <- 0.1 # variance of random slopes non-zero part
D33 <- 0.4 # variance of random intercepts zero part

# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)), rnorm(n, sd = sqrt(D22)), rnorm(n, sd = sqrt(D33)))
# linear predictor non-zero part
eta_y <- as.vector(X %*% betas + rowSums(Z * b[DF\$id, 1:2, drop = FALSE]))
# linear predictor zero part
eta_zi <- as.vector(X_zi %*% gammas + rowSums(Z_zi * b[DF\$id, 3, drop = FALSE]))
# we simulate truncated at zero negative binomial longitudinal data
lower <- pnbinom(0, mu = exp(eta_y), size = shape)
u <- runif(n * K, min = lower, max = 1)
DF\$y <- qnbinom(u, mu = exp(eta_y), size = shape)
# we set the zeros from the logistic regression
DF\$y[as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))] <- 0

## ------------------------------------------------------------------------
dm1 <- mixed_model(y ~ sex * time, random = ~ time | id, data = DF,
family = hurdle.poisson(), zi_fixed = ~ sex)

dm1

## ---- eval = FALSE-------------------------------------------------------
#  dm2 <- update(dm1, zi_random = ~ 1 | id)
#
#  dm2
#  #>
#  #> Call:
#  #> mixed_model(fixed = y ~ sex * time, random = ~time | id, data = DF,
#  #>     family = hurdle.poisson(), zi_fixed = ~sex, zi_random = ~1 |
#  #>         id)
#  #>
#  #>
#  #> Model:
#  #>  family: hurdle poisson
#  #>
#  #> Random effects covariance matrix:
#  #>                 StdDev    Corr
#  #> (Intercept)     0.8227  (Intr)    time
#  #> time            0.3636 -0.3135
#  #> zi_(Intercept)  0.6601 -0.0089  0.0698
#  #>
#  #> Fixed effects:
#  #>    (Intercept)      sexfemale           time sexfemale:time
#  #>     1.52752248     0.08381132     0.09213789    -0.08032932
#  #>
#  #> Zero-part coefficients:
#  #> (Intercept)   sexfemale
#  #>  -1.5386156   0.5177395
#  #>
#  #> log-Lik: -2797.308

## ---- eval = FALSE-------------------------------------------------------
#  anova(dm1, dm2)
#  #>
#  #>         AIC     BIC  log.Lik  LRT df p.value
#  #> dm1 5622.60 5646.05 -2802.30
#  #> dm2 5618.62 5618.62 -2797.31 9.98  3  0.0187

## ---- eval = FALSE-------------------------------------------------------
#  hm1 <- mixed_model(y ~ sex * time, random = ~ time | id, data = DF,
#                    family = hurdle.negative.binomial(), zi_fixed = ~ sex)
#
#  hm1
#  #>
#  #> Call:
#  #> mixed_model(fixed = y ~ sex * time, random = ~time | id, data = DF,
#  #>     family = hurdle.negative.binomial(), zi_fixed = ~sex)
#  #>
#  #>
#  #> Model:
#  #>  family: hurdle negative binomial
#  #>
#  #> Random effects covariance matrix:
#  #>              StdDev    Corr
#  #> (Intercept)  0.7305
#  #> time         0.3249 -0.1170
#  #>
#  #> Fixed effects:
#  #>    (Intercept)      sexfemale           time sexfemale:time
#  #>     1.48250218     0.07508276     0.10249953    -0.08978082
#  #>
#  #> Zero-part coefficients:
#  #> (Intercept)   sexfemale
#  #>  -1.4178427   0.4857483
#  #>
#  #> phi parameters:
#  #>  0.6778918
#  #>
#  #> log-Lik: -2181.468

## ---- eval = FALSE-------------------------------------------------------
#  hm2 <- update(hm1, zi_random = ~ 1 | id)
#
#  hm2
#  #>
#  #> Call:
#  #> mixed_model(fixed = y ~ sex * time, random = ~time | id, data = DF,
#  #>     family = hurdle.negative.binomial(), zi_fixed = ~sex, zi_random = ~1 |
#  #>         id)
#  #>
#  #>
#  #> Model:
#  #>  family: hurdle negative binomial
#  #>
#  #> Random effects covariance matrix:
#  #>                 StdDev    Corr
#  #> (Intercept)     0.7313  (Intr)    time
#  #> time            0.3258 -0.1161
#  #> zi_(Intercept)  0.6610 -0.0522  0.1420
#  #>
#  #> Fixed effects:
#  #>    (Intercept)      sexfemale           time sexfemale:time
#  #>     1.47626158     0.07041272     0.10539676    -0.08613159
#  #>
#  #> Zero-part coefficients:
#  #> (Intercept)   sexfemale
#  #>  -1.5425483   0.5193066
#  #>
#  #> phi parameters:
#  #>  0.6759281
#  #>
#  #> log-Lik: -2176.313

## ---- eval = FALSE-------------------------------------------------------
#  anova(hm1, hm2)
#  #>
#  #>         AIC     BIC  log.Lik   LRT df p.value
#  #> hm1 4382.94 4408.99 -2181.47
#  #> hm2 4378.63 4378.63 -2176.31 10.31  3  0.0161
```