inst/doc/ZeroInflated_and_TwoPart_Models.R

## ----setup, include = FALSE----------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library("GLMMadaptive")

## ------------------------------------------------------------------------
set.seed(1234)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time

# we constuct a data frame with the design: 
# everyone has a baseline measurment, and then measurements at random follow-up times
DF <- data.frame(id = rep(seq_len(n), each = K),
                 time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
                 sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))

# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex * time, data = DF)
Z <- model.matrix(~ 1, data = DF)
# design matrices for the fixed and random effects zero part
X_zi <- model.matrix(~ sex, data = DF)
Z_zi <- model.matrix(~ 1, data = DF)

betas <- c(1.5, 0.05, 0.05, -0.03) # fixed effects coefficients non-zero part
shape <- 2 # shape/size parameter of the negative binomial distribution
gammas <- c(-1.5, 0.5) # fixed effects coefficients zero part
D11 <- 0.5 # variance of random intercepts non-zero part
D22 <- 0.4 # variance of random intercepts zero part

# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)), rnorm(n, sd = sqrt(D22)))
# linear predictor non-zero part
eta_y <- as.vector(X %*% betas + rowSums(Z * b[DF$id, 1, drop = FALSE]))
# linear predictor zero part
eta_zi <- as.vector(X_zi %*% gammas + rowSums(Z_zi * b[DF$id, 2, drop = FALSE]))
# we simulate negative binomial longitudinal data
DF$y <- rnbinom(n * K, size = shape, mu = exp(eta_y))
# we set the extra zeros
DF$y[as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))] <- 0

## ------------------------------------------------------------------------
fm1 <- mixed_model(y ~ sex * time, random = ~ 1 | id, data = DF,
                   family = zi.poisson(), zi_fixed = ~ sex)

fm1

## ------------------------------------------------------------------------
fm2 <- update(fm1, zi_random = ~ 1 | id)

fm2

## ------------------------------------------------------------------------
anova(fm1, fm2)

## ------------------------------------------------------------------------
gm1 <- mixed_model(y ~ sex * time, random = ~ 1 | id, data = DF,
                   family = zi.negative.binomial(), zi_fixed = ~ sex)

gm1

## ------------------------------------------------------------------------
anova(gm1, fm2, test = FALSE)

## ------------------------------------------------------------------------
set.seed(1234)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time

# we constuct a data frame with the design: 
# everyone has a baseline measurment, and then measurements at random follow-up times
DF <- data.frame(id = rep(seq_len(n), each = K),
                 time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
                 sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))

# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex * time, data = DF)
Z <- model.matrix(~ time, data = DF)
# design matrices for the fixed and random effects zero part
X_zi <- model.matrix(~ sex, data = DF)
Z_zi <- model.matrix(~ 1, data = DF)

betas <- c(-2.13, -0.25, 0.24, -0.05) # fixed effects coefficients non-zero part
sigma <- 0.5 # standard deviation error terms non-zero part
gammas <- c(-1.5, 0.5) # fixed effects coefficients zero part
D11 <- 0.5 # variance of random intercepts non-zero part
D22 <- 0.1 # variance of random slopes non-zero part
D33 <- 0.4 # variance of random intercepts zero part

# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)), rnorm(n, sd = sqrt(D22)), rnorm(n, sd = sqrt(D33)))
# linear predictor non-zero part
eta_y <- as.vector(X %*% betas + rowSums(Z * b[DF$id, 1:2, drop = FALSE]))
# linear predictor zero part
eta_zi <- as.vector(X_zi %*% gammas + rowSums(Z_zi * b[DF$id, 3, drop = FALSE]))
# we simulate log-normal longitudinal data
DF$y <- exp(rnorm(n * K, mean = eta_y, sd = sigma))
# we set the zeros from the logistic regression
DF$y[as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))] <- 0

## ------------------------------------------------------------------------
hurdle.lognormal <- function () {
    stats <- make.link("identity")
    log_dens <- function (y, eta, mu_fun, phis, eta_zi) {
        sigma <- exp(phis)
        # binary indicator for y > 0
        ind <- y > 0
        # non-zero part
        eta <- as.matrix(eta)
        eta_zi <- as.matrix(eta_zi)
        out <- eta
        out[ind, ] <- plogis(eta_zi[ind, ], lower.tail = FALSE, log.p = TRUE) + 
            dnorm(x = log(y[ind]), mean = eta[ind, ], sd = sigma, log = TRUE)
        # zero part
        out[!ind, ] <- plogis(eta_zi[!ind, ], log.p = TRUE)
        attr(out, "mu_y") <- eta
        out
    }
    score_eta_fun <- function (y, mu, phis, eta_zi) {
        sigma <- exp(phis)
        # binary indicator for y > 0
        ind <- y > 0
        # non-zero part
        eta <- as.matrix(mu)
        out <- eta
        out[!ind, ] <- 0
        out[ind, ] <- (log(y[ind]) - eta[ind, ]) / sigma^2
        out
    }
    score_eta_zi_fun <- function (y, mu, phis, eta_zi) {
        ind <- y > 0
        probs <- plogis(as.matrix(eta_zi))
        out <- 1 - probs
        out[ind, ] <- - probs[ind, ]
        out
    }
    score_phis_fun <- function (y, mu, phis, eta_zi) {
        sigma <- exp(phis)
        # binary indicator for y > 0
        ind <- y > 0
        # non-zero part
        eta <- as.matrix(mu)
        out <- eta
        out[!ind, ] <- 0
        out[ind, ] <- - 1 + (log(y[ind]) - eta[ind, ])^2 / sigma^2
        out
    }
    simulate <- function (n, mu, phis, eta_zi) {
        y <- rlnorm(n = n, meanlog = mu, sdlog = exp(phis))
        y[as.logical(rbinom(n, 1, plogis(eta_zi)))] <- 0
        y
    }
    structure(list(family = "two-part log-normal", link = stats$name, 
                   linkfun = stats$linkfun, linkinv = stats$linkinv, log_dens = log_dens,
                   score_eta_fun = score_eta_fun, score_eta_zi_fun = score_eta_zi_fun,
                   score_phis_fun = score_phis_fun, simulate = simulate),
              class = "family")
}

## ------------------------------------------------------------------------
km1 <- mixed_model(y ~ sex * time, random = ~ 1 | id, data = DF, 
                  family = hurdle.lognormal(), n_phis = 1,
                  zi_fixed = ~ sex)

km1

## ------------------------------------------------------------------------
km2 <- update(km1, random = ~ 1 || id, zi_random = ~ 1 | id)

km2

## ---- fig.align = "center", fig.width = 8.5, fig.height = 7.5------------
par(mar = c(2.5, 2.5, 0, 0), mgp = c(1.1, 0.5, 0), cex.axis = 0.7, cex.lab = 0.8)
y <- DF$y
y[y > 0] <- log(y[y > 0])
x_vals <- seq(min(y), max(y), length.out = 500)
out <- simulate(km2, nsim = 30, acount_MLEs_var = TRUE)
ind <- out > sqrt(.Machine$double.eps)
out[ind] <- log(out[ind])
rep_y <- apply(out, 2, function (x, x_vals) ecdf(x)(x_vals), x_vals = x_vals)
matplot(x_vals, rep_y, type = "l", lty = 1, col = "lightgrey", 
        xlab = "Response Variable", ylab = "Empirical CDF")
lines(x_vals, ecdf(y)(x_vals))
legend("bottomright", c("log replicated data", "log observed data"), lty = 1, 
       col = c("lightgrey", "black"), bty = "n", cex = 0.8)

## ------------------------------------------------------------------------
set.seed(123)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time

# we constuct a data frame with the design: 
# everyone has a baseline measurment, and then measurements at random follow-up times
DF <- data.frame(id = rep(seq_len(n), each = K),
                 time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
                 sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))

# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex * time, data = DF)
Z <- model.matrix(~ time, data = DF)
# design matrices for the fixed and random effects zero part
X_zi <- model.matrix(~ sex, data = DF)
Z_zi <- model.matrix(~ 1, data = DF)

betas <- c(1.5, 0.05, 0.05, -0.03) # fixed effects coefficients non-zero part
shape <- 2 # shape/size parameter of the negative binomial distribution
gammas <- c(-1.5, 0.5) # fixed effects coefficients zero part
D11 <- 0.5 # variance of random intercepts non-zero part
D22 <- 0.1 # variance of random slopes non-zero part
D33 <- 0.4 # variance of random intercepts zero part

# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)), rnorm(n, sd = sqrt(D22)), rnorm(n, sd = sqrt(D33)))
# linear predictor non-zero part
eta_y <- as.vector(X %*% betas + rowSums(Z * b[DF$id, 1:2, drop = FALSE]))
# linear predictor zero part
eta_zi <- as.vector(X_zi %*% gammas + rowSums(Z_zi * b[DF$id, 3, drop = FALSE]))
# we simulate truncated at zero negative binomial longitudinal data
lower <- pnbinom(0, mu = exp(eta_y), size = shape)
u <- runif(n * K, min = lower, max = 1)
DF$y <- qnbinom(u, mu = exp(eta_y), size = shape)
# we set the zeros from the logistic regression
DF$y[as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))] <- 0

## ------------------------------------------------------------------------
dm1 <- mixed_model(y ~ sex * time, random = ~ time | id, data = DF, 
                  family = hurdle.poisson(), zi_fixed = ~ sex)

dm1

## ---- eval = FALSE-------------------------------------------------------
#  dm2 <- update(dm1, zi_random = ~ 1 | id)
#  
#  dm2
#  #>
#  #> Call:
#  #> mixed_model(fixed = y ~ sex * time, random = ~time | id, data = DF,
#  #>     family = hurdle.poisson(), zi_fixed = ~sex, zi_random = ~1 |
#  #>         id)
#  #>
#  #>
#  #> Model:
#  #>  family: hurdle poisson
#  #>  link: log
#  #>
#  #> Random effects covariance matrix:
#  #>                 StdDev    Corr
#  #> (Intercept)     0.8227  (Intr)    time
#  #> time            0.3636 -0.3135
#  #> zi_(Intercept)  0.6601 -0.0089  0.0698
#  #>
#  #> Fixed effects:
#  #>    (Intercept)      sexfemale           time sexfemale:time
#  #>     1.52752248     0.08381132     0.09213789    -0.08032932
#  #>
#  #> Zero-part coefficients:
#  #> (Intercept)   sexfemale
#  #>  -1.5386156   0.5177395
#  #>
#  #> log-Lik: -2797.308

## ---- eval = FALSE-------------------------------------------------------
#  anova(dm1, dm2)
#  #>
#  #>         AIC     BIC  log.Lik  LRT df p.value
#  #> dm1 5622.60 5646.05 -2802.30
#  #> dm2 5618.62 5618.62 -2797.31 9.98  3  0.0187

## ---- eval = FALSE-------------------------------------------------------
#  hm1 <- mixed_model(y ~ sex * time, random = ~ time | id, data = DF,
#                    family = hurdle.negative.binomial(), zi_fixed = ~ sex)
#  
#  hm1
#  #>
#  #> Call:
#  #> mixed_model(fixed = y ~ sex * time, random = ~time | id, data = DF,
#  #>     family = hurdle.negative.binomial(), zi_fixed = ~sex)
#  #>
#  #>
#  #> Model:
#  #>  family: hurdle negative binomial
#  #>  link: log
#  #>
#  #> Random effects covariance matrix:
#  #>              StdDev    Corr
#  #> (Intercept)  0.7305
#  #> time         0.3249 -0.1170
#  #>
#  #> Fixed effects:
#  #>    (Intercept)      sexfemale           time sexfemale:time
#  #>     1.48250218     0.07508276     0.10249953    -0.08978082
#  #>
#  #> Zero-part coefficients:
#  #> (Intercept)   sexfemale
#  #>  -1.4178427   0.4857483
#  #>
#  #> phi parameters:
#  #>  0.6778918
#  #>
#  #> log-Lik: -2181.468

## ---- eval = FALSE-------------------------------------------------------
#  hm2 <- update(hm1, zi_random = ~ 1 | id)
#  
#  hm2
#  #>
#  #> Call:
#  #> mixed_model(fixed = y ~ sex * time, random = ~time | id, data = DF,
#  #>     family = hurdle.negative.binomial(), zi_fixed = ~sex, zi_random = ~1 |
#  #>         id)
#  #>
#  #>
#  #> Model:
#  #>  family: hurdle negative binomial
#  #>  link: log
#  #>
#  #> Random effects covariance matrix:
#  #>                 StdDev    Corr
#  #> (Intercept)     0.7313  (Intr)    time
#  #> time            0.3258 -0.1161
#  #> zi_(Intercept)  0.6610 -0.0522  0.1420
#  #>
#  #> Fixed effects:
#  #>    (Intercept)      sexfemale           time sexfemale:time
#  #>     1.47626158     0.07041272     0.10539676    -0.08613159
#  #>
#  #> Zero-part coefficients:
#  #> (Intercept)   sexfemale
#  #>  -1.5425483   0.5193066
#  #>
#  #> phi parameters:
#  #>  0.6759281
#  #>
#  #> log-Lik: -2176.313

## ---- eval = FALSE-------------------------------------------------------
#  anova(hm1, hm2)
#  #>
#  #>         AIC     BIC  log.Lik   LRT df p.value
#  #> hm1 4382.94 4408.99 -2181.47
#  #> hm2 4378.63 4378.63 -2176.31 10.31  3  0.0161

Try the GLMMadaptive package in your browser

Any scripts or data that you put into this service are public.

GLMMadaptive documentation built on Jan. 29, 2019, 5:09 p.m.