# Doc header --------------------------------------------------------------
# Jan van den Brand
# jan.vandenbrand@kuleuven.be
# janvandenbrand@gmail.com
# Create custom model family object
# Aim is to develop a two part model that can accomodate continuous and binary outcomes
# See https://drizopoulos.github.io/GLMMadaptive/articles/ZeroInflated_and_TwoPart_Models.html
# Use the 'zi' formulation
# Preliminaries -----------------------------------------------------------
setwd("C:/Users/jajgv/Documents/repos/highdimjm")
pkg <- c("ggplot2","gridExtra", "dplyr", "optimParallel", "GLMMadaptive", "Matrix", "MASS")
lapply(pkg, library, character.only = TRUE)
cluster <- makeCluster(detectCores()); setDefaultCluster(cl = cluster)
# Simulate some data --------------------------------------------------
# From https://drizopoulos.github.io/GLMMadaptive/articles/ZeroInflated_and_TwoPart_Models.html#two-part-mixed-effects-model-for-semi-continuous-data-1
set.seed(1234)
n <- 10 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time
# we constuct a data frame with the design:
# everyone has a baseline measurement, and then measurements at random follow-up times
df <- data.frame(id = rep(seq_len(n), each = K),
time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))
# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex * time, data = df)
Z <- model.matrix(~ time, data = df)
# design matrices for the fixed and random effects zero part (i.e. the zero inflated part)
X_zi <- model.matrix(~ sex, data = df)
Z_zi <- model.matrix(~ 1, data = df)
betas <- c(-2.13, -0.25, 0.24, -0.05) # fixed effects coefficients non-zero part
sigma <- 0.5 # standard deviation error terms non-zero part
gammas <- c(-1.5, 0.5) # fixed effects coefficients zero part
D11 <- 0.5 # variance of random intercepts non-zero part
D22 <- 0.1 # variance of random slopes non-zero part
D33 <- 0.4 # variance of random intercepts zero part
# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)), rnorm(n, sd = sqrt(D22)), rnorm(n, sd = sqrt(D33)))
# linear predictor continuous part
eta_y <- as.vector(X %*% betas + rowSums(Z * b[df$id, 1:2, drop = FALSE]))
# linear predictor binomial part
eta_zi <- as.vector(X_zi %*% gammas + rowSums(Z_zi * b[df$id, 3, drop = FALSE]))
# we simulate normal longitudinal data
df$y <- rnorm(n * K, mean = eta_y, sd = sigma)
# we simulate the binomial data
df$y2 <- as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))
# Determine the log density ---------------------------------------------------
y <- c(df$y, df$y2)
ind <- rep(0:1, each = length(y)/2)
(y[ind == 0])
(y[ind == 1])
# Covariate data just gets copied
eta <- as.matrix(eta_y)
eta_zi <- as.matrix(eta_zi)
out <- rbind(eta, eta_zi)
(out[ind == 1, ])
(out[ind == 0, ])
# linear mixed model (continuous part)
out[ind==0,] <- dnorm(y[ind == 0], eta, sigma, log = TRUE)
# logistic mixed model (binary part)
probs <- exp(eta_zi) / (1 + exp(eta_zi))
out[ind==1,] <- dbinom(y[ind == 1], 1, probs, log = TRUE)
out
# Cast into a function ----------------------------------------------------
binary.normal <- function () {
stats <- make.link("identity")
log_dens <- function (y, eta, mu_fun, phis, eta_zi) {
### Determine the probability density for a combined binary and normal outcome
# Uses finite difference approximation for optimization of the loss function
# Args:
# y: the outcome vector holding the binary and normal outcome
# (optional) eta: the linear predictor
# (optional) mu_fun: optional inverse of the linear predictor used during optimization
# phis: the number of phi parameters, default = 1
# (optional) eta_zi Zero Inflated model part
###
sigma <- exp(phis)
ind <- rep(0:1, times = length(y)/2) # FIX let user assign id's
eta <- as.matrix(eta)
eta_zi <- as.matrix(eta_zi)
out <- rbind(eta, eta_zi)
# linear mixed model (continuous part)
out[ind==0,] <- dnorm(y[ind == 0], eta, sigma, log = TRUE)
# logistic mixed model (binary part)
probs <- exp(eta_zi) / (1 + exp(eta_zi))
out[ind==1,] <- dbinom(y[ind == 1], 1, probs, log = TRUE)
out
}
structure(list(family = "user binary normal",
link = stats$name,
linkfun = stats$linkfun,
linkinv = stats$linkinv,
log_dens = log_dens),
class = "family")
}
# Stack the data ----------------------------------------------------------
d <- df
# Append the data
pairs <- c("y", "y2")
# Note that id was stored as a factor in the pbc data. This may cause errors.
ids <- rep(d[["id"]], each = 2)
Y <- d %>% dplyr::select(y1 = paste(pairs[1]), y2 = paste(pairs[2]))
Y <- t(as.matrix(Y))
Y <- as.vector(Y)
Y <- data.frame(id = as.character(ids),
Y = Y)
# 2) create the design matrix
X <- d %>% dplyr::select(x1 = paste(pairs[2]), x2 = paste(pairs[1]))
X <- t(as.matrix(X))
X <- as.vector(X)
X <- data.frame(id = as.character(ids),
X = X)
# 3) add covariates
Z <- d %>% mutate(sex = as.integer(sex)) %>% dplyr::select(time, sex) # sex: 1 = male, 2 = female
Z <- as.matrix(Z)
# 4) Add dummies for X
dummy <- kronecker(rep(1, times = nrow(d)), diag(1, nrow = 2))
Zk <- kronecker(Z, diag(1, nrow = 2))
Z <- cbind(dummy, Zk)
# 5 Join the Y and design matrices
data <- cbind(Y, X[-1], Z)
names(data) <- c("id", "Y", "X", "intercept.Y1", "intercept.Y2","time.Y1", "time.Y2", "sex.Y1", "sex.Y2")
# test data consistency
checkY <- data %>% dplyr::filter(intercept.Y1 == 1) %>% dplyr::select(Y)
all(d["y"] == checkY)
checkX <- data %>% dplyr::filter(intercept.Y1 == 1) %>% dplyr::select(X)
all(d["y2"] == checkX)
# assert that y1 is NOT dichotomous
all(sort(unique(d$y)) != c(0,1))
# assert that y2 is dichotomous
all(sort(unique(d$y2)) == c(0,1))
# assert that y is numeric
class(d$y) == "numeric"
rm(list = setdiff(ls(), "data", "binary.normal"))
# Test the model -----------------------------------------------------------
data$id <- as.numeric(data$id)
model <- mixed_model(fixed = Y ~ -1 + intercept.Y1 + intercept.Y1:X + time.Y1,
random = ~ -1 + intercept.Y1 | id,
zi_fixed = ~ -1 + intercept.Y2 + intercept.Y2:X + time.Y2,
zi_random = ~ -1 + intercept.Y2 | id,
data = data,
family = binary.normal(),
n_phis = 1,
control = list(optimizer = "optim",
optim_method = "L-BFGS-B",
iter_EM = 1e3,
verbose = TRUE))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.