# Doc header --------------------------------------------------------------
# Create custom model family object
# Jan van den Brand
# jan.vandenbrand@kuleuven.be
# janvandenbrand@gmail.com
# Aim is to develop a two part model that can accomodate continuous and binary outcomes
# See https://drizopoulos.github.io/GLMMadaptive/articles/ZeroInflated_and_TwoPart_Models.html
# Preliminaries -----------------------------------------------------------
setwd("C:/Users/jajgv/Documents/repos/highdimjm")
pkg <- c("ggplot2","gridExtra", "dplyr", "optimParallel", "GLMMadaptive", "Matrix", "MASS")
lapply(pkg, library, character.only = TRUE)
cluster <- makeCluster(detectCores()); setDefaultCluster(cl = cluster)
# Data --------------------------------------------------
set.seed(1234)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time
# we constuct a data frame with the design:
# everyone has a baseline measurment, and then measurements at random follow-up times
df <- data.frame(id = rep(seq_len(n), each = K),
time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))
# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex * time, data = df)
Z <- model.matrix(~ time, data = df)
# design matrices for the fixed and random effects zero part (i.e. the zero inflated part)
X_zi <- model.matrix(~ sex, data = df)
Z_zi <- model.matrix(~ 1, data = df)
betas <- c(-2.13, -0.25, 0.24, -0.05) # fixed effects coefficients non-zero part
sigma <- 0.5 # standard deviation error terms non-zero part
gammas <- c(-1.5, 0.5) # fixed effects coefficients zero part
D11 <- 0.5 # variance of random intercepts non-zero part
D22 <- 0.1 # variance of random slopes non-zero part
D33 <- 0.4 # variance of random intercepts zero part
# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)), rnorm(n, sd = sqrt(D22)), rnorm(n, sd = sqrt(D33)))
# linear predictor non-zero part
#
# Xbeta <- X %*% betas
# Zb <- Z * b[df$id, 1:2] # why select rows on df$id? Short hand for a rep (in essence yes)
# length(unique(df$id))
# length(df$id)
# b[1:5,]
# b[rep(1:2, each = 2),]
# Zb <- rowSums(Zb)
# lp_y <- as.vector(Xbeta + Zb)
#
eta_y <- as.vector(X %*% betas + rowSums(Z * b[df$id, 1:2, drop = FALSE]))
# linear predictor zero part
#
# X_zigamma <- X_zi %*% gammas
# Z_zib <- Z_zi * b[df$id,3]
# Z_zib <- rowSums(Z_zib)
# lp_zi <- as.vector(X_zigamma + Z_zib)
#
eta_zi <- as.vector(X_zi %*% gammas + rowSums(Z_zi * b[df$id, 3, drop = FALSE]))
# we simulate log-normal longitudinal data
df$y <- exp(rnorm(n * K, mean = eta_y, sd = sigma))
# we set the zeros from the logistic regression
df$y2 <- as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))
# Stack the data ------------------------------------------------
d <- df
# Append the data
pairs <- c("y", "y2")
# Note that id was stored as a factor in the pbc data. This may cause errors.
ids <- rep(d[["id"]], each = 2)
Y <- d %>% dplyr::select(y1 = paste(pairs[1]), y2 = paste(pairs[2]))
Y <- t(as.matrix(Y))
Y <- as.vector(Y)
Y <- data.frame(id = as.character(ids),
Y = Y)
# 2) create the design matrix
X <- d %>% dplyr::select(x1 = paste(pairs[2]), x2 = paste(pairs[1]))
X <- t(as.matrix(X))
X <- as.vector(X)
X <- data.frame(id = as.character(ids),
X = X)
# 3) add covariates
Z <- d %>% mutate(sex = as.integer(sex)) %>% dplyr::select(time, sex) # sex: 1 = male, 2 = female
Z <- as.matrix(Z)
# 4) Add dummies for X
dummy <- kronecker(rep(1, times = nrow(d)), diag(1, nrow = 2))
Zk <- kronecker(Z, diag(1, nrow = 2))
Z <- cbind(dummy, Zk)
# 5 Join the Y and design matrices
data <- cbind(Y, X[-1], Z)
names(data) <- c("id", "Y", "X", "intercept.Y1", "intercept.Y2","time.Y1", "time.Y2", "sex.Y1", "sex.Y2")
# test data consistency
checkY <- data %>% dplyr::filter(intercept.Y1 == 1) %>% dplyr::select(Y)
all(d["y"] == checkY)
checkX <- data %>% dplyr::filter(intercept.Y1 == 1) %>% dplyr::select(X)
all(d["y2"] == checkX)
# assert that y1 is NOT dichotomous
all(sort(unique(d$y)) != c(0,1))
# assert that y2 is dichotomous
all(sort(unique(d$y2)) == c(0,1))
# assert that y is numeric
class(d$y) == "numeric"
# Code Dimitris ---------------
Y <- data$Y
# determine the log density function
(ind <- rep(0:1, times = length(Y)/2))
# check the selection
(Y[ind==0])
(Y[ind==1])
eta <- as.matrix(c(t(cbind(eta_y, eta_zi))))
out <- eta
# linear model
sigma <- exp(1)
out[ind == 0,] <- dnorm(Y[ind == 0], eta[ind == 0,], sigma, log = TRUE)
# binomial model
probs <- exp(eta[ind == 1,]) / (1 + exp(eta[ind == 1,])) # probability
out[ind == 1,] <- dbinom(Y[ind == 1], 1, probs, log = TRUE)
# Full function -----------------------------------------------------------
rm(list = setdiff(ls(), c("data", "binary.normal")))
# Create family object for the longitudinal data
binary.normal <- function () {
stats <- make.link("identity")
log_dens <- function (y, eta, mu_fun, phis, eta_zi) {
### Determine the probability density for a combined binary and normal outcome
# Uses finite difference approximation for optimization of the loss function
# Args:
# y: the outcome vector holding the binary and normal outcome
# (optional) eta: the linear predictor
# (optional) mu_fun: optional inverse of the linear predictor used during optimization
# phis: the number of phi parameters, default = 1
# (optional) eta_zi Zero Inflated model part
###
# Set indicator
# Set indicator, assuming that the pairs have been unrolled using c(t(cbind(...))) procedure
ind <- rep(0:1, times = length(y)/2)
eta <- as.matrix(eta)
out <- eta
# linear mixed model
sigma <- exp(phis)
out[ind==0, ] <- dnorm(y[ind == 0], eta[ind == 0, ], sigma, log = TRUE)
# mixed logistic model
probs <- exp(eta[ind == 1, ]) / (1 + exp(eta[ind == 1, ]))
out[ind == 1,] <- dbinom(y[ind == 1], 1, probs, log = TRUE)
out
}
structure(list(family = "user binary normal", link = stats$name, linkfun = stats$linkfun,
linkinv = stats$linkinv, log_dens = log_dens),
class = "family")
}
# Fit the model -----------------------------------------------------------
data$id <- as.numeric(data$id)
model <- mixed_model(fixed = Y ~ -1 +
intercept.Y1 + intercept.Y1:X + time.Y1 +
intercept.Y2 + intercept.Y2:X + time.Y2,
random = ~ -1 + intercept.Y1 + intercept.Y2 | id,
data = data,
family = binary.normal(), n_phis = 1,
control = list(optimizer = "optim",
optim_method = "BFGS",
iter_EM = 30,
verbose = TRUE))
# Convergence on EM works after 39 iterations
# quasi-Newton works
# Function for gaussians ---------------------------------------------------
# from: https://drizopoulos.github.io/GLMMadaptive/articles/Custom_Models.html#student-s-t-mixed-effects-model
normal <- function() {
# env <- new.env(parent = .GlobalEnv)
stats <- make.link("identity")
log_dens <- function (y, eta, mu_fun, phis, eta_zi) {
### Determine the probability density for two normally distributed outcomes
# Uses finite difference apporiximantion for optimization of the loss function
# Args:
# y: outcome vector with longitudinal outcomes
# (optional) eta: the linear predictor
# (optional) mu_fun: the inverse of the linear predictor used by optim
# phis: the number of phi parameters (for the standard deviation)
# (optional): the linear predictor for the Zero Inflated part of mixed_model
###
eta <- as.matrix(eta)
out <- eta
# linear mixed model
sigma <- exp(phis)
out <- dnorm(y , eta, sigma, log = TRUE)
out
}
# score_eta_fun <- function (y, mu, phis, eta_zi) {
# # the derivative of the log density w.r.t. mu
# sigma2 <- exp(phis)^2
# y_mu <- y - mu
# y_mu / sigma2
# }
# score_phis_fun <- function (y, mu, phis, eta_zi) {
# sigma <- exp(phis)
# y_mu <- y - mu
# sigma^{-2} / (1 + y_mu / sigma^2) - 1
# }
# environment(log_dens) <- environment(score_eta_fun) <- environment(score_phis_fun) <- env
structure(list(family = "user defined normal" , link = stats$name, linkfun = stats$linkfun,
linkinv = stats$linkinv, log_dens = log_dens),
class = "family")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.