highdimjm: High Dimensional Joint Models

# Doc header --------------------------------------------------------------

# Create custom model family object
# Jan van den Brand
# jan.vandenbrand@kuleuven.be
# janvandenbrand@gmail.com

# Aim is to develop a two part model that can accomodate continuous and binary outcomes
# See https://drizopoulos.github.io/GLMMadaptive/articles/ZeroInflated_and_TwoPart_Models.html 


# Preliminaries -----------------------------------------------------------

setwd("C:/Users/jajgv/Documents/repos/highdimjm")
pkg <- c("ggplot2","gridExtra", "dplyr", "optimParallel", "GLMMadaptive", "Matrix", "MASS")
lapply(pkg, library, character.only = TRUE)
cluster <- makeCluster(detectCores()); setDefaultCluster(cl = cluster)

# Data --------------------------------------------------

set.seed(1234)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time

# we constuct a data frame with the design: 
# everyone has a baseline measurment, and then measurements at random follow-up times
df <- data.frame(id = rep(seq_len(n), each = K),
                 time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
                 sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))

# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex * time, data = df)
Z <- model.matrix(~ time, data = df)
# design matrices for the fixed and random effects zero part (i.e. the zero inflated part)
X_zi <- model.matrix(~ sex, data = df)
Z_zi <- model.matrix(~ 1, data = df)

betas <- c(-2.13, -0.25, 0.24, -0.05) # fixed effects coefficients non-zero part
sigma <- 0.5 # standard deviation error terms non-zero part
gammas <- c(-1.5, 0.5) # fixed effects coefficients zero part
D11 <- 0.5 # variance of random intercepts non-zero part
D22 <- 0.1 # variance of random slopes non-zero part
D33 <- 0.4 # variance of random intercepts zero part

# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)), rnorm(n, sd = sqrt(D22)), rnorm(n, sd = sqrt(D33)))
# linear predictor non-zero part
#
# Xbeta <- X %*% betas 
# Zb <- Z * b[df$id, 1:2] # why select rows on df$id? Short hand for a rep (in essence yes)
# length(unique(df$id))
# length(df$id)
# b[1:5,]
# b[rep(1:2, each = 2),]
# Zb <- rowSums(Zb)
# lp_y <- as.vector(Xbeta + Zb)
#
eta_y <- as.vector(X %*% betas + rowSums(Z * b[df$id, 1:2, drop = FALSE])) 
# linear predictor zero part
#
# X_zigamma <- X_zi %*% gammas
# Z_zib <- Z_zi * b[df$id,3]
# Z_zib <- rowSums(Z_zib)
# lp_zi <- as.vector(X_zigamma + Z_zib)
#
eta_zi <- as.vector(X_zi %*% gammas + rowSums(Z_zi * b[df$id, 3, drop = FALSE]))
# we simulate log-normal longitudinal data
df$y <- exp(rnorm(n * K, mean = eta_y, sd = sigma))
# we set the zeros from the logistic regression
df$y2 <- as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))


# Stack the data ------------------------------------------------

d <- df
# Append the data
pairs <- c("y", "y2")
# Note that id was stored as a factor in the pbc data. This may cause errors.
ids <- rep(d[["id"]], each = 2)
Y <- d %>% dplyr::select(y1 = paste(pairs[1]), y2 = paste(pairs[2]))
Y <- t(as.matrix(Y))
Y <- as.vector(Y)
Y <- data.frame(id = as.character(ids), 
                Y = Y)
# 2) create the design matrix
X <- d %>% dplyr::select(x1 = paste(pairs[2]), x2 = paste(pairs[1]))
X <- t(as.matrix(X))
X <- as.vector(X)
X <- data.frame(id = as.character(ids), 
                X = X)
# 3) add covariates
Z <- d %>% mutate(sex = as.integer(sex)) %>% dplyr::select(time, sex) # sex: 1 = male, 2 = female
Z <- as.matrix(Z)
# 4) Add dummies for X
dummy <- kronecker(rep(1, times = nrow(d)), diag(1, nrow = 2))
Zk <- kronecker(Z, diag(1, nrow = 2))
Z <- cbind(dummy, Zk)
# 5 Join the Y and design matrices
data <- cbind(Y, X[-1], Z)
names(data) <- c("id", "Y", "X", "intercept.Y1", "intercept.Y2","time.Y1", "time.Y2", "sex.Y1", "sex.Y2")

# test data consistency
checkY <-  data %>% dplyr::filter(intercept.Y1 == 1) %>% dplyr::select(Y)
all(d["y"] == checkY)
checkX <- data %>% dplyr::filter(intercept.Y1 == 1) %>% dplyr::select(X)
all(d["y2"] == checkX)

# assert that y1 is NOT dichotomous
all(sort(unique(d$y)) != c(0,1))
# assert that y2 is dichotomous
all(sort(unique(d$y2)) == c(0,1))
# assert that y is numeric
class(d$y) == "numeric"
 
#  Code Dimitris ---------------

Y <- data$Y
# determine the log density function
(ind <- rep(0:1, times = length(Y)/2))
# check the selection
(Y[ind==0])
(Y[ind==1])
eta <- as.matrix(c(t(cbind(eta_y, eta_zi))))
out <- eta
# linear model
sigma <- exp(1)
out[ind == 0,] <- dnorm(Y[ind == 0], eta[ind == 0,], sigma, log = TRUE)
# binomial model
probs <- exp(eta[ind == 1,]) / (1 + exp(eta[ind == 1,])) # probability
out[ind == 1,] <- dbinom(Y[ind == 1], 1, probs, log = TRUE)


# Full function -----------------------------------------------------------

rm(list = setdiff(ls(), c("data", "binary.normal")))

# Create family object for the longitudinal data
binary.normal <- function () {
  stats <- make.link("identity")
  log_dens <- function (y, eta, mu_fun, phis, eta_zi) {
    ### Determine the probability density for a combined binary and normal outcome
    
      # Uses finite difference approximation for optimization of the loss function
    
      # Args:
        # y: the outcome vector holding the binary and normal outcome
        # (optional) eta: the linear predictor
        # (optional) mu_fun: optional inverse of the linear predictor used during optimization
        # phis: the number of phi parameters, default = 1 
        # (optional) eta_zi Zero Inflated model part
    ###
    # Set indicator
    # Set indicator, assuming that the pairs have been unrolled using c(t(cbind(...))) procedure
    ind <- rep(0:1, times = length(y)/2)
    eta <- as.matrix(eta)
    out <- eta
    # linear mixed model
    sigma <- exp(phis)
    out[ind==0, ] <- dnorm(y[ind == 0], eta[ind == 0, ], sigma, log = TRUE)
    # mixed logistic model
    probs <- exp(eta[ind == 1, ]) / (1 + exp(eta[ind == 1, ])) 
    out[ind == 1,] <- dbinom(y[ind == 1], 1, probs, log = TRUE)
    out
  }
  structure(list(family = "user binary normal", link = stats$name, linkfun = stats$linkfun,
                 linkinv = stats$linkinv, log_dens = log_dens),
            class = "family")
}

# Fit the model -----------------------------------------------------------

data$id <- as.numeric(data$id)
model <- mixed_model(fixed = Y ~ -1 + 
                       intercept.Y1 + intercept.Y1:X + time.Y1 +
                       intercept.Y2 + intercept.Y2:X + time.Y2,
                     random = ~ -1 + intercept.Y1 + intercept.Y2 | id, 
                     data = data, 
                     family = binary.normal(), n_phis = 1,
                     control = list(optimizer = "optim",
                                  optim_method = "BFGS",
                                  iter_EM = 30,
                                  verbose = TRUE))
# Convergence on EM works after 39 iterations
# quasi-Newton works


# Function for gaussians ---------------------------------------------------
# from: https://drizopoulos.github.io/GLMMadaptive/articles/Custom_Models.html#student-s-t-mixed-effects-model

normal <- function() {
  # env <- new.env(parent = .GlobalEnv)
  stats <- make.link("identity")
  log_dens <- function (y, eta, mu_fun, phis, eta_zi) {
    ### Determine the probability density for two normally distributed outcomes
    
    # Uses finite difference apporiximantion for optimization of the loss function
    
    # Args:
    # y: outcome vector with longitudinal outcomes
    # (optional) eta: the linear predictor
    # (optional) mu_fun: the inverse of the linear predictor used by optim
    # phis: the number of phi parameters (for the standard deviation)
    # (optional): the linear predictor for the Zero Inflated part of mixed_model
    ###
    
    eta <- as.matrix(eta)
    out <- eta
    # linear mixed model
    sigma <- exp(phis)
    out <- dnorm(y , eta, sigma, log = TRUE)
    out
  }
  # score_eta_fun <- function (y, mu, phis, eta_zi) {
  #   # the derivative of the log density w.r.t. mu
  #   sigma2 <- exp(phis)^2
  #   y_mu <- y - mu
  #   y_mu / sigma2 
  # }
  # score_phis_fun <- function (y, mu, phis, eta_zi) {
  #   sigma <- exp(phis)
  #   y_mu <- y - mu
  #   sigma^{-2} / (1 + y_mu / sigma^2) - 1
  # }
  # environment(log_dens) <- environment(score_eta_fun) <- environment(score_phis_fun) <- env
  structure(list(family = "user defined normal" , link = stats$name, linkfun = stats$linkfun,
                 linkinv = stats$linkinv, log_dens = log_dens),
            class = "family")
}

JanvandenBrand/highdimjm documentation built on Dec. 18, 2021, 12:32 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

JanvandenBrand/highdimjm
High Dimensional Joint Models

R/custom_model_families.R
In JanvandenBrand/highdimjm: High Dimensional Joint Models

Defines functions normal

R Package Documentation

Browse R Packages

We want your feedback!

JanvandenBrand/highdimjm High Dimensional Joint Models

R/custom_model_families.R In JanvandenBrand/highdimjm: High Dimensional Joint Models

Defines functions normal

R Package Documentation

Browse R Packages

We want your feedback!

JanvandenBrand/highdimjm
High Dimensional Joint Models

R/custom_model_families.R
In JanvandenBrand/highdimjm: High Dimensional Joint Models