# Doc header --------------------------------------------------------------
# Pairwise binomial mixed models
# Jan van den Brand, PhD
# jan.vandenbrand@kuleuven.be
# janvandenbrand@gmail.com
# Project: DKF19OK003
# The work here is based on The work is based on Fieuws S., Verbeke, G., Boen, F. and Delecluse, C. (2006)
# "High dimensional multivariate mixed models for binary questionnaire data", Applied Statistics, 55 (4), 449-460.
# For the orginal SAS macro, refer to https://ibiostat.be/online-resources/uploads/pairwiseMGLMM.zip
# 0: Preliminaries ------------------------------------------------------
packages <- c("optimx", "ggplot2","lattice","MASS", "Matrix", "gridExtra", "dplyr", "optimParallel", "GLMMadaptive")
lapply(packages, library, character.only = TRUE)
cluster <- makeCluster(detectCores()); setDefaultCluster(cl = cluster)
setwd("C:/Users/jajgv/Documents/NSN19OK003/Data")
# 1: The Data -------------------------------------------------------------
# For this example I will use the questions data provide by Fieuws.
df <- read.csv(file = "questiondata.txt", sep = " ")
names(df) <- c("id", "dc", "resp", "physical", "selfesteem", "barriers", "psychic", "selfperception", "selfeffic", "motivation")
df <- mutate(df,
id = as.character(id),
resp = factor(resp,
labels = c("negative", "positive"),
exclude = ".")
)
# 2: Data exploration -----------------------------------------------------
# Print overview of data
str(df)
head(df)
# 3 Data preparation ------------------------------------------------------
# Split the data set into data.frames with pairs of the longitudinal outcomes.
outcomes <- names(df)[-1:-3]
# outcomes <- outcomes[1:3] # TEst only the first 3 outcomes
N <- length(outcomes)
Npair <- N * (N-1)/2
pairs <- matrix(data = 0, nrow = Npair, ncol = 5)
h <- 1
for (i in 1:(N-1)) {
for (j in (i+1):N) {
pairs[h,1] <- outcomes[i]
pairs[h,2] <- outcomes[j]
pairs[h,3] <- paste0(i,j)
pairs[h,4] <- paste0(i)
pairs[h,5] <- paste0(j)
h <- h + 1
}
}
print(pairs)
pairs <- pairs[c(1,2,7),]
d <- df
# 4 Get starting values --------------------------------------------------
# NB mixed_model return the individual level gradients in score_vect_contributions.
# Fit the pairwise models
fits <- vector("list", length = nrow(pairs))
starts <- vector("list", length = nrow(pairs))
tstart <- Sys.time()
for (i in 1:nrow(pairs)) {
loopstart <- Sys.time()
fixed <- paste("resp ~ -1 +", # omit the fixed intercept
paste(pairs[i, 1]), # fixed intercept for pair==1
"+", paste(pairs[i, 2]), # fixed intercept for pair==2
"+", paste0(pairs[i,1],":dc +"), # fixed slope for pair==1 x dc
paste0(pairs[i,2],":dc")) # fixed slope for pair==2 x dc
random <- paste("~ -1 + ",
paste(pairs[i,1]),"+", # random slope for pair == 1
paste(pairs[i,2]), "| id" ) # random slope for pair == 2
ds <- filter(d, d[paste(pairs[i, 1])] == 1 | d[paste(pairs[i, 2])] == 1) # Filter the data for the relevant pair of outcomes
fits[[i]] <- mixed_model(fixed = as.formula(fixed),
random = as.formula(random),
data = ds,
family = binomial(),
nAGQ = 11,
control = list(optimizer = "optimParellel",
optim_method = "L-BFGS-B",
numeric_deriv = "fd",
iter_EM = 100,
verbose = TRUE)
)
starts[[i]] <- list(betas = fixef(fits[[i]]), D = fits[[i]]$D)
loopend <- Sys.time()
print(round(loopend - loopstart,1))
}
tend <- Sys.time()
print(round(tend - tstart,1))
# 5: fit pairwise models on subject level to calculate the first and second order derivatives ------------
# FEATURE: Get score_vect_contributions$score.betas directly from mixed_model output.
# FEATURE: do data split outside loop for parallelization
# Initialize the data.frames to store results
tstart <- Sys.time()
options(warn = -1)
df.hessians <- vector("list", length = nrow(pairs))
df.gradients <- vector("list", length = nrow(pairs))
# Outer loop over the pairs
for (i in 1:nrow(pairs)) {
loopstart <- Sys.time() # review run time
# create lists to store subject level results
hessians <- vector("list", length = nrow(unique(d["id"])))
gradients <- vector("list", length = nrow(unique(d["id"])))
# construct the formulas from pairs
fixed <- paste("resp ~ -1 +", # omit the fixed intercept
paste(pairs[i, 1]), # fixed intercept for pair==1
"+", paste(pairs[i, 2]), # fixed intercept for pair==2
"+", paste0(pairs[i,1],":dc +"), # fixed slope for pair==1 x dc
paste0(pairs[i,2],":dc")) # fixed slope for pair==2 x dc
random <- paste("~ -1 + ", # omit random intercepts
paste(pairs[i,1]),"+", # random slope for pair == 1
paste(pairs[i,2]), "| id" ) # random slope for pair == 2
# Inner loop over the subjects
for (j in 1:nrow(unique(d["id"]))) {
# get subject level data within the pairs data
ds <- d %>% filter((d[paste(pairs[i, 1])] == 1 | d[paste(pairs[i, 2])] == 1) & id == j)
# Copying the subject is needed to hack the optimization algorithm
ds[,"id"] <- paste0(j,"a")
ds <- rbind(d %>% filter((d[paste(pairs[i, 1])] == 1 | d[paste(pairs[i, 2])] == 1) & id == j), ds)
# Fit the model for the subject
fit <- mixed_model(fixed = as.formula(fixed),
random = as.formula(random),
data = ds,
family = binomial(),
initial_values = starts[[i]], # input start values obtained at step 4.
nAGQ = 11,
control = list(optimizer = "optimParallel",
optim_method = "L-BFGS-B",
numeric_deriv = "fd",
iter_EM = 0,
iter_qN_outer = 0) # this avoids iterating from the start values
)
# Store the Hessian
h <- cbind("id" = j, fit$Hessian/2) # divide by 2 to undo the hack
# Store the subject level gradients
g <- c("id" = j,
colSums(fit$score_vect_contributions$score.betas/2), #divide by two to undo the hack
colSums(fit$score_vect_contributions$score.D/2)) # ==> these do not match SAS
names(g) <- c("id",colnames(fit$Hessian))
# List the individual level results
hessians[[j]] <- h
gradients[[j]] <- g
}
# Append the lists with individual level results into data.frames
df.hessians[[i]] <- do.call(rbind, hessians)
df.gradients[[i]] <- do.call(rbind, gradients)
loopend <- Sys.time()
print(round(loopend - loopstart,1))
}
tend <- Sys.time()
print(round(tend - tstart, 1))
options(warn = 0)
# 6.1: Combine the pairs ----------------------------------------------------
# Stack the parameters
parameters <- vector("list", length = nrow(pairs))
for (i in 1:nrow(pairs)) {
parameters[[i]] <- data.frame(
"parameter" = c(paste0("alpha", pairs[i,4]),
paste0("alpha", pairs[i,5]),
paste0("beta", pairs[i,4]),
paste0("beta", pairs[i,5]),
paste0("su",pairs[i,4]),
paste0("su",pairs[i,4],pairs[i,5]),
paste0("su",pairs[i,5])),
"estimates" = c(fixef(fits[[i]]),
c(fits[[i]]$D[lower.tri(fits[[i]]$D, diag = TRUE)])
)
)
}
parameters <- do.call(rbind, parameters)
# Stack the gradients
gradients <- vector("list", length = nrow(pairs))
for (i in 1:nrow(pairs)) {
g <- vector("list", length = nrow(unique(d["id"])))
for (j in 1:nrow(unique(d["id"]))) {
g[[j]] <- data.frame(
"id" = df.gradients[[i]][j,"id"],
"parameter" = c(paste0("alpha", pairs[i,4]),
paste0("alpha", pairs[i,5]),
paste0("beta", pairs[i,4]),
paste0("beta", pairs[i,5]),
paste0("su",pairs[i,4]),
paste0("su",pairs[i,4],pairs[i,5]),
paste0("su",pairs[i,5])),
"gradient" = df.gradients[[i]][j,-1]
)
}
gradients[[i]] <- do.call(rbind, g)
}
gradients <- do.call(rbind, gradients)
gradients <- gradients[order(gradients$id),]
# Hessians have already been stacked into df.hessians
hessians <- vector("list", length = nrow(pairs))
for (i in 1:nrow(pairs)) {
hessians[[i]] <- as.data.frame(df.hessians[[i]])
}
# 6.2: Create the A matrix -------------------------------------------------
# The A matrix is of p rows, equal to the number of unique parameters in the MMM, which is npar.# and q columns equal to the total number of parameters estimated in the pairwise modeling step: npar times npairs.
A <- matrix(0, nrow = length(unique(parameters[,"parameter"])), ncol = nrow(parameters))
dimnames(A) <- list(
unique(parameters[,"parameter"]),
parameters[,"parameter"]
)
# The values of A are 1 if a parameter was estimated, or 0 if it was not estimated in the pairwise step.
for (i in 1:nrow(A)) {
for (j in 1:ncol(A)) {
if (rownames(A)[i] == colnames(A)[j]) {
A[i,j] <- 1
}
}
}
# 6.3: Calculate the covariance matrix for fixed effects ----------------------
npar <- 7
# Get the cross product of the subject specific gradient vector and input into CPgradient.
CPgrad <- matrix(0, nrow(pairs)*npar, nrow(pairs)*npar,
dimnames = list(
parameters[,"parameter"],
parameters[,"parameter"]
)
)
# The loop sums all the subject level contributions to the gradient.
for (i in 1:nrow(unique(d["id"]))){
gsub <- gradients[gradients$id == i,]$gradient # subject level contribution to the gradients
cpgsub <- tcrossprod(gsub) # takes the cross product to get a block matrix
CPgrad <- CPgrad + cpgsub # Add to the total
}
# Average the gradient
CPgrad <- CPgrad * (1/nrow(unique(d["id"])))
# Average the Hessians
Hessian <- matrix(0, nrow(pairs)*npar, nrow(pairs)*npar)
for (j in 1:nrow(pairs)) {
hess_pair <- matrix(0, npar, npar)
# Inner loop: subject level contributions to the pairwise Hessian
for (i in 1:nrow(unique(d["id"]))) {
hsub <- as.matrix(hessians[[j]][hessians[[j]]$id == i,-1])
hess_pair <- hess_pair + hsub
}
# Outer loop: create block Sparse matrix of the multivariate mixed model Hessian
if (j == 1) {
Hessian <- hess_pair
} else {
Hessian <- as.matrix(bdiag(Hessian, hess_pair))
}
}
# Average the Hessian
Hessian <- Hessian * (1/nrow(unique(d["id"])))
dimnames(Hessian) <- list(
colnames(A),
colnames(A)
)
# Covariance of sqrt(n) * (theta - theta_0) Fieuws 2006 eq(4) p453
JKJ <- ginv(Hessian) %*% CPgrad %*% ginv(Hessian)
dimnames(JKJ) <- list(
colnames(A),
colnames(A)
)
# Covariance of (theta - theta_0)
cov <- JKJ * (1/nrow(unique(d["id"])))
# Average of the parameters
A <- A * (1/rowSums(A))
avg <- A %*% parameters[,"estimates"]
covrobust <- A %*% cov %*% t(A)
serobust <- sqrt(diag(covrobust))
B <- data.frame(
"parameter" = rownames(avg),
"estimate_pair" = avg ,
"se_pair" = serobust
)
# 6.4: Create the covariance and correlation matrix of RE -----------------
# The parameters and standard errors random effects are reported on the variance and not std scale.
# Fixed effects
FE <- B %>% filter(grepl("^su", parameter) == FALSE)
FE <- FE[order(FE$parameter),]
# Random effects
RE <- B %>% filter(grepl("^su", parameter) == TRUE)
RE <- RE[order(RE$parameter),]
# Select the covariance parameters
est_D <- matrix(0, nrow = N,ncol = N)
est_D[lower.tri(est_D, diag = TRUE)] <- RE$estimate_pair
est_D[upper.tri(est_D)] <- t(est_D)[upper.tri(est_D)]
# Calculate the correlation matrix
est_Dcorr <- cov2cor(est_D)
# outcome table
estimates <- rbind(FE, RE)
print(estimates)
print(est_D)
print(est_Dcorr)
save.image(file = paste0("mglmm_",format(Sys.Date(),"%Y%m%d"),".RData"))
write.csv2(estimates , file = "C:/Users/jajgv/Downloads/estimates.csv", row.names = FALSE)
write.csv2(round(est_Dcorr,2), file = "C:/Users/jajgv/Downloads/correlations.csv", row.names = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.