# Doc header ----------------------------------------------------------------------------------
# Create simulated data for multivariate pairwise MM
# Initialize ----------------------------------------------------------------------------------
setwd("C:/Users/jajgv/Documents/repos/highdimjm")
pkg <- c("optimx", "ggplot2","lattice","MASS", "Matrix", "gridExtra", "tidyverse",
"GLMMadaptive", "stringr", "flexsurv", "future.apply", "scales", "microbenchmark")
lapply(pkg, library, character.only = TRUE)
source("R/mmm_functions.R")
# Data --------------------------------------------------
set.seed(1234)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time
# we constuct a data frame with the design:
# everyone has a baseline measurment, and then measurements at random follow-up times
df <- data.frame(id = as.character(rep(seq_len(n), each = K)),
time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))
# design matrices for the fixed effects
X1 <- model.matrix(~ sex + time, data = df)
X2 <- model.matrix(~ time, data = df)
X3 <- model.matrix(~ sex + time, data = df)
X4 <- model.matrix(~ time, data = df)
# Random intercepts model
Z1 <-
Z2 <-
Z3 <-
Z4 <- model.matrix(~ 1, data = df)
# fixed effects coefficients
betas1 <- c(-0.67, -0.25, 0.24)
betas2 <- c(-1.15, 0.3)
betas3 <- c(1.34, -0.13, -0.04)
betas4 <- c(0.25, -0.17)
# random effects matrix
D <- c(0.8, 0.7, 0.6, 0.5)
resid <- c(0.5, 0.87) # standard deviation error terms
# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D[1])), rnorm(n, sd = sqrt(D[2])), rnorm(n, sd = sqrt(D[3])), rnorm(n, D[4]))
eta_y1 <- as.vector(X1 %*% betas1 + rowSums(Z1 * b[as.integer(df$id), 1, drop = FALSE]))
eta_y2 <- as.vector(X2 %*% betas2 + rowSums(Z2 * b[as.integer(df$id), 2, drop = FALSE]))
eta_y3 <- as.vector(X3 %*% betas3 + rowSums(Z3 * b[as.integer(df$id), 3, drop = FALSE]))
eta_y4 <- as.vector(X4 %*% betas4 + rowSums(Z4 * b[as.integer(df$id), 4, drop = FALSE]))
# simulate normal longitudinal data
df$y1 <- rnorm(n * K, mean = eta_y1, sd = resid[1])
df$y2 <- rnorm(n * K, mean = eta_y2, sd = resid[2])
# we set binary outcome from the logistic regression
df$y3 <- as.numeric(as.logical(rbinom(n * K, size = 1, prob = plogis(eta_y3))))
df$y4 <- as.numeric(as.logical(rbinom(n * K, size = 1, prob = plogis(eta_y4))))
# Set failures
df$failure <- rnorm(n * K, mean = -0.5*eta_y1+0.8*eta_y2-0.33*eta_y3+0.15*eta_y4, sd = 1.5*mean(resid))
df$failure <- exp(df$failure) / (1 + exp(df$failure))
df$failure <- ifelse(df$failure >= 0.6 & df$time >0, 1, 0)
# Set failure times
subjects <- split(df, df$id)
for (i in seq_along(subjects)) {
if ( summarise(subjects[[i]], max(failure)) == 0 ) {
subjects[[i]]$time_failure <- unlist(summarise(subjects[[i]], max(time)))["max(time)"]
} else {
subjects[[i]]$time_failure <- unlist(summarise(subjects[[i]] %>%
group_by(failure) %>%
filter(failure == 1), min(time)))["min(time)"]
}
subjects[[i]]$failure <- ifelse(max(subjects[[i]]$failure) == 1, 1, 0)
subjects[[i]] <- subjects[[i]] %>% filter(time <= time_failure)
}
df <- do.call(rbind, subjects)
df <- df %>% mutate(sex = as.numeric(sex))
summary(df)
#scale the parameters
df <- df %>% mutate(y1 = scale(y1),
y2 = scale(y2))
# Fit the mmm ---------------------------------------------------------------------------------
pairs <- make_pairs(outcomes = c("y1", "y2", "y3", "y4"))
model_info <- test_input_datatypes(data = df, pairs = pairs)
df_fail <- df %>% filter(failure == 1)
df_fail_stacked <- stack_data(data = df_fail, id = "id", pairs = pairs, covars = c("time", "sex", "time_failure"))
df_nofail <- df %>% filter(failure == 0)
df_nofail_stacked <- stack_data(data = df_nofail, id = "id", pairs = pairs, covars = c("time", "sex"))
fixed_nofail <- make_fixed_formula(covars = c("time", "sex"))
fixed_fail <- make_fixed_formula(covars = c("time", "sex", "time_failure"))
random <- make_random_formula(id="id")
model_nofail <- mmm_model(fixed = fixed_nofail,
random = random,
id = "id",
data = df_nofail,
stacked_data = df_nofail_stacked,
pairs = pairs,
model_families = model_info,
iter_EM = 100,
iter_qN_outer = 30,
nAGQ = 11)
model_fail <- mmm_model(fixed = fixed_fail,
random = random,
id = "id",
data = df_fail,
stacked_data = df_fail_stacked,
pairs = pairs,
model_families = model_info,
iter_EM = 100,
iter_qN_outer = 30,
nAGQ = 11,
update_GH_every = 5,
parallel_plan = "multisession")
# Get the predictions -----------------------------------------------------
# Simulate random effects
outcomes <- paste0("y",1:4)
mu <- setNames(rep(0, length(outcomes)), outcomes)
re_samples_nofail <- mvrnorm(1e3,mu = mu, Sigma = model_nofail$vcov)
re_samples_fail <- mvrnorm(1e3,mu = mu, Sigma = model_fail$vcov)
prior <- get_priors(data = df,
time_failure = "time_failure",
failure = "failure",
horizon = 5,
interval = 1/12)
outcomes <- get_outcome_type(data = df,
outcomes)
df_pred <- df %>% filter(id == 10)
# FIX: check argument validity
predictions <- mmm_predictions(data = df_pred,
outcomes = outcomes,
fixed_formula_nofail = "~ time + sex + y1 + y2 + y3 + y4",
random_formula_nofail = "~ 1 | id",
random_effects_nofail = re_samples_nofail,
parameters_nofail = model_nofail$estimates,
fixed_formula_fail = "~ time + sex + time_failure + y1 + y2 + y3 + y4",
random_formula_fail = "~ 1 | id",
random_effects_fail = re_samples_fail,
parameters_fail = model_fail$estimates,
time = "time",
failure = "failure",
failure_time = "time_failure",
prior = prior,
id = "id",
horizon = 5,
interval = 1/12)
# Visualize the predictions
get_plots(newdata = df_pred,
predictions = predictions,
outcomes = outcomes,
interval = 1/12)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.