Joint models have two components:
Modelling those two jointly means that the marker values are imputed between measurements according to the longitudinal model, but information also flows in the other direction. We use the implemention from the rstanarm
package [@http://zotero.org/users/5567156/items/KHJGLQS5] - consult the reference for an introduction to this class of models.
We use those models only for association between markers (IL-6 and D-dimer) and disease progression. Those analyses ended up not being reported in the main paper as the results are inconclusive.
We evaluate the associations directly via the coefficient describing the association between the longitudinal and time-to-event component. We focus solely on the "Death" event.
knitr::opts_chunk$set(cache = TRUE, echo=FALSE) devtools::load_all() library(tidyverse) library(survival) library(rstanarm) library(tidybayes) theme_set(cowplot::theme_cowplot()) data <- read_data_for_analysis() wide <- data$marker_data_wide options(mc.cores = parallel::detectCores()) jm_hypothesis_res_list <- list() cache_dir <- here::here("local_temp_data") if(!dir.exists(cache_dir)) { dir.create(cache_dir) }
data_surv <- wide %>% filter(day > 0) %>% mutate(daym1 = day - 1, event_death = !is.na(breathing) & breathing == "Death", event_discharged = !is.na(breathing) & breathing == "Discharged", ) %>% inner_join(data$patient_data, by = c("hospital_id", "patient_id")) %>% arrange("patient_id", "day") #TODO: impute the competing risk as censored long in the future
The longitudinal formula is log_d_dimer ~ day + (1 | patient_id)
and the time-to-event formula is Surv(daym1, day, event_death) ~ age + sex
. The fitted coefficients are:
data_d_dimer <- data_surv %>% group_by(patient_id) %>% filter(any(!is.na(d_dimer))) %>% ungroup() %>% select(event_death, event_discharged, day, daym1, patient_id, hospital_id, age, sex, took_hcq, d_dimer) %>% mutate(log_d_dimer = log(d_dimer + 1) - 4) fit_d_dimer_age_sex <- stan_jm_with_cache(formulaLong = log_d_dimer ~ day + (1 | patient_id), dataLong = data_d_dimer %>% filter(!is.na(d_dimer)), formulaEvent = Surv(daym1, day, event_death) ~ age + sex, dataEvent = data_d_dimer, id_var = "patient_id", time_var = "day", cache_file = paste0(cache_dir, "/jm_d_dimer_age_sex.rds")) fit_d_dimer_age_sex
jm_hypothesis_res_list[["d_dimer_age_sex"]] <- bayesian_hypothesis_res_from_jm( hypotheses$d_dimer_death, fit_d_dimer_age_sex, coefficient_name = "Assoc|Long1|etavalue", adjusted = "age, sex", model_check = "Suspicious") #jm_hypothesis_res_list[["d_dimer_age_sex"]]
The time-to-event formula is expanded to Surv(daym1, day, event_death) ~ age + sex + took_hcq
. The fitted coefficients are:
fit_d_dimer_age_sex_hcq <- stan_jm_with_cache(formulaLong = log_d_dimer ~ day + (1 | patient_id), dataLong = data_d_dimer %>% filter(!is.na(d_dimer)), formulaEvent = Surv(daym1, day, event_death) ~ age + sex + took_hcq, dataEvent = data_d_dimer, id_var = "patient_id", time_var = "day", cache_file = paste0(cache_dir, "/jm_d_dimer_age_sex_hcq.rds")) fit_d_dimer_age_sex_hcq
jm_hypothesis_res_list[["d_dimer_age_sex_hcq"]] <- bayesian_hypothesis_res_from_jm( hypotheses$d_dimer_death, fit_d_dimer_age_sex_hcq, coefficient_name = "Assoc|Long1|etavalue", adjusted = "age, sex, hcq", model_check = "Problematic") #jm_hypothesis_res_list[["d_dimer_age_sex_hcq"]]
The problems in the model are visible when visualising the fitted longitudinal trajectories for some patients: there are very few measurements and thus huge uncertainties:
p1 <- posterior_traj(fit_d_dimer_age_sex_hcq, ids = unique(data_d_dimer$patient_id)[23:28]) plot(p1, plot_observed = TRUE)
The longitudinal formula is log_IL_6 ~ day + (1 + day | patient_id)
and the time-to-event formula is Surv(daym1, day, event_death) ~ age + sex
. The fitted coefficients are:
data_IL_6 <- data_surv %>% group_by(patient_id) %>% filter(any(!is.na(IL_6))) %>% ungroup() %>% select(event_death, event_discharged, day, daym1, patient_id, hospital_id, age, sex, took_hcq, IL_6) %>% mutate(log_IL_6 = log(IL_6)) fit_IL_6_age_sex <- stan_jm_with_cache(formulaLong = log_IL_6 ~ day + (1 + day | patient_id), dataLong = data_IL_6 %>% filter(!is.na(IL_6)), formulaEvent = Surv(daym1, day, event_death) ~ age + sex, dataEvent = data_IL_6, id_var = "patient_id", time_var = "day", cache_file = paste0(cache_dir, "/jm_IL_6_age_sex.rds")) fit_IL_6_age_sex
jm_hypothesis_res_list[["IL_6_age_sex"]] <- bayesian_hypothesis_res_from_jm( hypotheses$IL_6_death, fit_IL_6_age_sex, coefficient_name = "Assoc|Long1|etavalue", adjusted = "age, sex", model_check = "Suspicious") #jm_hypothesis_res_list[["IL_6_age_sex"]]
The time-to-event formula becomes Surv(daym1, day, event_death) ~ age + sex + took_hcq
. The fitted model is:
fit_IL_6_age_sex_hcq <- stan_jm_with_cache(formulaLong = log_IL_6 ~ day + (1 + day | patient_id), dataLong = data_IL_6 %>% filter(!is.na(IL_6)), formulaEvent = Surv(daym1, day, event_death) ~ age + sex + took_hcq, dataEvent = data_IL_6, id_var = "patient_id", time_var = "day", cache_file = paste0(cache_dir, "/jm_IL_6_age_sex_hcq.rds")) fit_IL_6_age_sex_hcq
jm_hypothesis_res_list[["IL_6_age_sex_hcq"]] <- bayesian_hypothesis_res_from_jm( hypotheses$IL_6_death, fit_IL_6_age_sex_hcq, coefficient_name = "Assoc|Long1|etavalue", adjusted = "age, sex, hcq", model_check = "Suspicious") #jm_hypothesis_res_list[["IL_6_age_sex_hcq"]]
Similarly, we can explore the posterior trajectories, showing similar issues as before: too wide uncertainty.
p1 <- posterior_traj(fit_IL_6_age_sex_hcq, ids = unique(data_IL_6$patient_id)[c(8,14,23,13,6,1)]) plot(p1, plot_observed = TRUE)
jm_hypothesis_res_all <- do.call(rbind, jm_hypothesis_res_list) write_csv(jm_hypothesis_res_all, path = here::here("manuscript", "jm_res.csv"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.