Joint longitudinal and time-to-event models

Joint models have two components:

Modelling those two jointly means that the marker values are imputed between measurements according to the longitudinal model, but information also flows in the other direction. We use the implemention from the rstanarm package [@http://zotero.org/users/5567156/items/KHJGLQS5] - consult the reference for an introduction to this class of models.

We use those models only for association between markers (IL-6 and D-dimer) and disease progression. Those analyses ended up not being reported in the main paper as the results are inconclusive.

We evaluate the associations directly via the coefficient describing the association between the longitudinal and time-to-event component. We focus solely on the "Death" event.

knitr::opts_chunk$set(cache = TRUE, echo=FALSE)

devtools::load_all()
library(tidyverse)
library(survival)
library(rstanarm)
library(tidybayes)

theme_set(cowplot::theme_cowplot())
data <- read_data_for_analysis()
wide <- data$marker_data_wide

options(mc.cores = parallel::detectCores())

jm_hypothesis_res_list <- list()


cache_dir <- here::here("local_temp_data")
if(!dir.exists(cache_dir)) {
  dir.create(cache_dir)
}
data_surv <- wide %>%  
  filter(day > 0) %>%
  mutate(daym1 = day - 1,
    event_death = !is.na(breathing) & breathing == "Death",
    event_discharged = !is.na(breathing) & breathing == "Discharged",
  ) %>%
  inner_join(data$patient_data, by = c("hospital_id", "patient_id"))  %>%
  arrange("patient_id", "day")

#TODO: impute the competing risk as censored long in the future

D-dimer

Age + sex

The longitudinal formula is log_d_dimer ~ day + (1 | patient_id) and the time-to-event formula is Surv(daym1, day, event_death) ~ age + sex. The fitted coefficients are:

data_d_dimer <- data_surv %>%
  group_by(patient_id) %>%
  filter(any(!is.na(d_dimer))) %>%
  ungroup() %>%
    select(event_death, event_discharged, 
         day, daym1, patient_id, hospital_id, 
         age, sex, took_hcq,
         d_dimer) %>%
  mutate(log_d_dimer = log(d_dimer + 1) - 4)

fit_d_dimer_age_sex <- stan_jm_with_cache(formulaLong = log_d_dimer ~ day + (1  | patient_id),
              dataLong = data_d_dimer %>% filter(!is.na(d_dimer)),
              formulaEvent = Surv(daym1, day, event_death) ~ age + sex,
              dataEvent = data_d_dimer,
              id_var = "patient_id",
              time_var = "day",
              cache_file = paste0(cache_dir, "/jm_d_dimer_age_sex.rds"))

fit_d_dimer_age_sex
jm_hypothesis_res_list[["d_dimer_age_sex"]] <-
  bayesian_hypothesis_res_from_jm(
    hypotheses$d_dimer_death,
    fit_d_dimer_age_sex,
    coefficient_name = "Assoc|Long1|etavalue",
    adjusted  = "age, sex",
    model_check = "Suspicious")

#jm_hypothesis_res_list[["d_dimer_age_sex"]]

Age + sex + hydroxychloroquine

The time-to-event formula is expanded to Surv(daym1, day, event_death) ~ age + sex + took_hcq. The fitted coefficients are:

fit_d_dimer_age_sex_hcq <- stan_jm_with_cache(formulaLong = log_d_dimer ~ day + (1  | patient_id),
              dataLong = data_d_dimer %>% filter(!is.na(d_dimer)),
              formulaEvent = Surv(daym1, day, event_death) ~ age + sex + took_hcq,
              dataEvent = data_d_dimer,
              id_var = "patient_id",
              time_var = "day",
              cache_file = paste0(cache_dir, "/jm_d_dimer_age_sex_hcq.rds"))

fit_d_dimer_age_sex_hcq
jm_hypothesis_res_list[["d_dimer_age_sex_hcq"]] <-
  bayesian_hypothesis_res_from_jm(
    hypotheses$d_dimer_death,
    fit_d_dimer_age_sex_hcq,
    coefficient_name = "Assoc|Long1|etavalue",
    adjusted  = "age, sex, hcq",
    model_check = "Problematic")

#jm_hypothesis_res_list[["d_dimer_age_sex_hcq"]]

The problems in the model are visible when visualising the fitted longitudinal trajectories for some patients: there are very few measurements and thus huge uncertainties:

 p1 <- posterior_traj(fit_d_dimer_age_sex_hcq, ids = unique(data_d_dimer$patient_id)[23:28])
 plot(p1, plot_observed = TRUE)                     

IL-6

Age + sex

The longitudinal formula is log_IL_6 ~ day + (1 + day | patient_id) and the time-to-event formula is Surv(daym1, day, event_death) ~ age + sex. The fitted coefficients are:

data_IL_6 <- data_surv %>%
  group_by(patient_id) %>%
  filter(any(!is.na(IL_6))) %>%
  ungroup() %>% 
  select(event_death, event_discharged, 
         day, daym1, patient_id, hospital_id, 
         age, sex, took_hcq,
         IL_6) %>%
  mutate(log_IL_6 = log(IL_6))

fit_IL_6_age_sex <- stan_jm_with_cache(formulaLong = log_IL_6 ~ day + (1 + day | patient_id),
              dataLong = data_IL_6 %>% filter(!is.na(IL_6)),
              formulaEvent = Surv(daym1, day, event_death) ~ age + sex,
              dataEvent = data_IL_6,
              id_var = "patient_id",
              time_var = "day",
              cache_file = paste0(cache_dir, "/jm_IL_6_age_sex.rds"))

fit_IL_6_age_sex
jm_hypothesis_res_list[["IL_6_age_sex"]] <-
  bayesian_hypothesis_res_from_jm(
    hypotheses$IL_6_death,
    fit_IL_6_age_sex,
    coefficient_name = "Assoc|Long1|etavalue",
    adjusted  = "age, sex",
    model_check = "Suspicious")

#jm_hypothesis_res_list[["IL_6_age_sex"]]

Age + sex + hydroxychloroquine

The time-to-event formula becomes Surv(daym1, day, event_death) ~ age + sex + took_hcq. The fitted model is:

fit_IL_6_age_sex_hcq <- stan_jm_with_cache(formulaLong = log_IL_6 ~ day + (1 + day | patient_id),
              dataLong = data_IL_6 %>% filter(!is.na(IL_6)),
              formulaEvent = Surv(daym1, day, event_death) ~ age + sex + took_hcq,
              dataEvent = data_IL_6,
              id_var = "patient_id",
              time_var = "day",
              cache_file = paste0(cache_dir, "/jm_IL_6_age_sex_hcq.rds"))

fit_IL_6_age_sex_hcq
jm_hypothesis_res_list[["IL_6_age_sex_hcq"]] <-
  bayesian_hypothesis_res_from_jm(
    hypotheses$IL_6_death,
    fit_IL_6_age_sex_hcq,
    coefficient_name = "Assoc|Long1|etavalue",
    adjusted  = "age, sex, hcq",
    model_check = "Suspicious")

#jm_hypothesis_res_list[["IL_6_age_sex_hcq"]]

Similarly, we can explore the posterior trajectories, showing similar issues as before: too wide uncertainty.

 p1 <- posterior_traj(fit_IL_6_age_sex_hcq, ids = unique(data_IL_6$patient_id)[c(8,14,23,13,6,1)])
 plot(p1, plot_observed = TRUE)                     
jm_hypothesis_res_all <- do.call(rbind, jm_hypothesis_res_list)
write_csv(jm_hypothesis_res_all, path = here::here("manuscript", "jm_res.csv"))


cas-bioinf/covid19retrospective documentation built on Sept. 7, 2021, 6:19 p.m.