R/landmark_analysis.R

Defines functions landmark_dataset

# landmarking model

# Doc header --------------------------------------------------------------

# author: "Jan van den Brand, PhD"
# email: jan.vandenbrand@kuleuven.be
# project: NSN19OK003
# funding: Dutch Kidney Foundation
# Topic: Data exploration

# Preliminaries  ------------------------------------------------------

reqlib <- c("foreign", "ggplot2", "lattice", "MASS", "gridExtra", "tidyr", "caret",
            "mice", "miceadds", "reshape2", "visdat", "tableone", "lubridate", 
            "corrplot", "timeROC", "ggthemes")
lapply(reqlib, library, character.only=TRUE)
setwd("C:/Users/jajgv/Documents/repos/highdimjm")
source("R/mmm_functions.R")
# Set handlers for progress bars
handlers(global=TRUE)
handlers("progress")
# set seed for reproducibility
set.seed(20201013)
load("output/model_bx.RData")

d_gf_train <- df_fail_train %>% 
  bind_rows(df_nofail_train) %>% mutate(
    failure=as.numeric(event == "graft failure"))


# Data set-up ----
## Create a stack of datasets for every landmark between 0 and 3 years in 3 month intervals

#' Stack data for Cox regression with landmarking
#' 
#' Selects subset of data at a given landmark. Slices the data, then appends
#' all of the landmark datasets into a single stacked data set (long format)
#' with an indicator for each landmark 
#' 
#' @param data the original data (long format).
#' @param id a character string with the subject identifier column name.
#' @param time a character string with the column name of observation time to base the landmarks on.
#' @param failure_time a character string with the column name of the time to failure.
#' @param landmark a (vector) of landmark times.
#' @param horizon the prediction horizon for the window
#' 
#' @details The calculation of the prediction window is as follows:
#' failure_time  = min(horizon, failure_time - landmark). For example, if a subject
#' survived to T = 8 years and we create a window with landmark at T = 2 and horizon 
#' at 5 years. The landmarked survival time would be T=5 years (i.e. the horizon)
#' If the horizon would be at 10 years instead the landmarked survival time would be
#' T= 8 - 2 = 6 years.
#' 
#' @import tidyverse
#' 
#' @export
#' 
#' @returns a stacked data set with data a selected landmark times and an indicator for the landmark: `landmark`.

landmark_dataset <- function(data, id, time, failure_time, landmark, horizon) {
  out <- lapply(landmark, function(lm) {
    data %>% 
      group_by(get(id)) %>%
      filter(all_of(time) <= lm) %>%
      summarize(across(everything(),last)) %>%
      mutate(landmark = lm,
             failure_time = pmin(horizon, get(failure_time) - lm)) %>%
      ungroup()
  })
  do.call(rbind, out)
}
landmark <- seq(1, 5, 0.25)
horizon <- 10
d_bx_lm <- landmark_dataset(data = d_gf_train,
                            id = "transnr",
                            time = "time",
                            failure_time="stime",
                            landmark = landmark,
                            horizon = horizon)

d_bx_lm <- d_bx_lm %>% 
  mutate(failure = as.numeric(event == "graft failure"),
         failure = ifelse(failure_time == horizon, 0, failure))

ggplot(data=d_bx_lm, aes(x=landmark)) +
  geom_bar() + 
  theme_bw() +
  theme(axis.title=element_text(size=10),
        axis.text=element_text(size=8)) 
ggsave("plots/landmarksets.png", width=7, height=5.6, units="cm", dpi=300)

# Model fitting ----
# Fit a Cox model with stratification on the landmark and interactions between the landmark indicator and parameters.
outcomes <- c("gfr", "nf_procr", "ci", "cv", "t", "i",  "g", "ptcitis", "c4d_ptc")
f1 <- as.formula(paste("Surv(stime, failure) ~ (",paste(outcomes, collapse=" + "),"):strata(landmark)"))
cox_lm_1 <- coxph(f1, data = d_bx_lm, cluster=transnr)

# Plot the parameter estimates by landmark time
plots <- lapply(outcomes, function(v) {
  plt_data <-  data.frame(
    y = coef(cox_lm_1)[str_detect(names(coef(cox_lm_1)),v)],
    x = landmark
  )
  plot <- ggplot(plt_data, aes(x=x, y=y)) +
    geom_line() + 
    theme_bw() + 
    xlab(str_to_title(paste(v))) + 
    ylab("") + 
    theme(
      axis.title=element_text(size=10),
      axis.text=element_text(size=8)
    )
  })
png("plots/landmark-model.png",width=15, height=12, units="cm", res=300)
do.call("grid.arrange", c(plots, ncol = 3))
dev.off()

# Fit a model with the landmark indicator as a covariate
f2 <- as.formula(paste("Surv(stime, failure) ~ (",paste(outcomes, collapse=" + "),"):landmark + landmark"))
cox_lm_2 <- coxph(f2, data = d_bx_lm, cluster=transnr)                      

# Prune the model 
f3 <- as.formula("Surv(stime, failure) ~ (gfr + nf_procr + t + g):landmark")
cox_lm_3 <- coxph(f3, data = d_bx_lm, cluster=transnr)
cox_table <- round(exp(cbind(coef(cox_lm_3), confint(cox_lm_3))),3)

# Predictions ----
# The survival probability for a subject is equal to exp(-expected).
d_bx_lm <- d_bx_lm %>% 
  mutate(predictions = exp(-predict(cox_lm_3, type="expected"))) %>%
  arrange(transnr, landmark)

# Model performance -------------------------------------------------------

# Predict survival in a sliding window up to 5 years from landmark
rocs <- lapply(landmark, function(l) {
  roc <- timeROC(T = d_bx_lm %>% filter(landmark == l) %>% dplyr::select(failure_time) %>% unlist(),
                 delta = d_bx_lm %>% filter(landmark == l) %>% dplyr::select(failure) %>% unlist(),
                 marker = d_bx_lm %>% filter(landmark == l) %>% dplyr::select(predictions) %>% unlist(),
                 cause = 1, 
                 weighting = "marginal",
                 times = horizon - l - 0.001, 
                 iid = TRUE) 
  roc <- cbind(roc$Stats, roc$AUC, roc$inference$vect_sd_1, roc$CumulativeIncidence)
  colnames(roc) <- c("failures", "survivors", "censored", "auc", "auc_se", "cuminc")
  roc <- as.data.frame(roc, row.names = FALSE) %>% mutate(landmark = l)
  roc[2, ]
})
roc_auc_lm <- do.call(rbind,rocs) %>% 
  filter(!is.na(auc)) %>%
  mutate(horizon = horizon) %>%
  relocate(landmark, horizon, everything())
plot_auc_lm <- ggplot(roc_auc_lm, aes(x=landmark, y=auc)) + 
  geom_ribbon(aes(ymin=auc - qnorm(0.975) * auc_se,
                  ymax=auc + qnorm(0.975) * auc_se), 
              fill="grey85") +
  geom_line(color="black") +
  geom_hline(aes(yintercept=0.5), linetype = "dotted") +
  scale_y_continuous(limits=c(0, 1),
                     breaks=seq(0, 1, 0.2)) + 
  theme_few() + 
  labs(title=paste("Discrimination performance at",  horizon,"years follow-up in landmark data"),
       y="Area under the ROC curve",
       x="Landmark time [years]")
ggsave(plot_auc_lm, filename="plots/rocauc_lm.png", device =png,
       width=16, height=12, units="cm", res=300)
write.csv(roc_auc_lm, file="output/roc_auc_lm.csv")

plot_calibration_lm <- lapply(which(lm %in% c(1:3, 5)), function(l) {
  predict <- d_bx_lm %>%
  group_by(transnr) %>%
  dplyr::select(failure, stime, predictions, landmark) %>%
  dplyr::filter(landmark == lm[l],
                horizon == 10) %>%
  ungroup()
  score <- riskRegression::Score(
    list( "p_fail"=predict$predictions ),
    formula=Surv(stime, failure) ~ 1,
    data=predict, 
    conf.inf=FALSE,
    times=10,
    metrics=NULL, 
    plots="Calibration")
  plt_calibrate <- riskRegression::plotCalibration(
    score,
    cens.method="local", 
    round=TRUE,
    rug=TRUE,
    plot=FALSE)
  plt_calibrate$plotFrames$p_fail %>% 
    dplyr::mutate(landmark = lm[l])
})
plot_calibration_lm <- do.call(rbind, plot_calibration_lm)
plot_calibration_lm <- ggplot(
    plot_calibration_lm, 
    aes(x=Pred, y=Obs)) + 
    stat_smooth(
      aes(color=factor(landmark)),
      method="loess",
      span=0.3,
      alpha=0.2) + 
    geom_abline(intercept=0, slope=1, alpha=0.5) +
    geom_rug() + 
    scale_y_continuous(limits=c(0, 1), breaks=seq(0, 1, 0.2)) + 
    scale_x_continuous(limits=c(0, 1), breaks=seq(0, 1, 0.2)) +
    labs(title="", 
         caption=paste("Calibration plots for kidney graft failure at", horizon ,"years follow-up in landmark data"),
         y="Predicted Risks",
         x="Observed risks") +
    ggthemes::theme_tufte(ticks=TRUE) +
  labs(color="Landmark")
  

pdf("plots/calibration_gf_lm.pdf",
    width=7, 
    height=5.6)
plot_calibration_lm
dev.off()
JanvandenBrand/highdimjm documentation built on Dec. 18, 2021, 12:32 a.m.