# landmarking model
# Doc header --------------------------------------------------------------
# author: "Jan van den Brand, PhD"
# email: jan.vandenbrand@kuleuven.be
# project: NSN19OK003
# funding: Dutch Kidney Foundation
# Topic: Data exploration
# Preliminaries ------------------------------------------------------
reqlib <- c("foreign", "ggplot2", "lattice", "MASS", "gridExtra", "tidyr", "caret",
"mice", "miceadds", "reshape2", "visdat", "tableone", "lubridate",
"corrplot", "timeROC", "ggthemes")
lapply(reqlib, library, character.only=TRUE)
setwd("C:/Users/jajgv/Documents/repos/highdimjm")
source("R/mmm_functions.R")
# Set handlers for progress bars
handlers(global=TRUE)
handlers("progress")
# set seed for reproducibility
set.seed(20201013)
load("output/model_bx.RData")
d_gf_train <- df_fail_train %>%
bind_rows(df_nofail_train) %>% mutate(
failure=as.numeric(event == "graft failure"))
# Data set-up ----
## Create a stack of datasets for every landmark between 0 and 3 years in 3 month intervals
#' Stack data for Cox regression with landmarking
#'
#' Selects subset of data at a given landmark. Slices the data, then appends
#' all of the landmark datasets into a single stacked data set (long format)
#' with an indicator for each landmark
#'
#' @param data the original data (long format).
#' @param id a character string with the subject identifier column name.
#' @param time a character string with the column name of observation time to base the landmarks on.
#' @param failure_time a character string with the column name of the time to failure.
#' @param landmark a (vector) of landmark times.
#' @param horizon the prediction horizon for the window
#'
#' @details The calculation of the prediction window is as follows:
#' failure_time = min(horizon, failure_time - landmark). For example, if a subject
#' survived to T = 8 years and we create a window with landmark at T = 2 and horizon
#' at 5 years. The landmarked survival time would be T=5 years (i.e. the horizon)
#' If the horizon would be at 10 years instead the landmarked survival time would be
#' T= 8 - 2 = 6 years.
#'
#' @import tidyverse
#'
#' @export
#'
#' @returns a stacked data set with data a selected landmark times and an indicator for the landmark: `landmark`.
landmark_dataset <- function(data, id, time, failure_time, landmark, horizon) {
out <- lapply(landmark, function(lm) {
data %>%
group_by(get(id)) %>%
filter(all_of(time) <= lm) %>%
summarize(across(everything(),last)) %>%
mutate(landmark = lm,
failure_time = pmin(horizon, get(failure_time) - lm)) %>%
ungroup()
})
do.call(rbind, out)
}
landmark <- seq(1, 5, 0.25)
horizon <- 10
d_bx_lm <- landmark_dataset(data = d_gf_train,
id = "transnr",
time = "time",
failure_time="stime",
landmark = landmark,
horizon = horizon)
d_bx_lm <- d_bx_lm %>%
mutate(failure = as.numeric(event == "graft failure"),
failure = ifelse(failure_time == horizon, 0, failure))
ggplot(data=d_bx_lm, aes(x=landmark)) +
geom_bar() +
theme_bw() +
theme(axis.title=element_text(size=10),
axis.text=element_text(size=8))
ggsave("plots/landmarksets.png", width=7, height=5.6, units="cm", dpi=300)
# Model fitting ----
# Fit a Cox model with stratification on the landmark and interactions between the landmark indicator and parameters.
outcomes <- c("gfr", "nf_procr", "ci", "cv", "t", "i", "g", "ptcitis", "c4d_ptc")
f1 <- as.formula(paste("Surv(stime, failure) ~ (",paste(outcomes, collapse=" + "),"):strata(landmark)"))
cox_lm_1 <- coxph(f1, data = d_bx_lm, cluster=transnr)
# Plot the parameter estimates by landmark time
plots <- lapply(outcomes, function(v) {
plt_data <- data.frame(
y = coef(cox_lm_1)[str_detect(names(coef(cox_lm_1)),v)],
x = landmark
)
plot <- ggplot(plt_data, aes(x=x, y=y)) +
geom_line() +
theme_bw() +
xlab(str_to_title(paste(v))) +
ylab("") +
theme(
axis.title=element_text(size=10),
axis.text=element_text(size=8)
)
})
png("plots/landmark-model.png",width=15, height=12, units="cm", res=300)
do.call("grid.arrange", c(plots, ncol = 3))
dev.off()
# Fit a model with the landmark indicator as a covariate
f2 <- as.formula(paste("Surv(stime, failure) ~ (",paste(outcomes, collapse=" + "),"):landmark + landmark"))
cox_lm_2 <- coxph(f2, data = d_bx_lm, cluster=transnr)
# Prune the model
f3 <- as.formula("Surv(stime, failure) ~ (gfr + nf_procr + t + g):landmark")
cox_lm_3 <- coxph(f3, data = d_bx_lm, cluster=transnr)
cox_table <- round(exp(cbind(coef(cox_lm_3), confint(cox_lm_3))),3)
# Predictions ----
# The survival probability for a subject is equal to exp(-expected).
d_bx_lm <- d_bx_lm %>%
mutate(predictions = exp(-predict(cox_lm_3, type="expected"))) %>%
arrange(transnr, landmark)
# Model performance -------------------------------------------------------
# Predict survival in a sliding window up to 5 years from landmark
rocs <- lapply(landmark, function(l) {
roc <- timeROC(T = d_bx_lm %>% filter(landmark == l) %>% dplyr::select(failure_time) %>% unlist(),
delta = d_bx_lm %>% filter(landmark == l) %>% dplyr::select(failure) %>% unlist(),
marker = d_bx_lm %>% filter(landmark == l) %>% dplyr::select(predictions) %>% unlist(),
cause = 1,
weighting = "marginal",
times = horizon - l - 0.001,
iid = TRUE)
roc <- cbind(roc$Stats, roc$AUC, roc$inference$vect_sd_1, roc$CumulativeIncidence)
colnames(roc) <- c("failures", "survivors", "censored", "auc", "auc_se", "cuminc")
roc <- as.data.frame(roc, row.names = FALSE) %>% mutate(landmark = l)
roc[2, ]
})
roc_auc_lm <- do.call(rbind,rocs) %>%
filter(!is.na(auc)) %>%
mutate(horizon = horizon) %>%
relocate(landmark, horizon, everything())
plot_auc_lm <- ggplot(roc_auc_lm, aes(x=landmark, y=auc)) +
geom_ribbon(aes(ymin=auc - qnorm(0.975) * auc_se,
ymax=auc + qnorm(0.975) * auc_se),
fill="grey85") +
geom_line(color="black") +
geom_hline(aes(yintercept=0.5), linetype = "dotted") +
scale_y_continuous(limits=c(0, 1),
breaks=seq(0, 1, 0.2)) +
theme_few() +
labs(title=paste("Discrimination performance at", horizon,"years follow-up in landmark data"),
y="Area under the ROC curve",
x="Landmark time [years]")
ggsave(plot_auc_lm, filename="plots/rocauc_lm.png", device =png,
width=16, height=12, units="cm", res=300)
write.csv(roc_auc_lm, file="output/roc_auc_lm.csv")
plot_calibration_lm <- lapply(which(lm %in% c(1:3, 5)), function(l) {
predict <- d_bx_lm %>%
group_by(transnr) %>%
dplyr::select(failure, stime, predictions, landmark) %>%
dplyr::filter(landmark == lm[l],
horizon == 10) %>%
ungroup()
score <- riskRegression::Score(
list( "p_fail"=predict$predictions ),
formula=Surv(stime, failure) ~ 1,
data=predict,
conf.inf=FALSE,
times=10,
metrics=NULL,
plots="Calibration")
plt_calibrate <- riskRegression::plotCalibration(
score,
cens.method="local",
round=TRUE,
rug=TRUE,
plot=FALSE)
plt_calibrate$plotFrames$p_fail %>%
dplyr::mutate(landmark = lm[l])
})
plot_calibration_lm <- do.call(rbind, plot_calibration_lm)
plot_calibration_lm <- ggplot(
plot_calibration_lm,
aes(x=Pred, y=Obs)) +
stat_smooth(
aes(color=factor(landmark)),
method="loess",
span=0.3,
alpha=0.2) +
geom_abline(intercept=0, slope=1, alpha=0.5) +
geom_rug() +
scale_y_continuous(limits=c(0, 1), breaks=seq(0, 1, 0.2)) +
scale_x_continuous(limits=c(0, 1), breaks=seq(0, 1, 0.2)) +
labs(title="",
caption=paste("Calibration plots for kidney graft failure at", horizon ,"years follow-up in landmark data"),
y="Predicted Risks",
x="Observed risks") +
ggthemes::theme_tufte(ticks=TRUE) +
labs(color="Landmark")
pdf("plots/calibration_gf_lm.pdf",
width=7,
height=5.6)
plot_calibration_lm
dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.