#' Estimate Restricted Mean Time Lost (RMTL) and its Difference
#'
#' The method of Connor and Trinquart (Stat Med 2021) estimates the RMTL in the
#' presence of competing risks. This function is a modification of their R code.
#' Estimation functions are identical; input handling and output have been
#' adapted.
#'
#' @param data Data frame (tibble).
#' @param exposure Optional. Name of exposure variable within the \code{data}.
#' If omitted, will return unstratified RMTL.
#' @param time Name of time variable within the \code{data}. By default, this
#' is the event time. If \code{time2} is also given, this is the entry time,
#' as per \code{\link[survival]{Surv}} standard.
#' @param time2 Optional. Name of the variable within the \code{data}
#' indicating the exit (event) time. If omitted, \code{time} is the event
#' time, as per \code{\link[survival]{Surv}} standard.
#' @param event Name of event variable within the \code{data}. Can be integer,
#' factor, character, or logical. The reference level indicates censoring and
#' is taken as the first level of the factor, the lowest numeric value
#' (usually \code{0}), or \code{FALSE} for a logical variables. Other levels
#' are events of different types. If no competing risks, the alternative
#' level, e.g. \code{1}, indicates the event (e.g., death).
#' @param event_of_interest Optional. Indicator for which of the non-censoring
#' events is of main interest. Others are treated as competing. Defaults to
#' \code{1}, i.e., the first non-censoring event.
#' @param weight Optional. Weights, such as inverse-probability weights or
#' survey weights. The default (\code{NULL}) uses equal weights for all
#' observations.
#' @param tau Optional. Time horizon to restrict event times. By default, the
#' latest time in the group with the shortest follow-up. Prefer a
#' user-defined, interpretable time horizon.
#' @param reach_tau Optional. How to handle provided
#' \code{tau} values that are not reached in one of the exposure categories.
#'
#' * \code{"warn"} Default. Display a warning and estimate RMTL and its
#' difference in those exposure categories where \code{tau} is reached.
#' * \code{"stop"} Stop with an error if \code{tau} not reached in one or more
#' of the exposure categories.
#' * \code{"ignore"} Ignore exposure categories where \code{tau} is not
#' reached; estimate RMTL and its difference in those exposure categories
#' where \code{tau} is reached.
#'
#' If \code{tau} is not reached in any of the exposure categories, the
#' function always stops with an error.
#' @param conf.level Optional. Confidence level. Defaults to \code{0.95}.
#'
#' @details
#' Differences to the original function rmtl::rmtl():
#' * Convert \code{\link{print}} for errors to \code{\link{stop}} or
#' \code{\link{warning}}.
#' * Pass data as a data frame/tibble and select variables from it.
#' * Allow for different variable types in the \code{event} variable.
#' * Use \code{\link[survival]{Surv}} conventions for entry/exit times
#' (\code{time}, \code{time2}).
#' * Allow for exposure groups where \code{tau} is not reached.
#' * Return contrasts in restricted mean time lost as comparisons to the
#' reference level instead of all pairwise comparisons.
#' * Add origin (time 0, cumulative incidence 0) to the returned cumulative
#' incidence function for proper plotting.
#' * Return results as a list of tibbles. Do not print or plot.
#'
#' @return List:
#' * \code{tau} Time horizon.
#' * \code{rmtl} Tibble with absolute estimates of RMTL, per exposure group if
#' given.
#' * \code{rmtdiff} Tibble with contrasts of RMTL between exposure groups,
#' compared to a common reference (the first level). Empty tibble if no
#' exposure variable given.
#' * \code{cif} Tibble with cumulative incidence function for the event of
#' interest:
#' * \code{exposure} Exposure group.
#' * \code{time} Event time.
#' * \code{estimate} Aalen-Johansen estimate of cumulative incidence function
#' with \code{se}, \code{conf.low}, and \code{conf.high}.
#'
#' @references Conner SC, Trinquart L. Estimation and modeling of the restricted
#' mean time lost in the presence of competing risks. Stat Med 2021;40:2177–96.
#' [https://doi.org/10.1002/sim.8896](https://doi.org/10.1002/sim.8896).
#' @export
#'
#' @examples
#' data(cancer, package = "survival")
#' cancer <- cancer %>% dplyr::mutate(
#' status = status - 1, # make 0/1
#' sex = factor(sex, levels = 1:2, labels = c("Men", "Women")))
#'
#' result <- estimate_rmtl(
#' data = cancer,
#' exposure = sex,
#' time = time,
#' event = status,
#' tau = 365.25) # time horizon: one year
#' result
#'
#' # Make simple plot
#' library(ggplot2)
#' library(dplyr)
#' library(tidyr)
#'
#' result$cif %>%
#' ggplot(mapping = aes(x = time, y = estimate, color = exposure)) +
#' geom_step() +
#' scale_x_continuous(breaks = seq(from = 0, to = 365, by = 60)) +
#' labs(x = "Time since start of follow-up in days",
#' y = "Cumulative incidence")
#'
#' # Make fancier plot with a shaded area for the RMTL difference
#' df_ribbon <- result$cif %>%
#' select(exposure, time, estimate) %>%
#' pivot_wider(names_from = exposure,
#' values_from = estimate,
#' names_repair = ~c("time", "surv", "surv2")) %>%
#' filter(time < 365.25) %>% # tau for RMST
#' arrange(time) %>% # carry forward survival values per stratum
#' fill(surv) %>%
#' fill(surv2)
#'
#' result$cif %>%
#' ggplot() +
#' geom_step(mapping = aes(x = time, y = estimate, color = exposure)) +
#' scale_x_continuous(breaks = seq(from = 0, to = 365, by = 60)) +
#' scale_y_continuous(expand = expansion()) +
#' labs(x = "Time since start of follow-up in days",
#' y = "Cumulative incidence") +
#' cowplot::theme_minimal_hgrid() +
#' geom_stepribbon(data = df_ribbon,
#' mapping = aes(x = time, ymin = surv, ymax = surv2),
#' fill = "gray80", alpha = 0.5)
estimate_rmtl <- function(data,
exposure = NULL,
time,
time2 = NULL,
event,
event_of_interest = 1,
weight = NULL,
tau = NULL,
reach_tau = c("warn", "stop", "ignore"),
conf.level = 0.95) {
alldat <- data %>%
tibble::as_tibble() %>%
dplyr::select(time = {{ time }},
time2 = {{ time2 }},
event = {{ event }},
group = {{ exposure }},
weight = {{ weight }})
# set to default values if missing in data
added_cols <- tibble(time2 = NA_real_, group = "Overall", weight = 1)
alldat <- alldat %>%
tibble::add_column(added_cols %>%
select(-dplyr::any_of(names(alldat)))) %>%
filter(!is.na(.data$group) & !is.na(.data$time)) %>%
arrange(.data$group) %>%
dplyr::mutate(
group = factor(.data$group),
entry = dplyr::if_else(is.na(.data$time2),
true = 0, false = as.double(.data$time)),
times = dplyr::if_else(is.na(.data$time2),
true = .data$time, false = .data$time2),
event = as.numeric(factor(as.numeric(.data$event))) - 1) %>%
dplyr::select(-.data$time, -.data$time2)
if (any(alldat$times < 0)) {
stop("Event/censoring times must be positive.")
}
z <- stats::qnorm(1 - (1 - conf.level) / 2)
gg <- length(levels(alldat$group))
if (is.null(tau)) {
tau <- alldat %>%
dplyr::group_by(.data$group) %>%
dplyr::summarize(maxfu = max(.data$times), .groups = "drop") %>%
dplyr::summarize(tau = min(.data$maxfu)) %>%
pull(.data$tau)
} else {
tau.error <- rep(0, gg)
for (i in (1:gg)) {
groupval <- (levels(alldat$group)[i])
dat_group <- alldat[which(alldat$group == (groupval)), ]
tau.error[i] <- ifelse(max(dat_group$times) < tau, 1, 0)
}
if(all(tau.error == 1)) {
stop("Observed event/censoring times do not reach tau in ANY ",
"exposure group. Choose a different tau or leave ",
"NULL for automatic selection of latest possible time.")
}
if (sum(tau.error) > 0) {
tau_message <- paste0("Observed event/censoring times do not reach tau ",
"in these exposure groups: ",
paste(levels(alldat$group)[tau.error == 1],
sep = ", ", collapse = ", "),
". Choose a different tau or leave NULL for ",
"automatic selection of latest possible time.")
if(reach_tau[1] == "stop")
stop(tau_message)
if(reach_tau[1] == "warn")
warning(tau_message)
}
}
alldat$event[alldat$times > tau] <- 0
alldat$times[alldat$times > tau] <- tau
rmtl <- rep(NA, length(1:gg))
groupval <- rep(NA, length(1:gg))
rmtl.se <- rep(NA, length(1:gg))
res.cif <- list()
for (g in 1:gg) {
groupval1 <- (levels(alldat$group)[g])
dat_group1 <- alldat[which(alldat$group == (groupval1)), ]
if(max(dat_group1$times) >= tau) {
groupval[g] <- levels(alldat$group)[g]
data <- alldat[which(alldat$group == (groupval[g])), ]
tj <- data$times[data$event != 0]
tj <- unique(tj[order(tj)])
num.tj <- length(tj)
num.atrisk <-
sapply(tj, function(x)
sum(data$weight[data$entry < x & data$times >= x]))
num.ev1 <-
sapply(tj, function(x)
sum(data$weight[data$event == event_of_interest &
data$times == x]))
num.ev2 <-
sapply(tj, function(x)
sum(data$weight[data$event != event_of_interest &
data$event != 0 & data$times == x]))
num.ev <- num.ev1 + num.ev2
m <- sapply(tj, function(x) {
sum((data$weight[data$entry < x & data$times >= x]) ^ 2)
})
mg <- ((num.atrisk ^ 2) / m)
h1 <- num.ev1 / num.atrisk
h <- num.ev / num.atrisk
s <- cumprod(c(1, 1 - h))
s <- s[1:length(s) - 1]
theta <- s * h1
cif1 <- cumsum(theta)
a <- c(0, cumsum(num.ev / (mg * (num.atrisk - num.ev))))
a <- a[1:num.tj]
var.theta <- ((theta) ^ 2) *
(((num.atrisk - num.ev1) / (mg * num.ev1)) + a)
var.theta[is.nan(var.theta)] <- 0
cov.theta <- matrix(NA, nrow = num.tj, ncol = num.tj)
b <- c(0, cumsum(num.ev / (mg * (num.atrisk - num.ev))))
for (j in 1:(num.tj - 1)) {
for (k in (j + 1):num.tj) {
cov.theta[k, j] <- cov.theta[j, k] <- (theta[j]) *
(theta[k]) * (-1 / mg[j] + b[j])
}
}
diag(cov.theta) <- var.theta
cov.f10 <- apply(cov.theta, 2, function(x) {
x[is.na(x)] <- 0
cumsum(x)
})
cov.f1 <- apply(cov.f10, 1, function(x) {
x[is.na(x)] <- 0
cumsum(x)
})
var.f1 <- diag(cov.f1)
areas <- c(tj[2:num.tj], tau) - tj
rmtl[g] <- sum(areas * cif1)
cov.weights <- outer(areas, areas)
cov.f1.weight <- cov.weights * cov.f1
rmtl.var <- sum(cov.f1.weight)
rmtl.se[g] <- sqrt(rmtl.var)
res.cif.g <- list()
res.cif.g[[length(res.cif.g) + 1]] <- c(cif1, cif1[num.tj])
res.cif.g[[length(res.cif.g) + 1]] <- c(sqrt(var.f1), sqrt(var.f1[num.tj]))
res.cif.g[[length(res.cif.g) + 1]] <- c(tj, tau)
res.cif.g[[length(res.cif.g) + 1]] <- c(
cif1 - z * sqrt(var.f1),
cif1[num.tj] - z * sqrt(var.f1[num.tj]))
res.cif.g[[length(res.cif.g) + 1]] <- c(
cif1 + z * sqrt(var.f1),
cif1[num.tj] + z * sqrt(var.f1[num.tj]))
names(res.cif.g) <- c("cif", "se.cif", "tj", "cif.cil", "cif.ciu")
} else {
res.cif.g <- list(cif = NA, se.cif = NA, tj = NA,
cif.cil = NA, cif.ciu = NA)
}
res.cif[[length(res.cif) + 1]] <- res.cif.g
names(res.cif)[g] <- groupval[g] # assign name here
}
# if(gg > 1) { # moved from here
res <- data.frame(
groupval,
rmtl,
rmtl.se,
cil = rmtl - (z * rmtl.se),
ciu = rmtl + (z * rmtl.se))
#names(res.cif) <- groupval # assign name above to allow for missing groups
pwc <- gg # all pairwise: ((gg ^ 2) - gg) / 2
# to here
if (gg > 1) {
if (pwc > 0) {
label.diff <- rep(NA, pwc)
rmtl.diff <- rep(NA, pwc)
rmtl.diff.se <- rep(NA, pwc)
rmtl.diff.cil <- rep(NA, pwc)
rmtl.diff.ciu <- rep(NA, pwc)
rmtl.diff.p <- rep(NA, pwc)
res.diff <- data.frame(
label.diff,
rmtl.diff,
rmtl.diff.se,
rmtl.diff.cil,
rmtl.diff.ciu,
rmtl.diff.p)
l <- 1
i <- 1 # make contrasts only to reference level (1st group)
for (ii in i:gg) { # (ii in (i + 1):gg) {
res.diff[l, ]$label.diff <- res[ii, ]$groupval
res.diff[l, ]$rmtl.diff <- (res[ii, ]$rmtl - res[i, ]$rmtl)
res.diff[l, ]$rmtl.diff.se <- sqrt(res[ii, ]$rmtl.se ^ 2 +
res[i, ]$rmtl.se ^2)
res.diff[l, ]$rmtl.diff.cil <- res.diff[l, ]$rmtl.diff -
z * res.diff[l, ]$rmtl.diff.se
res.diff[l, ]$rmtl.diff.ciu <- res.diff[l, ]$rmtl.diff +
z * res.diff[l, ]$rmtl.diff.se
res.diff[l, ]$rmtl.diff.p <- 2 * (1 - stats::pnorm(
abs(res.diff[l, ]$rmtl.diff) / res.diff[l, ]$rmtl.diff.se))
l <- l + 1
}
res.diff[1, c("rmtl.diff.se", "rmtl.diff.cil", "rmtl.diff.ciu",
"rmtl.diff.p")] <- NA
res.diff <- tibble::as_tibble(res.diff) %>%
dplyr::rename(exposure = .data$label.diff,
estimate = .data$rmtl.diff,
se = .data$rmtl.diff.se,
conf.low = .data$rmtl.diff.cil,
conf.high = .data$rmtl.diff.ciu,
p.value = .data$rmtl.diff.p)
}
} else {
res.diff <- tibble::tibble()
}
list(tau = tau,
rmtl = tibble::as_tibble(res) %>%
dplyr::rename(exposure = .data$groupval,
estimate = .data$rmtl,
se = .data$rmtl.se,
conf.low = .data$cil,
conf.high = .data$ciu),
rmtdiff = res.diff,
# add origin at (time = 0, cuminc = 0) for each exposure group
# (even if no estimation was possible there because tau was not reached)
cif = dplyr::bind_rows(tibble::tibble(exposure =
levels(alldat$group)) %>%
dplyr::mutate(tj = 0, cif = 0,
se.cif = NA,
cif.cil = NA, cif.ciu = NA),
res.cif %>%
purrr::transpose() %>%
tibble::as_tibble() %>%
dplyr::mutate(exposure = names(.data$cif)) %>%
tidyr::unnest(cols = c(-.data$exposure))) %>%
dplyr::select(.data$exposure,
time = .data$tj,
estimate = .data$cif,
se = .data$se.cif,
conf.low = .data$cif.cil,
conf.high = .data$cif.ciu))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.