inst/doc/timeSplitter.R

## ----message=FALSE, warning=FALSE, echo=FALSE---------------------------------
knitr::opts_chunk$set(message = FALSE, warning = FALSE)

## ----Coxph_tt_example, eval=FALSE---------------------------------------------
#  library(survival)
#  coxph(Surv(time, event) ~ age + sex +
#          type_of_surgery + tt(type_of_surgery) +
#          tt(surgery_length),
#        data = my_surgical_data,
#        tt = list(
#          function(type_of_surgery, time, ...){
#            # As type_of_surgery is a categorical variable
#            # we must transform it into a design matrix that
#            # we can use for multiplication with time
#            # Note: the [,-1] is for dropping the intercept
#            mtrx <- model.matrix(~type_of_surgery)[,-1]
#            mtrx * time
#          },
#          function(surgery_length, time, ...){
#            # Note that the surgery_length and the time
#            # should probably have similar ranges. If you
#            # report operating time in minutes, follow-up
#            # in years the t will be dwarfed by the
#            pspline(surgery_length + time)
#          }
#        ))

## ----eval=FALSE---------------------------------------------------------------
#  library(plyr)
#  models <-
#    timeSplitter(your_data,
#                 by = 4,
#                 event_var = "status",
#                 time_var = "years",
#                 event_start_status = "alive",
#                 time_related_vars = c("age", "date")) |>
#    dlply("Start_time",
#          function(data){
#            coxph(Surv(Start_time, End_time, status) ~ age + sex + treatment, data = data)
#          })

## -----------------------------------------------------------------------------
library(tidyverse)
library(Greg)

test_data <- tibble(id = 1:4,
                    time = c(4, 3.5, 1, 5),
                    event = c("censored", "dead", "alive", "dead"),
                    age = c(62.2, 55.3, 73.7, 46.3),
                    date = as.Date(
                      c("2003-01-01",
                        "2010-04-01",
                        "2013-09-20",
                        "2002-02-23")),
                    stringsAsFactors = TRUE) |>
  mutate(time_str = sprintf("0 to %.1f", time))

## ----Display_data, echo=FALSE, fig.height=4, fig.width=7----------------------
library(grid)
getMaxWidth <- function(vars){
  sapply(vars,
         USE.NAMES = FALSE,
         function(v){
           grobWidth(x = textGrob(v)) |>
             convertX(unitTo = "mm")
         }) |>
    max() |>
    unit("mm")
}
plotTitleAndPushVPs <- function(title_txt){
  pushViewport(viewport(width = unit(.9, "npc"),
                        height = unit(.9, "npc")))

  title <- textGrob(title_txt, gp = gpar(cex = 2))
  title_height <- grobHeight(title) |>
    convertY(unitTo = "mm", valueOnly = TRUE) * 2 |>
    unit("mm")
  viewport(layout = grid.layout(nrow = 3,
                                heights = unit.c(title_height,
                                                 unit(.1, "npc"),
                                                 unit(1, "npc") -
                                                   title_height -
                                                   unit(.1, "npc") -
                                                   unit(2, "line"),
                                                 unit(2, "line")))) |>
    pushViewport()
  viewport(layout.pos.row = 1) |>
    pushViewport()
  grid.draw(title)
  upViewport()

  viewport(layout.pos.row = 3) |>
    pushViewport()
}

plotLine <- function(row_no,
                     start_time,
                     stop_time,
                     event,
                     data_range = c(0, max(test_data$time)),
                     print_axis = FALSE) {
  viewport(layout.pos.row = row_no,
           layout.pos.col = 6,
           xscale = data_range) |>
    pushViewport()
  on.exit(upViewport())

  if (event) {
    grid.lines(x = unit(c(start_time,
                          stop_time), "native"),
               y = rep(0.5, 2))
    grid.points(x = unit(stop_time, "native"), y = 0.5,
                pch = "*",
                gp = gpar(cex = 2))
  }else{
    grid.lines(x = unit(c(start_time,
                          stop_time), "native"),
               y = rep(0.5, 2),
               arrow = arrow(length = unit(3, "mm"),
                             type = "closed"),
               gp = gpar(fill = "#000000"))
  }
  grid.points(x = unit(start_time, "native"), y = 0.5, pch = 20)
  if (print_axis)
    grid.xaxis()
}

plotIDcell <- function(row_no, id){
  viewport(layout.pos.row = row_no,
           layout.pos.col = 2) |>
    pushViewport()
  grid.text(id)
  upViewport()
}
plotTimeStrcell <- function(row_no, time_str){
  viewport(layout.pos.row = row_no,
           layout.pos.col = 4) |>
    pushViewport()
  grid.text(time_str)
  upViewport()
}

plotRowColor <- function(row_no, clr = "#F6F6FF"){
  viewport(layout.pos.row = row_no) |>
    pushViewport()
  grid.rect(gp = gpar(col = clr, fill = clr))
  upViewport()
}


# Do the actual plot
grid.newpage()
plotTitleAndPushVPs("Time spans")
widths <-
  unit.c(unit(.1, "npc"),
         getMaxWidth(test_data$id),
         unit(.1, "npc"),
         getMaxWidth(test_data$time_str),
         unit(.1, "npc")) |>
  (\(x) unit.c(x,
               unit(1, "npc") - sum(x) - unit(.1, "npc"),
               unit(.1, "npc")))()

viewport(layout = grid.layout(nrow = nrow(test_data),
                              ncol = length(widths),
                              widths = widths)) |>
  pushViewport()


for (i in 1:nrow(test_data)) {
  if (i %% 2 == 0)
    plotRowColor(i)
  plotIDcell(i, test_data$id[i])
  plotTimeStrcell(i, test_data$time_str[i])

  plotLine(row_no = i,
           start_time = 0,
           stop_time = test_data$time[i],
           event = test_data$event[i] == "dead",
           print_axis = i == nrow(test_data))
}
upViewport(2)

## ----Split_data---------------------------------------------------------------
library(dplyr)
split_data <- test_data |>
  select(id, event, time, age, date) |>
  timeSplitter(by = 2, # The time that we want to split by
               event_var = "event",
               time_var = "time",
               event_start_status = "alive",
               time_related_vars = c("age", "date"))

knitr::kable(head(split_data, 10))

## ----Complex_split_plot, fig.height=6, fig.width=7, echo=FALSE----------------
# Do the actual plot
plotTitleAndPushVPs("Time spans with split")

viewport(layout = grid.layout(nrow = nrow(test_data) + nrow(split_data),
                              ncol = length(widths),
                              widths = widths)) |>
  pushViewport()

current_id <- NULL
no_ids <- 0
for (i in 1:nrow(split_data)) {
  if (is.null(current_id) ||
      split_data$id[i] != current_id) {
    current_id <- split_data$id[i]
    subjects_splits <- subset(split_data, id == current_id)
    rowspan <- (i + no_ids):(i + no_ids + nrow(subjects_splits))
    if (no_ids %% 2 == 1)
      plotRowColor(rowspan)
    plotIDcell(row_no = rowspan, id = current_id)
    plotTimeStrcell(row_no = rowspan, time_str = subset(test_data,
                                                        id == current_id,
                                                        "time_str"))
    with(subset(test_data,
                id == current_id),
         plotLine(row_no = i + no_ids,
                  start_time = 0,
                  stop_time = time,
                  event = event == "dead"))
    no_ids = no_ids + 1
  }

  plotLine(row_no = i + no_ids,
           start_time = split_data$Start_time[i],
           stop_time = split_data$Stop_time[i],
           event = split_data$event[i] == "dead",
           print_axis = i == nrow(split_data))
}
upViewport(2)


## -----------------------------------------------------------------------------
# First we start with loading the dataset
data("melanoma", package = "boot")

# Then we munge it according to ?boot::melanoma
melanoma <- mutate(melanoma,
                   status = factor(status,
                                   levels = 1:3,
                                   labels = c("Died from melanoma",
                                              "Alive",
                                              "Died from other causes")),
                   ulcer = factor(ulcer,
                                  levels = 0:1,
                                  labels = c("Absent", "Present")),
                   time = time/365.25, # All variables should be in the same time unit
                   sex = factor(sex,
                                levels = 0:1,
                                labels = c("Female", "Male")))

## -----------------------------------------------------------------------------
library(survival)
regular_model <- coxph(Surv(time, status == "Died from melanoma") ~
                         age + sex + year + thickness + ulcer,
                       data = melanoma,
                       x = TRUE, y = TRUE)
summary(regular_model)

## -----------------------------------------------------------------------------
spl_melanoma <-
  melanoma |>
  timeSplitter(by = .5,
               event_var = "status",
               event_start_status = "Alive",
               time_var = "time",
               time_related_vars = c("age", "year"))

interval_model <-
  update(regular_model,
         Surv(Start_time, Stop_time, status == "Died from melanoma") ~ .,
         data = spl_melanoma)

summary(interval_model)

## -----------------------------------------------------------------------------
library(htmlTable)
cbind(Regular = coef(regular_model),
      Interval = coef(interval_model),
      Difference = coef(regular_model) - coef(interval_model)) |>
  txtRound(digits = 5) |>
  knitr::kable(align = "r")

## -----------------------------------------------------------------------------
cox.zph(regular_model) |>
  purrr::pluck("table") |>
  txtRound(digits = 2) |>
  knitr::kable(align = "r")

## -----------------------------------------------------------------------------
time_int_model <-
  update(interval_model,
         .~.+thickness:Start_time)
summary(time_int_model)

## -----------------------------------------------------------------------------
# First we need to manually add an interaction term
spl_melanoma <- mutate(spl_melanoma,
                       thickness_start = thickness * Start_time)
anova(time_int_model,
      update(time_int_model, .~.+I(thickness_start^2)))

## -----------------------------------------------------------------------------
update(time_int_model, .~.-thickness:Start_time+pspline(thickness_start))

## -----------------------------------------------------------------------------
# Lets create an evenly distributed categorical thickness variable
# and interactions
spl_melanoma <- mutate(spl_melanoma,
                       thickness_cat = cut(thickness,
                                           breaks = c(0, 1, 5, Inf),
                                           labels = c("less than 1.0",
                                                      "1.0 to 4.9",
                                                      "at least 5.0")))
# Now create interaction variables
for (l in levels(spl_melanoma$thickness_cat)[-1]) {
  spl_melanoma[[sprintf("thickness_%s_time", gsub(" ", "_", l))]] <-
    (spl_melanoma$thickness_cat == l)*spl_melanoma$Start_time
}

# Now for the model specification where we use a
# pspline for the two interaction variables
adv_int_model <-
  coxph(Surv(Start_time, Stop_time, status == "Died from melanoma") ~
          age + sex + year + ulcer +
          thickness_cat + pspline(thickness_1.0_to_4.9_time) + pspline(thickness_at_least_5.0_time),
        data = spl_melanoma,
        x = TRUE, y = TRUE,
        iter.max = 1000)

# To get the estimates we use the predict function
new_data <- data.frame(thickness_cat = rep(levels(spl_melanoma$thickness_cat)[-1],
                                           each = 100),
                       Start_time = 2^seq(-3, 3, length.out = 100),
                       stringsAsFactors = FALSE) |>
  mutate(thickness_1.0_to_4.9_time = (thickness_cat == levels(spl_melanoma$thickness_cat)[2]) *
           Start_time,
         thickness_at_least_5.0_time = (thickness_cat == levels(spl_melanoma$thickness_cat)[3]) *
           Start_time)
new_data$sex = "Female"
new_data$age = median(melanoma$age)
new_data$year = median(melanoma$year)
new_data$ulcer = "Absent"

adv_pred <- predict(adv_int_model,
                    newdata = new_data,
                    type = "terms",
                    terms = c("thickness_cat",
                              "pspline(thickness_1.0_to_4.9_time)",
                              "pspline(thickness_at_least_5.0_time)"),
                    se.fit = TRUE)

new_data$fit <- rowSums(adv_pred$fit)
new_data$se.fit <- apply(adv_pred$se.fit, 1, function(x) x^2) |>
  colSums() |>
  sqrt()
new_data <- mutate(new_data,
                   risk = exp(fit),
                   upper = exp(fit + 1.96*se.fit),
                   lower = exp(fit - 1.96*se.fit))


## ----fig.width=8, fig.height=6------------------------------------------------
library(ggplot2)
new_data |>
  mutate(adapted_risk = sapply(risk, function(x) min(max(2^-4, x), 2^5)),
         adapted_upper = sapply(upper, function(x) min(max(2^-4, x), 2^5)),
         adapted_lower = sapply(lower, function(x) min(max(2^-4, x), 2^5))) |>
  ggplot(aes(y = adapted_risk,
             x = new_data$Start_time,
             group = thickness_cat,
             col = thickness_cat,
             fill = thickness_cat)) +
  # The confidence intervals are too wide to display in this case
  # geom_ribbon(aes(ymax = adapted_upper, ymin = adapted_lower), fill = "red") +
  geom_line() +
  scale_x_log10(breaks = 2^(-3:4),
                limits = c(2^-3, 8),
                expand = c(0, 0)) +
  scale_y_log10(breaks = 2^(-4:5),
                labels = txtRound(2^(-4:5), 2),
                expand = c(0,0)) +
  scale_color_brewer(type = "qual", guide = guide_legend(title = "Thickness (mm)")) +
  ylab("Hazard ratio") +
  xlab("Time (years)") +
  theme_bw()

Try the Greg package in your browser

Any scripts or data that you put into this service are public.

Greg documentation built on Nov. 16, 2022, 5:06 p.m.