In keaven/gsDesign2: Group Sequential Design with Non-Constant Effect

knitr::opts_chunk$set(fig.width = 5, fig.height = 4, fig.align = 'center')

library(gt)
library(tibble)
library(dplyr)
library(testthat)
#library(gsDesign2)
devtools::load_all()

Introduction of `eEvents_df`

eEvents_df() computes expected number of events at a given analysis time by strata under the assumption of piecewise model:

piecewise constant enrollment rates
piecewise exponential failure rates
piecewise censoring rates.

The above piecewise exponential distribution allows a simple method to specify a distribution and enrollment pattern where the enrollment, failure and dropout rates changes over time.

Here the df in eEvents_df() is short for data frame, since its output is a data frame.

Use Cases

Example 1: Single Enroll + Single Fail Period

enrollRates <- tibble(duration = 10, rate = 10)
failRates <- tibble(duration = 100, failRate = log(2) / 6, dropoutRate = .01)
totalDuration <- 22

eEvents_df(enrollRates = enrollRates, failRates = failRates, totalDuration = totalDuration, simple = FALSE)

Example 2: Multiple Enroll + Single Fail Period

enrollRates <- tibble(duration = c(5, 5), rate = c(10, 20))
failRates <- tibble(duration = 100, failRate = log(2)/6, dropoutRate = .01)
totalDuration <- 22

eEvents_df(enrollRates = enrollRates, failRates = failRates, totalDuration = totalDuration, simple = FALSE)

Example 3: Signle Enroll + Multiple Fail Period

enrollRates <- tibble(duration = 10, rate = 10)
failRates <- tibble(duration = c(20, 80), failRate = c(log(2)/6, log(2)/4), dropoutRate = .01)
totalDuration <- 22

eEvents_df(enrollRates = enrollRates, failRates = failRates, totalDuration = totalDuration, simple = FALSE)

Example 4: Multiple Duration

enrollRates <- tibble(duration = 10, rate = 10)
failRates <- tibble(duration = 100, failRate = log(2) / 6, dropoutRate = .01)
totalDuration <- c(2, 22)

try(eEvents_df(enrollRates = enrollRates, failRates = failRates, totalDuration = totalDuration, simple = FALSE))

Inner Logic of `eEvents_df()`

Step 1: set the analysis time.

totalDuration <- 50

Step 2: set the enrollment rates.

enrollRates <- tibble(duration = c(5, 5), rate = c(10, 20))

# create a step function (sf) to define enrollment rates over time
sf.enrollRate <- stepfun(c(0, cumsum(enrollRates$duration)),
                         c(0, enrollRates$rate, 0),
                         right = FALSE)

plot(sf.enrollRate, 
     xlab = "duration", ylab = "enrollment rates", 
     main = "Piecewise enrollment rate over time", xlim = c(-0.01, 21))

Step 3: set the failure rates and dropout rates.

failRates <- tibble(duration = c(20, 80), failRate = c(0.1, 0.2), dropoutRate = .01)

# get the time points where the failure rates change
startFail <- c(0, cumsum(failRates$duration))

# plot the piecewise failure rates
sf.failRate <- stepfun(startFail,
                       c(0, failRates$failRate, last(failRates$failRate)),
                       right = FALSE)
plot(sf.failRate, 
     xlab = "duration", ylab = "failure rates", 
     main = "Piecewise failure rate over time", xlim = c(-0.01, 101))

# plot the piecewise dropout rate
sf.dropoutRate <- stepfun(startFail,
                          c(0, failRates$dropoutRate, last(failRates$dropoutRate)),
                          right = FALSE)
plot(sf.dropoutRate, 
     xlab = "duration", ylab = "dropout rates", 
     main = "Piecewise dropout rate over time", xlim = c(-0.01, 101))

Given the above piecewise enrollment rates, failure rates, dropout rates, the time line is divided into several parts:

$(0, 5]$ (5 is the change point of the enrollment rates);
$(5, 10]$ (10 is another change point of the enrollment rates);
$(10, 20]$ (20 is the change point of the failure rates);
$(20, 50]$ (50 is the analysis time);
$(50, \infty]$ (after the analysis time).

plot(sf.enrollRate, 
     xlab = "time", ylab = "enrollment rates", 
     lty = 1, col = "red", pch = 1,
     main = "Piecewise enrollment, failure, dropout rate over time", xlim = c(-0.01, 101))

plot(sf.failRate, 
     xlab = "time", ylab = "failure rates", 
     lty = 2, col = "blue", pch = 2,
     xlim = c(-0.01, 101), add = TRUE)

plot(sf.dropoutRate, 
     xlab = "time", ylab = "dropout rates", 
     lty = 3, col = "green", pch = 3,
     xlim = c(-0.01, 101), add = TRUE)

legend(60, 20, c("enrollment rate", "failure rate", "dropout rate"), col = c("red", "blue", "green"),
       lty = c(1, 2, 3), pch = c(1, 2, 3),
       merge = TRUE, bg = "gray90")

Given the above sub-intervals, our objective is to calculate the expected number of events in each sub-intervals.

Step 4: divide the time line for enrollments

df_1 <- tibble(startEnroll = c(0, cumsum(enrollRates$duration)),
               endFail = totalDuration - startEnroll,
               rate = c(enrollRates$rate, 0))

df_1 %>% 
  gt() %>%
  tab_header(title = "df_1") %>% 
  tab_footnote(footnote = "The time when the enrollment starts.", locations = cells_column_labels("startEnroll")) %>%
  tab_footnote(footnote = "The time from startEnroll to the analysis time.", locations = cells_column_labels("endFail")) %>% 
  tab_footnote(footnote = "The enrollment rates", locations = cells_column_labels("rate"))

Step 5: divide the time line for failure \& dropout rates

df_2 <- tibble(endFail = cumsum(failRates$duration),
               startEnroll = totalDuration - endFail,
               failRate = failRates$failRate,
               dropoutRate = failRates$dropoutRate)

df_2 %>% 
  gt() %>%
  tab_header(title = "df_2") %>% 
  tab_footnote(footnote = "The time when the failure changes.", locations = cells_column_labels("endFail")) %>%
  tab_footnote(footnote = "The time from endFail to the analysis time.", locations = cells_column_labels("startEnroll")) %>% 
  tab_footnote(footnote = "The failure rates", locations = cells_column_labels("failRate")) %>% 
  tab_footnote(footnote = "The dropout rates", locations = cells_column_labels("dropoutRate"))

For the above df_2, one needs to discriminate if the analysis time (totalDuration = 50) is beyond the total failure rate duration.

# if the analysis time is after the total failure rate duration
if(sum(failRates$duration) < totalDuration){
  df_2 <- df_2[-nrow(df_2), ]
}else{
  df_2 <- df_2 %>% filter(startEnroll > 0)
}

df_2 %>% 
  gt() %>%
  tab_header(title = "df_2", subtitle = "Updated by adjusting the analysis time and failRates duration") %>% 
  tab_footnote(footnote = "The time when the failure changes.", locations = cells_column_labels("endFail")) %>%
  tab_footnote(footnote = "The time from endFail to the analysis time.", locations = cells_column_labels("startEnroll")) %>% 
  tab_footnote(footnote = "The failure rates", locations = cells_column_labels("failRate")) %>% 
  tab_footnote(footnote = "The dropout rates", locations = cells_column_labels("dropoutRate"))

Step 6: divide the time line considering both the change points in enrollment, failure, dropout rates.

df <- full_join(df_1, df_2, by = c("startEnroll", "endFail")) %>% arrange(endFail)

df %>% 
  gt() %>% 
  tab_header(title = "df") %>% 
  tab_footnote(footnote = "The time when the enrollment rate starts.", locations = cells_column_labels("startEnroll")) %>% 
  tab_footnote(footnote = "The time when the failure rate ends. And startEnroll + endFail = 50", locations = cells_column_labels("endFail")) %>% 
  tab_footnote(footnote = "The enrollment rates.", locations = cells_column_labels("rate"))

We find there are lots of NA, which can be imputed by the piecewise model.

df <- df %>% mutate(endEnroll = lag(startEnroll, default = as.numeric(totalDuration)),
                    startFail = lag(endFail, default = 0),
                    duration = endEnroll - startEnroll,
                    failRate = sf.failRate(startFail),
                    dropoutRate = sf.dropoutRate(startFail),
                    enrollRate = sf.enrollRate(startEnroll)) %>% 
             select(-rate)

df %>% 
  select(startEnroll, endEnroll, startFail, endFail, enrollRate, failRate, dropoutRate, duration) %>% 
  arrange(startEnroll) %>% 
  gt() %>% 
  tab_footnote(footnote = "The time when the enrollment rate starts.", 
               locations = cells_column_labels("startEnroll")) %>% 
  tab_footnote(footnote = "The (startEnroll, endEnroll] forms the piecewise model of the enrollment rates",
               locations = cells_column_labels("endEnroll")) %>% 
  tab_footnote(footnote = "The time when the failure rate starts.", 
               locations = cells_column_labels("startFail")) %>% 
  tab_footnote(footnote = "The time when the failure rate ends. And startEnroll + endFail = 50. Besides, (startFail, endFail  ] forms the piecewise model of the enrollment rates.", 
               locations = cells_column_labels("endFail")) %>% 
  tab_footnote(footnote = "endEnroll - startEnroll",
               locations = cells_column_labels("duration"))

Step 7: compute the expected number of events in sub-intervals following the technical details in the vignette ``computing expected events by interval at risk''

             # create 2 auxiliary variable for failure & dropout rate
             # q: number of expected events in a sub-interval
             # Q: cumulative product of q (pool all sub-intervals)
df <- df %>% mutate(q = exp(-duration * (failRate + dropoutRate)),
                    Q = lag(cumprod(q), default = 1)) %>%
             arrange(desc(startFail)) %>%
             # create another 2 auxiliary variable for enroll rate
             # g: number of expected subjects in a sub-interval
             # G: cumulative sum of g (pool all sub-intervals)
             mutate(g = enrollRate * duration,
                    G = lag(cumsum(g), default = 0)) %>%
             arrange(startFail) %>%
             # compute expected events as nbar in a sub-interval
             mutate(d = ifelse(failRate == 0, 0, Q * (1 - q) * failRate / (failRate + dropoutRate)),
                    nbar = ifelse(failRate == 0, 0, G * d + (failRate * Q * enrollRate) / (failRate + dropoutRate) * (duration - (1 - q) / (failRate + dropoutRate))))

Step 8: output results

sf.startFail <- stepfun(startFail, c(0, startFail), right = FALSE)
df <- df %>% 
  transmute(t = endFail, failRate = failRate, Events = nbar, startFail = sf.startFail(startFail)) %>% 
  group_by(startFail) %>%
  summarize(failRate = first(failRate), Events = sum(Events)) %>%
  mutate(t = startFail) %>% 
  select("t", "failRate", "Events")

df %>% gt()