knitr::opts_chunk$set(fig.width = 5, fig.height = 4, fig.align = 'center')
library(gt) library(tibble) library(dplyr) library(testthat) #library(gsDesign2) devtools::load_all()
eEvents_df
eEvents_df()
computes expected number of events at a given analysis time by strata under the assumption of piecewise model:
The above piecewise exponential distribution allows a simple method to specify a distribution and enrollment pattern where the enrollment, failure and dropout rates changes over time.
Here the df
in eEvents_df()
is short for data frame, since its output is a data frame.
enrollRates <- tibble(duration = 10, rate = 10) failRates <- tibble(duration = 100, failRate = log(2) / 6, dropoutRate = .01) totalDuration <- 22 eEvents_df(enrollRates = enrollRates, failRates = failRates, totalDuration = totalDuration, simple = FALSE)
enrollRates <- tibble(duration = c(5, 5), rate = c(10, 20)) failRates <- tibble(duration = 100, failRate = log(2)/6, dropoutRate = .01) totalDuration <- 22 eEvents_df(enrollRates = enrollRates, failRates = failRates, totalDuration = totalDuration, simple = FALSE)
enrollRates <- tibble(duration = 10, rate = 10) failRates <- tibble(duration = c(20, 80), failRate = c(log(2)/6, log(2)/4), dropoutRate = .01) totalDuration <- 22 eEvents_df(enrollRates = enrollRates, failRates = failRates, totalDuration = totalDuration, simple = FALSE)
enrollRates <- tibble(duration = 10, rate = 10) failRates <- tibble(duration = 100, failRate = log(2) / 6, dropoutRate = .01) totalDuration <- c(2, 22) try(eEvents_df(enrollRates = enrollRates, failRates = failRates, totalDuration = totalDuration, simple = FALSE))
eEvents_df()
Step 1: set the analysis time.
totalDuration <- 50
Step 2: set the enrollment rates.
enrollRates <- tibble(duration = c(5, 5), rate = c(10, 20)) # create a step function (sf) to define enrollment rates over time sf.enrollRate <- stepfun(c(0, cumsum(enrollRates$duration)), c(0, enrollRates$rate, 0), right = FALSE) plot(sf.enrollRate, xlab = "duration", ylab = "enrollment rates", main = "Piecewise enrollment rate over time", xlim = c(-0.01, 21))
Step 3: set the failure rates and dropout rates.
failRates <- tibble(duration = c(20, 80), failRate = c(0.1, 0.2), dropoutRate = .01) # get the time points where the failure rates change startFail <- c(0, cumsum(failRates$duration)) # plot the piecewise failure rates sf.failRate <- stepfun(startFail, c(0, failRates$failRate, last(failRates$failRate)), right = FALSE) plot(sf.failRate, xlab = "duration", ylab = "failure rates", main = "Piecewise failure rate over time", xlim = c(-0.01, 101)) # plot the piecewise dropout rate sf.dropoutRate <- stepfun(startFail, c(0, failRates$dropoutRate, last(failRates$dropoutRate)), right = FALSE) plot(sf.dropoutRate, xlab = "duration", ylab = "dropout rates", main = "Piecewise dropout rate over time", xlim = c(-0.01, 101))
Given the above piecewise enrollment rates, failure rates, dropout rates, the time line is divided into several parts:
plot(sf.enrollRate, xlab = "time", ylab = "enrollment rates", lty = 1, col = "red", pch = 1, main = "Piecewise enrollment, failure, dropout rate over time", xlim = c(-0.01, 101)) plot(sf.failRate, xlab = "time", ylab = "failure rates", lty = 2, col = "blue", pch = 2, xlim = c(-0.01, 101), add = TRUE) plot(sf.dropoutRate, xlab = "time", ylab = "dropout rates", lty = 3, col = "green", pch = 3, xlim = c(-0.01, 101), add = TRUE) legend(60, 20, c("enrollment rate", "failure rate", "dropout rate"), col = c("red", "blue", "green"), lty = c(1, 2, 3), pch = c(1, 2, 3), merge = TRUE, bg = "gray90")
Given the above sub-intervals, our objective is to calculate the expected number of events in each sub-intervals.
Step 4: divide the time line for enrollments
df_1 <- tibble(startEnroll = c(0, cumsum(enrollRates$duration)), endFail = totalDuration - startEnroll, rate = c(enrollRates$rate, 0))
df_1 %>% gt() %>% tab_header(title = "df_1") %>% tab_footnote(footnote = "The time when the enrollment starts.", locations = cells_column_labels("startEnroll")) %>% tab_footnote(footnote = "The time from startEnroll to the analysis time.", locations = cells_column_labels("endFail")) %>% tab_footnote(footnote = "The enrollment rates", locations = cells_column_labels("rate"))
Step 5: divide the time line for failure \& dropout rates
df_2 <- tibble(endFail = cumsum(failRates$duration), startEnroll = totalDuration - endFail, failRate = failRates$failRate, dropoutRate = failRates$dropoutRate)
df_2 %>% gt() %>% tab_header(title = "df_2") %>% tab_footnote(footnote = "The time when the failure changes.", locations = cells_column_labels("endFail")) %>% tab_footnote(footnote = "The time from endFail to the analysis time.", locations = cells_column_labels("startEnroll")) %>% tab_footnote(footnote = "The failure rates", locations = cells_column_labels("failRate")) %>% tab_footnote(footnote = "The dropout rates", locations = cells_column_labels("dropoutRate"))
For the above df_2
, one needs to discriminate if the analysis time (totalDuration = 50
) is beyond the total failure rate duration.
# if the analysis time is after the total failure rate duration if(sum(failRates$duration) < totalDuration){ df_2 <- df_2[-nrow(df_2), ] }else{ df_2 <- df_2 %>% filter(startEnroll > 0) }
df_2 %>% gt() %>% tab_header(title = "df_2", subtitle = "Updated by adjusting the analysis time and failRates duration") %>% tab_footnote(footnote = "The time when the failure changes.", locations = cells_column_labels("endFail")) %>% tab_footnote(footnote = "The time from endFail to the analysis time.", locations = cells_column_labels("startEnroll")) %>% tab_footnote(footnote = "The failure rates", locations = cells_column_labels("failRate")) %>% tab_footnote(footnote = "The dropout rates", locations = cells_column_labels("dropoutRate"))
Step 6: divide the time line considering both the change points in enrollment, failure, dropout rates.
df <- full_join(df_1, df_2, by = c("startEnroll", "endFail")) %>% arrange(endFail)
df %>% gt() %>% tab_header(title = "df") %>% tab_footnote(footnote = "The time when the enrollment rate starts.", locations = cells_column_labels("startEnroll")) %>% tab_footnote(footnote = "The time when the failure rate ends. And startEnroll + endFail = 50", locations = cells_column_labels("endFail")) %>% tab_footnote(footnote = "The enrollment rates.", locations = cells_column_labels("rate"))
We find there are lots of NA
, which can be imputed by the piecewise model.
df <- df %>% mutate(endEnroll = lag(startEnroll, default = as.numeric(totalDuration)), startFail = lag(endFail, default = 0), duration = endEnroll - startEnroll, failRate = sf.failRate(startFail), dropoutRate = sf.dropoutRate(startFail), enrollRate = sf.enrollRate(startEnroll)) %>% select(-rate)
df %>% select(startEnroll, endEnroll, startFail, endFail, enrollRate, failRate, dropoutRate, duration) %>% arrange(startEnroll) %>% gt() %>% tab_footnote(footnote = "The time when the enrollment rate starts.", locations = cells_column_labels("startEnroll")) %>% tab_footnote(footnote = "The (startEnroll, endEnroll] forms the piecewise model of the enrollment rates", locations = cells_column_labels("endEnroll")) %>% tab_footnote(footnote = "The time when the failure rate starts.", locations = cells_column_labels("startFail")) %>% tab_footnote(footnote = "The time when the failure rate ends. And startEnroll + endFail = 50. Besides, (startFail, endFail ] forms the piecewise model of the enrollment rates.", locations = cells_column_labels("endFail")) %>% tab_footnote(footnote = "endEnroll - startEnroll", locations = cells_column_labels("duration"))
Step 7: compute the expected number of events in sub-intervals following the technical details in the vignette ``computing expected events by interval at risk''
# create 2 auxiliary variable for failure & dropout rate # q: number of expected events in a sub-interval # Q: cumulative product of q (pool all sub-intervals) df <- df %>% mutate(q = exp(-duration * (failRate + dropoutRate)), Q = lag(cumprod(q), default = 1)) %>% arrange(desc(startFail)) %>% # create another 2 auxiliary variable for enroll rate # g: number of expected subjects in a sub-interval # G: cumulative sum of g (pool all sub-intervals) mutate(g = enrollRate * duration, G = lag(cumsum(g), default = 0)) %>% arrange(startFail) %>% # compute expected events as nbar in a sub-interval mutate(d = ifelse(failRate == 0, 0, Q * (1 - q) * failRate / (failRate + dropoutRate)), nbar = ifelse(failRate == 0, 0, G * d + (failRate * Q * enrollRate) / (failRate + dropoutRate) * (duration - (1 - q) / (failRate + dropoutRate))))
Step 8: output results
sf.startFail <- stepfun(startFail, c(0, startFail), right = FALSE) df <- df %>% transmute(t = endFail, failRate = failRate, Events = nbar, startFail = sf.startFail(startFail)) %>% group_by(startFail) %>% summarize(failRate = first(failRate), Events = sum(Events)) %>% mutate(t = startFail) %>% select("t", "failRate", "Events") df %>% gt()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.