tests/testthat/helper-old_version-eEvents_df_.R

#  Copyright (c) 2022 Merck Sharp & Dohme Corp., a subsidiary of
#  Merck & Co., Inc., Rahway, NJ, USA.
#
#  This file is part of the gsDesign2 program.
#
#  gsDesign2 is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.

#' @importFrom dplyr filter select full_join mutate transmute group_by ungroup summarize arrange desc lag last lead "%>%"
#' @importFrom tibble tibble
#' @importFrom stats stepfun
NULL

#' Expected events observed under piecewise exponential model
#'
#' \code{eEvents_df} computes expected events over time and by strata
#' under the assumption of piecewise constant enrollment rates and piecewise
#' exponential failure and censoring rates.
#' The piecewise exponential distribution allows a simple method to specify a distribution
#' and enrollment pattern
#' where the enrollment, failure and dropout rates changes over time.
#' While the main purpose may be to generate a trial that can be analyzed at a single point in time or
#' using group sequential methods, the routine can also be used to simulate an adaptive trial design.
#' The intent is to enable sample size calculations under non-proportional hazards assumptions
#' for stratified populations.
#'
#' @param enrollRates Enrollment rates; see details and examples
#' @param failRates Failure rates and dropout rates by period
#' @param totalDuration Total follow-up from start of enrollment to data cutoff
#' @param simple If default (TRUE), return numeric expected number of events, otherwise
#' a \code{tibble} as described below.
#' @section Specification:
#' \if{latex}{
#'  \itemize{
#'    \item Validate if input enrollment rate contains total duration column.
#'    \item Validate if input enrollment rate contains rate column.
#'    \item Validate if input failure rate contains duration column.
#'    \item Validate if input failure rate contains failure rate column.
#'    \item Validate if input failure rate contains dropout rate column.
#'    \item Validate if input trial total follow-up (total duration) is a non-empty vector of positive integers.
#'    \item Validate if input simple is logical.
#'    \item Define a tibble with the start opening for enrollment at zero and cumulative duration.
#'    Add the event (or failure) time corresponding to the start of the enrollment. Finally, add the enrollment rate to the tibble
#'    corresponding to the start and end (failure) time. This will be recursively used to calculate the expected
#'    number of events later. For details, see vignette/eEventsTheory.Rmd
#'    \item Define a tibble including the cumulative duration of failure rates, the corresponding start time of
#'    the enrollment, failure rate and dropout rates.  For details, see vignette/eEventsTheory.Rmd
#'    \item Only consider the failure rates in the interval of the end failure rate and total duration.
#'    \item Compute the failure rates over time using \code{stepfun} which is used
#'     to group rows by periods defined by failRates.
#'    \item Compute the dropout rate over time using \code{stepfun}.
#'    \item Compute the enrollment rate over time using \code{stepfun}. Details are
#'    available in vignette/eEventsTheory.Rmd.
#'    \item Compute expected events by interval at risk using the notations and descriptions in
#'    vignette/eEventsTheory.Rmd.
#'    \item Return \code{eEvents_df}
#'  }
#' }
#' @return
#' The default when \code{simple=TRUE} is to return the total expected number of events as a real number.
#' Otherwise, when \code{simple=FALSE} a \code{tibble} is returned with the following variables for each period specified in 'failRates':
#' \code{t} start of period,
#' \code{failRate} failure rate during the period
#' \code{Events} expected events during the period,
#'
#' The records in the returned \code{tibble} correspond to the input \code{tibble} \code{failRates}.
#' @details
#' More periods will generally be supplied in output than those that are input.
#' The intent is to enable expected event calculations in a tidy format to
#' maximize flexibility for a variety of purposes.
#' @examples
#' library(tibble)
#' library(gsDesign2)
#'
#' # Default arguments, simple output (total event count only)
#' gsDesign2:::eEvents_df_()
#' # Event count by time period
#' gsDesign2:::eEvents_df_(simple = FALSE)
#' # Early cutoff
#' gsDesign2:::eEvents_df_(totalDuration = .5)
#' # Single time period example
#' gsDesign2:::eEvents_df_(
#'   enrollRates = tibble::tibble(duration = 10, rate = 10),
#'   failRates = tibble::tibble(duration = 100, failRate = log(2) / 6, dropoutRate = .01),
#'   totalDuration = 22,
#'   simple = FALSE
#' )
#' # Single time period example, multiple enrolment periods
#' gsDesign2:::eEvents_df_(
#'   enrollRates = tibble::tibble(duration = c(5, 5), rate = c(10, 20)),
#'   failRates = tibble::tibble(duration = 100, failRate = log(2) / 6, dropoutRate = .01),
#'   totalDuration = 22,
#'   simple = FALSE
#' )
#'
#' @noRd
#'
eEvents_df_ <- function(enrollRates = tibble::tibble(
                          duration = c(2, 2, 10),
                          rate = c(3, 6, 9)
                        ),
                        failRates = tibble::tibble(
                          duration = c(3, 100),
                          failRate = log(2) / c(9, 18),
                          dropoutRate = rep(.001, 2)
                        ),
                        totalDuration = 25,
                        simple = TRUE) {
  # check input values
  # check input enrollment rate assumptions
  if (max(names(enrollRates) == "duration") != 1) {
    stop("gsDesign2: enrollRates column names in `eEvents()` must contain duration")
  }
  if (max(names(enrollRates) == "rate") != 1) {
    stop("gsDesign2: enrollRates column names in `eEvents()` must contain rate")
  }

  # check input failure rate assumptions
  if (max(names(failRates) == "duration") != 1) {
    stop("gsDesign2: failRates column names in `eEvents()` must contain duration")
  }
  if (max(names(failRates) == "failRate") != 1) {
    stop("gsDesign2: failRates column names in `eEvents()` must contain failRate")
  }
  if (max(names(failRates) == "dropoutRate") != 1) {
    stop("gsDesign2: failRates column names in `eEvents()` must contain dropoutRate")
  }

  # check input trial durations
  if (!is.numeric(totalDuration)) {
    stop("gsDesign2: totalDuration in `eEvents()` must be a non-empty vector of positive numbers")
  }
  if (!is.vector(totalDuration) > 0) {
    stop("gsDesign2: totalDuration in `eEvents()` must be a non-empty vector of positive numbers")
  }
  if (!min(totalDuration) > 0) {
    stop("gsDesign2: totalDuration in `eEvents()` must be greater than zero")
  }

  # check input simple is logical
  if (!is.logical(simple)) {
    stop("gsDesign2: simple in `eEvents()` must be logical")
  }

  df_1 <- tibble::tibble(
    startEnroll = c(0, cumsum(enrollRates$duration)),
    endFail = totalDuration - startEnroll,
    rate = c(enrollRates$rate, 0)
  )
  df_1 <- df_1[df_1$endFail > 0, ]

  df_2 <- tibble::tibble(
    endFail = cumsum(failRates$duration),
    startEnroll = totalDuration - endFail,
    failRate = failRates$failRate,
    dropoutRate = failRates$dropoutRate
  )
  df_2 <- if (dplyr::last(cumsum(failRates$duration)) < totalDuration) df_2[-nrow(df_2), ] else df_2[df_2$startEnroll > 0, ] # we will use start of failure rate periods repeatedly below
  startFail <- c(0, cumsum(failRates$duration))
  # Step function to define failure rates over time
  sf.failRate <- stepfun(startFail,
    c(0, failRates$failRate, dplyr::last(failRates$failRate)),
    right = FALSE
  )
  # Step function to define dropout rates over time
  sf.dropoutRate <- stepfun(startFail,
    c(
      0, failRates$dropoutRate,
      dplyr::last(failRates$dropoutRate)
    ),
    right = FALSE
  )
  # sf.startFail is used later to group rows by periods defined by failRates
  # # If only a single failure rate period, always 0
  # if(nrow(failRates)==1){x <- 0
  #                        y <- c(0,0)}else{
  #   # if more than 1 failure rate period
  #   x <- startFail
  #   y <- c(0,startFail)
  # }
  sf.startFail <- stepfun(startFail, c(0, startFail), right = FALSE)

  # Step function to define enrollment rates over time
  sf.enrollRate <- stepfun(c(0, cumsum(enrollRates$duration)),
    c(0, enrollRates$rate, 0),
    right = FALSE
  )
  # Put everything together as laid out in vignette
  # "Computing expected events by interval at risk"
  df_join <- dplyr::full_join(df_1, df_2, by = c("startEnroll", "endFail")) %>%
    dplyr::arrange(endFail) %>%
    dplyr::mutate(
      endEnroll = dplyr::lag(startEnroll, default = as.numeric(totalDuration)),
      startFail = dplyr::lag(endFail, default = 0),
      duration = endEnroll - startEnroll,
      failRate = sf.failRate(startFail),
      dropoutRate = sf.dropoutRate(startFail),
      enrollRate = sf.enrollRate(startEnroll),
      q = exp(-duration * (failRate + dropoutRate)),
      Q = dplyr::lag(cumprod(q), default = 1)
    ) %>%
    dplyr::arrange(dplyr::desc(startFail)) %>%
    dplyr::mutate(
      g = enrollRate * duration,
      G = dplyr::lag(cumsum(g), default = 0)
    ) %>%
    dplyr::arrange(startFail) %>%
    dplyr::mutate(
      d = ifelse(failRate == 0, 0, Q * (1 - q) * failRate / (failRate + dropoutRate)),
      nbar = ifelse(failRate == 0, 0,
        G * d + (failRate * Q * enrollRate) / (failRate + dropoutRate) * (duration - (1 - q) / (failRate + dropoutRate))
      )
    )
  if (simple) {
    return(as.numeric(sum(df_join$nbar)))
  }
  df_join %>%
    dplyr::transmute(
      t = endFail, failRate = failRate, Events = nbar,
      startFail = sf.startFail(startFail)
    ) %>%
    dplyr::group_by(startFail) %>%
    dplyr::summarize(failRate = dplyr::first(failRate), Events = sum(Events)) %>%
    dplyr::mutate(t = startFail) %>%
    dplyr::select("t", "failRate", "Events")
}

Try the gsDesign2 package in your browser

Any scripts or data that you put into this service are public.

gsDesign2 documentation built on April 3, 2025, 9:39 p.m.