simtrial: Clinical Trial Simulation

Documented in sim_pw_surv

#  Copyright (c) 2022 Merck & Co., Inc., Rahway, NJ, USA and its affiliates. All rights reserved.
#
#  This file is part of the simtrial program.
#
#  simtrial is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.

#' @importFrom dplyr group_by mutate %>%
#' @importFrom tibble tibble
NULL

#' Simulate a stratified time-to-event outcome randomized trial
#'
#' \code{sim_pw_surv} enables simulation of a clinical trial with essentially arbitrary
#' patterns of enrollment, failure rates and censoring.
#' The piecewise exponential distribution allows a simple method to specify a distribtuion
#' and enrollment pattern
#' where the enrollment, failure and dropout rate changes over time.
#' While the main purpose may be to generate a trial that can be analyzed at a single point in time or
#' using group sequential methods, the routine can also be used to simulate an adaptive trial design.
#' Enrollment, failure and dropout rates are specified by treatment group, stratum and time period.
#' Fixed block randomization is used; blocks must include treatments provided in failure and dropout
#' specification.
#' Default arguments are set up to allow very simple implementation of a non-proportional hazards assumption
#' for an unstratified design.
#'
#' @param n Number of observations.
#' If length(n) > 1, the length is taken to be the number required.
#' @param strata A tibble with strata specified in `Stratum`, probability (incidence) of each stratum
#' in `p`
#' @param block Vector of treatments to be included in each  block
#' @param enroll_rate Enrollment rates; see details and examples
#' @param fail_rate Failure rates; see details and examples; note that treatments need
#' to be the same as input in block
#' @param dropoutRates Dropout rates; see details and examples; note that treatments need
#' to be the same as input in block
#'
#' @return a \code{tibble} with the following variables for each observation
#' \code{Stratum},
#' \code{enroll_time} (enrollment time for the observation),
#' \code{Treatment} (treatment group; this will be one of the values in the input \code{block}),
#' \code{fail_time} (failure time generated using \code{rpwexp()}),
#' \code{dropoutTime} (dropout time generated using \code{rpwexp()}),
#' \code{cte} (calendar time of enrollment plot the minimum of failure time and dropout time),
#' \code{fail} (indicator that \code{cte} was set using failure time; i.e., 1 is a failure, 0 is a dropout).
#'
#' @examples
#' library(dplyr)
#' library(tibble)
#'
#' # example 1
#' sim_pw_surv(n = 20)
#'
#' # example 2
#' # 3:1 randomization
#' sim_pw_surv(n = 20,
#'           block = c(rep("Experimental",3), "Control"))
#'
#' # example 3
#' # Simulate 2 strata; will use defaults for blocking and enrollRates
#' sim_pw_surv(n = 20,
#'           # 2 strata,30% and 70% prevalence
#'           strata = tibble(Stratum = c("Low","High"), p = c(.3, .7)),
#'           fail_rate = tibble(Stratum = c(rep("Low", 4), rep("High", 4)),
#'                              period = rep(1:2, 4),
#'                              Treatment = rep(c(rep("Control", 2),
#'                                          rep("Experimental", 2)), 2),
#'                              duration = rep(c(3,1), 4),
#'                              rate = c(.03, .05, .03, .03, .05, .08, .07, .04)),
#'           dropoutRates = tibble(Stratum = c(rep("Low", 2), rep("High", 2)),
#'                                 period = rep(1, 4),
#'                                 Treatment = rep(c("Control", "Experimental"), 2),
#'                                 duration = rep(1, 4),
#'                                 rate = rep(.001, 4)))
#' # example 4
#' # If you want a more rectangular entry for a tibble
#' fail_rate <- bind_rows(
#'   tibble(Stratum = "Low" , period = 1, Treatment = "Control"     , duration = 3, rate = .03),
#'   tibble(Stratum = "Low" , period = 1, Treatment = "Experimental", duration = 3, rate = .03),
#'   tibble(Stratum = "Low" , period = 2, Treatment = "Experimental", duration = 3, rate = .02),
#'   tibble(Stratum = "High", period = 1, Treatment = "Control"     , duration = 3, rate = .05),
#'   tibble(Stratum = "High", period = 1, Treatment = "Experimental", duration = 3, rate = .06),
#'   tibble(Stratum = "High", period = 2, Treatment = "Experimental", duration = 3, rate = .03))
#'
#' dropoutRates <- bind_rows(
#'   tibble(Stratum = "Low" , period=1, Treatment = "Control"     , duration = 3, rate = .001),
#'   tibble(Stratum = "Low" , period=1, Treatment = "Experimental", duration = 3, rate = .001),
#'   tibble(Stratum = "High", period=1, Treatment = "Control"     , duration = 3, rate = .001),
#'   tibble(Stratum = "High", period=1, Treatment = "Experimental", duration = 3, rate = .001))
#'
#'sim_pw_surv(n = 12,
#'          strata = tibble(Stratum = c("Low","High"), p = c(.3, .7)),
#'          fail_rate = fail_rate,
#'          dropoutRates = dropoutRates)
#' @export
sim_pw_surv <- function(
    n = 100,
    strata = tibble(Stratum = "All", p = 1),
    block = c(rep("Control", 2), rep("Experimental", 2)),
    enroll_rate = tibble(rate = 9, duration = 1),
    fail_rate = tibble(Stratum = rep("All", 4),
                       period = rep(1:2,2),
                       Treatment = c(rep("Control", 2), rep("Experimental", 2)),
                       duration = rep(c(3, 1), 2),
                       rate = log(2) / c(9, 9, 9, 18)),
    dropoutRates = tibble(Stratum = rep("All", 2),
                          period = rep(1, 2),
                          Treatment = c("Control", "Experimental"),
                          duration = rep(100, 2),
                          rate = rep(.001, 2))){
  # start tibble by generating strata and enrollment times

  x <- tibble(Stratum = sample(x = strata$Stratum,
                               size = n,
                               replace = TRUE,
                               prob = strata$p)) %>%
    mutate(enroll_time = rpw_enroll(n, enroll_rate)) %>%
    group_by(Stratum) %>%
    # assign treatment
    mutate(Treatment = randomize_by_fixed_block(n = n(), block = block)) %>%
    # generate time to failure and time to dropout
    group_by(Stratum, Treatment)

    unique_stratum <- unique(x$Stratum)
    unique_treatment <- unique(x$Treatment)
    x$fail_time <- 0
    x$dropoutTime <- 0

    for(sr in unique_stratum){
      for(tr in unique_treatment){
      indx <- x$Stratum ==sr & x$Treatment == tr
      x$fail_time[indx] <- rpwexpinvRcpp(n = sum(indx),
                                        fail_rate = fail_rate[fail_rate$Stratum == sr & fail_rate$Treatment == tr, , drop = FALSE])
      x$dropoutTime[indx] <- rpwexpinvRcpp(n = sum(indx),
                                           fail_rate = dropoutRates[dropoutRates$Stratum == sr & dropoutRates$Treatment == tr, ,drop = FALSE])
      }
    }

    # set calendar time-to-event and failure indicator
    ans <- x %>%
      mutate(cte = pmin(dropoutTime, fail_time) + enroll_time,
             fail = (fail_time <= dropoutTime) * 1)
    return(ans)
}