# Copyright (c) 2022 Merck & Co., Inc., Rahway, NJ, USA and its affiliates. All rights reserved.
#
# This file is part of the simtrial program.
#
# simtrial is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#' @import dplyr
#' @importFrom tibble tibble
#' @import survival
#' @import foreach
NULL
#' Simulation of fixed sample size design for time-to-event endpoint
#'
#' `sim_fixed_n()` provide simulations of a single endpoint two-arm trial
#' where the enrollment, hazard ratio, and failure and dropout rates change over time.
#' @param nsim Number of simulations to perform.
#' @param sampleSize Total sample size per simulation.
#' @param target_event Targeted event count for analysis.
#' @param strata A tibble with strata specified in `Stratum`, probability (incidence) of each stratum in `p`.
#' @param enroll_rate Piecewise constant enrollment rates by time period.
#' Note that these are overall population enrollment rates and the `strata` argument controls the
#' random distribution between strata.
#' @param fail_rate Piecewise constant control group failure rates, hazard ratio for experimental vs control,
#' and dropout rates by stratum and time period.
#' @param totalDuration Total follow-up from start of enrollment to data cutoff.
#' @param block As in `simtrial::sim_pw_surv()`. Vector of treatments to be included in each block.
#' @param timingType A numeric vector determining data cutoffs used; see details.
#' Default is to include all available cutoff methods.
#' @param rg As in `simtrial::tenFHCorr()`.
#' A \code{tibble} with variables \code{rho} and \code{gamma}, both greater than equal
#' to zero, to specify one Fleming-Harrington weighted logrank test per row.
#' @param seed Optional. Initial seed for simulations
#'
#' @details \code{timingType} has up to 5 elements indicating different options for data cutoff.
#' 1 uses the planned study duration, 2 the time the targeted event count is achieved,
#' 3 the planned minimum follow-up after enrollment is complete,
#' 4 the maximum of planned study duration and targeted event count cuts (1 and 2),
#' 5 the maximum of targeted event count and minimum follow-up cuts (2 and 3).
#'
#' @return A \code{tibble} including columns \code{Events} (event count), \code{lnhr} (log-hazard ratio),
#' \code{Z} (normal test statistic; < 0 favors experimental) cut (text describing cutoff used),
#' \code{Duration} (duration of trial at cutoff for analysis) and \code{sim} (sequential simulation id).
#' One row per simulated dataset per cutoff specified in \code{timingType}, per test statistic specified.
#' If multiple Fleming-Harrington tests are specified in \code{rg}, then columns {rho,gamma}
#' are also included.
#'
#' @examples
#' library(tidyr)
#' library(dplyr)
#' library(doParallel)
#' library(tibble)
#'
#' # example 1
#' # Show output structure
#' sim_fixed_n(nsim = 3)
#'
#' # example 2
#' # Example with 2 tests: logrank and FH(0,1)
#' sim_fixed_n(nsim = 1,rg = tibble(rho = 0, gamma = c(0, 1)))
#'
#' # example 3
#' # Power by test
#' # Only use cuts for events, events + min follow-up
#' xx <- sim_fixed_n(nsim = 100,
#' timingType = c(2, 5),
#' rg = tibble(rho = 0, gamma = c(0, 1)))
#' # Get power approximation for FH, data cutoff combination
#' xx %>%
#' group_by(cut, rho, gamma) %>%
#' summarise(mean(Z <= qnorm(.025)))
#'
#' # MaxCombo power estimate for cutoff at max of targeted events, minimum follow-up
#' p <- xx %>%
#' filter(cut != "Targeted events") %>%
#' group_by(Sim) %>%
#' group_map(pvalue_maxcombo) %>%
#' unlist()
#'
#' mean(p < .025)
#'
#' # MaxCombo estimate for targeted events cutoff
#' p <- xx %>%
#' filter(cut == "Targeted events") %>%
#' group_by(Sim) %>%
#' group_map(pvalue_maxcombo) %>%
#' unlist()
#'
#' mean(p < .025)
#'
#' # example 3
#' # Use two cores
#' registerDoParallel(2)
#' sim_fixed_n(nsim = 10, seed = 2022)
#' stopImplicitCluster()
#' registerDoSEQ()
#'
#' @export
#'
sim_fixed_n <- function(nsim = 1000,
sampleSize = 500, # sample size
target_event = 350, # targeted total event count
# multinomial probability distribution for strata enrollment
strata = tibble(Stratum = "All", p = 1),
# enrollment rates as in AHR()
enroll_rate = tibble(duration = c(2, 2, 10), rate = c(3, 6, 9)),
# failure rates as in AHR()
fail_rate = tibble(Stratum = "All",
duration = c(3, 100),
fail_rate = log(2) / c(9, 18),
hr = c(.9, .6),
dropoutRate = rep(.001, 2)),
# total planned trial duration; single value
totalDuration = 30,
# Fixed block randomization specification
block = rep(c("Experimental", "Control"), 2),
# select desired cutoffs for analysis (default is all types)
timingType = 1:5,
# default is to to logrank testing, but one or more Fleming-Harrington tests
# can be specified
rg = tibble(rho = 0, gamma = 0),
seed = NULL
){
# check input values
# check input enrollment rate assumptions
if(!("duration" %in% names(enroll_rate)) ){
stop("sim_fixed_n: enrollRates column names in `sim_fixed_n()` must contain duration!")
}
if(!("rate" %in% names(enroll_rate))){
stop("sim_fixed_n: enrollRates column names in `sim_fixed_n()` must contain rate!")
}
# check input failure rate assumptions
if(!("Stratum" %in% names(fail_rate))){
stop("sim_fixed_n: fail_rate column names in `sim_fixed_n()` must contain Stratum!")
}
if(!("duration" %in% names(fail_rate))){
stop("sim_fixed_n: fail_rate column names in `sim_fixed_n()` must contain duration!")
}
if(!("fail_rate" %in% names(fail_rate))){
stop("sim_fixed_n: fail_rate column names in `sim_fixed_n()` must contain fail_rate!")
}
if(!("hr" %in% names(fail_rate))){
stop("sim_fixed_n: fail_rate column names in `sim_fixed_n()` must contain hr")
}
if(!("dropoutRate" %in% names(fail_rate))){
stop("sim_fixed_n: fail_rate column names in `sim_fixed_n()` must contain dropoutRate")
}
# check input trial duration
if(!is.numeric(totalDuration)){
stop("sim_fixed_n: totalDuration in `sim_fixed_n()` must be a single positive number!")
}
if(!is.vector(totalDuration)){
stop("sim_fixed_n: totalDuration in `sim_fixed_n()` must be a single positive number!")
}
if(length(totalDuration) != 1){
stop("sim_fixed_n: totalDuration in `sim_fixed_n()` must be a single positive number!")
}
if(!min(totalDuration) > 0){
stop("sim_fixed_n: totalDuration in `sim_fixed_n()` must be a single positive number!")
}
# check stratum
strata2 <- names(table(fail_rate$Stratum))
if(nrow(strata) != length(strata2)){
stop("sim_fixed_n: Stratum in `sim_fixed_n()` must be the same in strata and fail_rate!")
}
if(any(is.na(match(strata$Stratum, strata2))) | any(is.na(match(strata2, strata$Stratum)))){
stop("sim_fixed_n: Stratum in `sim_fixed_n()` must be the same in strata and fail_rate!")
}
# check nsim
if(nsim <= 0){
stop("sim_fixed_n: nsim in `sim_fixed_n()` must be positive integer!")
}
if(length(nsim) != 1){
stop("sim_fixed_n: nsim in `sim_fixed_n()` must be positive integer!")
}
if(nsim != ceiling(nsim)){
stop("sim_fixed_n: nsim in `sim_fixed_n()` must be positive integer!")
}
# check target_event
if(target_event <= 0){
stop("sim_fixed_n: target_event in `sim_fixed_n()` must be positive!")
}
if(length(target_event) != 1){
stop(("sim_fixed_n: target_event in `sim_fixed_n()` must be positive!"))
}
# check sampleSize
if(sampleSize <= 0){
stop("sim_fixed_n: sampleSize in `sim_fixed_n()` must be positive!")
}
if(length(sampleSize) != 1){
stop("sim_fixed_n: sampleSize in `sim_fixed_n()` must be positive")
}
# check seed
if(is.null(seed)) {
setSeed <- FALSE
} else {
if (!is.numeric(seed)){stop("sim_fixed_n: seed in `sim_fixed_n()` must be a number")}
setSeed <- TRUE
}
n_stratum <- nrow(strata)
# build a function to calculate Z and log-hr
doAnalysis <- function(d, rg, n_stratum){
if (nrow(rg) == 1){
Z <- tibble(Z = (d %>% counting_process(arm = "Experimental") %>% wlr(rg = rg))$Z)
} else{
Z <- d %>% counting_process(arm = "Experimental") %>% tenFHcorr(rg = rg, corr = TRUE)
}
ans <- tibble(
Events = sum(d$event),
lnhr = ifelse(n_stratum > 1,
survival::coxph(survival::Surv(tte, event) ~ (Treatment == "Experimental") + survival::strata(Stratum), data = d)$coefficients,
survival::coxph(survival::Surv(tte, event) ~ (Treatment == "Experimental"), data = d)$coefficients
) %>% as.numeric()
)
ans <- cbind(ans, Z)
return(ans)
}
# compute minimum planned follow-up time
minFollow <- max(0, totalDuration - sum(enroll_rate$duration))
# put failure rates into sim_pw_surv format
temp <- simfix2simPWSurv(fail_rate)
fr <- temp$fail_rate
dr <- temp$dropoutRates
results <- NULL
# parallel
`%op%` <- get_operator()
results <- foreach::foreach(
i = seq_len(nsim),
.combine = "rbind",
.errorhandling = "pass"
) %op% {
# set random seed
if (setSeed){
set.seed(seed + i - 1)
}
# generate piecewise data
sim <- sim_pw_surv(n = sampleSize,
strata = strata,
enroll_rate = enroll_rate,
fail_rate = fr,
dropoutRates = dr,
block = block)
# study date that targeted event rate achieved
tedate <- sim %>% get_cut_date_by_event(target_event)
# study data that targeted minimum follow-up achieved
tmfdate <- max(sim$enroll_time) + minFollow
# Compute tests for all specified cutoff options
r1 <- NULL
r2 <- NULL
r3 <- NULL
tests <- rep(FALSE, 3)
## Total planned trial duration or max of this and targeted events
if (1 %in% timingType){
tests[1] <- TRUE # planned duration cutoff
}
if (2 %in% timingType){
tests[2] <- TRUE # targeted events cutoff
}
if (3 %in% timingType){
tests[3] <- TRUE # minimum follow-up duration target
}
if (4 %in% timingType){ # max of planned duration, targeted events
if (tedate > totalDuration){
tests[2] <- TRUE
}else{
tests[1] <- TRUE
}
}
if (5 %in% timingType){ # max of minimum follow-up, targeted events
if (tedate > tmfdate){
tests[2] <- TRUE
}else{
tests[3] <- TRUE
}
}
# Total duration cutoff
if (tests[1]){
d <- sim %>% cut_data_by_date(totalDuration)
r1 <- d %>% doAnalysis(rg, n_stratum)
}
# targeted events cutoff
if (tests[2]){
d <- sim %>% cut_data_by_date(tedate)
r2 <- d %>% doAnalysis(rg, n_stratum)
}
# minimum follow-up cutoff
if (tests[3]){
d <- sim %>% cut_data_by_date(tmfdate)
r3 <- d %>% doAnalysis(rg, n_stratum)
}
addit <- NULL
# planned duration cutoff
if (1 %in% timingType){
addit <- rbind(addit,
r1 %>% mutate(cut = "Planned duration",
Duration = totalDuration))
}
# targeted events cutoff
if (2 %in% timingType){
addit <- rbind(addit,
r2 %>% mutate(cut = "Targeted events",
Duration = tedate))
}
# minimum follow-up duration target
if (3 %in% timingType){
addit <- rbind(addit,
r3 %>% mutate(cut = "Minimum follow-up",
Duration = tmfdate))
}
# max of planned duration, targeted events
if (4 %in% timingType){
if (tedate > totalDuration){
addit <- rbind(addit,
r2 %>% mutate(cut = "Max(planned duration, event cut)",
Duration = tedate))
}else{
addit <- rbind(addit,
r1 %>% mutate(cut = "Max(planned duration, event cut)",
Duration = totalDuration))
}
}
# max of minimum follow-up, targeted events
if (5 %in% timingType){
if (tedate > tmfdate){
addit <- rbind(addit,
r2 %>% mutate(cut = "Max(min follow-up, event cut)",
Duration = tedate))
}else{
addit <- rbind(addit,
r3 %>% mutate(cut = "Max(min follow-up, event cut)",
Duration = tmfdate))
}
}
addit %>% mutate(Sim = i)
}
return(results)
}
# Get operator (parallel or serial)
get_operator <- function() {
is_par <- foreach::getDoParWorkers() > 1
if (is_par) {
message("Using ", foreach::getDoParWorkers(), " cores with backend ", foreach::getDoParName())
res <- foreach::`%dopar%`
} else {
res <- foreach::`%do%`
}
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.