R/postMonitoring.R

Defines functions decisionTimes crossBoundCumProb VEpowerPP buildBounds headTestVE testVE rankTrial censTrial

Documented in censTrial crossBoundCumProb decisionTimes rankTrial VEpowerPP

# Functions to post-process data after running monitorTrial.
# Included Functions:
#   censTrial, rankTrial, testVE, headTestVE, buildBounds, VEpowerPP, crossBoundCumProb


#' Generation of Pre-Unblinded Follow-Up Data-Sets by Applying the Monitoring Outcomes
#' 
#' \code{censTrial} `correctly censors' treatment arms in data-sets generated by \code{simTrial} by including pre-unblinded follow-up data only according to the monitoring conclusions as reported by \code{monitorTrial}.
#' 
#' @param dataFile if \code{saveDir = NULL}, a list returned by \code{simTrial}; otherwise a name (character string) of an \code{.RData} file created by \code{simTrial}
#' @param monitorFile if \code{saveDir = NULL}, a list returned by \code{monitorTrial}; otherwise a name (character string) of an \code{.RData} file created by \code{monitorTrial}
#' @param stage1 the final week of stage 1 in a two-stage trial
#' @param stage2 the final week of stage 2 in a two-stage trial, i.e., the maximum follow-up time 
#' @param saveFile a character string specifying the name of the output \code{.RData} file. If \code{NULL} (default), a default file name will be used.
#' @param saveDir a character string specifying a path for both \code{dataFile} and \code{monitorFile}. If supplied, the output is also saved as an \code{.RData} file in this directory; otherwise the output is returned as a list.
#' @param verbose a logical value indicating whether information on the output directory and file name should be printed out (default is \code{TRUE})
#' 
#' @details All time variables use week as the unit of time. Month is defined as 52/12 weeks.
#' 
#' The following censoring rules are applied to each data-set generated by \code{simTrial}:
#' \itemize{
#'   \item If no vaccine arm registers efficacy or high efficacy in Stage 1, the placebo arm is censored on the date when the last vaccine arm hits the harm or non-efficacy boundary.
#'   \item If a vaccine arm hits the harm boundary, censor the arm immediately.
#'   \item If a vaccine arm hits the non-efficacy boundary, censor the arm on the earliest date of the two events: (1) the last vaccine arm hits the harm or non-efficacy boundary (if applicable); and (2) all subjects in the vaccine arm have completed the final \code{stage1} visit.
#' }
#' 
#' @return If \code{saveDir} is specified, the output list (named \code{trialListCensor}) is saved as an \code{.RData} file in \code{saveDir} (the path to \code{saveDir} is printed); otherwise it is returned. 
#' The output object is a list of length equal to the number of simulated trials, each of which is a \code{data.frame} with at least the variables \code{trt}, \code{entry}, \code{exit}, and \code{event} 
#' storing the treatment assignments, enrollment times, correctly censored study exit times, and event indicators, respectively. If available, indicators belonging to the per-protocol cohort 
#' (named \code{pp1}, \code{pp2}, etc.) are copied from the uncensored data-sets.
#' 
#' @examples
#' simData <- simTrial(N=c(1000, rep(700, 2)), aveVE=seq(0, 0.4, by=0.2), 
#'                     VEmodel="half", vePeriods=c(1, 27, 79), enrollPeriod=78, 
#'                     enrollPartial=13, enrollPartialRelRate=0.5, dropoutRate=0.05, 
#'                     infecRate=0.04, fuTime=156, 
#'                     visitSchedule=c(0, (13/3)*(1:4), seq(13*6/3, 156, by=13*2/3)),
#'                     missVaccProb=c(0,0.05,0.1,0.15), VEcutoffWeek=26, nTrials=5, 
#'                     stage1=78, randomSeed=300)
#' 
#' monitorData <- monitorTrial(dataFile=simData, stage1=78, stage2=156, 
#'                             harmMonitorRange=c(10,100), alphaPerTest=NULL, 
#'                             nonEffStartMethod="FKG", nonEffInterval=20, 
#'                             lowerVEnoneff=0, upperVEnoneff=0.4, highVE=0.7, 
#'                             stage1VE=0, lowerVEuncPower=0, alphaNoneff=0.05, 
#'                             alphaHigh=0.05, alphaStage1=0.05, 
#'                             alphaUncPower=0.05, estimand="cuminc", lagTime=26)
#'
#' censData <- censTrial(dataFile=simData, monitorFile=monitorData, stage1=78, stage2=156)
#' 
#' ### alternatively, to save the .RData output file (no '<-' needed):
#' ###
#' ### simTrial(N=c(1400, rep(1000, 2)), aveVE=seq(0, 0.4, by=0.2), VEmodel="half", 
#' ###          vePeriods=c(1, 27, 79), enrollPeriod=78, enrollPartial=13, 
#' ###          enrollPartialRelRate=0.5, dropoutRate=0.05, infecRate=0.04, fuTime=156, 
#' ###          visitSchedule=c(0, (13/3)*(1:4), seq(13*6/3, 156, by=13*2/3)), 
#' ###          missVaccProb=c(0,0.05,0.1,0.15), VEcutoffWeek=26, nTrials=30, 
#' ###          stage1=78, saveDir="./", randomSeed=300)
#' ###
#' ### monitorTrial(dataFile=
#' ###              "simTrial_nPlac=1400_nVacc=1000_1000_aveVE=0.2_0.4_infRate=0.04.RData", 
#' ###              stage1=78, stage2=156, harmMonitorRange=c(10,100), alphaPerTest=NULL, 
#' ###              nonEffStartMethod="FKG", nonEffInterval=20, lowerVEnoneff=0, 
#' ###              upperVEnoneff=0.4, highVE=0.7, stage1VE=0, lowerVEuncPower=0, 
#' ###              alphaNoneff=0.05, alphaHigh=0.05, alphaStage1=0.05, alphaUncPower=0.05, 
#' ###              estimand="cuminc", lagTime=26, saveDir="./")
#' ###
#' ### censTrial(dataFile=
#' ###          "simTrial_nPlac=1400_nVacc=1000_1000_aveVE=0.2_0.4_infRate=0.04.RData",
#' ###          monitorFile=
#' ###          "monitorTrial_nPlac=1400_nVacc=1000_1000_aveVE=0.2_0.4_infRate=0.04_cuminc.RData",
#' ###          stage1=78, stage2=156, saveDir="./")
#'  
#' @seealso \code{\link{simTrial}}, \code{\link{monitorTrial}}, and \code{\link{rankTrial}}
#' 
#' @export
censTrial <- function(dataFile,
                      monitorFile,
                      stage1,
                      stage2,
                      saveFile = NULL,
                      saveDir  = NULL,
                      verbose = TRUE){
                     
  if ( is.list(dataFile) ) {
    trialObj <- dataFile
  } else {
    if ( !is.null(saveDir) ){
      ## load in RData object (a list named 'trialObj' )
      load(file.path(saveDir, dataFile))
    } else {
      load(dataFile)
    }
  }
  
  nTrials = length(trialObj[["trialData"]])  
  nTrtArms <- as.integer( trialObj$nArm - 1 )   ## one placebo arm
  
  if ( is.list(monitorFile) ) {
    out <- monitorFile
  } else {
    if ( !is.null(saveDir) ){
      ## load in RData object (a list named 'trialObj' )
      load(file.path(saveDir, monitorFile))
    } else {
      load(monitorFile)
    }
  }
  
  ## a matrix to store bounds results from monitoring each single arm
  boundsRes = matrix(NA, ncol=nTrtArms, nrow = nTrials)
  
  ## an object to store the arm stop time when the arm stops (since trial starts), 
  stopTime = matrix(NA, ncol=nTrtArms, nrow = nTrials)
  
  ## create a list to store the trial data that all subjects have been correctly
  ## censored based on the monitoring results from all arms
  trialListCensor <- vector("list", nTrials)
  
  for (i in 1:nTrtArms) {  
    ## get the bounds 
    boundsRes [, i]= unlist( lapply( out, function(x) x[[i]]$boundHit) )
    stopTime [, i]= unlist( lapply( out, function(x) x[[i]]$stopTime) )
  }
  
  ## go through each trial
  for (i in 1:nTrials ) {
    
    ## create a list to store censored data for ith trial
    #trialCensorI = vector("list",nTrtArms+1 )
    trialCensorI = NULL
    
    ## extract data for the i-th trial
    datI <- trialObj[["trialData"]][[ i ]]
    datI$futime <- datI$exit - datI$entry
    
    ## maximum possible trial duration  
    trialStop = max(datI$exit)
    
    ## get the placebo arm
    datI.0 = subset(datI, trt == 0)  
    
    ## if none of the arms are efficacy or highefficacy (i.e. all arms are either harm or noneff),
    ## the entire trial stops when the last arm hits noneff/harm
    if (!any(boundsRes[i,]%in%c("Eff", "HighEff"))) {
      
      ## get the time when the trial stops
      trialStop = max(stopTime[i,])  
      
      ## censor the placebo arm        
      datI.0$event = datI.0$event == 1 & (datI.0$exit <= trialStop)
      datI.0$exit = pmin( datI.0$exit, trialStop)
    }
    ## if at least one arm reaches efficacy, placebo arm will continue follow up until stage 2
    ## no need of extra action, store the censored placebo
    trialCensorI = rbind(trialCensorI, datI.0)
    
    ## Now we move to censor the trt arms
    for (j in 1:nTrtArms) {
      
      ## subset jth trt arm
      datI.j <- subset(datI, trt==j )
      
      ## censor the subjects based on bounds results for all arms
      ## first, get the stop time for trial i arm j
      t.j = stopTime[i, j]
      
      ## second, check if hit harm
      if (boundsRes[i, j] %in% "Harm") {  ## if "Harm", stop
        ## censor the trt arm        
        datI.j$event = datI.j$event == 1 & (datI.j$exit <= t.j)
        datI.j$exit = pmin( datI.j$exit, t.j)
        
      } else { 
        
        if (boundsRes[i, j] %in% c("NonEffInterim", "NonEffFinal")) {  ## if hit the non efficacy bound
          ## get the stop time for this arm, 
          ## which is the end of stage 1 or when the last arm hits noneff/harm if no eff. trt arms
          endStage1 = max(datI.j$entry + stage1)
          trialStop = min (c(trialStop, endStage1))
          
          ## censor the trt arm at 'trialStop'      
          datI.j$event = datI.j$event == 1 & (datI.j$exit <= trialStop)
          datI.j$exit = pmin( datI.j$exit, trialStop)                   
          
        } else { ## hit high eff or efficacy, i will remove this later
          ## the arm will follow up to stage 2, no need of action
        }      
      }
      
      ## store the censored trt arm
      #trialCensorI [[j+1]] = datI.j
      trialCensorI = rbind(trialCensorI, datI.j)                
    }      
    trialListCensor [[i]] = trialCensorI      
  }

  ## save monitoring output
  if ( !is.null(saveDir) ) {
    if ( is.null(saveFile) ) {
        if ( is.list(monitorFile) )
          warning(
              "The output of 'censTrial' cannot be saved to a file\n",
              "You have not specified the argument 'saveFile', and a default\n",
              "filename cannot be constructed when argument 'monitorFile' is ",
              "a list.\n\n", immediate.=TRUE)
        saveFile <- paste0("trialDataCens", 
                           substr(monitorFile, 13, nchar(monitorFile)) ) 
    }
    #save(trialListCensor, file = file.path(saveDir, saveFile), compress="xz" )
    save(trialListCensor, file = file.path(saveDir, saveFile) )

    if (verbose) { 
        cat("Trial data with correct censoring saved in:\n", 
             file.path(saveDir, saveFile), "\n\n") 
    }
  } 

  ## it should not be an "either/or" decision whether you save or output
  ## the results
  return( invisible( trialListCensor ) )
}


#' Ranking and Selection, and Head-to-Head Comparison of Individual and Pooled Treatment Arms
#' 
#' \code{rankTrial} assesses the probability of correctly selecting the winning most efficacious (individual and/or pooled) treatment arm, and assesses power to detect relative treatment efficacy in head-to-head comparisons of (individual and/or pooled) treatment arms.
#'
#' @param censFile if \code{saveDir = NULL}, a list returned by \code{censTrial}; otherwise a name (character string) of an \code{.RData} file created by \code{censTrial}
#' @param idxHighestVE an integer value identifying the treatment (vaccine) arm with the true highest VE(0--\code{stage2})
#' @param headHead a matrix (\code{ncol = 2}) of treatment arm indices for head-to-head comparisons, where the treatment with higher efficacy is listed first in each row
#' @param poolHead a matrix (\code{ncol} equals 3 or 4) of treatment arm indices for pooled-arm comparisons, where the pooled treatment with higher efficacy pooled over the first two columns is compared with the (pooled) treatment defined by columns 3 and onward. Ranking and selection of pooled arms is performed separately for each row of \code{poolHead}.
#' @param lowerVE a numeric value defining a `winning' treatment arm as one with sufficient evidence for rejecting the null hypothesis H0: VE(0--\code{stage1}) \eqn{\le} \code{lowerVE} x 100\% (typically set equal to 0)
#' @param stage1 the final week of stage 1 in a two-stage trial
#' @param stage2 the final week of stage 2 in a two-stage trial, i.e., the maximum follow-up time 
#' @param alpha one minus the nominal confidence level of the two-sided confidence interval used for testing a null hypothesis H0: VE(0--\code{stage1}) \eqn{\le} \eqn{b} x 100\% against an alternative hypothesis H1: VE(0--\code{stage1}) \eqn{>} \eqn{b} x 100\%
#' @param saveDir a character string specifying a path for \code{censFile}. If supplied, the output is also saved as an \code{.RData} file in this directory; otherwise the output is returned as a list.
#' @param verbose a logical value indicating whether information on the output directory and file name should be printed out (default is \code{TRUE})
#' 
#' @details All time variables use week as the unit of time. Month is defined as 52/12 weeks.
#' 
#' The probability of correct treatment selection is defined as the probability that the treatment arm with the highest estimated VE(0--\code{stage2}) is the one with the true highest VE(0--\code{stage2}) and, for this treatment arm, the null hypothesis H0: VE(0--\code{stage1}) \eqn{\le} \code{lowerVE} x 100\% is rejected. If \code{poolHead} is specified, the probability of correct pooled treatment selection is assessed for each set of two pooled treatment arms.
#' 
#' VE(0--\eqn{t}) is estimated as one minus the ratio of Nelson-Aalen-based cumulative incidence functions. The null hypothesis H0: VE(0--\eqn{t}) \eqn{\le} \eqn{b} x 100\% is rejected if the lower bound of the two-sided (1-\code{alpha}) x 100\% confidence interval for VE(0--\eqn{t}) lies above \eqn{b}.
#' 
#' For head-to-head individual and pooled treatment comparisons, powers to reject the null hypotheses that relative VE(0--\code{stage1}) \eqn{\le} 0\% and relative VE(0--\code{stage2}) \eqn{\le} 0\% are assessed using the aforementioned testing rule.
#' 
#' @return If \code{saveDir} is specified, the output list (named \code{out}) is saved as an \code{.RData} file in \code{saveDir} (the path to \code{saveDir} is printed); otherwise it is returned. The output object is a list with the following components:
#' \itemize{
#'   \item \code{rankSelectPw}: the probability of correct selection of the winning most efficacious individual treatment
#'   \item \code{headHeadPw}: if \code{headHead} is specified, a matrix of powers to detect relative VE(0--\code{stage1}) (column 1) and relative VE(0--\code{stage2}) (column 2) in head-to-head comparisons of individual treatment arms
#'   \item \code{poolRankSelectPw}: if \code{poolHead} is specified, a numeric vector of the probabilities of correct selection of the winning most efficacious pooled treatment for each set of pooled treatments
#'   \item \code{poolHeadPw}: if \code{poolHead} is specified, a matrix of powers to detect relative VE(0--\code{stage1}) (column 1) and relative VE(0--\code{stage2}) (column 2) in head-to-head comparisons of pooled treatment arms
#' }
#' 
#' @examples 
#' 
#' simData <- simTrial(N=c(1000, rep(700, 2)), aveVE=seq(0, 0.4, by=0.2), 
#'                     VEmodel="half", vePeriods=c(1, 27, 79), enrollPeriod=78, 
#'                     enrollPartial=13, enrollPartialRelRate=0.5, dropoutRate=0.05, 
#'                     infecRate=0.04, fuTime=156, 
#'                     visitSchedule=c(0, (13/3)*(1:4), seq(13*6/3, 156, by=13*2/3)),
#'                     missVaccProb=c(0,0.05,0.1,0.15), VEcutoffWeek=26, nTrials=5, 
#'                     stage1=78, randomSeed=300)
#'
#' monitorData <- monitorTrial(dataFile=simData, stage1=78, stage2=156, 
#'                             harmMonitorRange=c(10,100), alphaPerTest=NULL, 
#'                             nonEffStartMethod="FKG", nonEffInterval=20, 
#'                             lowerVEnoneff=0, upperVEnoneff=0.4, 
#'                             highVE=0.7, stage1VE=0, lowerVEuncPower=0, 
#'                             alphaNoneff=0.05, alphaHigh=0.05, alphaStage1=0.05, 
#'                             alphaUncPower=0.05, estimand="cuminc", lagTime=26)
#'
#' censData <- censTrial(dataFile=simData, monitorFile=monitorData, stage1=78, stage2=156)
#'                        
#' rankData <- rankTrial(censFile=censData, idxHighestVE=2, 
#'                       headHead=matrix(2:1, nrow=1, ncol=2), lowerVE=0, stage1=78, 
#'                       stage2=156, alpha=0.05)
#'
#' ### alternatively, to save the .RData output file (no '<-' needed):
#' ###
#' ### simTrial(N=c(1400, rep(1000, 2)), aveVE=seq(0, 0.4, by=0.2), VEmodel="half", 
#' ###          vePeriods=c(1, 27, 79), enrollPeriod=78, enrollPartial=13, 
#' ###          enrollPartialRelRate=0.5, dropoutRate=0.05, infecRate=0.04, fuTime=156, 
#' ###          visitSchedule=c(0, (13/3)*(1:4), seq(13*6/3, 156, by=13*2/3)), 
#' ###          missVaccProb=c(0,0.05,0.1,0.15), VEcutoffWeek=26, nTrials=30, 
#' ###          stage1=78, saveDir="./", randomSeed=300)
#' ###
#' ### monitorTrial(dataFile=
#' ###          "simTrial_nPlac=1400_nVacc=1000_1000_aveVE=0.2_0.4_infRate=0.04.RData", 
#' ###          stage1=78, stage2=156, harmMonitorRange=c(10,100), alphaPerTest=NULL, 
#' ###          nonEffStartMethod="FKG", nonEffInterval=20, 
#' ###          lowerVEnoneff=0, upperVEnoneff=0.4, highVE=0.7, stage1VE=0, 
#' ###          lowerVEuncPower=0, alphaNoneff=0.05, alphaHigh=0.05, alphaStage1=0.05, 
#' ###          alphaUncPower=0.05, estimand="cuminc", lagTime=26, saveDir="./")
#' ###
#' ### censTrial(dataFile=
#' ###  "simTrial_nPlac=1400_nVacc=1000_1000_aveVE=0.2_0.4_infRate=0.04.RData",
#' ###  monitorFile=
#' ###  "monitorTrial_nPlac=1400_nVacc=1000_1000_aveVE=0.2_0.4_infRate=0.04_cuminc.RData",
#' ###  stage1=78, stage2=156, saveDir="./")
#' ###
#' ### rankTrial(censFile=
#' ###  "trialDataCens_nPlac=1400_nVacc=1000_1000_aveVE=0.2_0.4_infRate=0.04_cuminc.RData",
#' ###  idxHighestVE=2, headHead=matrix(2:1, nrow=1, ncol=2), lowerVE=0, stage1=78, 
#' ###  stage2=156, alpha=0.05, saveDir="./")
#' 
#' @seealso \code{\link{simTrial}}, \code{\link{monitorTrial}}, and \code{\link{censTrial}}
#' 
#' @export
rankTrial <- function(censFile,
                      idxHighestVE,
                      headHead=NULL,
                      poolHead=NULL,
                      lowerVE,
                      stage1,
                      stage2,
                      alpha,
                      saveDir = NULL,
                      verbose = TRUE){                 
  
  if (!is.null(saveDir)){
    ## load the trial data in RData object (a list named 'trialObj' )
    load(file.path(saveDir, censFile))
  } else {
    trialListCensor <- censFile
    rm(censFile)
  }  
  
  nTrials = length(trialListCensor)  
  nTrtArms <- length(unique(trialListCensor[[1]]$trt)) - 1    ## one placebo arm
  
  if (!is.null(headHead)){
    if (NCOL(headHead)!=2){ stop("Number of columns in headHead must equal to 2.'\n") }
    
    ## a matrix to store power for head to head comparison of single arm
    ## column 1 = power for VE(0-18), column 2 = power for VE(0-36)
    headHeadPw = matrix(0, nrow=NROW(headHead), ncol=2)    
  }  
  
  if (!is.null(poolHead)){
    if (!(NCOL(poolHead) %in% 3:4)){ stop("Number of columns in poolHead must equal to 3 or 4.'\n") }
    
    ## a matrix to store power for comparison of pooled arms
    ## column 1 = power for VE(0-18), column 2 = power for VE(0-36)
    poolHeadPw = matrix(0, nrow=NROW(poolHead), ncol=2)    
    
    # power to correctly identify the best pooled vaccine arm
    poolRankSelectPw <- numeric(NROW(poolHead))
  }  
  
  # power to correctly identify the best vaccine arm
  rankSelectPw <- 0
  
  ## go through each trial
  for (i in 1:nTrials ) {
    ## extract data for the i-th trial
    datI <- trialListCensor[[i]] 
    
    ## a matrix to store estimated VE for each trt arm 
    ## column 1 = power for VE(0-18), column 2 = power for VE(0-36)
    VE.I = matrix(0, nrow=nTrtArms, ncol=2)
    
    ## cum hazard-based Wald test results for each trt arm
    ## column 1 = power for VE(0-18), column 2 = power for VE(0-36)
    test.I = matrix(0, nrow=nTrtArms, ncol=2)
    
    ## now calculate power for rank and select
    for (j in 1:nTrtArms){
      ## extract data for the j-th trt arm & placebo
      testData = subset(datI, trt %in% c(0, j))
      
      ## power for stage 1
      bnd = testVE(datI=testData, lowerVE=lowerVE, stage =stage1, alpha=alpha)
      VE.I[j, 1] = bnd$VE
      test.I[j, 1] = bnd$bound
      
      ## power for stage 2
      bnd = testVE(datI=testData, lowerVE=lowerVE, stage =stage2, alpha=alpha)
      VE.I[j, 2] = bnd$VE
      test.I[j, 2] = bnd$bound
    }
    
    # identify the vaccine arm with the highest estimated VE(0-36)
    bestInd = which(VE.I[,2] == max(VE.I[,2]))
      
    # to estimate the probability that the vaccine arm with the highest estimated VE(0-36) is the one with the
    # true highest VE(0-36), AND for that regimen the hypothesis H0: VE(0-18)<=0% was rejected    
    if (bestInd==idxHighestVE && test.I[bestInd, 1]=="Eff"){ rankSelectPw = rankSelectPw + 1 }    
    
    if (!is.null(headHead)){  
      # head-to-head comparison of individual vaccine arms at 18 and 36 months
      pw = headTestVE(datI, headHead, stage1, stage2, alpha)
      headHeadPw = headHeadPw + pw  # counts of detected relative efficacy for each comparison            
    }    
    
    if (!is.null(poolHead)) {
      # head-to-head comparison of pooled vaccine arms at 18 and 36 months
      pw = headTestVE(datI, poolHead, stage1, stage2, alpha)
      poolHeadPw = poolHeadPw + pw   
      
      # ranking and selection for pooled vaccine arms
      for (hi in 1:NROW(poolHead)) {
        arm1 = poolHead[hi,1:2]
        arm2 = poolHead[hi,3:NCOL(poolHead)]
        
        ## change trt index to combine arms listed in "arm1" and "arm2"
        ## and store the new data in datI.p
        datI$trt[datI$trt %in% arm1] = arm1[1]
        
        ## if arm2 is placebo, then no changes
        datI$trt[datI$trt %in% arm2] = arm2[1]
        
        ## now update 'bestVE' to arm1[1]
        bestVE = arm1[1]
        
        ## a matrix to store estimated VE for each trt arm 
        ## column 1 = power for VE(0-18), column 2 = power for VE(0-36)
        
        ## make it very small and impossible to be selected
        VE.I = matrix(-10, nrow=nTrtArms, ncol=2)
        test.I = matrix("empty", nrow=nTrtArms, ncol=2)
        
        ## get index for combined arms
        indTrt = sort(unique(datI$trt))
        indTrt = indTrt[-1]    ## remove placebo
        
        for (j in indTrt) {
          ## extract data for the j-th trt arm & placebo
          testData = subset(datI, trt %in% c(0, j)) 
          
          ## power for stage 1
          bnd = testVE(datI=testData, lowerVE=lowerVE, stage=stage1, alpha=alpha)
          VE.I[j, 1] = bnd$VE
          test.I[j, 1] = bnd$bound
          
          ## power for stage 2
          bnd = testVE(datI=testData, lowerVE=lowerVE, stage=stage2, alpha=alpha)
          VE.I[j, 2] = bnd$VE
          test.I[j, 2] = bnd$bound
        }
        
        # identify the pooled vaccine arm with the highest estimated VE(0-36)
        bestInd = which(VE.I[,2]==max(VE.I[,2]))
        
        # to estimate the probability that the pooled vaccine arm with the highest estimated VE(0-36) is the one 
        # with the true highest VE(0-36), AND for that regimen the hypothesis H0: VE(0-18)<=0% was rejected    
        if (bestInd==bestVE && test.I[bestInd, 1]=="Eff"){ poolRankSelectPw[hi] = poolRankSelectPw[hi] + 1 }
      }
    }
  }
  
  out <- list(rankSelectPw = rankSelectPw/nTrials)
  if (!is.null(headHead)){ out$headHeadPw <- headHeadPw/nTrials } 
  if (!is.null(poolHead)){ 
    out$poolRankSelectPw <- poolRankSelectPw/nTrials
    out$poolHeadPw <- poolHeadPw/nTrials    
  }
  
  if (!is.null(saveDir)){
    saveFile <- paste0("rankSelectPwr",substr(censFile, 14, nchar(censFile)))
    #save(out, file = file.path(saveDir, saveFile), compress="xz" )
    save(out, file = file.path(saveDir, saveFile) )
    if (verbose){ cat("Output saved in:\n", file.path(saveDir, saveFile), "\n\n") }
  } else {
    return(out)
  }  
}

testVE <- function(datI, lowerVE, stage, alpha){
  upperFR <- 1-lowerVE
  ## variables for cumulative hazard-based Wald test, censor at 'stage' 
  ## which can be stage 1 or 2 
  datI$futime <- datI$exit - datI$entry
  datI$event <- datI$event == 1 & ( datI$futime <= stage)
  datI$exit  <- pmin( datI$exit, datI$entry + stage )
  datI$futime <- datI$exit - datI$entry
  
  ## convert 'trt' to indicator variable before calculating VE 
  ## (i.e. convert the non-zero values to 1)
  datI$trt <- as.integer(datI$trt > 0 )
  
  # if a vaccine regimen is highly efficacious, there will be no events observed in this arm and the Nelson-Aalen estimator will be incalculable
  if (sum(datI[datI$trt==1,"event"])==0){
    FR <- 0
    bound <- "Eff"
  } else {
    KM <- survfit(Surv(futime, event) ~ trt, data=datI, error="greenwood")
    KM.sum <- summary(KM)
    # Nelson-Aalen estimates
    na.0 <- cumsum(KM.sum$n.event[KM.sum$strata=="trt=0"]/KM.sum$n.risk[KM.sum$strata=="trt=0"])
    varna.0 <- cumsum(KM.sum$n.event[KM.sum$strata=="trt=0"]/KM.sum$n.risk[KM.sum$strata=="trt=0"]^2)
    na.0 <- na.0[length(na.0)]
    varna.0 <- varna.0[length(varna.0)]        
    na.1 <- cumsum(KM.sum$n.event[KM.sum$strata=="trt=1"]/KM.sum$n.risk[KM.sum$strata=="trt=1"])
    varna.1 <- cumsum(KM.sum$n.event[KM.sum$strata=="trt=1"]/KM.sum$n.risk[KM.sum$strata=="trt=1"]^2)
    na.1 <- na.1[length(na.1)]
    varna.1 <- varna.1[length(varna.1)]
    
    # survival estimates
    S.0 <- exp(-na.0)
    varS.0 <- ifelse(is.na(varna.0), NA, exp(-2*na.0) * varna.0)
    S.1 <- exp(-na.1)
    varS.1 <- ifelse(is.na(varna.1), NA, exp(-2*na.1) * varna.1)
    
    # cumulative incidence ratio
    F.0 <- 1 - S.0
    F.1 <- 1 - S.1
    FR <- F.1/F.0
    varlogFR <- ifelse(is.na(varS.0) | is.na(varS.1), NA, varS.1/(F.1^2) + varS.0/(F.0^2))
    
    FRci.up <- ifelse(is.na(varlogFR), NA, exp(log(FR)+qnorm(1-alpha/2)*sqrt(varlogFR)))
    bound <- ifelse(FRci.up < upperFR, "Eff", "nonEff")  
  }
  
  return(list(bound=bound, VE=1-FR))
}

headTestVE <- function(datI, headHeadInd, stage1, stage2, alpha){
  ## a matrix to store power for head to head comparison 
  ## column 1 = power for VE(0-18), column 2 = power for VE(0-36)
  headHeadPw = matrix(0, nrow=NROW(headHeadInd), ncol=2)
  
  for (h in 1:NROW(headHeadInd)) {
    if(NCOL(headHeadInd)==2) {  ## 2 columns
      arm1 = headHeadInd[h,1]
      arm2 = headHeadInd[h,2]
    } else {                    ## 3 or 4 column
      arm1 = headHeadInd[h,1:2]
      arm2 = headHeadInd[h,3:NCOL(headHeadInd)]
    }
    
    testDataRel <- subset(datI, trt %in% c(arm1, arm2))
    ## need to change the index in arm2 to 0
    testDataRel$trt [testDataRel$trt %in% arm2] = 0    
    
    testData <- subset(datI, trt %in% c(0, arm1))
        
    # head-to-head power = the 1-sided head-to-head test rejects; AND the superior arm passes 
    # Stage 1 with VE(0-stage1) > 0%
    ## relative VE(0-stage1)    
    bndRel <- testVE(datI=testDataRel, lowerVE=0, stage=stage1, alpha=alpha)
    ## VE(0-stage1) of the superior treatment
    bnd <- testVE(datI=testData, lowerVE=0, stage=stage1, alpha=alpha)    
    if (bndRel$bound =="Eff" & bnd$bound=="Eff"){ headHeadPw[h, 1] = 1 }
    
    ## power for stage 2
    bndRel <- testVE(datI=testDataRel, lowerVE=0, stage=stage2, alpha=alpha)
    bnd <- testVE(datI=testData, lowerVE=0, stage=stage2, alpha=alpha)    
    if (bndRel$bound =="Eff" & bnd$bound=="Eff"){ headHeadPw[h, 2] = 1 }
  }
  return(headHeadPw)
}


#########
buildBounds = function(nInfec, highEffBounds) {
   
   ## high efficacy : approxiamtion 
   idx = which(highEffBounds$N==nInfec)
   
   ## get number of row
   nBounds = nrow (highEffBounds)
   if (length(idx)==0) {  ## no bound for nInfec, add it
      highEffBounds  = rbind(highEffBounds, c(nInfec, highEffBounds[nBounds,2]))   ## approximation  
      
      ## order the bound by infections
      highEffBounds = highEffBounds[order(highEffBounds$N), ]
      
      idx = which(highEffBounds$N==nInfec)
      if(idx>1 && (idx+1)<=nrow (highEffBounds))       ## approximate the bound by averaging over 2 adjacent twos
         highEffBounds[idx,2] = mean(c(highEffBounds[idx-1,2], highEffBounds[idx+1,2]))
    }
    highEffBounds = subset(highEffBounds, N<=nInfec)
 
   list(highEffBounds=highEffBounds)            
}


#' Unconditional Power to Detect Positive Treatment Efficacy in a Per-Protocol Cohort
#' 
#' \code{VEpowerPP} computes unconditional power to detect positive treatment (vaccine) efficacy in per-protocol cohorts identified in \code{simTrial}-generated data-sets.
#'                  
#' @param dataList if \code{saveDir = NULL}, a list of objects (lists) returned by \code{censTrial}; otherwise a list of \code{.RData} file names (character strings) generated by \code{censTrial}
#' @param lowerVEuncPower a numeric value specifying a one-sided null hypothesis H0: VE(\code{VEcutoffWeek}--\code{stage1}) \eqn{\le} \code{lowerVEuncPower} x 100\%. Unconditional power (i.e., accounting for sequential monitoring) to reject H0 in the per-protocol cohort is calculated, where the rejection region is defined by the lower bound of the two-sided (1-\code{alphaUncPower}) x 100\% confidence interval for VE(\code{VEcutoffWeek}--\code{stage1}) being above \code{lowerVEuncPower} (typically a number in the 0--0.5 range).
#' @param alphaUncPower one minus the nominal confidence level of the two-sided confidence interval used to test the one-sided null hypothesis H0: VE(\code{VEcutoffWeek}--\code{stage1}) \eqn{\le} \code{lowerVEuncPower} x 100\% against the alternative hypothesis H1: VE(\code{VEcutoffWeek}--\code{stage1}) \eqn{>} \code{lowerVEuncPower} x 100\%.
#' @param VEcutoffWeek a cut-off time (in weeks). Only subjects with the follow-up time exceeding \code{VEcutoffWeek} are included in the per-protocol cohort.
#' @param stage1 the final week of stage 1 in a two-stage trial
#' @param outName a character string specifying the output \code{.RData} file name. If \code{outName = NULL} but \code{saveDir} is specified, the output file is named \code{VEpwPP.RData}.
#' @param saveDir a character string specifying a path for the output directory. If supplied, the output is saved as an \code{.RData} file named \code{outName} in the directory; otherwise the output is returned as a list.
#' @param verbose a logical value indicating whether information on the output directory and file name should be printed out (default is \code{TRUE})
#' 
#' @details All time variables use week as the unit of time. Month is defined as 52/12 weeks.
#' 
#' A per-protocol cohort indicator is assumed to be included in the \code{simTrial}-generated data-sets, which is ensured by specifying the \code{missVaccProb} argument in \code{simTrial}.
#' 
#' VE(\code{VEcutoffWeek}--\code{stage1}) is estimated as one minus the ratio of Nelson-Aalen-based cumulative incidence functions. \code{VEpowerPP} computes power to reject the null hypothesis H0: VE(\code{VEcutoffWeek}--\code{stage1}) \eqn{\le} \code{lowerVEuncPower} x 100\%. H0 is rejected if the lower bound of the two-sided (1-\code{alphaUncPower}) x 100\% confidence interval for VE(\code{VEcutoffWeek}--\code{stage1}) lies above \code{lowerVEuncPower}.
#'
#' @return If \code{saveDir} is specified, the output list (named \code{pwList}) is saved as an \code{.RData} file named \code{outName} (or \code{VEpwPP.RData} if left unspecified); otherwise the output list is returned. The output object is a list (of equal length as \code{dataList}) of lists with the following components:
#' \itemize{
#'   \item \code{VE}: a numeric vector of VE(\code{VEcutoffWeek}--\code{stage1}) estimates for each missing vaccination probability in \code{missVaccProb} of \code{simTrial}
#'   \item \code{VEpwPP}: a numeric vector of powers to reject the null hypothesis H0: VE(\code{VEcutoffWeek}--\code{stage1}) \eqn{\le} \code{lowerVEuncPower} x 100\% for each missing vaccination probability in \code{missVaccProb} of \code{simTrial}
#' }
#' 
#' @examples 
#' simData <- simTrial(N=rep(1000, 2), aveVE=c(0, 0.4), VEmodel="half", 
#'                     vePeriods=c(1, 27, 79), enrollPeriod=78, 
#'                     enrollPartial=13, enrollPartialRelRate=0.5, dropoutRate=0.05, 
#'                     infecRate=0.04, fuTime=156, 
#'                     visitSchedule=c(0, (13/3)*(1:4), seq(13*6/3, 156, by=13*2/3)),
#'                     missVaccProb=c(0,0.05,0.1,0.15), VEcutoffWeek=26, nTrials=5, 
#'                     stage1=78, randomSeed=300)
#'
#' monitorData <- monitorTrial(dataFile=simData, stage1=78, stage2=156, 
#'                             harmMonitorRange=c(10,100), alphaPerTest=NULL, 
#'                             nonEffStartMethod="FKG", nonEffInterval=20, 
#'                             lowerVEnoneff=0, upperVEnoneff=0.4, 
#'                             highVE=0.7, stage1VE=0, lowerVEuncPower=0, 
#'                             alphaNoneff=0.05, alphaHigh=0.05, alphaStage1=0.05, 
#'                             alphaUncPower=0.05, estimand="cuminc", lagTime=26)
#'
#' censData <- censTrial(dataFile=simData, monitorFile=monitorData, stage1=78, stage2=156)
#'
#' VEpwPP <- VEpowerPP(dataList=list(censData), lowerVEuncPower=0, alphaUncPower=0.05,
#'                     VEcutoffWeek=26, stage1=78)
#'
#' ### alternatively, to save the .RData output file (no '<-' needed):
#' ###
#' ### simTrial(N=rep(1000, 2), aveVE=c(0, 0.4), VEmodel="half", 
#' ###          vePeriods=c(1, 27, 79), enrollPeriod=78, enrollPartial=13, 
#' ###          enrollPartialRelRate=0.5, dropoutRate=0.05, infecRate=0.04, fuTime=156, 
#' ###          visitSchedule=c(0, (13/3)*(1:4), seq(13*6/3, 156, by=13*2/3)), 
#' ###          missVaccProb=c(0,0.05,0.1,0.15), VEcutoffWeek=26, nTrials=5, 
#' ###          stage1=78, saveDir="./", randomSeed=300)
#' ###
#' ### monitorTrial(dataFile=
#' ###          "simTrial_nPlac=1000_nVacc=1000_aveVE=0.4_infRate=0.04.RData", 
#' ###          stage1=78, stage2=156, harmMonitorRange=c(10,100), alphaPerTest=NULL, 
#' ###          nonEffStartMethod="FKG", nonEffInterval=20, 
#' ###          lowerVEnoneff=0, upperVEnoneff=0.4, highVE=0.7, stage1VE=0, 
#' ###          lowerVEuncPower=0, alphaNoneff=0.05, alphaHigh=0.05, alphaStage1=0.05, 
#' ###          alphaUncPower=0.05, estimand="cuminc", lagTime=26, saveDir="./")
#' ###
#' ### censTrial(dataFile=
#' ###  "simTrial_nPlac=1000_nVacc=1000_aveVE=0.4_infRate=0.04.RData",
#' ###  monitorFile=
#' ###  "monitorTrial_nPlac=1000_nVacc=1000_aveVE=0.4_infRate=0.04_cuminc.RData",
#' ###  stage1=78, stage2=156, saveDir="./")
#' ###
#' ### VEpowerPP(dataList=
#' ###  list("trialDataCens_nPlac=1000_nVacc=1000_aveVE=0.4_infRate=0.04_cuminc.RData"),
#' ###  lowerVEuncPower=0, alphaUncPower=0.05, VEcutoffWeek=26, stage1=78, saveDir="./")
#' 
#' @seealso \code{\link{simTrial}}
#' 
#' @export
VEpowerPP <- function( dataList, 
                       lowerVEuncPower, 
                       alphaUncPower, 
                       VEcutoffWeek,
                       stage1, 
                       outName = NULL, 
                       saveDir = NULL, 
                       verbose = TRUE){

  upperFRuncPower <- 1-lowerVEuncPower
  # output list (for each censTrial output object) of lists with components 'VE' and 'VEpwPP'
  pwList <- as.list(NULL)
  for (k in 1:length(dataList)){
    if (!is.null(saveDir)){
      # assumes 'dataList[[k]]' is a character string
      load(file.path(saveDir, dataList[[k]]))
      dataList[[k]] <- trialListCensor
      rm(trialListCensor)
    }
    
    nTrials <- length(dataList[[k]])
    nPPcohorts <- length(grep("pp", colnames(dataList[[k]][[1]])))
    if (nPPcohorts==0){ stop("Missing per-protocol cohort indicator in the data-set.") }
    ppnames <- paste0("pp", 1:nPPcohorts)
    
    VE <- matrix(NA, nrow=nTrials, ncol=nPPcohorts)
    VEpwPP <- matrix(NA, nrow=nTrials, ncol=nPPcohorts)
    for (i in 1:nTrials){
      dataI <- dataList[[k]][[i]]
      dataI$futime <- dataI$exit - dataI$entry
      # count only infections between VEcutoffWeek and the end of Stage 1
      dataI$event <- dataI$event==1 & dataI$futime>VEcutoffWeek & dataI$futime<=stage1
      # censor everyone at the end of Stage 1
      dataI$futime <- pmin(dataI$futime, stage1)

      # AN ALTERNATIVE WAY TO COMPUTE PER-PROTOCOL VE      
#       # restrict to subjects with follow-up time exceeding 'VEcutoffWeek' weeks (per-protocol criterion 1)
#       dataI <- subset(dataI, futime > VEcutoffWeek)
#       # censor all subjects at the Week 'stage1' visit
#       dataI$event <- dataI$event == 1 & (dataI$futime <= stage1)
#       dataI$futime <- pmin(dataI$futime, stage1)
#       # shift the time origin to the Week 'VEcutoffWeek' visit
#       dataI$futime <- dataI$futime - VEcutoffWeek      
      
      for (j in 1:nPPcohorts){
        # restrict to subjects with non-missing vaccinations (per-protocol criterion 2)
        dataIj <- dataI[dataI[[ppnames[j]]]==1,]
        # now we have the final per-protocol data-set at the end of 'stage 1'
        # get the group sizes in the PP cohort
        nAll <- table(dataIj$trt)
        if (length(nAll)<2){
          next
        } else {
          # get the infection counts in the PP cohort
          nInf <- table(dataIj$trt[dataIj$event==1])
          if (length(nInf)<2){
            next
          } else {
            KM <- survfit(Surv(futime, event) ~ trt, data=dataIj, error="greenwood")
            KM.sum <- summary(KM)
            
            # Nelson-Aalen estimates
            na.0 <- cumsum(KM.sum$n.event[KM.sum$strata=="trt=0"]/KM.sum$n.risk[KM.sum$strata=="trt=0"])
            varna.0 <- cumsum(KM.sum$n.event[KM.sum$strata=="trt=0"]/KM.sum$n.risk[KM.sum$strata=="trt=0"]^2)
            na.0 <- na.0[length(na.0)]
            varna.0 <- varna.0[length(varna.0)]
            
            na.1 <- cumsum(KM.sum$n.event[KM.sum$strata=="trt=1"]/KM.sum$n.risk[KM.sum$strata=="trt=1"])
            varna.1 <- cumsum(KM.sum$n.event[KM.sum$strata=="trt=1"]/KM.sum$n.risk[KM.sum$strata=="trt=1"]^2)
            na.1 <- na.1[length(na.1)]
            varna.1 <- varna.1[length(varna.1)]
            
            # survival estimates
            S.0 <- exp(-na.0)
            varS.0 <- ifelse(is.na(varna.0), NA, exp(-2*na.0) * varna.0)
            S.1 <- exp(-na.1)
            varS.1 <- ifelse(is.na(varna.1), NA, exp(-2*na.1) * varna.1)
            
            # cumulative incidence ratio
            F.0 <- 1 - S.0
            F.1 <- 1 - S.1
            FR.i <- F.1/F.0
            varlogFR <- ifelse(is.na(varS.0) | is.na(varS.1), NA, varS.1/(F.1^2) + varS.0/(F.0^2))
            
            # VE(6.5-stage1) estimate
            VE[i,j] <- 1 - FR.i
            
            # 1-sided test of null hypothesis VE(6.5-stage1)<=lowerVEuncPower
            FRci.up <- ifelse(is.na(varlogFR), NA, exp(log(FR.i)+qnorm(1-alphaUncPower/2)*sqrt(varlogFR)))
            # if the upper bound of the CI for the cumulative incidence ratio lies below 'upperFRuncPower', reject H0
            VEpwPP[i,j] <- ifelse(FRci.up < upperFRuncPower, 1, 0)            
          }          
        }        
      }
    }
    VE <- apply(VE, 2, mean, na.rm=TRUE)
    VEpwPP <- apply(VEpwPP, 2, mean, na.rm=TRUE)
    pwList[[k]] <- list(VE=VE, VEpwPP=VEpwPP)    
  }
  if (!is.null(saveDir)){
    saveFile <- ifelse(is.null(outName), "VEpwPP.RData", outName)
    #save(pwList, file=file.path(saveDir, saveFile), compress="xz")
    save(pwList, file=file.path(saveDir, saveFile))
    if (verbose){ cat("Output saved in:\n", file.path(saveDir, saveFile), "\n\n") }
  } else {
    return(pwList)
  }  
}

#' Estimate cumulative probabilities of crossing an efficacy or non-efficacy boundary in an event-driven 2-arm trial design
#' 
#' Computes proportions of simulated trials that crossed either an efficacy or a non-efficacy stopping boundary by analysis \eqn{1,\ldots,\code{nAnalyses}} using an \code{.RData} output file from \code{\link{monitorTrial}}. An event-driven 2-arm trial design is assumed.
#' 
#' @param boundType a character string specifying if the one-sided null hypothesis is of the form \eqn{H_0: \theta \geq \theta_0} (\code{"eff"}, default) or \eqn{H_0: \theta \leq \theta_0} (\code{"nonEff"}), where \eqn{\theta} and \eqn{\theta_0} are the true hazard ratio and its value specifying the null hypothesis, respectively
#' @param nAnalyses a numeric value specifying the number of analyses
#' @param monitorTrialFile either a character string specifying an \code{.RData} file or a list outputted by \code{\link{monitorTrial}}
#' @param monitorTrialDir a character string specifying a path to \code{monitorTrialFile} if \code{monitorTrialFile} specifies a file name
#' 
#' @return A numeric vector of estimated cumulative probabilities of crossing the specified boundary by analysis \eqn{1,\ldots,\code{nAnalyses}}.
#' 
#' @examples
#' simData <- simTrial(N=c(1000, 1000), aveVE=c(0, 0.4),
#'                     VEmodel="half", vePeriods=c(1, 27, 79), enrollPeriod=78,
#'                     enrollPartial=13, enrollPartialRelRate=0.5, dropoutRate=0.05,
#'                     infecRate=0.06, fuTime=156, visitSchedule=seq(0, 156, by=4),
#'                     missVaccProb=0.05, VEcutoffWeek=26, nTrials=5,
#'                     stage1=78, randomSeed=300)
#' 
#' monitorData <- monitorTrial(dataFile=simData, stage1=78, stage2=156,
#'                             harmMonitorRange=c(10,75), harmMonitorAlpha=0.05,
#'                             effCohort=list(timingCohort=list(lagTime=0),
#'                                            times=c(75, 150),
#'                                            timeUnit="counts",
#'                                            lagTime=0,
#'                                            estimand="cox",
#'                                            nullVE=0,
#'                                            nominalAlphas=c(0.001525, 0.024501)),
#'                             nonEffCohorts=list(timingCohort=list(lagTime=0),
#'                                                times=c(75, 150),
#'                                                timeUnit="counts",
#'                                                cohort1=list(lagTime=0,
#'                                                             estimand="cox",
#'                                                             nullVE=0.4,
#'                                                             nominalAlphas=c(0.001525, 0.024501))),
#'                             lowerVEnoneff=0, highVE=1, lowerVEuncPower=0,
#'                             alphaHigh=0.05, alphaUncPower=0.05,
#'                             verbose=FALSE)
#' 
#' crossBoundCumProb(boundType="eff", nAnalyses=2, monitorTrialFile=monitorData)
#' crossBoundCumProb(boundType="nonEff", nAnalyses=2, monitorTrialFile=monitorData)
#'
#' @export
crossBoundCumProb <- function(boundType=c("eff", "nonEff"), nAnalyses, monitorTrialFile, monitorTrialDir=NULL){
  boundType <- match.arg(boundType)
  
  if (is.character(monitorTrialFile)){
    if (is.character(monitorTrialDir)){
      if (file.exists(file.path(monitorTrialDir, monitorTrialFile))){
        # load an RData file (a list named 'out')
        load(file.path(monitorTrialDir, monitorTrialFile))    
      } else {
        stop("The file\n", file.path(monitorTrialDir, monitorTrialFile), "\ndoes not exist.")
      }
      
    } else {
      # load an RData file (a list named 'out')
      load(monitorTrialFile)  
    }
  } else if (is.list(monitorTrialFile)){
    out <- monitorTrialFile
  }
  
  if (boundType=="eff"){
    # identify the efficacy outcomes and return the number of the test at which the
    # efficacy boundary was crossed
    effCross <- sapply(out, function(x){
      ifelse(x[[1]]$boundHit=="Eff", NROW(x[[1]]$summEff), NA)
    })
    effCross <- factor(effCross, levels=1:nAnalyses)
    
    return(cumsum(as.vector(table(effCross)) / length(out)))
  } else {
    # identify the non-efficacy outcomes and return the number of the test at which the
    # efficacy boundary was crossed
    noneffCross <- sapply(out, function(x){
      ifelse(x[[1]]$boundHit=="NonEffInterim", if (is.data.frame(x[[1]]$summObj)) NROW(x[[1]]$summObj) else NROW(x[[1]]$summObj[[1]]), NA)
    })
    noneffCross <- factor(noneffCross, levels=1:nAnalyses)
    
    return(cumsum(as.vector(table(noneffCross)) / length(out)))
  }
}

#' Extract the time since trial start to crossing a stopping boundary or reaching the target number of events if no stopping boundary crossed in an event-driven 2-arm trial design
#' 
#' Obtains times (in weeks) since trial initiation to crossing a harm, non-efficacy or efficacy boundary, or reaching the target number of events if no stopping boundary is crossed in an event-driven 2-arm trial design. The times are extracted from \code{.RData} files outputted by \code{\link{monitorTrial}}.
#' 
#' @param monitorTrialFile either a character vector specifying (multiple) \code{.RData} file(s) or a list of lists outputted by \code{\link{monitorTrial}}
#' @param monitorTrialDir a character string specifying a path to the file(s) in \code{monitorTrialFile} if \code{monitorTrialFile} specifies file name(s)
#' @param saveFile a character string optionally specifying an \code{.RData} file name storing the output (\code{NULL} by default)
#' @param saveDir a character string optionally specifying the file path for \code{saveFile} (set to \code{monitorTrialDir} by default)
#' 
#' @return A list (of the same length as \code{monitorTrialFile}) of numeric vectors of times. The order of the vectors matches the order of components in \code{monitorTrialFile}. The length of each vector equals the number of simulated trials in the corresponding output list from \code{\link{monitorTrial}}.
#' 
#' @examples
#' simData <- simTrial(N=c(1000, 1000), aveVE=c(0, 0.4),
#'                     VEmodel="half", vePeriods=c(1, 27, 79), enrollPeriod=78,
#'                     enrollPartial=13, enrollPartialRelRate=0.5, dropoutRate=0.05,
#'                     infecRate=0.6, fuTime=156,
#'                     visitSchedule=c(0, (13/3)*(1:4), seq(13*6/3, 156, by=13*2/3)),
#'                     missVaccProb=0.05, VEcutoffWeek=26, nTrials=5,
#'                     stage1=78, randomSeed=300)
#' 
#' monitorData <- monitorTrial(dataFile=simData, stage1=78, stage2=156,
#'                             harmMonitorRange=c(10,75), harmMonitorAlpha=0.05,
#'                             effCohort=list(timingCohort=list(lagTime=0),
#'                                            times=c(75, 150),
#'                                            timeUnit="counts",
#'                                            lagTime=0,
#'                                            estimand="cox",
#'                                            nullVE=0,
#'                                            nominalAlphas=c(0.001525, 0.024501)),
#'                             nonEffCohorts=list(timingCohort=list(lagTime=0),
#'                                                times=c(75, 150),
#'                                                timeUnit="counts",
#'                                                cohort1=list(lagTime=0,
#'                                                             estimand="cox",
#'                                                             nullVE=0.4,
#'                                                             nominalAlphas=c(0.001525, 0.024501))),
#'                             lowerVEnoneff=0, highVE=1, lowerVEuncPower=0,
#'                             alphaHigh=0.05, alphaUncPower=0.05,
#'                             verbose=FALSE)
#' 
#' times <- decisionTimes(list(monitorData))
#' 
#' ## alternatively, to save the .RData output file (no '<-' needed):
#' ## decisionTimes(list(monitorData), saveFile="decisionTimes.RData")
#' 
#' @export
decisionTimes <- function(monitorTrialFile, monitorTrialDir=NULL, saveFile=NULL, saveDir=monitorTrialDir){
  times <- vector("list", length(monitorTrialFile))
  
  for (i in 1:length(monitorTrialFile)){
    
    if (is.character(monitorTrialFile)){
      if (is.character(monitorTrialDir)){
        # load an RData file (a list named 'out')
        load(file.path(monitorTrialDir, monitorTrialFile[i]))  
      } else {
        # load an RData file (a list named 'out')
        load(monitorTrialFile[i])  
      }
    } else if (is.list(monitorTrialFile)){
      out <- monitorTrialFile[[i]]
    }
    
    out <- lapply(out, "[[", 1)
    
    times[[i]] <- sapply(out, function(x){
      if (is.na(x$boundHit) || x$boundHit=="Eff"){
        return(max(x$summEff$testTimes))
      } else {
        return(x$stopTime) 
      }
    })
  }
  
  if (is.character(saveFile)){
    if (is.character(saveDir)){
      save(times, file=file.path(saveDir, saveFile))
    } else {
      save(times, file=saveFile)
    }
  }
  
  return(invisible(times))
}
mjuraska/seqDesign documentation built on Dec. 14, 2022, 4:26 p.m.