R/monitoring_utils.R

Defines functions estHRbound getEstHR eqEstHR censorTrial VEci_binom finalStage1Test getInfecCntFirstNonEff getHarmBound getAlphaPerTest semiConstSpending pNS do_harm_monitoring2 do_harm_monitoring getFirstNonEffCnt

Documented in estHRbound

## Functions used by monitorTrial 


## Function to determine the number (i.e. count) of the first event that
## meets all the following criteria:
##
## The arguments to the function are: 
## ----------------------------------
##   'd':
##
getFirstNonEffCnt <- 
    function(d, minCnt=75, maxCnt=Inf, lagTimes=c(26,52), lagMinCnts=c(10,2) )
{ 
  ## extract events
  events <- d[d$event==1, c("exit","entry")]

  ## order by calendar time of event (i.e. 'exit')
  events <- events[order(events$exit), ]

  ## add on a count index
  events$cnt <- 1:nrow(events)

  ## vector of infection totals at which each lag condition is met
  lagCondCnts <- mapply(FUN= function(lagtime, mincnt, dat) {
                            wLag <- ( (dat$exit - dat$entry) > lagtime)
                            dat$cnt[ wLag ][ mincnt ] },
                        lagtime=lagTimes, mincnt=lagMinCnts, 
                        MoreArgs=list(dat=events ) )

   ## The minimum of 'maxCnt' and largest of minCnt and the values that satisfy
   ## the lagMinCnts conditions
   min(maxCnt, max( minCnt, lagCondCnts ) )
}


## Function to implement "harm" monitoring on data.frame 'd' using boundaries
## specified in data.frame 'bounds'.
##
## The 'bounds' data frame must contain the columns specified by the arguments
## 'totInfecVar' and 'vaccInfecVar'.  These columns should contain a
## running total of infections in both trt groups (column 'totInfecVar'), and a 
## running total of infections in the vaccinee group ('vaccInfecVar').
## Other columns are allowed, but will be ignored.
##
## Data.frame 'd' will contain one row per infection, a 'trt' variable and a
## variable 'exit' containing the study time that the infection is observed
## (i.e. diagnosed) - the infected ppt.s "exit time" from un-infected Follow-up
do_harm_monitoring <- function(d, bounds, totInfecVar="N", vaccInfecVar="V") {
  
  ## sanity checks on input
  if ( length(d)==0 )
    stop("Argument 'd' to function 'do_harm_monitoring' has length 0.\n")
  
  if ( length(bounds)==0 )
    stop("Argument 'bounds' to function 'do_harm_monitoring' has length 0.\n")
  
  ## check that 'd' is ordered - else order it
  if ( nrow(d) > 1 & !all( diff(d$exit) >= 0 ) ) {
    d <- d[ order(d$exit), ]
  }
  
  ## get cumulative count of vaccinee infections
  vaccInfecCnt <- cumsum( d$trt > 0 ) # cumulatively counts vaccinee infections
  
  ## get count of number of infections
  totInfec <- length( vaccInfecCnt )
  
  ## restrict 'bounds' to apply to only infection totals we have
  bounds <- bounds[ bounds[, totInfecVar] <= totInfec , ]
  
  ## subset out the components of 'vaccInfecCnt' that correspond to the 
  ## infections totals at which we can stop for harm (the values in 
  ## bounds[[ totInfecVar ]]).   Compare the subsetted values to the 
  ## values in: bounds[[ vaccInfecVar ]].  If any are equal we stop.
  vaccInfecSub <- vaccInfecCnt[ bounds[[totInfecVar ]] ] # matching 'vaccInfecCnt' to 'bounds'
  harmBoundsHit <- ( vaccInfecSub >= bounds[[ vaccInfecVar ]])
  
  ## remove NA (added by Yu on 12/28/2011)
  if ( any( harmBoundsHit, na.rm=TRUE ) ) {
    
    w.hit <- which( harmBoundsHit )[1]
    
    ## 'N' is the infection count at the harm bound hit, 
    ## 'V' is the vaccine total at the harm bound hit
    N <- bounds[[totInfecVar]][ w.hit ]
    V <- bounds[[vaccInfecVar]][ w.hit ]
    
    list( isHarm = TRUE,
          stopTime = d$exit[ N ], # 'd' is ordered by 'exit' time
          stopInfectCnt = N,
          stopInfectSplit = c(Vacc = V, Plac = N - V))
    
  } else {
    list( isHarm = FALSE )
  }
}


## Function to implement "harm" monitoring on data.frame 'd' using boundaries
## specified in data.frame 'bounds'.
##
## The 'bounds' data frame must contain the columns specified by the arguments
## 'totInfecVar' and 'vaccInfecVar'.  These should columns should contain a
## running total of infections in both trt groups (column 'totInfecVar'), and a 
## running total of infections in the vaccinee group ('vaccInfecVar').
## Other columns are allowed, but will be ignored.
##
## Data.frame 'd' will contain one row per infection, a 'trt' variable and a
## variable 'exit' containing the study time that the infection is observed
## (i.e. diagnosed) - the infected ppt.s "exit time" from un-infected Follow-up
do_harm_monitoring2 <- function(d, bounds, d2, stage1 =78,  totInfecVar="N", vaccInfecVar="V") {
  
  ## sanity checks on input
  if ( length(d)==0 )
    stop("Argument 'd' to function 'do_harm_monitoring' has length 0.\n")
  
  if ( length(bounds)==0 )
    stop("Argument 'bounds' to function 'do_harm_monitoring' has length 0.\n")
  
  
  ## check that 'd' is ordered - else order it
  if ( nrow(d) > 1 & !all( diff(d$exit) >= 0 ) ) {
    d <- d[ order(d$exit), ]
  }
  
  ## get cumulative count of vaccinee infections
  vaccInfecCnt <- cumsum( d$trt > 0 )
  
  ## get count of number of infections
  totInfec <- length( vaccInfecCnt )
  
  ## restrict 'bounds' to apply to only infection totals we have
  bounds <- bounds[ bounds[, totInfecVar] <= totInfec , ]
  
  ## subset out the components of 'vaccInfecCnt' that correspond to the 
  ## infections totals at which we can stop for harm (the values in 
  ## bounds[[ totInfecVar ]]).   Compare the subsetted values to the 
  ## values in: bounds[[ vaccInfecVar ]].  If any are equal we stop.
  vaccInfecSub <- vaccInfecCnt[ bounds[[totInfecVar ]] ]
  harmBoundsHit <- ( vaccInfecSub == bounds[[ vaccInfecVar ]])
  
  ## remove NA (added by Yu on 12/28/2011)
  if ( any( harmBoundsHit, na.rm=TRUE ) ) {
    
    w.hit <- which( harmBoundsHit )[1]
    
    ## 'N' is the infection count at the harm bound hit, 
    ## 'V' is the vaccine total at the harm bound hit
    N <- bounds[[totInfecVar]][ w.hit ]
    V <- bounds[[vaccInfecVar]][ w.hit ]
    
    ## calculate the stop time at stage 1, i.e., 
    ## follow all the subjects enrolled before 'stopTime' until 18 month
    t.i = d$exit[ N ]
    D <- subset(d2, entry < t.i )  
    
    stopTimeStg1 = max(D$exit)
    
    list( isHarm = TRUE,
          stopTime = d$exit[ N ],
          stopTimeStg1 = stopTimeStg1,
          stopInfectCnt = N,
          stopInfectSplit = c(Vacc = V, Plac = N - V))
    
  } else {
    list( isHarm = FALSE )
  }
}

## Functions for harm boundary
## ------------------------------------------------------------
## Functions for continuous monitoring harm boundary:

##  Input:
##    Bounds: Vector of stopping bounds which correspond to the infection totals
##           1,2,3,... For infection totals where stopping is not permitted (for
##           small infection totals, e.g. 1-7) the 'Bound' should be set to NA.
##
##    p: The randomization probability for the active treatment, when only
##       considering it and the placebo group (in cases when other groups exist).
##       If there are 3 active groups and 1 placebo group, and the randomization
##       prob.s are: 2/9, 2/9, 2/9, 3/9, (the last  being for the placebo group),
##       then the null.p value for generating bounds for one active vs. placebo
##       would be:   2/9 /(2/9 + 3/9) = 2/5
##
##  Function pNS creates an object containing the values of the function P(n,s)
##  defined by Breslow (1970, JASA) as, for 0 <= s <= n <= N,  the probability
##  of the binomial random walk S_n reaching S_n = s without "absorption" into
##  the rejection region (i.e. without hitting any of the stopping bounds).
##
##  Notation:
##    N   = max number of samples
##    S_n = sum{ x_i, i=1,..,n }, and the {x_i} are iid bernoulli(p)
##
##  To calculate P(n,s), we need to specify the probability 'p' the value 'N',
##  and a vector 'B' of length N that specifies the stopping boundaries
##  associated with the values of 'n' from 1:N.  Hence, B[1] will be the stopping
##  value for n=1, B[2] the stopping value for n=2, ..., and B[N] the stopping
##  value for n=N.
##
##  The vector of boundaries should contain NAs for the initial components
##  up until you want to allow stopping (e.g. for n=1,2,3,...,8(?)
##  how far you go will depend on the value of 'p' used.
##
##  The object is a list with components:
##    Bounds - data.frame with columns 'n' = 1:N and
##              'StoppingBound'= contents of input argument 'Bound'
##    p - value of 'p' passed to function
##    N - value of 'N' passed to function
##    totalStopProb - the cumulative probability of stopping by the N-th infection
##   'Stop' - numeric vector of length N, with Stop[i] giving the prob.
##            of hitting the stopping bound (for first time) at n=i
##
##   If argument 'returnPns' is TRUE, then 'Pns' is included in the output obj.
##       Pns is not normally needed, which is why it's been made optional.
##   Pns =  list of length N, each sublist containing a numeric vector.
##            Pns[[ i ]] is a numeric vector of length i+1 containing values
##            of P(n,s), for n=i and s in 0,..,i+1
##
##          Note that values of s >= Bound[i] are zero as they are part of
##          the rejection region (hence there is zero probability of reaching
##          them without hitting the boundary).
##
pNS <- function(Bounds, p=.5, returnPns=FALSE)
{
  N <- length(Bounds)

  ## 08Dec2022 Doug Grove - Modified so that if all elements of vector 'Bounds'
  ##   are NA, then throw a warning (rather than an error) and return an object
  ## immediately.  This is being done as there are cases where we're trying to
  ## circumvent harm monitoring in which this happens.  
  if ( all(is.na(Bounds)) ){
    warning("The vector provided for argument 'Bounds' contains only NAs.\n",
            "This may be expected if, e.g., one is trying to circumvent harm monitoring.\n",
            "Otherwise, you should investigate.\n\n")
    return( list(
      totalStopProb = 0,
      Stop = numeric(N),
      Bounds  = data.frame(n=1:N, StoppingBound=Bounds),
      N = N,
      p = p ))
  }
  
  if ( any(Bounds > (1:N), na.rm=TRUE) )
      stop("The bounds provided do not appear to correspond to the infection",
           " totals (1,2,...,N).\n", "One of more of the bounds are larger",
           "than their corresponding infection total.  Exiting...\n\n")

  ## create 'Pns' and 'Stop'. Note that Stop has initial values of 0.
  Stop <- numeric(N)
  Pns  <- vector("list", length=N)

  ## 'first.bound' is the point at which stopping is first allowed
  ## We checked above to make sure that not all values of Bounds are NAs
  ## so we shouldn't have any problem with this...
  first.bound <- which( !is.na( Bounds ) )[1]

  if (first.bound == 1)
    stop("Cannot stop at the first test. Why would you want to?\n\n")


  ## Initialize the base level (i==1). 'i' indexes the total number of infections
  Pns[[ 1 ]] <- c("0" = (1-p), "1" = p)

  ## loop over remaining infection totals, building up the prob.s as we go
  for (i in 2:N) {

    ## initialize vector to store prob.s of the i+1 possible outcome of the
    ## binomial(i, p) variable (the values are: 0,...,i)
    Pns[[ i ]] <- structure( numeric(i+1), names= as.character(0:i) )

    ## Only need to compute for up to the smaller of i and Bounds[i]-1
    ## And since Bounds[i] has to be <= i, Bounds[i] is always < i, so long
    ## as Bounds[i] exists (i.e. as long as it's not NA)
    max.S <- ifelse( is.na(Bounds[i]), i, Bounds[i]-1)

    Pns[[i]]["0"] <- (1-p)*Pns[[i-1]]["0"]
    for (s in 1:max.S )
    {
      ## 'S' is character version of 's'. Used for indexing.
      S         <- as.character( s )
      S.minus.1 <- as.character( s-1 )

      if (s < i )
        Pns[[i]][ S ] <- p*Pns[[i-1]][S.minus.1] + (1-p)*Pns[[i-1]][S]
      else
        ## done when s == i (only happens if Bounds[i] is NA)
        Pns[[i]][ S ] <- p*Pns[[i-1]][S.minus.1]
    }

    ## 'Stop' is the prob. that we encounter a stopping bound for the first
    ## time at infection count 'i'.  Only computed if stopping is possible,
    ## else remains at initialized value of 0.
    if ( !is.na(Bounds[i]) )

      ## If this is the first infection count at which we allow stopping, then
      ## the stop value must be computed allowing for people using non-standard
      ## bounds (i.e. ones that that *don't* begin with bound[n] = n).
      if (i == first.bound && Bounds[i] < i ) {

        ## stop prob. is same as in the 'normal' case (below) *plus* the prob.
        ## that we were already at/above the number of "success" needed to stop
        ## at the previous infection total (i.e. i-1) (for which we should have
        ## been given a stopping bound, but weren't).
        Stop[i] <- p*Pns[[ i-1 ]][ as.character(Bounds[i]-1) ] +
                     sum( Pns[[ i-1 ]][as.character(Bounds[i]:(i-1))] )
      } else {
        ## if 'i' is not the the first total at which stopping is allowed, or
        ## if it is, but we have a proper bound (i.e. Bounds[i] = i) then we do
        ## we compute as this:
        Stop[ i ] <- p*Pns[[ i-1 ]][ as.character(Bounds[i]-1) ]
      }
  }
  outObj <- list(
              totalStopProb = sum(Stop),
              Stop = Stop,
              Bounds  = data.frame(n=1:N, StoppingBound=Bounds),
              N = N,
              p = p )

  ## If 'returnPns' was set to TRUE then add Pns to the output object
  if ( isTRUE(returnPns) )
      outObj$Pns <- Pns

  return( outObj )
}
####################### end of function 'pNS'#############################


### THIS USES 'nonConstBounds' framework to do constant-bounds.
####################### end of function 'pNS'#############################
### Actually, it's constant from 5 to 45, it's zero before that, so
### technically it is non-constant...


## 'x' is the total number of infections (vacc + placebo)
## 'alphaVals' is a vector of nominal (un-adjusted) p-value thresholds
##    to use in establishing cutoffs for harm-monitoring.  This vector
##    must have the same length as 'startHarmMonitor'.  The i-th value
##    of 'alphaVals' applies to the i-th interval defined by 'startHarmMonitor'
## 'startHarmMonitor' gives the endpoints of all intervals.
##    The starting point of the first interval is 1, and of the the i-th interval
##    (for i>1) is 1 + startHarmMonitor[i-1]
semiConstSpending <- function(x, alphaVals, startHarmMonitor )
{
  which.interval <- findInterval(x, c(1, startHarmMonitor),
                                 rightmost.closed=TRUE)
  alphaVals[ which.interval ]
}

getAlphaPerTest <- function(harmMonitorRange, null.p, totalAlpha=0.05) 
{
    getCumAlpha <- function( alphaPerTest, harmMonitorRange, null.p) { 
        harmBounds <- getHarmBound(
                          N = harmMonitorRange[2], 
                          per.test = alphaPerTest, 
                          harmBoundRange = harmMonitorRange,
                          null.p = null.p)
        return( harmBounds$cumStopProb[ nrow(harmBounds) ] - totalAlpha )
    }
    return( uniroot(getCumAlpha, interval = c(0.000001, 0.05), tol=1e-7,
                    harmMonitorRange = harmMonitorRange, 
                    null.p=null.p)$root )
}

getHarmBound <- function(N,  ##Total number of infections desired for harm monitoring
                         per.test, ## value for per-test alpha level
                         harmBoundRange,
                         null.p,
                         dataDir = NULL,
                         verbose = TRUE){
  ## Note: 
  ##   'null.p' = the probability that an infection occurs in a vaccinee, under the null 
  ##              hypothesis that infection is equally likely in vaccinees and placebo
  ##              recipients.  Hence 'null.p' equals the fraction of the populations that
  ##              has received vaccine.  This would be 0.5 under a 1:1 randomization, or
  ##              11/29 under a 11:18 randomization (V:P).
  
  ## We consider the total number of infected participants to be 'N' and
  ## assume apriori and equal likelihood of infection for vaccinees as for
  ## placebos.  So the number of infected vaccinees should follow a binomial
  ## distribution with size N and probability p=null.p (where 'null.p' is 
  ## the proportion of vaccinees in the trial).  We wish to have a total
  ## probability of a "type I error" (stopping the trial for 'harm' when there
  ## is no real difference between vaccine and placebo) of .05.  Our approach
  ## will be to test for harm after each new infection, and we wish to find a
  ## fixed 'alpha' value to use for each test, so that the overall prob. of a
  ## false positive over the course of the trial is .05.
  ##
  ## To do this, we will iteratively choose a value alpha, generate a set of
  ## 'stopping bounds' corresponding to that alpha, and then estimate the
  ## overall type I error rate for those bounds.  We then go back and adjust
  ## our alpha value (up or down) depending on whether our estimated type I
  ## error is too high or too low.  Repeat until desired accuracy is obtained.
  
  bound <- NULL
  
  ## create data frame to store results in
  bounds <- data.frame(totInfec=1:N, vaccInfecBound=NA, alphaLevelBound=NA,
                       nextHigherAlphaLevel=NA, cutoff=NA )
  
  for (j in 1:nrow(bounds))
  {
    totInfec <- bounds$totInfec[j]
    
    alphaVal <- semiConstSpending( totInfec, alphaVals=c(0, per.test),
                                   startHarmMonitor = harmBoundRange)
    
    ## we don't need to do the next few steps unless alphaVal is > 0
    if (alphaVal <= 0) next
    
    ## choose the lowerBound for searching for the next cutoff value.
    ## Under our framework it will always be the same as, or higher than
    ## the cutoff from the previous (smaller) value of 'totInfec'
    if ( is.null(bound) ) {
      lowerBnd <- ceiling( null.p * totInfec )
    } else lowerBnd <- bound
    
    # startVal <- min(totInfec, bound)
    # lowerBnd <- startVal - 2
    valSeq <- totInfec:lowerBnd
    
    upperTailProbs <- cumsum( dbinom(valSeq, totInfec, null.p) )
    signif <- ( upperTailProbs <= alphaVal )
    
    ## if we have at least one significant value then do...
    if ( isTRUE(signif[1]) )
    {
      ## get "largest" (last) index for which signif == TRUE
      largest.index <- max( which( signif ) )
      
      ## define 'bound' to be the infection count corresponding to
      ## the 'largest.index' (i.e. the smallest infection count for
      ## which we have significance at per-test-level 'alphaVal'
      bound <- valSeq[ largest.index ]
      
      bounds$vaccInfecBound[ j ] <- bound
      bounds$alphaLevelBound[ j ] <- upperTailProbs[ largest.index ]
      bounds$cutoff[ j ] <- alphaVal
    }
    
  }
  out <- pNS(Bound=bounds$vaccInfecBound, p=null.p)
  
  names(out$Bounds)[ names(out$Bounds)=="StoppingBound" ] <- "Nvacc"
  boundOut <- within(out$Bounds, {
                       Nplac <- n-Nvacc
                       RR <- round( Nvacc*(1-null.p)/(Nplac*null.p), digits=2)
                     })
  boundOut <- cbind( boundOut,
                     stopProb=round(out$Stop,4),
                     cumStopProb=round(cumsum(out$Stop),4),
                     alphaVal = bounds$cutoff )

  harmBounds <- boundOut
  #names(harmBounds)[1:3] <- c("N", "V", "P") 
   
  # Rename columns from 'old.names' to 'new.names'
  old.names <- c("n","Nvacc","Nplac")
  new.names <- c("N","V","P")
  names(harmBounds)[ match(old.names, names(harmBounds)) ] <- new.names

  ## reorder columns
  ord.columns <- c("N","V","P","RR","stopProb","cumStopProb","alphaVal")
  harmBounds <- harmBounds[, ord.columns]

  if (!is.null(dataDir)) {
      fileName <- sprintf("harmBounds_N=%d_alphaPerTest=%6.4f_pVacc=%4.2f.csv",
                          N, round(per.test, 4), round(null.p, 2) )

      write.csv(harmBounds, file.path(dataDir, fileName), row.names=FALSE)

      if (verbose) {
          cat("Potential-harm stopping boundaries saved in:\n", 
              file.path(dataDir, fileName), "\n\n") 
      }
  }
  return(harmBounds)  
}



## Function to determine the number (i.e. count) of the first event that
## meets all the following criteria:
##   (a) the cumulate percentage of events after "week1" having some property meets or
##       exceeds the value given in 'minPercent',  and
##   (b) the number/count of the event is at least as large as 'minCount'
##   (c) at least 2 infections after month 12 visit
##
## The arguments to the function are: 
## ----------------------------------
##   'x': an indicator vector (0/1 or FALSE/TRUE) reporting whether each
##        event has some property.  The vector should only contain info
##        on *EVENTS*. 
##
##   'minPercent': the threshold that must be met for the cumulative 
##                 percentage of "positive" entries (1s or TRUEs) in 'x'
##
##   'minCount':   the minimum event total that must be attained along
##                 with the minimum percentage threshold
##
##    week1 = 26:  first time cutoff for 'minPercent'          
##    nInfecAfterwk = 2:   number of infections required after week2
##    week2 = 52:          second time cutoff for 'nInfecAfterwk'
## 
## Example:  
##   We have defined the infection total at which we wish to perform the first
##   non-efficacy analysis for a vaccine as the smallest infection count for
##   which the percentage of infections occurring at least 6-months
##   post-enrollment reaches/exceeds 25% of infections, with at least 2 infections
##   after week 52 and the additional
##   criteria that the total must also be at least 50 (to be of sufficient 
##   size that the "large sample" normal-approximations used in the monitoring
##   criteria are reasonable).
##

getInfecCntFirstNonEff <- 
  function(  x,                 ## 'stage1' placebo and j-vaccine infections ordered by 'exit' time
             minPercent = 0,    ## min. fraction of infections that have occurred after 'week1'
             minCount = 0,      ## min. number of total infection required
             week1 = 26,        ## 
             nInfecAfterwk = 2, ## number of infections required after week2
             week2 = 52)        ## the first interim analysis can only occur after 'week2' 
  {
    ## number of infections at which first non-efficacy monitoring starts
    N1 <- NA
    
    ## time that reach 'minCnt' infections 
    nInfec <- nrow(x)
 
    futime <- x$exit - x$entry
    
    ## number of infections after 'week2'
    nPostWk2 <- sum( futime > week2 )
    
    if ( (nInfec >= minCount) && (nPostWk2 >= nInfecAfterwk) ) {
      
      ## create column containing fraction of infections post-week1
      postWk1_fract <- cumsum(futime>week1)/(1:nInfec) 
      
      ## the value of N1 is the smallest 'N' that satisfies all three conditions
      ## SIMULTANEOUSLY
      N1 <- which( (x$nInf >= minCount) & (postWk1_fract >= minPercent) &
                     ( cumsum(futime > week2) >= 2 ) )[ 1 ]
      
      ## if nothing meets all three criteria then 'N1' will have length 0
      if ( length(N1) == 0 )
        N1 <- NA
    } 
    return( N1 ) 
  }



## This function implements a "final" analysis, just based on stage 1 data, 
## which will be used for two purposes:
##
##   (1) to perform an "End-of-Stage-1" test, when the final ppt. completes
##       their stage 1 follow-up (if that timepoint is reached without
##       encountering a stopping boundary first).  This test will determine
##       whether the trial continues to stage2, and also whether there is 
##       efficacy at the level of the design alternative (finalVE)
##
##   (2) to perform the "last-stage1-analysis" in cases when a stopping boundary
##       is hit before the end-of-stage 1 is reached.  Because this test is to
##       determine if there is evidence of efficacy, there is no need to do the
##       test if stopping was for harm or nonEfficacy.  So we do this test only
##       if we stop for highEff during stage 1.
finalStage1Test <- function(dat, 
                       analysisType=c("final","stopTime"), 
                       stage1VE=NULL,
                       lowerVE,
                       alphaLevel, 
                       estimand=c("cox","cuminc","combined"),
                       lagTime=NULL,
                       time=NULL, 
                       boundLabels=c("Eff", "NonEffFinal"),
                       randFraction = null.p )

## Arguments:
##   dat            a trial or list of trials
##
##   analysisType   Which type of analysis should be done
##
##   stage1VE       Used to determine whether the trial should move to stage 2.
##                  Only applicable if analysisType == "Final".
##                  The lower bound of the VE confidence interval must exceed
##                  this value.  (Minimal VE threshhold for further study)
##
##   lowerVE        vector of VE values to test (using stage 1 data only) when
##                  the trial stops, either by hitting a bound of by reaching 
##                  the end of stage 1.
##
##   alphaLevel     alpha level to use in constructing the confidence interval
##                  for the VE estimate that will be compared to 'lowerVE' and
##                  (if applicable) 'stage1VE'.
##
##   estimand       which estimand(s) to use to estimate VE
##
##   lagTime        specifies the time, in weeks (since enrollemnt), that events must 
##                  occur after in order to be included in the analysis.
##
##   time           (Only used if analysisType == "stopTime") 
##                  The calendar time at which to censor the stage1 data prior
##                  to estimating VE and comparing to 'finalVE'
##
##   boundLabels    (Used only if analysisType == "final").  
##                  These are values to return for the result component 
##                  "boundHit", when the comparison to 'lowerVE' is TRUE,
##                  or FALSE respectively.  
##                  Call the lower bound for the CI of the VE estimates 'loCI':
##                     if loCI > lowerVE for *all* estimands specified,                
##                          then boundHit <- boundLabels[1]
##                     if loCI <= lowerVE, for *any* estimand specified
##                          then  boundHit <-  boundLabels[2]
##
##  randFraction    Randomization fraction for current treatment group relative
##                  to that of the placebo group.  This information is used only
##                  in the case of having 0 infections in one of the groups
##                  (active or placebo), so that a proper confidence interval 
##                  cannot be constructed.  In that case, we pretend that our
##                  VE estimate is:  1 - (Nvacc/Nplac)*(Rp/Rv)
##                  and that Nvacc ~ binomial(Nvacc+Nplac, p).
##                  We compute an exact CI for 'p' based on the Clopper-Pearson
##                  method and use it to compute a CI for VE as just defined.
{
    force( randFraction )

    analysisType  <- match.arg(analysisType)
    estimand      <- match.arg(estimand)

    ## Set indicator of end of stage 1 analysis
    EndStg1 <- (analysisType == "final")

    ## force evaluation of a few arguments
    force(alphaLevel)
    force(lowerVE)
    if ( !missing(stage1VE) ) 
        force(stage1VE)
    if ( !missing(time) ) 
        force(time)

    ## TEMP CODE - need to modify monitorTrials to specify a specific estimand
    ## (not "combined") to use for the final stage 1/stop-time  analysis
    if (estimand == "combined")
       estimand <- "cuminc"

    ## set indicators for estimation types to use
    cox  <- (estimand %in% c("cox",   "combined"))
    cir  <- (estimand %in% c("cuminc","combined"))

    ## if lagTime has been specified, then left censor the data of follow-up time
    if ( !is.null(lagTime) && lagTime > 0.01 ) {
      dat <- censorTrial(dat, times=lagTime, timeScale="follow-up", type="left")
    }

    if (analysisType=="stopTime") {
        if (is.null(time)) {
            stop("Argument 'time' must be specified when analysisType='stopTime'\n")
        }
        ## censor data to 'time'
        dat <- censorTrial(dat, times=time, timeScale="calendar")
    }

    if (cir) {
        cumIncOut <- cumIncRatio( dat, 
                         times="last", 
                         alphaLevel = alphaLevel,
                         randFraction = randFraction )

        CIRobj <- transform( cumIncOut,
                      infectTotal = nEvents,
                      infectSplit = paste0("Pl:Vx =", nEvents.2, ":", nEvents.1),
                      nPlac       = nEvents.2,
                      nVacc       = nEvents.1,
                      VE_CIR      = VE,
                      VE_CIR_loCI = VE_loCI,
                      VE_CIR_upCI = VE_upCI )

        if (EndStg1) {
            ## set efficacy result based on lower bound of CI, unless we have no
            ## CI because we have no infections in vacc. group, then set to TRUE
            effResult <- ifelse(CIRobj$VE_loCI > stage1VE, TRUE, FALSE )
        }

        altDetected_CIR <- ifelse(CIRobj$VE_loCI > lowerVE, TRUE, FALSE)

             
        ## subset to retain only the var.s created above
        CIRobj <- CIRobj[,c("infectTotal", "infectSplit", "nPlac", "nVacc",
                           "VE_CIR", "VE_CIR_loCI", "VE_CIR_upCI") ]
    }

    if (cox) {
        coxHRout <- coxHR( dat, 
                           alphaLevel = alphaLevel,
                           randFraction = randFraction )

        CoxObj <- transform( coxHRout,
                      infectTotal = nEvents,
                      infectSplit = paste0("Pl:Vx =", nEvents.2, ":", nEvents.1),
                      nPlac       = nEvents.2,
                      nVacc       = nEvents.1,
                      VE_Cox      = VE,
                      VE_Cox_loCI = VE_loCI,
                      VE_Cox_upCI = VE_upCI)

        if (EndStg1) {
            ## set efficacy result based on lower bound of CI, unless we have no
            ## CI because we have no infections in vacc. group, then set to TRUE
            effResult <- ifelse(CoxObj$VE_loCI > stage1VE, TRUE, FALSE )
        }

        altDetected_Cox <- ifelse( CoxObj$VE_loCI > lowerVE, TRUE, FALSE )


        ## subset to retain only the var.s created above
        CoxObj <- CoxObj[,c("infectTotal", "infectSplit", "nPlac", "nVacc",
                           "VE_Cox",     "VE_Cox_loCI", "VE_Cox_upCI") ]
    }

    if (cox && cir) {
        altDetected <- altDetected_CIR & altDetected_Cox
        ## cbind the output together, dropping duplicated columns
        summObj <- cbind( CIRobj, 
                          CoxObj[, !names(CoxObj) %in% names(CIRobj) ] ) 
    } else {
        if (cox) {
            altDetected <- altDetected_Cox
            summObj     <- CoxObj
        } else {
            altDetected <- altDetected_CIR
            summObj     <- CIRobj
        }
    } 

    names( altDetected ) <- as.character( lowerVE )

    if ( EndStg1 ) {
        
        ## using 'as.vector' to strip the names off of 'altDetected' so they don't
        ## transfer onto 'boundHit;
        boundHit <- ifelse(effResult, boundLabels[1], boundLabels[2])

        outList <- 
            list( boundWasHit = TRUE,
                  boundHit = boundHit,
                  boundType = "FinalStage1",
                  ## only assign stopTime for nonefficacy result
                  stopTime = ifelse(effResult, NA, max( dat$exit)),
                  stage1summ = summObj, 
                  altDetected = altDetected )
    } else {
        ## this analysis comes after a bound was already hit, so we're just 
        ## reporting out on the last analysis.  So we won't output things
        ## like 'boundHit', and stopTime, as those have already been defined
        ## We just output the summaryObject with estimates and CIs and 
        outList <- list( stage1summ = summObj,
                         altDetected = altDetected )
    }
    return( outList )
}


## This function is used to create a confidence interval for the VE when
## we have 0 infections in either the active or placebo groups. It is
## *ONLY* for that case - no other.  It is used because we cannot construct
## a 'usual' CI in this setting.
##
## The approach is based off estimating VE as:   
##   VE = 1 - (Nv/Np)*(Rp/Rv)  
##      with Nv, Np being infection counts in the vacc. and plac. groups
##      and Rp, Rv the randomization fractions of those grps. 
##
## We treat Nvacc as ~ binomial(N, p), where N = (Nv+Np),  and compute an
## exact confidence interval for p using the Clopper-Pearson approach.
## We then use that CI to derive a CI for VE, based on the equation given
## a few lines above here.
##
##  N         = total infections 
##  Nv        = number of infections in vaccinee group
##  alpha     = the 2-sided confidence level
##  randFraction = randomization fraction of the current active trt group relative
##              to active + placebo.
## 
VEci_binom <- function(N, Nv, randFraction=0.5, alpha=0.05) {

    if ( any( (Nv != 0  &  Nv != N) ) )
        stop("This function is only usable when Nv equals 0 or N \n\n")
    
    L <- length( N )
    lowerBound <- numeric( L )
    upperBound <- numeric( L )

    ## logical indicators of 0 and N infections
    ind0 <- (Nv == 0) 
    indN <- (Nv == N)

    ## Bounds first for binomial 'p'
    lowerBound[ ind0 ] <- 0
    upperBound[ ind0 ] <- 1 - (alpha/2)^(1/N)

    lowerBound[ indN ] <- (alpha/2)^(1/N)
    upperBound[ indN ] <- 1 
 
    ## This is the reversed CI for binomial 'p', the make
    ## next set of computations simpler.
    revCI <- cbind(upperBound, lowerBound)

    ## Then transform it to get CI for VE
    ## First re-write VE equation as:  
    ##   VE = 1 - (Nv/N)/((N-Nv)/N)*Rp/Rv =   1 - [p/(1-p)] * Rp/Rv
    ## 
    ## then let (pLL, pUL) = lower-limit and upper-limit of the CI for 'p'
    ## and note:
    ##  since - p/(1-p) is decreasing in 'p', the lower/upper CI limits 
    ##    for the CI of -p/(1-p) will be: (-pUL/(1-pUL), -pLL/(1-pLL))  
    ##    and the CI for VE comes by direct manipulation of this
   
    ci <- cbind(lowerBound, upperBound)

    VE_ci <- 1 - revCI/(1-revCI) * (1-randFraction)/randFraction

    return( VE_ci )
}




## Censors trial data.frames to specified times:
##
##   d:  either a trial data.frame (i.e. data from a single trial) or a list
##       of such data.frames (each representing a trial)
##
##  times: gives the time(s) to use for censoring.  
## 
##         If 'times' is a vector, then the following behavior will be obtained:
##            every trial will be censored using every value of 'times'.
##
##         If 'times' is a list, then it must have length equal to length(d),
##         and the resultant behavior will be to apply the times from times[[j]]
##         to trial d[[j]] - i.e. each component of 'times' specifies a set of
##         times to use in censoring a particular trial.
## 
##  arms:  UNDER CONSTRUCTION - NOT CURRENTLY IN USE - WILL BE IGNORED.
##         vector of codes of the treatment arms that should be censored.
##         Default is to censor all arms.
##
##  timeScale:  Specifies whether the censoring should be done on calendar time 
##         or follow-up time - i.e. whether the values in 'times' should be taken
##         as time since the study began (calendar time), or the time since each
##         participant enrolled (follow-up time).
##
##  type:  Type of censoring to perform. Possible values are 'right' and 'left'
##         The default is 'right'.  Right censoring discards data accrued after
##         a given time, whereas left censoring discards data accrued *before*
##         a given time.  Interval censoring can be achieved by employing the
##         function twice, once with right censoring and once with left. 
##
##   ...:  Not currently used
##
##  SIMPLIFY:  Indicates whether a data.frame should be returned instead of a 
##             list of length 1, in the case where a single data.frame (or list 
##             length 1) and single time are provided by the user.
##             Or, in the case of mulitple trials and a single time per trial,
##             whether the nested list structure should be simplified to a 
##             un-nested list (i.e. the inner list be "unlist()ed")

censorTrial <- function(d, times, arms=NULL, timeScale=c("calendar","follow-up"),
                        type=c("right","left"), ..., SIMPLIFY=TRUE)
{
    ## To handle the case when 'd' is a list of trials, we force it to always
    ## be a list (i.e. we coerce to list when needed)
    if ( is.data.frame(d) )
        d <- list(d)

    ## determine how many trials we're censoring
    nTrials <- length(d)
    nTimes  <- length(times) 

    if (is.list(times) && nTimes != nTrials)
      stop("Argument 'times' is a list and has length different than 'd' - ",
           "this is not permitted\n")

    ## if 'arms' has been specified, subset data using it
    #if (!is.null(arms)) {
    #  d <- lapply(d, function(x) x[x$trt %in% arms, ] )
    #}
             
    timeScale <- match.arg(timeScale)
    type <- match.arg(type)


    ### Define 4 censoring functions, for all combinations of type and timeScale

    ## (1) function to right censor a single trial at a single calendar time
    right.censor.Calendar <- function(trial, time) {
          ## restrict to those enrolled before 'time'
          cens <- trial[trial$entry<time, ]

          ## censor events that occur after 'time'
          cens$event[ cens$exit > time ] <- 0

          ## alter 'exit' time to reflect censoring
          cens$exit <- pmin( cens$exit, time)

          return(cens)
    }

    ## (2) function to left censor a single trial at a single calendar time
    left.censor.Calendar <- function(trial, time) {
          ## restrict to those still active at time 'time'
          cens <- trial[trial$exit > time, ]

          ## alter 'entry' time to reflect censoring, it must be >= time
          cens$entry <- pmax( cens$entry, time)

          return(cens)
    }

    ## (3) function to right censor a single trial at a single follow-up time
    ## follow-up time = (exit_time - entry_time)
    right.censor.Followup <- function(trial, time) {

          ## No exclusion of participants
          cens <- trial

          ## censor events with follow-up time greater than 'time'
          cens$event[ (cens$exit - cens$entry) > time ] <- 0

          ## alter 'exit' time to reflect censoring
          cens$exit <- pmin( cens$exit, cens$entry + time ) 

          return(cens)
    }

    ## (4) function to left censor a single trial at a single follow-up time
    left.censor.Followup <- function(trial, time) {
          ## restrict to participants with at least 'time' followup
          cens <- trial[ (trial$exit - trial$entry) > time, ]

          ## -- not sure that we should do this -- leave it out for now...
          ## alter 'entry' time to reflect censoring (move up by 'time') to
          ## reduce FU by 'time' 
          ## cens$entry <- cens$entry + time 

          return(cens)
    }
    
    ## choose appropriate censoring function
    censFunc <- switch( type, 
                  "right"= switch(timeScale, "calendar"=right.censor.Calendar,
                                    "follow-up"=right.censor.Followup, NA ),
                  "left" = switch(timeScale, "calendar"=left.censor.Calendar,
                                    "follow-up"=left.censor.Followup, NA ) )


    ## apply censoring
    if (!is.list(times)) {
        ## apply all times to all trials.  This returns a nested list:
        ##   outer list over trials, inner list over times within trial
        cens <- lapply(d, FUN = function(d.i, time, cenFunc) 
                         lapply(X=time, FUN=cenFunc, trial=d.i),
                       time=times, cenFunc=censFunc )
    } else {
        ## otherwise if 'times' is a list (same length as 'd') do: 
        cens <- mapply(FUN= function(d.i, time, cenFunc) 
                          lapply(X=time, FUN=cenFunc, trial=d.i ),
                       d.i=d, time=times, 
                       MoreArgs=list(cenFunc=censFunc),
                       SIMPLIFY=FALSE, USE.NAMES=FALSE)
    }

  if ( SIMPLIFY ) {
    ## - If the output is only one data.frame, remove it from its list,
    ## - If the output is one trial at mulitple times, remove the outer list.
    ## - If the output is multiple trials each at a single time, then remove
    ##     the inner list.
    ## [note: 'unlist()' not used as it strips off the data.frame attribute ]
    if (nTrials==1 && nTimes==1) 
        return( cens[[1]][[1]] )
    

    if (nTrials==1 && nTimes>1)
        return( cens[[1]] )

    if (nTrials>1 && (nTimes==1 || 
          (is.list(times) && all(unlist(lapply(times,length))==1)) ) ) 
        return( lapply(cens, function(x) x[[1]] ) )
    
    return( cens )

  } else {
    return ( cens )
  }
}

# equation to be solved
# all arguments are scalars to have a single equation
eqEstHR <- function(hr, boundType, nullHR, alpha, nEvents, p){
  confLim <- log(hr) + ifelse(boundType=="eff", 1, -1) * qnorm(1 - alpha / 2) * 
    sqrt((1 / nEvents) * (2 + p * hr / (1 - p) + (1 - p) / (p * hr)))
  return(confLim - log(nullHR))
}

# get the root of 'eqEstHR' wrt its first argument
# all arguments are scalars to solve a single equation
getEstHR <- function(boundType, nullHR, alpha, nEvents, randFrac){
  
  int <- if (boundType=="eff"){ c(0.05, nullHR) } else { c(nullHR, 5) }
  estHR <- try(uniroot(eqEstHR, interval=int, boundType=boundType, nullHR=nullHR, 
                       alpha=alpha, nEvents=nEvents, p=randFrac, tol=1e-9), silent=TRUE)
  root <- ifelse(inherits(estHR, "try-error"), NA, estHR$root)
  return(root)
}

#' Estimate hazard ratios at an efficacy or non-efficacy stopping boundary defined using the Wald CI approach in an event-driven 2-arm trial design
#' 
#' Assuming an exponential survival model, hazard ratios are estimated at an efficacy or non-efficacy stopping boundary, defined using the Wald CI approach, at each group-sequential analysis in an event-driven 2-arm trial design.
#' 
#' @param boundType a character string specifying if the one-sided null hypothesis is of the form \eqn{H_0: \theta \geq \theta_0} (\code{"eff"}, default) or \eqn{H_0: \theta \leq \theta_0} (\code{"nonEff"}), where \eqn{\theta} is the hazard ratio and \eqn{\theta_0} is specified by \code{nullHR}
#' @param nullHR a nonnegative numeric value specifying the hazard ratio, \eqn{\theta_0}, under the null hypothesis. If the null hypothesis differs across multiple analyses, \code{nullHR} may be a numeric vector of equal length as \code{alpha}.
#' @param alpha a numeric vector of two-sided nominal significance levels (e.g., those defined by the O'Brien-Fleming group-sequential test)
#' @param nEvents a numeric vector of numbers of events at which analyses are performed. The lengths of \code{alpha} and \code{nEvents} must be the same, and the components of the two vectors must correspond to each other.
#' @param randFrac a fraction of subjects randomized to the group considered in the hazard ratio's numerator
#' 
#' @details Using an exponential survival model and sample estimates \eqn{\widehat{\lambda}_1} and \eqn{\widehat{\lambda}_2} of the group-specific hazard rates, the asymptotic variance of the log hazard ratio estimator 
#' \eqn{\log \widehat{\theta} = \log (\widehat{\lambda}_1 / \widehat{\lambda}_2)} is employed together with the approximation \eqn{E\{\delta | \lambda_1\} = (\widehat{\lambda}_1 / \widehat{\lambda}_2)\, E\{\delta | \lambda_2\}}.
#' The resultant variance approximation is \eqn{\mathrm{var} \{\log \widehat{\theta}\} = (1/D) \{ 2 + p \, \widehat{\theta} / (1 - p) + (1 - p) / (p \, \widehat{\theta}) \}},
#' where \eqn{D} is the arm-pooled number of events \code{nEvents} and \eqn{p} is the randomization fraction \code{randFrac}.
#' 
#' @return A data frame (with rows corresponding to the components of \code{alpha} and \code{nEvents}) of point estimates of the hazard ratio at the stopping boundary and the pertaining monitoring-adjusted \eqn{(1 - \alpha^{\ast}) \times 100\%} confidence intervals, where \eqn{\alpha^{\ast}} is the overall two-sided type 1 error rate.
#' 
#' @examples
#' ## O'Brien-Fleming test of H0: HR >= 0.7 (for efficacy) at 
#' ## 35%, 70%, and 100% of the total information under 1:1 randomization
#' estHRbound("eff", nullHR=0.7, alpha=c(0.00030, 0.01466, 0.04548), 
#'            nEvents=c(53, 106, 151), randFrac=0.5)
#' 
#' ## O'Brien-Fleming test of H0: HR <= 0.5 (for non-efficacy) at
#' ## 35%, 70%, and 100% of the total information under 1:1 randomization
#' estHRbound("nonEff", nullHR=0.5, alpha=c(0.00030, 0.01466, 0.04548), 
#'            nEvents=c(53, 106, 151), randFrac=0.5)
#'
#' @export
estHRbound <- function(boundType=c("eff", "nonEff"), nullHR, alpha, nEvents, randFrac){
  if (length(alpha)!=length(nEvents)){
    stop("The arguments 'alpha' and 'nEvents' must be of equal length.")
  }
  
  if (length(nullHR)>1){
    if (length(nullHR)!=length(alpha)){
      stop("Since 'nullHR' is a vector, it must be of the same length as 'alpha'.\nAlternatively, 'nullHR' may be a scalar.")
    }
  } else {
    nullHR <- rep(nullHR, length(alpha))
  }
  
  boundType <- match.arg(boundType)
  
  estHR <- NULL
  for (i in 1:length(alpha)){
    estHR <- c(estHR, getEstHR(boundType, nullHR[i], alpha[i], nEvents[i], randFrac))
  }
  
  ciHR <- exp(log(estHR) + qnorm(1 - alpha / 2) * sqrt((1 / nEvents) * (2 + randFrac * estHR / (1 - randFrac) + (1 - randFrac) / (randFrac * estHR))) %o% c(-1, 1))
  
  out <- data.frame(estHR, ciHR)
  colnames(out) <- c("estHR", "lb", "ub")
  
  return(out)
}
mjuraska/seqDesign documentation built on Dec. 14, 2022, 4:26 p.m.