R/design_optimizer.R

Defines functions is.scalar is.whole.number in.open.interval reals.to.probability squash get.sample.sizes.per.stage list.potential.trials list.potential.trials.survival get.trial.criteria linear.threshold.penalty design.performance.continuous.or.binary.outcome.type design.performance.survival.outcome.type triage.based.on.outcome.type transform.parameters power.penalized.weighted get.optim.trajectory sa.optimize sa.temperature

#Uses: library(mvtnorm), library(plyr)
#
# See https://svn.r-project.org/R/trunk/src/appl/optim.c and ?optim for
# details on simulated annealing.
#
# In brief, an initial temperature ti is supplied: if no function for generating
# new candidates is supplied, the default is a gaussian kernel centered at the
# current value of the parameter (see genptry - lines 48-77; call to gaussian
# kernel at line 75).
# 
# The actual function which performs simulated annealing is samin on lines
# 722-788. 
#
#
#
#


### is.scalar ##################################################################
# Description: determine if object is scalar.
# Input:
#   x (object): object to test
# Output:
#   (logical): is object a scalar
is.scalar <- function(x) is.atomic(x) && length(x) == 1L




### is.whole.number ############################################################
# Description: determine if numeric value is a whole number 
#   (see ?is.integer) for more information.
# Input:
#   x (numeric): numeric value
# Output:
#   (logical): is whole number to a given tolerance
is.whole.number <-
  function(x, tol=.Machine$double.eps^0.5) {
    stopifnot(is.numeric(x))
    abs(x - round(x)) < tol
  }




### in.open.interval ###########################################################
# Description: determine if numeric value is in an open interval (lb, ub)
# Input:
#   x (numeric): numeric value
# Output:
#   (logical): is whole number in the interval
in.open.interval <-
  function(x, lb=0, ub=1) {
    stopifnot(is.numeric(x), is.numeric(lb), is.numeric(ub), ub>lb)
    (x > lb) & (x < ub)
  }





### reals.to.probability #######################################################
# Description: map matrix of reals to a vector of probabilities by a logistic
#   transformation and normalization.
# Input:
#   x (numeric matrix): real values to be mapped
# Output:
#   (numeric matrix): non-negative values in (0, 1)
reals.to.probability <- function(x) plogis(x)/sum(plogis(x))




### squash #####################################################################
# Description: map matrix of reals to an interval (min, max) using a linear
#   threshold (method="linear"), a logistic function (method="logit"),
#   or other (tbd) methods.
# Input:
#   x (numeric matrix): real values to be mapped to (min, max)
# Output:
#   (numeric matrix): values in (min, max)
squash <- 
  function(x,
           x.min=0,
           x.max=1,
           method=c("linear", "logit") [1]) {
  stopifnot(is.numeric(x), 
            x.max>x.min)
  switch(method,
         linear=pmin(pmax(x.min, x), x.max), # Linear Interpolation 
         logit=x.min + (x.max-x.min)*plogis(x, location=(x.max+x.min)/2,
                                            scale=(x.max-x.min)/6)
  )
}




### get.sample.sizes.per.stage #################################################
# Description: Calculate the number of patients per arm in multi-stage clinical
#   trials with one or more treatments compared with controls in one population,
#   potentially divided into disjoint subgroups. Single subpopulations and 
#   single-stage trials are also supported.
#
# Input:
#   n.arms (numeric scalar - whole): number of total arms (L+1)
#   n.per.arm (numeric scalar - whole: number of patients per arm
#   subpopulation.sizes (numeric vector - vector of probabilities): the 
#     proportion of the population in each disjoint subgroup. Must sum to 1.
#     Using subpopulation.sizes = NULL or 1 gives a trial in a single
#     population.
#   interim.info.times (numeric vector - sorted proportions, ending in 1): The 
#     proportion of patients enrolled by each interim analysis. If no interim
#     analyses are desired, set to NULL or 1. 
#   accrual.rate (numeric scalar): number of patients enrolled per year
#   delay - numeric : time in years from randomization to observation of primary
#     outcome
#   warn.last.accrual.before.interim (logical scalar): warn if an interim 
#     analysis occurs after enrollment has stopped: such an interim analysis can
#     not reduce sample size, but may reduce costs of follow-up visits.
#
# Output: 
#   analytic.n.per.stage ([K x J(L+1)] matrix), where K is the number of stages,
#     J is the number of subpopulations, and L is the number of treatments
#     compared to the control arm. For continuous or binary outcomes, this 
#     represents the patients with primary outcome at each interim analysis; for
#     survival outcome, this represents number of patients enrolled. Each row is
#     one stage of the study, with each column indicating the number of outcomes
#     available in that (treatment x subpopulation) stratum. Columns are
#     arranged by subpopulation (S1, ..., SJ) within treatments (T0, ..., TL):
#
#     stage 1: T0S1 T0S2 ... T0SJ ... TLS1 TLS2 ... TLSJ
#     stage 2: T0S1 T0S2 ... T0SJ ... TLS1 TLS2 ... TLSJ
#       ...
#     stage k: T0S1 T0S2 ... T0SJ ... TLS1 TLS2 ... TLSJ
#
#   total.n.per.stage ([K x J(L+1)] matrix): all enrolled patients at each
#     interim analysis, irrespective of when the primary outcome is observed.
#     stage 1: T0S1 T0S2 ... T0SJ ... TLS1 TLS2 ... TLSJ
#     stage 2: T0S1 T0S2 ... T0SJ ... TLS1 TLS2 ... TLSJ
#       ...
#     stage k: T0S1 T0S2 ... T0SJ ... TLS1 TLS2 ... TLSJ
#
#   accrual.years (K x 1 numeric vector): time to accrue the patients in each
#     stage of a trial, assuming no interim stopping.
#
#   outcome.years (K x 1 numeric vector): time to observe the outcomes of the
#     patients accrued in each of K stages of a trial: may exceed the length
#     of enrollment, depending on enrollment rate and delay. Again, this assumes
#     no interim stopping.
#   enrollment.period.normalized (numeric scalar): the enrollment period
#     normalized to be a proportion of the time until the final analysis time 
#     (e.g if enrollment period is 4 years and the time of the last analysis is
#     6 years then enrollment.period.normalized = 6/4.)
#   
#
# NOTE: When delay = 0, total.n.per.stage and analytic.n.per.stage will be
#   equal when sample sizes are integers: when not integers, the number of
#   observed outcomes is rounded down using floor() and the number of enrolled
#   participants is rounded up using ceiling() to be conservative. For survival
#   outcomes, delay should be set to 0.
get.sample.sizes.per.stage <-
  function(n.arms, n.per.arm, subpopulation.sizes,
           interim.info.times, accrual.rate, delay,
           warn.last.accrual.before.interim=TRUE,
           restrict.enrollment=FALSE,
           enrollment.period.normalized=NULL){

    max.arms <- 8 # This can be increased, but not beyond 26 (=dim(letters))

    # Input Checks
    stopifnot(is.whole.number(n.arms) & n.arms > 0 & n.arms <= max.arms,
              is.finite(n.arms) & is.scalar(n.arms),
              is.whole.number(n.per.arm) & n.per.arm > 0,
              is.finite(n.per.arm) & is.scalar(n.per.arm),
              is.numeric(accrual.rate) & accrual.rate > 0,
              is.finite(accrual.rate) & is.scalar(accrual.rate),
              is.numeric(delay) & delay >=0,
              is.finite(delay) & is.scalar(delay),
              is.logical(warn.last.accrual.before.interim) &
                is.scalar(warn.last.accrual.before.interim))
    
    # Check interim.info.times and subpopulation.sizes for coherence
    if(!is.null(subpopulation.sizes)){
      stopifnot(is.numeric(subpopulation.sizes),
                length(subpopulation.sizes) >= 1,
                all.equal(sum(subpopulation.sizes), 1))
      if(length(subpopulation.sizes)>1) {
        stopifnot(in.open.interval(subpopulation.sizes, lb=0, ub=1))
      }
    } else {
      subpopulation.sizes <- 1
    }
    
    if(!is.null(interim.info.times)){
      # Numeric, Not empty, strictly increasing
      stopifnot(is.numeric(interim.info.times),
                length(interim.info.times) >= 1, 
                !is.unsorted(interim.info.times, strictly=TRUE))
      if(length(interim.info.times)>=1) {
        # Positive, less than or equal to 1, last value == 1
        stopifnot(sum(interim.info.times<0)==0,
                  mean(interim.info.times<=1)==1)
        if(tail(interim.info.times, 1)!=1) interim.info.times <- 
            c(interim.info.times, 1)
      }
    } else {
      interim.info.times <- 1
    }
    
    # Adjust interim.info.times if enrollment.period might be smaller
    # than some of the analysis time
    if(!is.null(enrollment.period.normalized)){
      interim.info.times <-
        pmin(interim.info.times/enrollment.period.normalized, 1)
    }

    # Enforce n.per.arm limit
    if(length(subpopulation.sizes)>1){
      max.per.subpop <- round(head(subpopulation.sizes, -1)*n.per.arm)
      max.per.subpop <- c(max.per.subpop, n.per.arm-sum(max.per.subpop))
    } else max.per.subpop <- n.per.arm
    
    # See if a subpopulation is empty or near empty
    if(sum(round(subpopulation.sizes*n.per.arm)<1)){
      warning(paste0("Empty subgroup: Check n.per.arm (", n.per.arm,
                     ") and subpopulation.sizes (", 
                     paste(subpopulation.sizes, collapse=", "), ")."))
    }
    
    # Calculate time to accrue n.per.arm
    stages <- length(interim.info.times) + 1*(tail(interim.info.times, 1)!=1)
    n.subgroups <- length(subpopulation.sizes)
    accrual.years <- n.arms*interim.info.times*n.per.arm/accrual.rate
    outcome.years <- accrual.years + delay
    
    # Calculate total patients accrued at interim analyses
    accrued.patients <-
      floor(kronecker(outcome.years*accrual.rate/n.arms,
                        t(subpopulation.sizes)))
    
    # Cap max enrollment - can overshoot with long delay
    for(i in 1:nrow(accrued.patients)) {
      if(sum(accrued.patients[i,] > max.per.subpop)>0) {
        accrued.patients[i,] <- max.per.subpop
      }
    }
    
    # Count the number of outcomes observed at each information time
    accrued.outcomes <-  
      floor(kronecker(interim.info.times,
                      t(subpopulation.sizes))*n.per.arm)
    
    # Enforce n.per.arm limit
    accrued.outcomes[nrow(accrued.outcomes),] <- tail(accrued.patients, 1)
    
    # Apply names to each arm, subgroup; C for control is column 1 in each block
    arm.names <- c(LETTERS[3], LETTERS[1:max.arms][-3])[1:n.arms]
    analytic.n.per.stage <- kronecker(t(rep(1, n.arms)), accrued.outcomes)
    
    colnames(analytic.n.per.stage) <-
      paste0(rep(arm.names, each=n.subgroups), rep(1:n.subgroups, n.arms))
    
    total.n.per.stage <- kronecker(t(rep(1, n.arms)), accrued.patients)
    colnames(total.n.per.stage) <-
      paste0(rep(arm.names, each=n.subgroups), rep(1:n.subgroups, n.arms))

    # Optionally warn if accrual completes before interim analysis - 
    # If all patients are enrolled before an interim analysis, sample size
    # can't be reduced by efficacy/futility stopping.
    #if(warn.last.accrual.before.interim &
    #   sum(duplicated(total.n.per.stage)>0)) {
    #  warning("Accrual completed before interim analyses.")
    #}
    
    if(sum(analytic.n.per.stage==0)>0){
      warning("Some stage did not accrue a patient in one subpopulation.")
    }
    
    # Re-write in terms of labels instead of column indices
    if(restrict.enrollment == TRUE){
      for(k in 1:n.arms){
        analytic.n.per.stage[, 2*k] <-
          (analytic.n.per.stage[, 2*k] - analytic.n.per.stage[1, 2*k])
        total.n.per.stage[, 2*k] <- 
          (total.n.per.stage[, 2*k] - total.n.per.stage[1, 2*k])
      }
    }
    
    return(list(analytic.n.per.stage=analytic.n.per.stage,
                total.n.per.stage=total.n.per.stage,
                accrual.years=accrual.years,
                outcome.years=outcome.years))
  }




### list.potential.trials ######################################################
# Description: Create a look-up table that gives the sample size and duration
# of each potential outcome of a clinical trial. This can be used to calculate
# ESS and duration from a matrix of simulated trial results. If each row is a
# simulated trial, and each column specifies the stage at which each (arm x 
# subgroup) strata ended the trial, simply tabulate the number of outcomes of
# each type, merge with the look-up table, and then use weighted.mean() on
# sample size and duration.
list.potential.trials <-
  function(total.n.per.stage,
           accrual.rate,
           subpopulation.sizes){
    
    # Get number of arms, subgroups, and stages from total.n.per.stage
    n.arms <- 
      length(unique(substring(colnames(total.n.per.stage), first=1, last=1)))
    n.subgroups <-
      length(unique(substring(colnames(total.n.per.stage), first=2)))
    stages <- nrow(total.n.per.stage)
    
    # Get the increment in sample size at each stage
    incremental.n <- diff(rbind(0, total.n.per.stage))
    
    # Create a list of columns for each subgroup;
    # Vector of columns for each control arm
    subgroup.col <- list()
    for(j in 1:n.subgroups) subgroup.col[[j]] <-
      which(substring(colnames(total.n.per.stage), first=2)==j)
    control.col <-
      match(paste0("C", 1:n.subgroups), colnames(total.n.per.stage))
    
    ### Calculate possible sample sizes, durations
    # Create all potential outcomes: Arm x Subopulation x Stage combinations
    all.trials <- matrix(1:stages, nrow=stages, ncol=n.subgroups*n.arms)
    colnames(all.trials) <- colnames(total.n.per.stage)
    all.trials <- expand.grid(data.frame(all.trials))
    
    if(nrow(all.trials)>1){
      
      # Restrict such that active treatments can't enroll without controls:
      # Keep only trials where the last stage reached by any treatment is equal
      # to the last stage reached by control
      for(j in 1:n.subgroups){
        tx.cols <- setdiff(subgroup.col[[j]], control.col[j])
        if(length(tx.cols)>0){
          all.trials <-
            all.trials[which(apply(as.matrix(all.trials[,tx.cols]), 1, max) == 
                               all.trials[,control.col[j]]),]
        }
      }
      
      stage.duration <- matrix(0, nrow=nrow(all.trials), ncol=n.subgroups)
      trial.duration <- matrix(0, nrow=nrow(all.trials))
      
      for (k in 1:stages){
        patients.stage.k <- 
          (all.trials >= k)* # If Treatment x Subpopulation strata in stage k
          kronecker(matrix(1, nrow=nrow(all.trials)), 
                    t(incremental.n[k,])) # Incremental sample size by strata
        
        # Count arms enrolling; Get accrual rate in subpopulation by adjusting
        # subpopulation accrual rate by arms enrolling in current stage
        for (j in 1:n.subgroups){
          arms.enrolling <- 
            rowSums(as.matrix((all.trials >= k)[, subgroup.col[[j]]]))
          randomization.rate <- 
            (accrual.rate)*subpopulation.sizes[j]*n.arms/arms.enrolling
          stage.duration[,j] <- 
            rowSums(as.matrix(patients.stage.k[, subgroup.col[[j]]]))/
            randomization.rate
        }
        trial.duration <- trial.duration + apply(stage.duration, 1, max)
      }
      
      # Calculate sample size of each trial
      potential.trials <-
        data.frame(all.trials,
                   sample.size=apply(t(all.trials), 2, function(x) 
                     sum(total.n.per.stage[cbind(x, 1:ncol(all.trials))])),
                   duration=trial.duration)

    } else {
      potential.trials <-
        data.frame(all.trials,
                   sample.size=sum(total.n.per.stage),
                   duration=sum(total.n.per.stage)/accrual.rate)
    }
    return(potential.trials)
}



## Special Function for computing list of potential trials
## including duration, for survival outcomes: 
## only validated for 1 treatment versus control case
list.potential.trials.survival <-
  function(total.n.per.stage,
           accrual.rate,
           subpopulation.sizes,
           time){
    
    # Get number of arms, subgroups, and stages from total.n.per.stage
    n.arms <- 
      length(unique(substring(colnames(total.n.per.stage), first=1, last=1)))
    n.subgroups <-
      length(unique(substring(colnames(total.n.per.stage), first=2)))
    stages <- nrow(total.n.per.stage)
    
    # Get the increment in sample size at each stage
    incremental.n <- diff(rbind(0, total.n.per.stage))
    
    # Create a list of columns for each subgroup;
    # Vector of columns for each control arm
    subgroup.col <- list()
    for(j in 1:n.subgroups) subgroup.col[[j]] <-
      which(substring(colnames(total.n.per.stage), first=2)==j)
    control.col <-
      match(paste0("C", 1:n.subgroups), colnames(total.n.per.stage))
    
    ### Calculate possible sample sizes, durations
    # Create all potential outcomes: Arm x Subopulation x Stage combinations
    all.trials <- matrix(1:stages, nrow=stages, ncol=n.subgroups*n.arms)
    colnames(all.trials) <- colnames(total.n.per.stage)
    all.trials <- expand.grid(data.frame(all.trials))
    
    if(nrow(all.trials)>1){
      
      # Restrict such that active treatments can't enroll without controls:
      # Keep only trials where the last stage reached by any treatment is equal
      # to the last stage reached by control
      for(j in 1:n.subgroups){
        tx.cols <- setdiff(subgroup.col[[j]], control.col[j])
        if(length(tx.cols)>0){
          all.trials <-
            all.trials[which(apply(as.matrix(all.trials[,tx.cols]), 1, max) == 
                               all.trials[,control.col[j]]),]
        }
      }
      
      trial.duration <- matrix(0, nrow=nrow(all.trials))
      for(row.number in 1:nrow(all.trials)){
        trial.duration[row.number] <- time[max(all.trials[row.number,])]
      }
      person.time <- matrix(0, nrow=nrow(all.trials))
      for(row.number in 1:nrow(all.trials)){
        person.time[row.number] <- sum(sapply(all.trials[row.number,],function(x){time[x]}))
      }
      
      # Calculate sample size of each trial
      potential.trials <-
        data.frame(all.trials,
                   sample.size=apply(t(all.trials), 2, function(x) 
                     sum(total.n.per.stage[cbind(x, 1:ncol(all.trials))])),
                   duration=trial.duration,
                   person.time=person.time
        )
      
    } else {
      potential.trials <-
        data.frame(all.trials,
                   sample.size=sum(total.n.per.stage),
                   duration=time[1],
                   person.time=time[1]*n.arms*n.subgroups)
    }
    return(potential.trials)
  }


### get.trial.criteria #########################################################
# Description: determine the maximum sample size, expected sample size, expected
#   duration, power, type I error, and familywise type I error rate (FWER) for
#   a clinical trial based on a trial.simulation object, a matrix of hypothesis
#   rejections (decisions.rejections), a matrix of stages at which decisions
#   were made (decisions.stages), the columns corresponding to null hypotheses
#   (null.hypotheses), a look-up table of potential trials with their sample
#   sizes and durations calculated (potential.trials)
#
#
#   if no hypotheses are null, specify null.hypotheses=NULL
get.trial.criteria <-
  function(decisions.rejections,
           decisions.stages,
           null.hypotheses=NULL,
           potential.trials){
    
    # Add warning if names of decisions.stages don't match potential.trials
    
    # Get number of arms, subgroups, and stages from total.n.per.stage
    n.arms <- 
      length(unique(substring(colnames(decisions.stages), first=1, last=1)))
    n.subgroups <-
      length(unique(substring(colnames(decisions.stages), first=2)))
    n.simulations <- nrow(decisions.stages)
    
    # Create a list of columns for each subgroup;
    # Vector of columns for each control arm
    subgroup.col <- list()
    for(j in 1:n.subgroups) subgroup.col[[j]] <-
      which(substring(colnames(decisions.stages), first=2)==j)
    
    conj.power.n.subgroups <-
      matrix(NA, nrow=n.simulations, ncol=n.subgroups)
    disj.power.n.subgroups <-
      matrix(NA, nrow=n.simulations, ncol=n.subgroups)
    
    # Copy decisions.rejections
    empirical.alpha <- empirical.power <- decisions.rejections
    if(length(null.hypotheses) > 0){
      empirical.power[, null.hypotheses] <- NA
      empirical.alpha[, -null.hypotheses] <- NA
    } else {
      empirical.alpha <- empirical.alpha*NA
    }
    
    for(j in 1:n.subgroups){
      non.null.sub.cols <- setdiff(subgroup.col[[j]], null.hypotheses)
      if(length(non.null.sub.cols)>0){
        sub.cols <- as.matrix(empirical.power[,non.null.sub.cols])
        conj.power.n.subgroups[,j] <- mean(rowMeans(sub.cols)==1)
        disj.power.n.subgroups[,j] <- mean(rowMeans(sub.cols)>0)
      }
    }
    
    conj.power.n.subgroups <- colMeans(conj.power.n.subgroups)
    disj.power.n.subgroups <- colMeans(disj.power.n.subgroups)
    names(disj.power.n.subgroups) <- paste0("DIS_Power_", 1:n.subgroups)
    names(conj.power.n.subgroups) <- paste0("CON_Power_", 1:n.subgroups)

    conj.power <- mean(rowMeans(empirical.power, na.rm=TRUE)==1)
    disj.power <- mean(rowMeans(empirical.power, na.rm=TRUE)>0)
    empirical.power <- colMeans(empirical.power==1)
    
    if(length(null.hypotheses)>0){
      fwer <- mean(rowMeans(empirical.alpha, na.rm=TRUE)>0)
      type.1.error <- colMeans(empirical.alpha==1)
    } else {
      fwer <- NA
      type.1.error <- head(decisions.stages, 1)*NA
    }
    
    distribution.of.trials <- plyr::count(decisions.stages)
    names(distribution.of.trials) <- c(colnames(decisions.stages), "frequency")
    distribution.of.trials$proportion <- 
      distribution.of.trials$frequency/n.simulations
    
    distribution.of.trials <- merge(potential.trials,
                                    distribution.of.trials, all=TRUE)
    distribution.of.trials <-
      distribution.of.trials[union(names(potential.trials),
                                   names(distribution.of.trials))]
    
    return(list(distribution.of.trials=distribution.of.trials,
                empirical.power=empirical.power,
                conj.power=conj.power,
                disj.power=disj.power,
                conj.power.n.subgroups=conj.power.n.subgroups,
                disj.power.n.subgroups=disj.power.n.subgroups,
                type.1.error=type.1.error,
                fwer=fwer))
  }


### linear.threshold.penalty ###################################################
# Description: determine if a matrix of values meets or exceeds a matrix of
#   thresholds. A linear penalty is applied to values below the thresholds.
#   A loss matrix is returned of the same dimension as the power matrix and 
#   thresholds.
#
# If any scenario x hypothesis combinations do not contribute to the penalty,
#   they should be replaced with NA values before using power.penalty().
#
# Input:
#   x (numeric matrix): a (RxC) matrix of values
#   threshold.matrix (numeric matrix): a (RxC) matrix of thresholds
#   linear.penalty (numeric scalar - positive): a positive linear penalty to
#     scale the absolute difference between x and threshold.matrix
#
# Output: 
#   loss (numeric matrix): a (MxH) matrix of loss values.
linear.threshold.penalty <-
  function(x,
           threshold.matrix,
           linear.penalty) {
    # Check to see if x are probability scale and equal dimensions to
    # power thresholds, and thresholds are feasible.
    stopifnot(is.numeric(linear.penalty) & is.scalar(linear.penalty),
              linear.penalty>=0,
              identical(dim(x), dim(threshold.matrix)))
    return(linear.penalty*(x<threshold.matrix)*abs(x-threshold.matrix))
  }




### design.performance.continuous.or.binary.outcome.type ##############################################################
# Description: For continuous or binary outcomes, outcomes, evaluate trial 
#   design performance using simulated trials under each user-defined scenario
#   and return trial criteria on convergence. Performance includes expected
#   sample size, power, Type I error (per hypothesis, conjunctive, disjunctive),
#   and duration. 
#
#   This is done by calculating sample size per stage, and for each scenario:
#   the multivariate normal distribution of test statistics is constructed using
#   construct.test.statistics.joint.distribution, the stopping times and null
#   hypotheses rejected are computed for each simulated trial using 
#   design.evaluate and then performance metrics are computed using
#   get.trial.criteria.
#
# Input:
#   n.arms (numeric scalar - whole): number of total arms (currently 
#     restricted to 2)
#   n.per.arm (numeric scalar - whole: number of patients per arm
#   accrual.rate (positive numeric scalar): number of patients per year accrued
#     in the combined population
#   delay (numeric scalar): time in years from randomization to observation of
#     primary outcome
#   subpopulation.sizes (numeric vector - vector of probabilities): the 
#     proportion of the population in each disjoint subgroup. Must sum to 1.
#     Using subpopulation.sizes = NULL or 1 gives a trial in a single
#     population.
#   interim.info.times (numeric vector - sorted proportions, ending in 1): The 
#     proportion of patients enrolled by each interim analysis. If no interim
#     analyses are desired, set to NULL or 1. (currently restricted to length 
#     up to 2)
#   total.alpha (numeric scalar): total allowable familywise type I error rate.
#     Defaults to 0.025.
#   alpha.allocation (numeric vector): proportion of error rate spent on each
#     analysis. Must sum to 1.
#   outcome.type (string): outcome distribution: either "continuous" or "binary"
#   outcome.mean (M x JT numeric matrix): mean outcomes by scenario.
#   outcome.sd (M x JT numeric matrix): mean SDs by scenario.
#   mcid (numeric scalar): the minimum clinically important difference - on the
#     same scale as outcome.mean
#   futility.boundaries (numeric vector): futility boundaries
#   n.simulations (numeric scalar): Number of simulated trials to estimate trial
#     performance. Defaults to 10000.

design.performance.continuous.or.binary.outcome.type <- 
  function(n.arms,
           n.per.arm,
           accrual.rate,
           delay=0,
           subpopulation.sizes,
           interim.info.times,
           total.alpha=0.025,
           alpha.allocation,
           outcome.type,
           outcome.mean,
           outcome.sd=NULL,
           mcid=0,
           futility.boundaries=NULL,
           n.simulations=10000,
           relative.efficiency=1, 
           construct.joint.distribution.of.test.statistics,
           generate.efficacy.boundaries,
           design.evaluate){
    
    n.subgroups <- length(subpopulation.sizes)
    n.stages <- length(interim.info.times) + 1*(tail(interim.info.times, 1)!=1)
    n.scenarios <- nrow(outcome.mean)
    
    if(outcome.type=="binary"){
      outcome.sd <- sqrt(outcome.mean*(1-outcome.mean))
    }
    
    efficacy.boundary.parameters <- list(err.tol=10^-3)

    per.stage.sample.sizes <-
      get.sample.sizes.per.stage(n.arms=n.arms, 
                                 n.per.arm=n.per.arm,
                                 subpopulation.sizes=subpopulation.sizes,
                                 interim.info.times=interim.info.times,
                                 accrual.rate=accrual.rate,
                                 delay=delay)
    
    analytic.n.per.stage <- per.stage.sample.sizes$analytic.n.per.stage
    total.n.per.stage <- per.stage.sample.sizes$total.n.per.stage
    
    alpha.allocation <- alpha.allocation*total.alpha
    trial.criteria <- trial.criteria.by.scenario <- list()
    
    # List potential trial outcomes
    potential.trials <-
      list.potential.trials(total.n.per.stage=total.n.per.stage,
                            accrual.rate=accrual.rate,
                            subpopulation.sizes=subpopulation.sizes)
    
    # Determine which hypotheses are null, which are above MCID
    control.cols <-
      which(substring(colnames(outcome.mean), first=1, last=1)=="C")
    tx.cols <-
      which(substring(colnames(outcome.mean), first=1, last=1)!="C")
    
    mean.differences <- 
      matrix(outcome.mean[,tx.cols], nrow = nrow(outcome.mean)) -
      kronecker(matrix(1, ncol=(n.arms-1)), 
                matrix(outcome.mean[,control.cols],
                       nrow = nrow(outcome.mean)))
      

    null.hypotheses <- 1*(mean.differences<1e-6) #Null hypotheses are of the form: mean_difference <= 0.
    above.mcid <- 1*(mean.differences>=mcid)
    
    # In case of MCID=0:
    above.mcid[which(null.hypotheses==1, arr.ind=TRUE)] <- 0
    
    
    for(m in 1:n.scenarios){ 
      ### SPECIFIC TO 2-ARM and 3-ARM DESIGNS ###
      prop.pop.1 <- subpopulation.sizes[1] ### SPECIFIC TO 2 SUBPOPULATIONS ###
      
      if(n.arms==2){
        mean.sub.pop.1 <- outcome.mean[m, c("C1", "A1")]
        mean.sub.pop.2 <- outcome.mean[m, c("C2", "A2")]
        var.vec.pop.1 <- (outcome.sd[m, c("C1", "A1")])^2
        var.vec.pop.2 <- (outcome.sd[m, c("C2", "A2")])^2
      } else if(n.arms==3){
        mean.sub.pop.1 <- outcome.mean[m, c("C1", "A1", "B1")]
        mean.sub.pop.2 <- outcome.mean[m, c("C2", "A2", "B2")]
        var.vec.pop.1 <- (outcome.sd[m, c("C1", "A1", "B1")])^2
        var.vec.pop.2 <- (outcome.sd[m, c("C2", "A2", "B2")])^2
      } else {stop("Implementation of >3 arms is not currently implemented.")}
      
      joint.distribution.of.test.statistics <-
        construct.joint.distribution.of.test.statistics(analytic.n.per.stage,
                           mean.sub.pop.1=mean.sub.pop.1,
                           mean.sub.pop.2=mean.sub.pop.2,
                           var.vec.pop.1=var.vec.pop.1,
                           var.vec.pop.2=var.vec.pop.2,
                           outcome.type=outcome.type,
                           prop.pop.1=prop.pop.1,
                           relative.efficiency=relative.efficiency)
      
      # Get efficacy boundaries
      efficacy.boundaries <-
        do.call(what=generate.efficacy.boundaries,
                args=c(list(cov.mat.used=joint.distribution.of.test.statistics$cov.mat.used,
                            alpha.allocation=alpha.allocation),
                       efficacy.boundary.parameters))
      
      # Sample test statistics 
      test.statistics <- 
        with(joint.distribution.of.test.statistics,
             mvtnorm::rmvnorm(n=n.simulations,
                     mean=non.centrality.parameter.vec,
                     sigma=cov.mat.used))
      
      # Determine which hypotheses are rejected at each stage
      test.decisions <-
        do.call(what=design.evaluate,
                args=list(test.statistics=test.statistics,
                          efficacy.boundary=efficacy.boundaries,
                          ### SPECIFIC TO 2-ARM AND 3-ARM DESIGNS ###
                          futility.boundary=c(futility.boundaries,
                                              # No futility at last stage
                                              rep(-Inf, n.arms)),
                          alpha.allocation=alpha.allocation))
      
      # Get ESS, Duration, Power, Type I Error, FWER
      trial.criteria.by.scenario[[m]] <-
        get.trial.criteria(decisions.rejections=test.decisions$rejection.matrix,
                           decisions.stages=test.decisions$stage.decision,
                           null.hypotheses=which(null.hypotheses[m,]==1),
                           potential.trials=potential.trials)
      # Compute additional performance criteria: 
      # bias, variance, mse, and confidence interval coverage
      if(n.arms==2){
        true.estimand.value <-
          c(mean.sub.pop.1[2] - mean.sub.pop.1[1],
            mean.sub.pop.2[2] - mean.sub.pop.2[1])
        names(true.estimand.value) <- c("Subpop.1","Subpop.2")
      } else if (n.arms==3) {
        true.estimand.value = c(mean.sub.pop.1[2] - mean.sub.pop.1[1], mean.sub.pop.2[2] - mean.sub.pop.2[1], mean.sub.pop.1[3] - mean.sub.pop.1[1], mean.sub.pop.2[3] - mean.sub.pop.2[1])
        names(true.estimand.value) <- c("Arm.A.Subpop.1","Arm.A.Subpop.2","Arm.B.Subpop.1","Arm.B.Subpop.2")
      }
      bias.variance.mse.ci <- compute.performance(test.statistics,
                              cov.var.matrix=joint.distribution.of.test.statistics$cov.mat.used,
                              stage.decision=test.decisions$stage.decision,
                              information.level=joint.distribution.of.test.statistics$information.vector,
                              true.estimand.value=true.estimand.value,
                              n.treatment.excluding.control=n.arms-1)
      trial.criteria.by.scenario[[m]]$bias.variance.mse.ci <- bias.variance.mse.ci 
      trial.criteria.by.scenario[[m]]$true.estimand.value <- true.estimand.value 
      
      # Label distribution.of.trials by scenario
      trial.criteria.by.scenario[[m]]$distribution.of.trials <-
        data.frame(scenario=m,
                   trial.criteria.by.scenario[[m]]$distribution.of.trials)
    }
    
    # Unpack list of trial criteria - Label with scenario number
    criteria.names <- unique(unlist(lapply(trial.criteria.by.scenario, names)))
    for(i in 1:length(criteria.names)){
      criteria.result <- 
        do.call(what=rbind,
                args=lapply(trial.criteria.by.scenario, function(x)
                  get(criteria.names[i], x)))
      if(is.null(colnames(criteria.result))){
        colnames(criteria.result) <- criteria.names[i]
      }
      trial.criteria[[i]] <- criteria.result
    }
    
    names(trial.criteria) <- criteria.names
    
    trial.criteria$expected.sample.size.per.scenario <-
      aggregate(sample.size*proportion~scenario,
                FUN=sum,
                data=trial.criteria$distribution.of.trials)[,2]
    
    trial.criteria$expected.duration.per.scenario <-
      aggregate(duration*proportion~scenario,
                FUN=sum,
                data=trial.criteria$distribution.of.trials)[,2]
    trial.criteria$analysis.times <- per.stage.sample.sizes$outcome.years
    trial.criteria$null.hypotheses <- null.hypotheses
    trial.criteria$above.mcid <- above.mcid
    trial.criteria$per.stage.sample.sizes <- per.stage.sample.sizes
    trial.criteria$futility.boundaries <- futility.boundaries
    return(trial.criteria)
  }


### design.performance.survival.outcome.type ###########################################################
# Description: For survival outcomes, evaluate trial design performance using
#   simulated trials under each user-defined scenario and return trial criteria
#   on convergence. Performance includes expected sample size, power, Type I 
#   error (per hypothesis, conjunctive, disjunctive), and duration. 
#
#   This is done by calculating sample size per stage, and for each scenario:
#   the multivariate normal distribution of test statistics is constructed using
#   construct.joint.distribution.of.test.statistics, the stopping times and null
#   hypotheses rejected are computed for each simulated trial using 
#   design.evaluate and then performance metrics are computed using
#   get.trial.criteria.
#
# Input:
#   n.arms (numeric scalar - whole): number of total arms (currently 
#     restricted to 2)
#   accrual.rate (positive numeric scalar): number of patients per year accrued
#     in the combined population
#   subpopulation.sizes (numeric vector - vector of probabilities): the 
#     proportion of the population in each disjoint subgroup. Must sum to 1.
#     Using subpopulation.sizes = NULL or 1 gives a trial in a single
#     population.
#   hazard.rate (M x JT numeric matrix): hazard rates by scenario.
#   total.alpha (numeric scalar): total allowable familywise type I error rate.
#     Defaults to 0.025.
#   alpha.allocation (numeric vector): proportion of error rate spent on each
#     analysis. Must sum to 1.
#   time (numeric vector - sorted): The years at which analyses occur.
#   max.follow (numeric scalar):
#   censoring rate (numeric scalar):
#   ni.margin (numeric scalar):
#   non.inferiority (logical scalar):
#   mcid (numeric scalar): the minimum clinically important difference - on the
#     same scale as outcome.mean
#   futility.boundaries (numeric vector): futility boundaries
#   n.simulations (numeric scalar): Number of simulated trials to estimate trial
#     performance. Defaults to 10000.
design.performance.survival.outcome.type <-
  function(n.arms,
           accrual.rate,
           enrollment.period,
           subpopulation.sizes,
           hazard.rate,
           total.alpha=0.025,
           alpha.allocation,
           time,
           max.follow,
           censoring.rate,
           futility.boundaries,
           ni.margin=NULL,
           non.inferiority=FALSE,
           mcid=0,
           restrict.enrollment=FALSE,
           boundary.to.enroll=-Inf,
           n.simulations=10000,
           relative.efficiency=1,
           construct.joint.distribution.of.test.statistics,
           generate.efficacy.boundaries,
           design.evaluate){
    
    n.subgroups <- length(subpopulation.sizes)
    n.stages <- length(time)
    
    control.cols <-
      which(substring(colnames(hazard.rate), first=1, last=1)=="C")
    tx.cols <-
      which(substring(colnames(hazard.rate), first=1, last=1)!="C")
    hazard.ratios <- 
      matrix(data = hazard.rate[,tx.cols], 
             nrow = nrow(hazard.rate))/
      kronecker(matrix(1, ncol=(n.arms-1)), 
                matrix(data = hazard.rate[,control.cols],
                       nrow = nrow(hazard.rate)))
    
    prop.pop.1 <- subpopulation.sizes[1]
    n.scenarios <- nrow(hazard.rate)
    
    n.per.arm <- ceiling((accrual.rate*enrollment.period)/n.arms)
    
    # Functions for Enrollment Modification & Efficacy Boundaries
    efficacy.boundary.parameters=list(err.tol=10^-3)
    
    per.stage.sample.sizes <-
      get.sample.sizes.per.stage(n.arms=n.arms, 
                                 n.per.arm=n.per.arm,
                                 subpopulation.sizes=subpopulation.sizes,
                                 interim.info.times=time/max(time),
                                 accrual.rate=accrual.rate,
                                 delay=0,
                                 restrict.enrollment=restrict.enrollment,
                                 enrollment.period.normalized=
                                   enrollment.period/max(time))

    analytic.n.per.stage <- per.stage.sample.sizes$analytic.n.per.stage
    total.n.per.stage <- per.stage.sample.sizes$total.n.per.stage
    
    alpha.allocation <- alpha.allocation*total.alpha
    trial.criteria <- trial.criteria.by.scenario <- list()
    
    # List potential trial outcomes
    if(n.arms==2){
      potential.trials <-
        list.potential.trials.survival(total.n.per.stage=total.n.per.stage,
                              accrual.rate=accrual.rate,
                              subpopulation.sizes=subpopulation.sizes,
                              time=time)
    } else {
    potential.trials <-
      list.potential.trials(total.n.per.stage=total.n.per.stage,
                            accrual.rate=accrual.rate,
                            subpopulation.sizes=subpopulation.sizes)
    }
    
    # Determine which hypotheses are truly null
    if(non.inferiority){
      # In non-inferiority trial, null hypothesis is that the treatment is inferior (hazard ratio greater or equal to the non-inferiority margin)
      null.hypotheses <- 1*(hazard.ratios>=ni.margin)
    } else { # Superiority
      # In superiority trial, the null hypothesis is that the hazard ratio is greater or equal 1.
      null.hypotheses <- 1*(hazard.ratios>=1-1e-6)
      above.mcid <- 1*(hazard.ratios<=mcid)
      above.mcid[which(null.hypotheses==1, arr.ind=TRUE)] <- 0
    }
    
    for(m in 1:n.scenarios){
      ### SPECIFIC TO 2-ARM AND 3-ARM DESIGNS ###
      if(n.arms==2){
        hazard.rate.pop.1 <- hazard.rate[m, c(1,3)]
        hazard.rate.pop.2 <- hazard.rate[m, c(2,4)]
        ni.params <- c(NI=non.inferiority,
                       ni.margin=ni.margin,
                       restrict.enrollment=restrict.enrollment)
      } else if(n.arms==3){
        hazard.rate.pop.1 <- hazard.rate[m, c(1,3,5)]
        hazard.rate.pop.2 <- hazard.rate[m, c(2,4,6)]
        restrict.enrollment <- NULL
        boundary.to.enroll <- NULL
        ni.params <- NULL
      } else {stop("More than 2 arms not yet supported.")}
      
      joint.distribution.of.test.statistics <-
        do.call(what=construct.joint.distribution.of.test.statistics,
                args=c(ni.params,
                       list(analytic.n.per.stage=analytic.n.per.stage, 
                            outcome.type="survival",
                            prop.pop.1 = prop.pop.1,
                            max.follow=max.follow,
                            enrollment.period=enrollment.period,
                            hazard.rate.pop.1=hazard.rate.pop.1,
                            hazard.rate.pop.2=hazard.rate.pop.2,
                            time=time,
                            censoring.rate=censoring.rate,
                            relative.efficiency=relative.efficiency)))

      # Get efficacy boundaries
      efficacy.boundaries <-
        do.call(what=generate.efficacy.boundaries,
                args=c(list(cov.mat.used=joint.distribution.of.test.statistics$cov.mat.used,
                            alpha.allocation=alpha.allocation),
                       restrict.enrollment=restrict.enrollment,
                       efficacy.boundary.parameters))
      
      # Sample test statistics 
      test.statistics <- 
        with(joint.distribution.of.test.statistics,
             mvtnorm::rmvnorm(n=n.simulations,
                     mean=non.centrality.parameter.vec,
                     sigma=cov.mat.used))
      
      # Determine which hypotheses are rejected at each stage
      test.decisions <-
        do.call(what=design.evaluate,
                args=c(list(test.statistics=test.statistics,
                          efficacy.boundary=efficacy.boundaries,
                          ### SPECIFIC TO 2-ARM AND 3-ARM DESIGNS ###
                          futility.boundary=c(futility.boundaries,
                                              # No futility at last stage
                                              rep(-Inf, n.arms)),
                          alpha.allocation=alpha.allocation),
                          restrict.enrollment=restrict.enrollment,
                          boundary.to.enroll=boundary.to.enroll))
      
      # Get ESS, Duration, Power, Type I Error, FWER
      trial.criteria.by.scenario[[m]] <-
        get.trial.criteria(decisions.rejections=test.decisions$rejection.matrix,
                           decisions.stages=test.decisions$stage.decision,
                           null.hypotheses=which(null.hypotheses[m,]==1),
                           potential.trials=potential.trials)
      
      # Compute additional performance criteria: 
      # bias, variance, mse, and confidence interval coverage
      if(n.arms==2){
        true.estimand.value <- 
          c(log(hazard.rate.pop.1[1]/hazard.rate.pop.1[2]),
            log(hazard.rate.pop.2[1]/hazard.rate.pop.2[2])) -
          log(1/ifelse(!is.numeric(ni.margin), 1, ni.margin))
        names(true.estimand.value) <- c("Subpop.1","Subpop.2")
      } else if (n.arms == 3) {
         true.estimand.value <- c(log(hazard.rate.pop.1[1]/hazard.rate.pop.1[2]), log(hazard.rate.pop.2[1]/hazard.rate.pop.2[2]),log(hazard.rate.pop.1[1]/hazard.rate.pop.1[3]), log(hazard.rate.pop.2[1]/hazard.rate.pop.2[3])) - log(1/ifelse(is.null(ni.margin),1,ni.margin))
         names(true.estimand.value) <- c("Arm.A.Subpop.1","Arm.A.Subpop.2","Arm.B.Subpop.1","Arm.B.Subpop.2")  
      } 
      bias.variance.mse.ci <-
          compute.performance(test.statistics,
                              cov.var.matrix=joint.distribution.of.test.statistics$cov.mat.used,
                              stage.decision=test.decisions$stage.decision,
                              information.level=joint.distribution.of.test.statistics$information.vector,
                              true.estimand.value=true.estimand.value,
                              n.treatment.excluding.control=n.arms-1,
                              restrict.enrollment=restrict.enrollment,
                              convert.to.hazard.ratio.scale=TRUE,
                              ni.margin=
                                ifelse(is.null(ni.margin), 1, ni.margin))
        trial.criteria.by.scenario[[m]]$bias.variance.mse.ci <- 
          bias.variance.mse.ci 
        trial.criteria.by.scenario[[m]]$true.estimand.value <- 
          true.estimand.value 
      
      # Label distribution.of.trials by scenario
      trial.criteria.by.scenario[[m]]$distribution.of.trials <-
        data.frame(scenario=m,
                   trial.criteria.by.scenario[[m]]$distribution.of.trials)
    }
    
    # Unpack list of trial criteria - Label with scenario number
    criteria.names <- unique(unlist(lapply(trial.criteria.by.scenario, names)))
    for(i in 1:length(criteria.names)){
      criteria.result <- 
        do.call(what=rbind,
                args=lapply(trial.criteria.by.scenario, function(x)
                  get(criteria.names[i], x)))
      if(is.null(colnames(criteria.result))){
        colnames(criteria.result) <- criteria.names[i]
      }
      trial.criteria[[i]] <- criteria.result
    }
    
    names(trial.criteria) <- criteria.names
    
    trial.criteria$expected.sample.size.per.scenario <-
      aggregate(sample.size*proportion~scenario,
                FUN=sum,
                data=trial.criteria$distribution.of.trials)[,2]
    
    trial.criteria$expected.duration.per.scenario <-
      aggregate(duration*proportion~scenario,
                FUN=sum,
                data=trial.criteria$distribution.of.trials)[,2]
    
    trial.criteria$null.hypotheses <- null.hypotheses
    
    if(!non.inferiority){
      trial.criteria$above.mcid <- above.mcid
    }
    trial.criteria$analysis.times <- time
    trial.criteria$per.stage.sample.sizes <- per.stage.sample.sizes
    trial.criteria$futility.boundaries <- futility.boundaries
    return(trial.criteria)
  }



### triage.based.on.outcome.type ############################################################
# Description: a generic function for calling design.performance.survival.outcome.type for survival
#   outcomes or design.performance.continuous.or.binary.outcome.type for continuous/binary outcomes.
# 
# Input:
#   outcome.type (string): outcome distribution: either "continuous", "binary",
#     or "survival"
#   ... : other arguments passed on to either design.performance.survival.outcome.type or design.performance.continuous.or.binary.outcome.type
triage.based.on.outcome.type <- function(outcome.type, ...){
  parameters=list(...)
  if(outcome.type=="survival"){
    n.scenarios <- nrow(parameters$hazard.rate)
    n.arms <- 
      length(unique(substring(colnames(parameters$hazard.rate),
                              first=1, last=1)))
    n.subgroups <- length(unique(substring(colnames(parameters$hazard.rate),
                                           first=2)))
    n.stages <- length(parameters$time)
    restrict.enrollment <- parameters$restrict.enrollment
    if(restrict.enrollment==FALSE){
      with(parameters,
         stopifnot(length(futility.boundaries)==
                     (n.stages-1)*(n.arms-1)*n.subgroups,
                   n.subgroups==2))
    } else {
      with(parameters,
           stopifnot(length(futility.boundaries)==
                       (n.stages-1)*(n.arms-1)*n.subgroups-1,
                     n.arms %in% 2:3,
                     n.subgroups==2))
    }
    
    design.performance.survival.outcome.type(...)
  } else if (outcome.type %in% c("continuous", "binary")){
    n.scenarios <- nrow(parameters$outcome.mean)
    n.arms <- 
      length(unique(substring(colnames(parameters$outcome.mean),
                              first=1, last=1)))
    n.subgroups <- length(unique(substring(colnames(parameters$outcome.mean),
                                           first=2)))
    n.stages <- length(parameters$interim.info.times) + 
      1*(tail(parameters$interim.info.times, 1)!=1)
    with(parameters,
         stopifnot(length(futility.boundaries)==
                     (n.stages-1)*(n.arms-1)*n.subgroups,
                   length(alpha.allocation)==
                     n.stages*n.subgroups,
                   n.arms %in% 2:3,
                   n.subgroups==2,
                   n.stages<=2))
    
    design.performance.continuous.or.binary.outcome.type(outcome.type=outcome.type, ...)
  } else {
    stop("Supported distributions are 'survival', 'continuous', and 'binary'.")
  }
}




### transform.parameters #######################################################
# Description: transform parameters to restricted parameters spaces, such as
#   non-negative reals, positive reals, probabilities, normalized vectors, etc.
#
#
# Input:
#   parameters (list): a named list of parameters which are assumed to take any
#     real value.
#
#   transforms (list): a named list of functions used to transform the 
#     parameters from real values to another set. This can be used to handle
#     parameters that are strictly positive, require normalization, and so on,
#     such as alpha (re)allocation and information times. The names of the list
#     should correspond to the parameters intended for the transform.
#
# Output:
#   tf.params (list): a named list of transformed parameters
#
transform.parameters <- 
  function(parameters,
           transforms) {
  
  # 1. Make sure parameters and transforms are named
  if(sum(nchar(names(parameters))==0) > 0) {
    stop("parameters must be named to avoid ambiguity")
  }
  
  if(sum(nchar(names(transforms))==0) > 0) {
    stop("transforms must be named to avoid ambiguity")
  }
  
  if(!is.null(transforms)){
    
    # 2. Make sure transforms map uniquely to parameters
    which.tf <- match(names(transforms), names(parameters))
    
    if(length(unique(which.tf)) != length(transforms)){
      if(sum(duplicated(which.tf))>0) {
        stop("transforms did not uniquely match to parameters")
      }
      if(sum(is.na(which.tf))>0){
        stop(paste0("parameters did not contain: ",
                    names(transforms)[which(is.na(which.tf))]))
      }
    }
  }
  
  tf.params <- parameters
  
  for(i in 1:length(which.tf)) {
    tf.params[[which.tf[i]]] <- 
      transforms[[i]](parameters[[which.tf[i]]])
  }
  
  return(tf.params)
}




### power.penalized.weighted ###############################################
# Description: an objective function that encorporates a weighted average of
#   expected sample size (ESS) across outcome scenarios, with an added penalty
#   for each violation of the power constraints. 
#
# Input:
#   trial.performance (trial performance object): trial performance to evaluate.
#     This should have the following named elements: mss (numeric scalar),
#     ess (numeric vector), duration (numeric vector), power (numeric matrix),
#     fwer (numeric vector), and type.1.error (numeric matrix).
#   scenario.weights (numeric vector): a numeric vector of weights for
#     calculating a weighted mean of ESS over outcome scenarios. If no weight is
#     supplied, the weights are assumed equal.
#   power.constraints (numeric matrix): a numeric matrix of power constraints
#     matching the dimensions of trial.performance$power. If no power constraint
#     is supplied, 80% power is taken as the constraint.
#   power.penalty (numeric scalar): a penalty applied to the difference between
#     power achieved and power desired.
#
#
# Output: an objective function value.
power.penalized.weighted <- 
  function(trial.performance,
           scenario.weights=NULL,
           power.constraints=NULL,
           power.penalty=100000,
           objective.scale=1,
           optimization.target="size") {
    stopifnot(#optimization.target %in% names(trial.performance),
              "empirical.power" %in% names(trial.performance),
              is.numeric(objective.scale) & is.scalar(objective.scale),
              is.finite(objective.scale) & objective.scale > 0)
    
    if(optimization.target=="size"){
      target.objective.value <- trial.performance$expected.sample.size.per.scenario
    } else if(optimization.target=="duration"){
      target.objective.value <- trial.performance$expected.duration.per.scenario
    }
    trial.power <- cbind(trial.performance$empirical.power,trial.performance$conj.power)
    # For superiority trials, don't penalize for power in alternatives < MCID
    if(exists("above.mcid", where=trial.performance)){
      above.mcid <- trial.performance$mcid
      trial.power[which(above.mcid==0, arr.ind=TRUE)] <- NA
    }
    
    if(is.null(power.constraints)){
      power.constraints <-
        matrix(0.8, nrow=nrow(trial.power), ncol=ncol(trial.power))
    }
    
    if(is.null(scenario.weights)){
      scenario.weights <- rep(1, length(target.objective.value))
    }

    objective.value <- 
      weighted.mean(target.objective.value, w=scenario.weights) +
      sum(linear.threshold.penalty(trial.power,
                                   threshold.matrix=power.constraints,
                                   linear.penalty=power.penalty), na.rm=TRUE)
    stopifnot(is.scalar(objective.value))
    return(objective.value/objective.scale)
  }

### get.optim.trajectory ######################################################
# Description: by default, optim() only returns the value of the objective
# function at convergence. By passing options to optim, the iteration history
# can be directed to console, and capture.output() can be used to turn this
# into a character vector. This function takes in such a character vector,
# extracts the iteration history, and places it in a data frame.
#
# Input: the output of a call to capture.output(optim(...))
#
# Output: a data frame with iteration (the iteration number) and obj.fx
# (the objective function at that iteration).
get.optim.trajectory <- 
  function(optim.text) {
    optim.text <- optim.text[grep("iter\\s*\\d*\\s*value\\s*", optim.text)]
    optim.text <- data.frame(iteration=NA, obj.fx=NA, optim.text=optim.text)
    optim.text$iteration <-
      as.numeric(gsub("(iter)|(value.*)", "", optim.text$optim.text))
    optim.text$obj.fx <- 
      as.numeric(gsub("iter\\s*\\d*\\s*value", "", optim.text$optim.text))
    optim.text[c("iteration", "obj.fx")]
  }




### sa.optimize ################################################################
# Description: this is a wrapper for optimizing objects, which requires two
#   functions: a function from the parameters to create an object, and a 
#   function that assigns a value to the object (a loss function that takes
#   in an object and produces a numeric scalar value). This allows optimization
#   of objects using simulated annealing by attempting to minimize the loss for
#   a given set of parameters.
#
#   create.object() is a function that takes the lists search.parameters and 
#   fixed.parameters, and produces an object. Optional transforms specified
#   by search.transforms can constrain this parameter space. The function 
#   evaluate.object() maps the object to a numeric scalar, using the optional
#   parameters specified by ellipsis (...).
#
#   search.parameters = {p1, p2, ..., pd}
#   search.transforms = {f1(), f2(), ... , fd()}
#   transformed.parameters := {x1=f1(p1), x2=f2(p2), ..., xd=fd(pd)
#   object.to.optimize <- create.object(transformed.parameters)
#   objective.value = evaluate.object(object.to.optimize, ...)
#
#   If G is create.object and F is evaluate.object, SA will try to find: 
#     xmin = argmin(F(G(x1, x2, ..., xd)))
#
# Input:
#   search.parameters (list): a named list of all parameters that are to be
#     searched over by simulated annealing, with starting values: passed to
#     create.object()
#
#   search.transforms (list): a named list of functions used to transform the
#     search parameters from real values to another set. This can be used to
#     handle parameters that are strictly positive, require normalization, and
#     so on. If NULL, parameters are assumed to be able to assume any real
#     value. See the documentation of transform.parameters() for more
#     information.
#
#   fixed.parameters (list): a named list of all parameters that are required to
#     fully specify the creation of the object, along with their values: passed
#     to create.object()
#
#   create.object (function): a function which accepts the arguments 
#     search.parameters (after transformation by search.transforms) and
#     fixed.parameters, and returns an object.
#
#   evaluate.object (function): a function that takes an object and assigns it
#     a scalar numeric value, with smaller values being preferred.
#
#   max.iterations (positive numeric scalar): number of iterations of simulated
#     annealing. See ?optim() for more details.
#
#   temperature (positive numeric scalar): 'temperature' parameter for simulated
#     annealing. See ?optim() for more details.
#
#   evals.per.temp (positive numeric scalar): number of evaluations per 
#     temperature. See ?optim() for more details.
#
#   report.iteration (positive numeric scalar): interval for reporting objective
#     function - report every x iterations. See ?optim() for more details.
#
# Output:
# parameters: the list of parameters for the optimized object, after any 
#   transformations were applied.
# optimization.trajectory: a data frame of the objective function values by
#   iteration. See get.optim.trajectory().
# optim.result: the object returned by the call to optim().
sa.optimize <-
  function(search.parameters,
           search.transforms=NULL,
           fixed.parameters=NULL,
           create.object,
           evaluate.object,
           max.iterations,
           temperature,
           evals.per.temp,
           report.iteration,
           function.scale=1,
           parameter.scale=1,
           ...){

    
    # optim() can only take vectors: need to unlist/relist to retain structure.
    initial.param <- as.relistable(search.parameters)
    
    optim.trajectory <-
      capture.output(
        sa.result <- 
          optim(par=unlist(initial.param),
                fn=function(varying,
                            transforms=search.transforms,
                            fixed=fixed.parameters,
                            objective.fx=evaluate.object){ 
                  # Re-structure parameters as list
                  varying <- relist(varying, skeleton=search.parameters)
                  
                  # Call user-specified create.object function: capture any text
                  discard.text <-
                    capture.output(
                      sa.object <-
                        do.call(what=create.object,
                                args=c(list(),
                                       transform.parameters(varying,
                                                            transforms),
                                       fixed))
                    )
                      
                  # Call user-specified objective function: capture any text
                  discard.text <-
                    capture.output(
                      obj.fx <- evaluate.object(sa.object, ...)
                      )
                  obj.fx
                },
                method="SANN",
                control=list(trace=6,
                             maxit=max.iterations,
                             temp=temperature,
                             tmax=evals.per.temp,
                             REPORT=report.iteration,
                             fnscale=function.scale,
                             parscale=parameter.scale))
      )
        
    # Re-structure result as list
    search.parameters <- relist(sa.result$par, skeleton=search.parameters)
    
    # Transform results and return
    parameters <- 
      c(transform.parameters(search.parameters, search.transforms),
        fixed.parameters)
    
    optima <- do.call(what=create.object, args=parameters)

    return(list(parameters=parameters,
                optima=optima,
                optimization.trajectory=get.optim.trajectory(optim.trajectory),
                optim.result=relist(sa.result$par, skeleton=search.parameters),
                evaluate.object=evaluate.object,
                evaluate.object.parameters=list(...)))
  }




### sa.temperature #############################################################
# Description: this is a function for viewing the default cooling schedule for
#   optim()'s implementation of simulated annealing. The behavior of simulated
#   annealing can strongly depend on the initial temperature.
sa.temperature <- 
  function(iteration=1,
           max.iterations=10000,
           temperature=10,
           evals.per.temp=10) {
  temperature / log(((iteration-1) %/% evals.per.temp)*evals.per.temp + exp(1))
}
mrosenblum/AdaptiveDesignOptimizer documentation built on July 10, 2019, 2:46 p.m.