blockRAR: Block Design for Response-Adaptive Randomization

Documented in binomialfreq

#' @title Block Design for Response-Adaptive Randomization for Binomial Data
#'
#' @description Simulation for binomial counts for block design for
#'    response-adaptive randomization with time as a confounding
#' @param p_treatment scalar. Proportion of events under the treatment arm.
#' @param p_control scalar. Proportion of events under the control arm.
#' @param N_total scalar. Total sample size.
#' @param block_number scalar. Number of blocks or time levels. The default is set to 4.
#'   If \code{block_number} is set to 1. This is a traditional RCT design.
#' @param drift scalar. The increase or decrease in proportion of event over time.
#'   In this case, the proportion of failure changes in each block by the number of
#'   patient accured over the total sample size. The full drift effect is seen in the
#'   final block.
#' @param simulation scalar. Number of simulation to be ran. The default is set to 10000.
#' @param conf_int scalar. Confidence level of the interval.
#' @param alternative character. A string specifying the alternative hypothesis,
#'    must be one of "less" or "greater" (default).
#' @param correct logical. A logical indicating whether to apply continuity correction
#'    when computing the test statistic: one half is subtracted from all |O - E|
#'    differences; however, the correction will not be bigger than the differences themselves.
#' @param early_stop logical. A logical indicating whether the trials are stopped early
#'    for success or futility.
#' @param size_equal_randomization scalar. The number of run in patients because adaptive
#'    randomization is applied.
#' @param min_patient_earlystop scalar. Minimum number of patients before early stopping
#'    rule is applied.
#' @param max_prob scalar. The maximum probability for assigning to treatment/control
#'    group is 0.8.
#'
#'
#' @return a list with details on the simulation.
#' \describe{
#'   \item{\code{power}}{
#'     scalar. The power of the trial, ie. the proportion of success over the
#'     number of simulation ran.}
#'   \item{\code{N_enrolled}}{
#'     vector. The number of patients enrolled in the trial (sum of control
#'     and experimental group for each simulation. )}
#'   \item{\code{N_control}}{
#'     vector. The number of patients enrolled in the control group for
#'     each simulation.}
#'   \item{\code{N_control}}{
#'     vector. The number of patients enrolled in the experimental group for
#'     each simulation.}
#'   \item{\code{randomization_ratio}}{
#'     matrix. The randomization ratio allocated for each block.}
#' }
#'
#' @importFrom stats rbinom mantelhaen.test chisq.test qchisq
#' @importFrom ldbounds bounds
#' @importFrom dplyr mutate group_by summarize
#' @importFrom tibble as.tibble
#'
#' @export binomialfreq
#'
#' @examples
#' binomialfreq(p_control = 0.7, p_treatment = 0.65, N_total = 200,
#'             block_number = 2, simulation = 3)
#' binomialfreq(p_control = 0.5, p_treatment = 0.40, N_total = 200,
#'             block_number = 2, simulation = 3, drift = -0.15)

binomialfreq <- function(
  p_control,
  p_treatment,
  N_total,
  block_number             = 4,
  drift                    = 0,
  simulation               = 10000,
  conf_int                 = 0.95,
  alternative              = "greater",
  correct                  = FALSE,
  early_stop               = FALSE,
  size_equal_randomization = 20,
  min_patient_earlystop    = 20,
  max_prob                 = 0.8
){
   # stop if proportion of control is not between 0 and 1.
  if((p_control <= 0 | p_control >= 1)){
    stop("The proportion of event for the control group needs to between 0 and 1!")
  }

  # stop if proportion of treatment is not between 0 and 1.
  if((p_treatment <= 0 | p_treatment >= 1)){
    stop("The proportion of event for the treatment group needs to between 0 and 1!")
  }

  ## make sure sample size is an integer!
  if((N_total <= 0 | N_total %% 1 != 0)){
    stop("The sample size needs to be a positive integer!")
  }

  # the number of blocks need to be an integer.
  if((block_number <= 0 | block_number %% 1 != 0)){
    stop("The number of blocks needs to be a positve integer!")
  }

  # the number of blocks is bigger than the sample size.
  if(N_total/ block_number < 1){
    stop("The number of blocks is greater than sample size!")
  }

  # number of simulation needs to be a positive integer.
  if((simulation <= 0 | simulation %% 1 != 0)){
    stop("The number of simulation needs to be a positve integer!")
  }

  # the confidence interval needs to be between 0 and 1.
  if((conf_int <= 0 | conf_int >= 1)){
    stop("The confidence interval needs to between 0 and 1!")
  }

  # the alternative is either less or greater, two-sided not acceptable.
  if((alternative != "less" & alternative != "greater")){
    stop("The alternative can only be less or greater!")
  }

  # the drift value cant make the prop of control/treatment exceed 1 or below 0.
  if(drift + p_control >= 1 | drift + p_control <= 0 |
     drift + p_treatment >= 1 | drift + p_treatment <= 0){
    stop("The drift value is too high causing the proportion of event to exceed 1
         in either the control or treatment group, pick a lower value for drift!")
  }

  # if
  if(!(early_stop == FALSE | early_stop == TRUE)){
    stop("Early stopping can be only TRUE or FALSE!")
  }

  # total sample size is divided equally into blocks
  group <- rep(floor(N_total / block_number), block_number)

  # if the remainder of total sample size divided by block number is not 0,
  # randomly add patients to each block
  if((N_total - sum(group)) > 0){
    index <- sample(1:block_number, N_total - sum(group))
    group[index] <- group[index] + 1
  }

  # if we allow early stopping, compute the lan-demets bound
  if(early_stop){
    # divided time equally between 0 and 1 with the number of blocks
    time <- (min_patient_earlystop:(N_total - 1)) / N_total
    #using lan-demets bound, computing the early stopping criteria for the number of blocks
    bounds <- bounds(time, iuse = c(1, 1), alpha = c((1 - conf_int) / 2, (1 - conf_int) / 2))$upper.bounds
  }

  #assigning power to 0
  power <- 0

  # assigning overall variables as NULL and drift for patient by patient
  p_value              <- NULL
  N_control            <- NULL
  N_treatment          <- NULL
  sample_size          <- NULL
  p_control_estimate   <- NULL
  p_treatment_estimate <- NULL
  prop_diff_estimate   <- NULL
  drift_p              <- seq(drift / N_total, drift,  length.out = N_total)
  early_stopping       <- rep(0, simulation)
  randomization        <- array(NA, c(simulation, N_total))

  # looping overall all simulation
  for(k in 1:simulation){

    # assigning variables as NULL for each simulation
    data_total            <- data.frame()
    test_stat             <- 0
    time                  <- rep(1:block_number, group[1:block_number])
    index                 <- block_number

    #looping over all blocks
    for(i in 1:N_total){
      # create a data summary from previos block or if its null, create an empty
      # summary

      if(any((i - 1) == cumsum(group)) | i == 1){
        if(nrow(data_total) != 0 & nrow(data_total) >= min_patient_earlystop){
          y_ctr     <- sum(as.numeric(as.character(data_total$outcome[data_total$treatment == 0])))
          N_ctr     <- length(as.numeric(as.character(data_total$outcome[data_total$treatment == 0])))
          y_trt     <- sum(as.numeric(as.character(data_total$outcome[data_total$treatment == 1])))
          N_trt     <- length(as.numeric(as.character(data_total$outcome[data_total$treatment == 1])))
        }
        else{
          y_ctr     <- 0
          N_ctr     <- 0
          y_trt     <- 0
          N_trt     <- 0
        }

        # if the alternative is greater, use proportion to set randomization ratio
        if(alternative == "greater"){
          prob_trt <- (y_trt + 1) / (N_trt + 2) /
            ((y_trt + 1) / (N_trt + 2) + (y_ctr + 1) / (N_ctr + 2))
        }
        # if the alternative is greater, use 1 - proportion to set randomization ratio
        else{
          prob_trt <- (1 - (y_trt + 1) / (N_trt + 2)) /
            (1 - (y_trt + 1) / (N_trt + 2) + (1 - (y_ctr + 1) / (N_ctr + 2)))
        }

        # maximum probability assigning to the treatment group is 0.8
        if(prob_trt > max_prob){
          prob_trt <- max_prob
        }

        # maximum probability assigning to the control group is 0.8
        else if(prob_trt < (1 - max_prob)){
          prob_trt <- 1 - max_prob
        }
      }

      # storing info of prob_trt
      randomization[k, i] <- prob_trt

      # generate data frame treatment assignment based on sampling and
      # alternative hypothesis. leave the outcome variable empty.
      data <- data.frame(
        treatment = sample(x = 0:1, size = 1, prob = c(1 - prob_trt, prob_trt)),
        outcome = rep(NA, 1))

      # fill in the outcome variable based on the treatment assignement and proportion
      # of event in respective arm
      data$outcome <- rbinom(nrow(data), 1, prob = data$treatment * p_treatment +
                               (1 - data$treatment) * p_control +
                               drift_p[i])

      # bind the data with previous block if available
      data_total <- rbind(data_total, data)

      # convert the data_total to factor for outcome and treatment
      data_total$treatment <- as.factor(data_total$treatment)
      data_total$outcome <- as.factor(data_total$outcome)

      # making sure outcome level of 1 is present, even if its not present in the data
      if(is.na(match("1", levels(data_total$outcome)))){
        data_total$outcome <- factor(data_total$outcome,
                                     levels=c(levels(data_total$outcome), "1"))
      }

      # making sure outcome level of 0 is present, even if its not present in the data
      if(is.na(match("0", levels(data_total$outcome)))){
        data_total$outcome <- factor(data_total$outcome,
                                     levels=c(levels(data_total$outcome), "0"))
      }

      # making sure the treatment group is present, even if its not present in the data
      if(is.na(match("1", levels(data_total$treatment)))){
        data_total$treatment <- factor(data_total$treatment,
                                     levels=c(levels(data_total$treatment), "1"))
      }

      # making sure the control group is present, even if its not present in the data
      if(is.na(match("0", levels(data_total$treatment)))){
        data_total$treatment <- factor(data_total$treatment,
                                     levels=c(levels(data_total$treatment), "0"))
      }

      data_interim <- data_total %>%
        mutate(time = time[1:i])


      if(early_stop & (i >= min_patient_earlystop) & (i < N_total)){
        if(any(table(data_interim$time) == 1 & N_total / block_number >= 2)){
          remove             <- as.numeric(which(table(data_interim$time) == 1))
          data_interim       <- data_interim[!(data_interim$time == remove), ]
        }

        # if one treatment is not present or one type of outcome is not present,
        # set the test_statistics to 0.
        if(all(data_interim$outcome == 1) | all(data_interim$outcome == 0) |
           all(data_interim$treatment == 1) | all(data_interim$treatment == 0) |
           nrow(data_interim) == 0){
          test_stat <- 0
        }

        # else compute the test statistics
        else if(any(N_total / block_number < 2 | block_number == 1 | all(data_interim$time == 1))){
          test_stat         <- sqrt(as.numeric(chisq.test(data_interim$treatment,
                                                  data_interim$outcome,
                                                  correct = correct)$statistic))
        }
        else{
          p.val             <- tryCatch(expr = mantelhaen.test(table(data_interim),
                                                               correct = correct)$p.val,
                                        error   = function(data_interim =
                                                             data_interim[data_interim$time == 1, ]){
                                          as.numeric(chisq.test(data_interim$treatment,
                                                                     data_interim$outcome,
                                                                     correct = correct)$p.value / 2)})




          if(is.nan(p.val) | p.val > 0.5){
            p.val           <- 0.5
          }
          test_stat         <- sqrt(qchisq(1 - p.val, df = 1))
        }

      # if we allow early stopping and the
      # the test_statistics exceed the lan-demets bound, quit the loop
        if(test_stat > bounds[i - min_patient_earlystop + 1]){
          index                                  <- i
          early_stopping[k]                      <- 1
          time                                   <- time[1:i]
          randomization[k, (i + 1):N_total]      <- 0
          break
        }
      }

    }

    # adding the time factor to the data using group function
    data_total <- data_total %>%
      mutate(time = factor(time))

    # summarizing the data by treatment group
    ctrl_prop <- mean(as.numeric(as.character(data_total$outcome[data_total$treatment == 0])))
    trt_prop  <- mean(as.numeric(as.character(data_total$outcome[data_total$treatment == 1])))

    # estimating prop_difference
    prop_diff <- prop_strata(treatment = data_total$treatment,
                             outcome   = data_total$outcome,
                             block     = data_total$time)


    if(any(table(data_total$time) == 1 & N_total / block_number >= 2)){
      remove          <- as.numeric(which(table(data_total$time) == 1))
      data_total      <- data_total[!(data_total$time == remove), ]
      data_total$time <- droplevels(data_total$time)
    }

    # if number of block is 1 or if any block has only one patients, then use chisq.test
    if(all(data_total$time == 1) | N_total / block_number <  2){
      # if the prop is in the right direction, compute p-value
      if(((ctrl_prop - trt_prop >= 0) & alternative == "less") |
         ((trt_prop - ctrl_prop >= 0) & alternative == "greater")){
        p.val <- chisq.test(data_total$treatment, data_total$outcome,
                            correct = correct)$p.value / 2
      }
      else{
        p.val <- 1
      }
    }
    # compute mantelhaen.test for number of block > 1.
    else{
      p.val             <- tryCatch(expr = mantelhaen.test(table(data_total),
                                                           correct = correct)$p.val,
                                    error   = function(data_total =
                                                         data_total[data_total$time == 1, ]){
                                      as.numeric(chisq.test(data_total$treatment,
                                                            data_total$outcome,
                                                            correct = correct)$p.value / 2)})
    }

    # compute the sample size for control, treatment and proportion for
    # control and treatment estimate
    p_value              <- c(p_value, p.val)
    N_control            <- c(N_control, sum(data_total$treatment == 0))
    N_treatment          <- c(N_treatment, sum(data_total$treatment == 1))
    sample_size          <- c(sample_size, nrow(data_total))
    p_control_estimate   <- c(p_control_estimate, ctrl_prop)
    p_treatment_estimate <- c(p_treatment_estimate, trt_prop)
    prop_diff_estimate   <- c(prop_diff_estimate, prop_diff)

    # compute the power if p-value is smaller than 0.05
    if(p.val < (1 - conf_int)){
      power <- power + 1
    }

  }

  #return all the output in a list
  output <- list(
    power                 = power / simulation,
    prop_diff_estimate    = prop_diff_estimate,
    N_enrolled            = sample_size,
    N_control             = N_control,
    N_treatment           = N_treatment,
    early_stop            = early_stopping,
    p_value               = p_value,
    prob_treatment        = randomization
    )

  return(output)
}

## quiets concerns of R CMD check re: the .'s that appear in pipelines
if(getRversion() >= "2.15.1")  utils::globalVariables(c("treatment", "outcome"))