#' @title Block Design for Response-Adaptive Randomization for Binomial Data
#'
#' @description Simulation for binomial counts for block design for
#' response-adaptive randomization with time as a confounding
#' @param p_treatment scalar. Proportion of events under the treatment arm.
#' @param p_control scalar. Proportion of events under the control arm.
#' @param N_total scalar. Total sample size.
#' @param block_number scalar. Number of blocks or time levels. The default is set to 4.
#' If \code{block_number} is set to 1. This is a traditional RCT design.
#' @param drift scalar. The increase or decrease in proportion of event over time.
#' In this case, the proportion of failure changes in each block by the number of
#' patient accured over the total sample size. The full drift effect is seen in the
#' final block.
#' @param simulation scalar. Number of simulation to be ran. The default is set to 10000.
#' @param conf_int scalar. Confidence level of the interval.
#' @param alternative character. A string specifying the alternative hypothesis,
#' must be one of "less" or "greater" (default).
#' @param correct logical. A logical indicating whether to apply continuity correction
#' when computing the test statistic: one half is subtracted from all |O - E|
#' differences; however, the correction will not be bigger than the differences themselves.
#' @param early_stop logical. A logical indicating whether the trials are stopped early
#' for success or futility.
#' @param size_equal_randomization scalar. The number of run in patients because adaptive
#' randomization is applied.
#' @param min_patient_earlystop scalar. Minimum number of patients before early stopping
#' rule is applied.
#' @param max_prob scalar. The maximum probability for assigning to treatment/control
#' group is 0.8.
#'
#'
#' @return a list with details on the simulation.
#' \describe{
#' \item{\code{power}}{
#' scalar. The power of the trial, ie. the proportion of success over the
#' number of simulation ran.}
#' \item{\code{N_enrolled}}{
#' vector. The number of patients enrolled in the trial (sum of control
#' and experimental group for each simulation. )}
#' \item{\code{N_control}}{
#' vector. The number of patients enrolled in the control group for
#' each simulation.}
#' \item{\code{N_control}}{
#' vector. The number of patients enrolled in the experimental group for
#' each simulation.}
#' \item{\code{randomization_ratio}}{
#' matrix. The randomization ratio allocated for each block.}
#' }
#'
#' @importFrom stats rbinom mantelhaen.test chisq.test qchisq
#' @importFrom ldbounds bounds
#' @importFrom dplyr mutate group_by summarize
#' @importFrom tibble as.tibble
#'
#' @export binomialfreq
#'
#' @examples
#' binomialfreq(p_control = 0.7, p_treatment = 0.65, N_total = 200,
#' block_number = 2, simulation = 3)
#' binomialfreq(p_control = 0.5, p_treatment = 0.40, N_total = 200,
#' block_number = 2, simulation = 3, drift = -0.15)
binomialfreq <- function(
p_control,
p_treatment,
N_total,
block_number = 4,
drift = 0,
simulation = 10000,
conf_int = 0.95,
alternative = "greater",
correct = FALSE,
early_stop = FALSE,
size_equal_randomization = 20,
min_patient_earlystop = 20,
max_prob = 0.8
){
# stop if proportion of control is not between 0 and 1.
if((p_control <= 0 | p_control >= 1)){
stop("The proportion of event for the control group needs to between 0 and 1!")
}
# stop if proportion of treatment is not between 0 and 1.
if((p_treatment <= 0 | p_treatment >= 1)){
stop("The proportion of event for the treatment group needs to between 0 and 1!")
}
## make sure sample size is an integer!
if((N_total <= 0 | N_total %% 1 != 0)){
stop("The sample size needs to be a positive integer!")
}
# the number of blocks need to be an integer.
if((block_number <= 0 | block_number %% 1 != 0)){
stop("The number of blocks needs to be a positve integer!")
}
# the number of blocks is bigger than the sample size.
if(N_total/ block_number < 1){
stop("The number of blocks is greater than sample size!")
}
# number of simulation needs to be a positive integer.
if((simulation <= 0 | simulation %% 1 != 0)){
stop("The number of simulation needs to be a positve integer!")
}
# the confidence interval needs to be between 0 and 1.
if((conf_int <= 0 | conf_int >= 1)){
stop("The confidence interval needs to between 0 and 1!")
}
# the alternative is either less or greater, two-sided not acceptable.
if((alternative != "less" & alternative != "greater")){
stop("The alternative can only be less or greater!")
}
# the drift value cant make the prop of control/treatment exceed 1 or below 0.
if(drift + p_control >= 1 | drift + p_control <= 0 |
drift + p_treatment >= 1 | drift + p_treatment <= 0){
stop("The drift value is too high causing the proportion of event to exceed 1
in either the control or treatment group, pick a lower value for drift!")
}
# if
if(!(early_stop == FALSE | early_stop == TRUE)){
stop("Early stopping can be only TRUE or FALSE!")
}
# total sample size is divided equally into blocks
group <- rep(floor(N_total / block_number), block_number)
# if the remainder of total sample size divided by block number is not 0,
# randomly add patients to each block
if((N_total - sum(group)) > 0){
index <- sample(1:block_number, N_total - sum(group))
group[index] <- group[index] + 1
}
# if we allow early stopping, compute the lan-demets bound
if(early_stop){
# divided time equally between 0 and 1 with the number of blocks
time <- (min_patient_earlystop:(N_total - 1)) / N_total
#using lan-demets bound, computing the early stopping criteria for the number of blocks
bounds <- bounds(time, iuse = c(1, 1), alpha = c((1 - conf_int) / 2, (1 - conf_int) / 2))$upper.bounds
}
#assigning power to 0
power <- 0
# assigning overall variables as NULL and drift for patient by patient
p_value <- NULL
N_control <- NULL
N_treatment <- NULL
sample_size <- NULL
p_control_estimate <- NULL
p_treatment_estimate <- NULL
prop_diff_estimate <- NULL
drift_p <- seq(drift / N_total, drift, length.out = N_total)
early_stopping <- rep(0, simulation)
randomization <- array(NA, c(simulation, N_total))
# looping overall all simulation
for(k in 1:simulation){
# assigning variables as NULL for each simulation
data_total <- data.frame()
test_stat <- 0
time <- rep(1:block_number, group[1:block_number])
index <- block_number
#looping over all blocks
for(i in 1:N_total){
# create a data summary from previos block or if its null, create an empty
# summary
if(any((i - 1) == cumsum(group)) | i == 1){
if(nrow(data_total) != 0 & nrow(data_total) >= min_patient_earlystop){
y_ctr <- sum(as.numeric(as.character(data_total$outcome[data_total$treatment == 0])))
N_ctr <- length(as.numeric(as.character(data_total$outcome[data_total$treatment == 0])))
y_trt <- sum(as.numeric(as.character(data_total$outcome[data_total$treatment == 1])))
N_trt <- length(as.numeric(as.character(data_total$outcome[data_total$treatment == 1])))
}
else{
y_ctr <- 0
N_ctr <- 0
y_trt <- 0
N_trt <- 0
}
# if the alternative is greater, use proportion to set randomization ratio
if(alternative == "greater"){
prob_trt <- (y_trt + 1) / (N_trt + 2) /
((y_trt + 1) / (N_trt + 2) + (y_ctr + 1) / (N_ctr + 2))
}
# if the alternative is greater, use 1 - proportion to set randomization ratio
else{
prob_trt <- (1 - (y_trt + 1) / (N_trt + 2)) /
(1 - (y_trt + 1) / (N_trt + 2) + (1 - (y_ctr + 1) / (N_ctr + 2)))
}
# maximum probability assigning to the treatment group is 0.8
if(prob_trt > max_prob){
prob_trt <- max_prob
}
# maximum probability assigning to the control group is 0.8
else if(prob_trt < (1 - max_prob)){
prob_trt <- 1 - max_prob
}
}
# storing info of prob_trt
randomization[k, i] <- prob_trt
# generate data frame treatment assignment based on sampling and
# alternative hypothesis. leave the outcome variable empty.
data <- data.frame(
treatment = sample(x = 0:1, size = 1, prob = c(1 - prob_trt, prob_trt)),
outcome = rep(NA, 1))
# fill in the outcome variable based on the treatment assignement and proportion
# of event in respective arm
data$outcome <- rbinom(nrow(data), 1, prob = data$treatment * p_treatment +
(1 - data$treatment) * p_control +
drift_p[i])
# bind the data with previous block if available
data_total <- rbind(data_total, data)
# convert the data_total to factor for outcome and treatment
data_total$treatment <- as.factor(data_total$treatment)
data_total$outcome <- as.factor(data_total$outcome)
# making sure outcome level of 1 is present, even if its not present in the data
if(is.na(match("1", levels(data_total$outcome)))){
data_total$outcome <- factor(data_total$outcome,
levels=c(levels(data_total$outcome), "1"))
}
# making sure outcome level of 0 is present, even if its not present in the data
if(is.na(match("0", levels(data_total$outcome)))){
data_total$outcome <- factor(data_total$outcome,
levels=c(levels(data_total$outcome), "0"))
}
# making sure the treatment group is present, even if its not present in the data
if(is.na(match("1", levels(data_total$treatment)))){
data_total$treatment <- factor(data_total$treatment,
levels=c(levels(data_total$treatment), "1"))
}
# making sure the control group is present, even if its not present in the data
if(is.na(match("0", levels(data_total$treatment)))){
data_total$treatment <- factor(data_total$treatment,
levels=c(levels(data_total$treatment), "0"))
}
data_interim <- data_total %>%
mutate(time = time[1:i])
if(early_stop & (i >= min_patient_earlystop) & (i < N_total)){
if(any(table(data_interim$time) == 1 & N_total / block_number >= 2)){
remove <- as.numeric(which(table(data_interim$time) == 1))
data_interim <- data_interim[!(data_interim$time == remove), ]
}
# if one treatment is not present or one type of outcome is not present,
# set the test_statistics to 0.
if(all(data_interim$outcome == 1) | all(data_interim$outcome == 0) |
all(data_interim$treatment == 1) | all(data_interim$treatment == 0) |
nrow(data_interim) == 0){
test_stat <- 0
}
# else compute the test statistics
else if(any(N_total / block_number < 2 | block_number == 1 | all(data_interim$time == 1))){
test_stat <- sqrt(as.numeric(chisq.test(data_interim$treatment,
data_interim$outcome,
correct = correct)$statistic))
}
else{
p.val <- tryCatch(expr = mantelhaen.test(table(data_interim),
correct = correct)$p.val,
error = function(data_interim =
data_interim[data_interim$time == 1, ]){
as.numeric(chisq.test(data_interim$treatment,
data_interim$outcome,
correct = correct)$p.value / 2)})
if(is.nan(p.val) | p.val > 0.5){
p.val <- 0.5
}
test_stat <- sqrt(qchisq(1 - p.val, df = 1))
}
# if we allow early stopping and the
# the test_statistics exceed the lan-demets bound, quit the loop
if(test_stat > bounds[i - min_patient_earlystop + 1]){
index <- i
early_stopping[k] <- 1
time <- time[1:i]
randomization[k, (i + 1):N_total] <- 0
break
}
}
}
# adding the time factor to the data using group function
data_total <- data_total %>%
mutate(time = factor(time))
# summarizing the data by treatment group
ctrl_prop <- mean(as.numeric(as.character(data_total$outcome[data_total$treatment == 0])))
trt_prop <- mean(as.numeric(as.character(data_total$outcome[data_total$treatment == 1])))
# estimating prop_difference
prop_diff <- prop_strata(treatment = data_total$treatment,
outcome = data_total$outcome,
block = data_total$time)
if(any(table(data_total$time) == 1 & N_total / block_number >= 2)){
remove <- as.numeric(which(table(data_total$time) == 1))
data_total <- data_total[!(data_total$time == remove), ]
data_total$time <- droplevels(data_total$time)
}
# if number of block is 1 or if any block has only one patients, then use chisq.test
if(all(data_total$time == 1) | N_total / block_number < 2){
# if the prop is in the right direction, compute p-value
if(((ctrl_prop - trt_prop >= 0) & alternative == "less") |
((trt_prop - ctrl_prop >= 0) & alternative == "greater")){
p.val <- chisq.test(data_total$treatment, data_total$outcome,
correct = correct)$p.value / 2
}
else{
p.val <- 1
}
}
# compute mantelhaen.test for number of block > 1.
else{
p.val <- tryCatch(expr = mantelhaen.test(table(data_total),
correct = correct)$p.val,
error = function(data_total =
data_total[data_total$time == 1, ]){
as.numeric(chisq.test(data_total$treatment,
data_total$outcome,
correct = correct)$p.value / 2)})
}
# compute the sample size for control, treatment and proportion for
# control and treatment estimate
p_value <- c(p_value, p.val)
N_control <- c(N_control, sum(data_total$treatment == 0))
N_treatment <- c(N_treatment, sum(data_total$treatment == 1))
sample_size <- c(sample_size, nrow(data_total))
p_control_estimate <- c(p_control_estimate, ctrl_prop)
p_treatment_estimate <- c(p_treatment_estimate, trt_prop)
prop_diff_estimate <- c(prop_diff_estimate, prop_diff)
# compute the power if p-value is smaller than 0.05
if(p.val < (1 - conf_int)){
power <- power + 1
}
}
#return all the output in a list
output <- list(
power = power / simulation,
prop_diff_estimate = prop_diff_estimate,
N_enrolled = sample_size,
N_control = N_control,
N_treatment = N_treatment,
early_stop = early_stopping,
p_value = p_value,
prob_treatment = randomization
)
return(output)
}
## quiets concerns of R CMD check re: the .'s that appear in pipelines
if(getRversion() >= "2.15.1") utils::globalVariables(c("treatment", "outcome"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.