Nothing
#' @title Block Design for Response-Adaptive Randomization for Binomial Data
#'
#' @description Simulation for binomial counts for block design for
#' response-adaptive randomization with time as a confounding
#' @param p_treatment scalar. Proportion of events under the treatment arm.
#' @param p_control scalar. Proportion of events under the control arm.
#' @param N_total scalar. Total sample size.
#' @param block_number scalar. Number of blocks or time levels. The default is set to 4.
#' If \code{block_number} is set to 1. This is a traditional RCT design.
#' @param drift scalar. The increase or decrease in proportion of event over time.
#' In this case, the proportion of failure changes in each block by the number of
#' patient accured over the total sample size. The full drift effect is seen in the
#' final block.
#' @param simulation scalar. Number of simulation to be ran. The default is set to 10000.
#' @param conf_int scalar. Confidence level of the interval.
#' @param alternative character. A string specifying the alternative hypothesis,
#' must be one of "less" or "greater" (default).
#' @param correct logical. A logical indicating whether to apply continuity correction
#' when computing the test statistic: one half is subtracted from all |O - E|
#' differences; however, the correction will not be bigger than the differences themselves.
#' @param replace logical. should sampling be with replacement? If replace is set to
#' FALSE (default), the 0 for control, 1 for treatment is replicated to the closest
#' integer and this vector is sampled with no replacement. If replace is set to TRUE,
#' the sampling is done based on randomization ratio provided with replacement.
#' @param early_stop logical. A logical indicating whether the trials are stopped early
#' for success or futility.
#' @param size_equal_randomization scalar. The number of run in patients because adaptive
#' randomization is applied.
#' @param min_patient_earlystop scalar. Minimum number of patients before early stopping
#' rule is applied.
#' @param max_prob scalar. The maximum probability for assigning to treatment/control
#' group is 0.8.
#'
#'
#' @return a list with details on the simulation.
#' \describe{
#' \item{\code{power}}{
#' scalar. The power of the trial, ie. the proportion of success over the
#' number of simulation ran.}
#' \item{\code{p_control_estimate}}{
#' scalar. The estimated proportion of events under the control group.}
#' \item{\code{p_treatment_estimate}}{
#' scalar. The estimated proportion of events under the treatment group.}
#' \item{\code{N_enrolled}}{
#' vector. The number of patients enrolled in the trial (sum of control
#' and experimental group for each simulation. )}
#' \item{\code{N_control}}{
#' vector. The number of patients enrolled in the control group for
#' each simulation.}
#' \item{\code{N_control}}{
#' vector. The number of patients enrolled in the experimental group for
#' each simulation.}
#' \item{\code{randomization_ratio}}{
#' matrix. The randomization ratio allocated for each block.}
#' }
#'
#' @importFrom stats rbinom mantelhaen.test chisq.test
#' @importFrom ldbounds bounds
#' @importFrom dplyr mutate group_by summarize
#' @importFrom tibble as.tibble
#'
#' @export binomialfreq
#'
#' @examples
#' binomialfreq(p_control = 0.7, p_treatment = 0.65, N_total = 200,
#' block_number = 2, simulation = 100)
#' binomialfreq(p_control = 0.5, p_treatment = 0.40, N_total = 200,
#' block_number = 2, simulation = 100, drift = -0.15)
binomialfreq <- function(
p_control,
p_treatment,
N_total,
block_number = 4,
drift = 0,
simulation = 10000,
conf_int = 0.95,
alternative = "greater",
correct = FALSE,
replace = TRUE,
early_stop = FALSE,
size_equal_randomization = 20,
min_patient_earlystop = 20,
max_prob = 0.8
){
# stop if proportion of control is not between 0 and 1.
if((p_control <= 0 | p_control >= 1)){
stop("The proportion of event for the control group needs to between 0 and 1!")
}
# stop if proportion of treatment is not between 0 and 1.
if((p_treatment <= 0 | p_treatment >= 1)){
stop("The proportion of event for the treatment group needs to between 0 and 1!")
}
## make sure sample size is an integer!
if((N_total <= 0 | N_total %% 1 != 0)){
stop("The sample size needs to be a positive integer!")
}
# the number of blocks need to be an integer.
if((block_number <= 0 | block_number %% 1 != 0)){
stop("The number of blocks needs to be a positve integer!")
}
# the number of blocks is bigger than the sample size.
if(N_total/ block_number < 1){
stop("The number of blocks is greater than sample size!")
}
# number of simulation needs to be a positive integer.
if((simulation <= 0 | simulation %% 1 != 0)){
stop("The number of simulation needs to be a positve integer!")
}
# the confidence interval needs to be between 0 and 1.
if((conf_int <= 0 | conf_int >= 1)){
stop("The confidence interval needs to between 0 and 1!")
}
# the alternative is either less or greater, two-sided not acceptable.
if((alternative != "less" & alternative != "greater")){
stop("The alternative can only be less or greater!")
}
# sampling accept either true or false
if((replace != "TRUE" & replace != "FALSE")){
stop("The replacement for sampling is either TRUE or FALSE!")
}
# the drift value cant make the prop of control/treatment exceed 1 or below 0.
if(drift + p_control >= 1 | drift + p_control <= 0 |
drift + p_treatment >= 1 | drift + p_treatment <= 0){
stop("The drift value is too high causing the proportion of event to exceed 1
in either the control or treatment group, pick a lower value for drift!")
}
# if
if(!(early_stop == FALSE | early_stop == TRUE)){
stop("Early stopping can be only TRUE or FALSE!")
}
# if number of patient in each block is 2 or smaller, sampling is done with replacement
if(replace == FALSE & N_total / block_number <= 2){
replace <- TRUE
}
# total sample size is divided equally into blocks
group <- rep(floor(N_total / block_number), block_number)
# if the remainder of total sample size divided by block number is not 0,
# randomly add patients to each block
if((N_total - sum(group)) > 0){
index <- sample(1:block_number, N_total - sum(group))
group[index] <- group[index] + 1
}
# if we allow early stopping, compute the lan-demets bound
if(early_stop){
# divided time equally between 0 and 1 with the number of blocks
time <- cumsum(group)[cumsum(group) > min_patient_earlystop] / N_total
#using lan-demets bound, computing the early stopping criteria for the number of blocks
bounds <- bounds(time, iuse = c(1, 1), alpha = c((1 - conf_int) / 2, (1 - conf_int) / 2))$upper.bounds
}
#assigning power to 0
power <- 0
# assigning overall variables as NULL and drift for patient by patient
N_control <- NULL
N_treatment <- NULL
sample_size <- NULL
p_control_estimate <- NULL
p_treatment_estimate <- NULL
prop_diff_estimate <- NULL
drift_p <- seq(drift / N_total, drift, length.out = N_total)
early_stopping <- rep(0, simulation)
randomization <- array(NA, c(simulation, block_number))
# looping overall all simulation
for(k in 1:simulation){
# assigning variables as NULL for each simulation
data_total <- data.frame()
test_stat <- 0
index <- block_number
#looping over all blocks
for(i in 1:block_number){
# create a data summary from previos block or if its null, create an empty
# summary
if(nrow(data_total) != 0 & nrow(data_total) >= min_patient_earlystop){
y_ctr <- sum(as.numeric(as.character(data_total$outcome[data_total$treatment == 0])))
N_ctr <- length(as.numeric(as.character(data_total$outcome[data_total$treatment == 0])))
y_trt <- sum(as.numeric(as.character(data_total$outcome[data_total$treatment == 1])))
N_trt <- length(as.numeric(as.character(data_total$outcome[data_total$treatment == 1])))
}
else{
y_ctr <- 0
N_ctr <- 0
y_trt <- 0
N_trt <- 0
}
# if the alternative is greater, use proportion to set randomization ratio
if(alternative == "greater"){
prob_trt <- (y_trt + 1) / (N_trt + 2) /
((y_trt + 1) / (N_trt + 2) + (y_ctr + 1) / (N_ctr + 2))
}
# if the alternative is greater, use 1 - proportion to set randomization ratio
else{
prob_trt <- (1 - (y_trt + 1) / (N_trt + 2)) /
(1 - (y_trt + 1) / (N_trt + 2) + (1 - (y_ctr + 1) / (N_ctr + 2)))
}
# maximum probability assigning to the treatment group is 0.8
if(prob_trt > max_prob){
prob_trt <- max_prob
}
# maximum probability assigning to the control group is 0.8
else if(prob_trt < (1 - max_prob)){
prob_trt <- 1 - max_prob
}
# storing info of prob_trt
randomization[k, i] <- prob_trt
# generate data frame treatment assignment based on sampling and
# alternative hypothesis. leave the outcome variable empty.
data <- data.frame(
treatment =
if(replace == TRUE){
sample(0:1, replace = T, group[i], prob = c(1 - prob_trt, prob_trt))
}
else{
sampling <- rep(c(0, 1), round(c(group[i] * (1 - prob_trt) - 0.0001,
group[i] * prob_trt + 0.0001)))
sample(sampling, length(sampling))
},
outcome = rep(NA, group[i]))
# fill in the outcome variable based on the treatment assignement and proportion
# of event in respective arm
data$outcome <- rbinom(nrow(data), 1, prob = data$treatment * p_treatment +
(1 - data$treatment) * p_control +
drift_p[((1:nrow(data)) + nrow(data_total))])
# bind the data with previous block if available
data_total <- rbind(data_total, data)
# convert the data_total to factor for outcome and treatment
data_total$treatment <- as.factor(data_total$treatment)
data_total$outcome <- as.factor(data_total$outcome)
# making sure outcome level of 1 is present, even if its not present in the data
if(is.na(match("1", levels(data_total$outcome)))){
data_total$outcome <- factor(data_total$outcome,
levels=c(levels(data_total$outcome), "1"))
}
# making sure outcome level of 0 is present, even if its not present in the data
if(is.na(match("0", levels(data_total$outcome)))){
data_total$outcome <- factor(data_total$outcome,
levels=c(levels(data_total$outcome), "0"))
}
# making sure the treatment group is present, even if its not present in the data
if(is.na(match("1", levels(data_total$treatment)))){
data_total$treatment <- factor(data_total$treatment,
levels=c(levels(data_total$treatment), "1"))
}
# making sure the control group is present, even if its not present in the data
if(is.na(match("0", levels(data_total$treatment)))){
data_total$treatment <- factor(data_total$treatment,
levels=c(levels(data_total$treatment), "0"))
}
# if one treatment is not present or one type of outcome is not present,
# set the test_statistics to 0.
if(all(data_total$outcome == 1) | all(data_total$outcome == 0) |
all(data_total$treatment == 1) | all(data_total$treatment == 0)){
test_stat <- 0
}
# else compute the test statistics
else{
test_stat <- sqrt(as.numeric(chisq.test(data_total$treatment,
data_total$outcome,
correct = correct)$statistic))
}
if(N_total / block_number > min_patient_earlystop){
bound_index <- i
}
else{
bound_index <- i - min_patient_earlystop / (N_total / block_number)
}
# if we allow early stopping and the
# the test_statistics exceed the lan-demets bound, quit the loop
if(early_stop & (nrow(data_total) > min_patient_earlystop) & (nrow(data_total) < N_total)){
if(test_stat > bounds[bound_index]){
index <- i
early_stopping[k] <- 1
randomization[k, (i + 1):block_number] <- 0
break
}
}
}
# adding the time factor to the data using group function
data_total <- data_total %>%
mutate(time = factor(rep(1:index, group[1:index])))
# summarizing the data by treatment group
ctrl_prop <- mean(as.numeric(as.character(data_total$outcome[data_total$treatment == 0])))
trt_prop <- mean(as.numeric(as.character(data_total$outcome[data_total$treatment == 1])))
# estimating prop_difference
prop_diff <- prop_strata(treatment = data_total$treatment,
outcome = data_total$outcome,
block = data_total$time)
# if number of block is 1 or if any block has only one patients, then use chisq.test
if(all(data_total$time == 1) | N_total / block_number < 2){
# if the prop is in the right direction, compute p-value
if(((ctrl_prop - trt_prop >= 0) & alternative == "less") |
((trt_prop - ctrl_prop >= 0) & alternative == "greater")){
p.val <- chisq.test(data_total$treatment, data_total$outcome,
correct = correct)$p.value / 2
}
else{
p.val <- 1
}
}
# compute mantelhaen.test for number of block > 1.
else{
p.val <- mantelhaen.test(table(data_total), alternative = alternative,
correct = correct)$p.val
}
# compute the sample size for control, treatment and proportion for
# control and treatment estimate
N_control <- c(N_control, sum(data_total$treatment == 0))
N_treatment <- c(N_treatment, sum(data_total$treatment == 1))
sample_size <- c(sample_size, nrow(data_total))
p_control_estimate <- c(p_control_estimate, ctrl_prop)
p_treatment_estimate <- c(p_treatment_estimate, trt_prop)
prop_diff_estimate <- c(prop_diff_estimate, prop_diff)
# compute the power if p-value is smaller than 0.05
if(p.val < (1 - conf_int)){
power <- power + 1
}
}
#return all the output in a list
output <- list(
power = power / simulation,
p_control_estimate = p_control_estimate ,
p_treatment_estimate = p_treatment_estimate,
prop_diff_estimate = prop_diff_estimate,
N_enrolled = sample_size,
N_control = N_control,
N_treatment = N_treatment,
early_stop = early_stopping,
randomization_ratio = randomization
)
return(output)
}
## quiets concerns of R CMD check re: the .'s that appear in pipelines
if(getRversion() >= "2.15.1") utils::globalVariables(c("treatment", "outcome"))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.