knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE )
library(BOSSS)
#n is the total sample size #ninterim is the number of patients at the interim analysis (proportion) #ainterim is alpha at interim analysis (threshold p-value at interim analysis) #afinal is alpha at final analaysis (threshold p-value for 2nd and final analysis) #this means overall alpha for the trial is ainterim*afinal # #pcontshort is the probability of 1 day PoSH in the control arm #pexpshort is the probability of 1 day PoSH in the experimental arm #pcontlong is the probability of 6 month PoSH in the control arm #pexplong is the probability of 6 month PoSH in the experimental arm #p01_relative is s.t. p01_relative*pexplong = probability of having # a long term outcome after no short term outcome #sim_trial <- function(design = c(300, 0.5, 0.4, 0.1), # hypothesis = c(0.25, 0.125, 0.1, 0.03, 0)) sim_trial <- function(n = 300, ninterim = 0.5, ainterim = 0.4, afinal = 0.1, pcontshort = 0.25, pexpshort = 0.125, pcontlong = 0.1, pexplong = 0.03, p01_relative = 0) { ninterim <- floor(ninterim*n) patients<-c(1:n) #create patients treat<- rep(c(1,2), ceiling(n/2))[1:n] n_cont <- sum(treat == 1) short<-rep(0,n) #short term outcome long<-rep(0,n) #long term outcome data<-data.frame(patients,treat,short,long) #combine into dataset #generate result 1/0 for short term outcome. If short term outcome=0 then long term outcome=0 #If short term outcome=1 then long term outcome has probability pcontlong/pcontshort #repeat for treatment=2 # for(i in 1:n){ # if(treat[i]==1){ # data$short[i]<-rbinom(1,1,pcontshort) # if(data$short[i]==0){ # data$long[i]<-0 # } # if(data$short[i]==1){ # data$long[i]<-rbinom(1,1,pcontlong/pcontshort) # } # } # else{ # data$short[i]<-rbinom(1,1,pexpshort) # if(data$short[i]==0){ # data$long[i]<-0 # } # if(data$short[i]==1){ # data$long[i]<-rbinom(1,1,pexplong/pexpshort) # } # } # } # An alternative and more general parametersation of the above model # to allow different values of probability of a long outcome after # no short outcome (previously hard coded as 0) # probability vector p00, p01, p10, p11 in (short, long) form # Control arm: p01 <- p01_relative*pcontlong p11 <- pcontlong - p01 p10 <- pcontshort - p11 # Simulate outcomes in multinomial format cont_count <- rmultinom(1, n_cont, c((1 - p01 - p10 - p11), p01, p10, p11)) # Translate these to short and long outcomes cont_out <- matrix(c(rep(c(0,0), cont_count[1]), rep(c(0,1), cont_count[2]), rep(c(1,0), cont_count[3]), rep(c(1,1), cont_count[4])), ncol = 2, byrow = TRUE) # shuffle the list randomly cont_out <- cont_out[sample(1:nrow(cont_out)),] # Experimental arm: # Control arm: p01 <- p01_relative*pexplong p11 <- pexplong - p01 p10 <- pexpshort - p11 exp_count <- rmultinom(1, n - n_cont, c((1 - p01 - p10 - p11), p01, p10, p11)) exp_out <- matrix(c(rep(c(0,0), exp_count[1]), rep(c(0,1), exp_count[2]), rep(c(1,0), exp_count[3]), rep(c(1,1), exp_count[4])), ncol = 2, byrow = TRUE) exp_out <- exp_out[sample(1:nrow(exp_out)),] data[data$treat == 1, c("short", "long")] <- cont_out data[data$treat == 2, c("short", "long")] <- exp_out #perform chi squared test on short term outcome for 1st ninterim patients data2<-data[1:ninterim,] tbl<-table(data2$short,data2$treat) test<- suppressWarnings(chisq.test(tbl)$p.value) #get p-value #if p<ainterim, perform chi squared test on long term outcome for all patients if(test<ainterim){ tbl2<-table(data$long,data$treat) test2<- suppressWarnings(chisq.test(tbl2)$p.value) } #if p>ainterim, trial unsuccessful at interim and final analysis #if p<ainterim, trial successful at interim: #if p2>afinal, trial unsuccessful at final analysis #if p2<afinal, trial successful at final analysis if(test>=ainterim){ return(c(g = FALSE, s = TRUE, n = ninterim)) } if(test<ainterim){ if(mean(data2[data2$treat == 1,"short"]) < mean(data2[data2$treat == 2,"short"])){ return(c(g = FALSE, s = TRUE, n = ninterim)) } else { if(test2>afinal){ return(c(g = FALSE, s = TRUE, n = n)) } if(test2<afinal){ return(c(g = TRUE, s = FALSE, n = n)) } } } } # For example, sim_trial()
# Problem specification design_space <- design_space(lower = c(300,0.05,0,0), upper = c(700,0.5,1,1), sim = sim_trial) hypotheses <- hypotheses(values = matrix(c(0.25, 0.25, 0.1, 0.1, 0, 0.25, 0.125, 0.1, 0.03, 0), ncol = 2), hyp_names = c("null", "alt"), sim = sim_trial) constraints <- constraints(name = c("a", "b"), out = c("g", "s"), hyp = c("null", "alt"), nom = c(0.1, 0.2), delta =c(0.95, 0.95)) objectives <- objectives(name = c("TI", "TII", "EN"), out = c("g", "s", "n"), hyp = c("null", "alt", "null"), weight = c(100, 100, 1), binary = c(TRUE, TRUE, FALSE)) prob <- BOSSS_problem(sim_trial, design_space, hypotheses, objectives, constraints) # Initialisation size <- 40 N <- 500 sol <- BOSSS_solution(size, N, prob) print(sol) plot(sol) # Iterations N <- 500 for(i in 1:10) { sol <- iterate(sol, prob, N) } print(sol) plot(sol)
sim_P2 <- function(n = 500, na = 30000, af = 1, q12 = 0.05, q23 = 0.05, q34 = 0.03, b12 = 0.67, b23 = 0.67, b34 = 0.67){ fu <- na*af/n # Assessment times as_times <- floor(seq(af, fu, af)) # Simulate numbers starting in each state starts <- rep(1:3, times = rmultinom(1, n, c(0.15, 0.7, 0.15))) trt <- rbinom(n, 1, 0.5) # Simulate transitions into each remaining state enter2 <- (starts == 1)*rexp(n, q12*exp(log(b12)*trt)) enter3 <- (starts != 3)*(enter2 + rexp(n, q23*exp(log(b23)*trt))) enter4 <- (starts != 3)*enter3 + rexp(n, q34*exp(log(b34)*trt)) # Matrix of transition times into each state for each patient enter_m <- matrix(c(enter2, enter3, enter4), ncol = 3) # Add time of "transitioning" to the end of the follow-up period enter_m <- cbind(enter_m, pmax(enter_m[,3], fu)) # Cap all times at length of follow-up enter_m <- pmin(enter_m, fu) # Ceiling all times, assuming assessment is at the start of the day and so # any transitions after x.0 will be seen at (x+1).0 enter_m <- ceiling(enter_m) # Translate into number of days spent in each state times_m <- cbind(enter_m[,1], enter_m[,2] - enter_m[,1], enter_m[,3] - enter_m[,2], enter_m[,4] - enter_m[,3]) y <- t(apply(times_m, 1, function(x) rep(1:4, times = x))) # Reshape to long y <- melt(y) colnames(y) <- c("id", "time", "state") y$trt <- trt[y$id] # Keep observations on assessment times and order by patient y <- y[y$time %in% as_times,] y <- y[order(y$id),] Q <- rbind ( c(0, 0.1, 0, 0), c(0, 0, 0.1, 0), c(0, 0, 0, 0.1), c(0, 0, 0, 0) ) # If msm returns an error we take this as a non-significant result suppressWarnings(fit <- try({ msm(state ~ time, subject=id, data = y, qmatrix = Q, covariates = ~ trt) }, silent = TRUE)) if (class(fit) == "try-error") { sig <- 0 } else { # If not converged, will not return CIs r1.67 <- hazard.msm(fit, cl = 1 - 0.0167)$trt if(ncol(r1.67) != 3){ sig <- 0 } else { sig1.67 <- sum(!(r1.67[,2] < 1 & r1.67[,3] > 1)) >= 1 r2.5 <- hazard.msm(fit, cl = 1 - 0.025)$trt sig2.5 <- sum(!(r2.5[,2] < 1 & r2.5[,3] > 1)) >= 2 r5 <- hazard.msm(fit, cl = 1 - 0.05)$trt sig5 <- sum(!(r5[,2] < 1 & r5[,3] > 1)) >= 3 sig <- any(c(sig1.67, sig2.5, sig5)) } } return(c(s = !sig)) } det_P2 <- function(n = 500, na = 30000, af = 1, q12 = 0.05, q23 = 0.05, q34 = 0.03, b12 = 0.67, b23 = 0.67, b34 = 0.67){ fu <- na*af/n return(c(n = n, a = na, f = fu)) } sim_P2()
design_space <- design_space(lower = c(100, 700, 1), upper = c(2000, 120000, 14), sim = sim_P2) hypotheses <- hypotheses(values = matrix(c(0.05, 0.05, 0.03, 0.67, 0.67, 0.67), ncol = 1), hyp_names = c("alt"), sim = sim_P2) constraints <- constraints(name = c("a", "b"), out = c("s", "f"), hyp = c("alt", "alt"), nom = c(0.2, 200), delta =c(0.95, 1), binary = c(TRUE, FALSE)) objectives <- objectives(name = c("n", "a"), out = c("n", "a"), hyp = c("alt"), weight = c(10, 1), binary = c(FALSE, FALSE)) prob <- BOSSS_problem(sim_P2, design_space, hypotheses, objectives, constraints, det_func = det_P2)
size <- 30 N <- 100 sol <- BOSSS_solution(size, N, prob) print(sol) plot(sol)
N <- 50 for(i in 1:5) { sol <- iterate(sol, prob, N) } print(sol) plot(sol)
# Pick a specific design from the Pareto set design <- sol$p_set[nrow(sol$p_set),] one_d_plots(design, prob, sol)
Consider the design of a cluster randomised trial with $m$ clusters per arm, $n$ patients per cluster and a binary outcome for each patient. The trial will be analysed using a mixed effects model fitted using the glmer
function from the lme4
package, using a Wald test of the null hypothesis of no difference between arms at the 0.025 (one-sided) level.
A design is defined by the choice of $m$ and $n$. We want to find designs which minimise the cost of sampling (a weighted combination of the number of clusters and the number of patients), and the overall probability of failure when marginalising over a joint prior distribution on the model parameters. A hypothesis, then, is not a set of point parameter values but a set of prior hyperparameter values.
library(lme4) sim_bayes <- function(m = 20, n = 10, theta_0 = 0.2, eff_n_0 = 15, theta_1 = 0.3, eff_n_1 = 15, het_n_mean = 100, het_n_sd = 10) { library(lme4) # m = Number of clusters in each arm # n = Number of patients per cluster m <- round(m); n <- round(n) # thetas, eff_ns = Overall probability of outcome and effective prior # sample size in each arm # het_n is the effective sample size used in the beta prior # of cluster rates - it encapsulates our degree of certainty # about the between-cluster variance. het_n <- rgamma(1, shape = (het_n_mean/het_n_sd)^2, rate = het_n_mean/het_n_sd^2) + 2 a_0 <- theta_0*eff_n_0; b_0 <- eff_n_0 - a_0 a_1 <- theta_1*eff_n_1; b_1 <- eff_n_1 - a_1 # Simulate overall rate in control arm r_0 <- rbeta(1, a_0, b_0) # Simulate cluster rates in control arm c_r_0 <- rbeta(m, r_0*het_n, het_n - r_0*het_n) # Simulate patient outcomes in control arm p_0 <- rbinom(m*n, 1, rep(c_r_0, each = n)) # Simulate overall rate in exp arm r_1 <- rbeta(1, a_1, b_1) # Simulate cluster rates in exp arm c_r_1 <- rbeta(m, r_1*het_n, het_n - r_1*het_n) # Simulate patient outcomes in exp arm p_1 <- rbinom(m*n, 1, rep(c_r_1, each = n)) # Create data frame df <- data.frame(trt = rep(c(0, 1), each = n*m), c_id = rep(1:(2*m), each = n), y = c(p_0, p_1)) fit <- suppressMessages(glmer(y ~ trt + (1 | c_id), family = binomial, data = df)) est <- fixef(fit)[[2]] se <- sqrt(diag(vcov(fit)))[[2]] s <- est/se < qnorm(0.975) return(c(s = s)) } sim_bayes() det_bayes <- function(m = 20, n = 10, theta_0 = 0.2, eff_n_0 = 50, theta_1 = 0.4, eff_n_1 = 50, het_n_mean = 100, het_n_sd = 10) { m <- round(m); n <- round(n) return(c(c = 2*m*n + 10*m)) } det_bayes()
design_space <- design_space(lower = c(5, 5), upper = c(50, 50), sim = sim_bayes) hypotheses <- hypotheses(values = matrix(c(0.2, 30, 0.4, 30, 100, 10), ncol = 1), hyp_names = c("alt"), sim = sim_bayes) objectives <- objectives(name = c("pow", "cost"), out = c("s", "c"), hyp = c("alt", "alt"), weight = c(100, 1), binary = c(TRUE, FALSE)) problem <- BOSSS_problem(sim_bayes, design_space, hypotheses, objectives, det_func = det_bayes) size <- 50 N <- 50 solution <- BOSSS_solution(size, N, problem) print(solution) plot(solution)
for(i in 1:10) { solution <- iterate(solution, problem, N) } print(solution) plot(solution)
library(AER) sim_trial <- function(n=200, mu=0.3) { z <- rep(c(0,1), n) u <- rnorm(2*n) lp_x <- 4*z - 2 + u x <- rbinom(2*n, 1, exp(lp_x)/(exp(lp_x) + 1)) y <- mu*x + u s <- summary(ivreg(y ~ x | z))$coefficients[2,4] > 0.05 # Analytic sample size calc from Freeman 2013 (qnorm(0.025) + qnorm(0.2))^2*var(y - x*cov(y, z)/cov(x, z))/(var(x)*mu^2*cor(x, z)^2) return(c(s=s)) } mean(replicate(10^4, sim_trial(n=734/2)))
source("C:/Users/meddwilb/OneDrive - University of Leeds/Documents/Research/Projects/BOSSS example/PowCalc_Binary_DGM_5_freq.R") source("C:/Users/meddwilb/OneDrive - University of Leeds/Documents/Research/Projects/BOSSS example/PowCalc_Binary_Analysis_5_freq.R") source("C:/Users/meddwilb/OneDrive - University of Leeds/Documents/Research/Projects/BOSSS example/PowCalc_Binary_DGM_5a_freq.R") source("C:/Users/meddwilb/OneDrive - University of Leeds/Documents/Research/Projects/BOSSS example/PowCalc_Binary_BPPcalculator_5_freq.R") source("C:/Users/meddwilb/OneDrive - University of Leeds/Documents/Research/Projects/BOSSS example/PowCalc_Binary_Design_5_freq.R") # n = Total sample size # n_interim = Interim sample size # p0_true and p1_true:= the assumed true probabilities of an event in the control (0) and experimental (1) arms # alpha = the threshold used at the final analysis to determine whether the result is significant or not. At final analysis, the bounds of the 100*(1 - alpha/2)% credible interval of the log-odds ratio is compared to 0. # BPP_numsims:= how many simulations are used to evaluate Bayesian Predictive Power at the interim analysis # BPP_threshold:= BPP must be >= this value in order for the trial to continue to final analysis # Remaining: the analysis of the trial assumes beta prior distributions on p0 and p1. p0 ~ beta(APr_p0_alpha, APr_p0_beta) and p1 ~ beta(APr_p1_alpha, APr_p1_beta) # by default these are both set to be beta(1,1), non-informative. sim_trial <- function(design = c(400, 200, 0.2, 0.05), hypothesis = c(0.2, 0.1)) { design <- as.numeric(design); hypothesis <- as.numeric(hypothesis) n <- design[1]; n_interim <- design[2]; BPP_threshold <- design[3]; alpha <- design[4] p0_true <- hypothesis[1]; p1_true <- hypothesis[2] BPP_numsims=200; APr_p0_alpha=1; APr_p0_beta=1; APr_p1_alpha=1; APr_p1_beta=1 # Generate a dataset chk_DGM_output <- DGM_run(n, n_interim, p0_true, p1_true) #Debugging # print("DGM output DONE") # Analyse chk_Analysis_output <- Analysis_run(compiled_model, interim_data=chk_DGM_output$interim_data, chk_DGM_output$full_data, alpha, BPP_numsims, APr_p0_alpha, APr_p0_beta, APr_p1_alpha, APr_p1_beta) #Debugging # print("Analysis output DONE") BPP_sig <- (chk_Analysis_output$BPP >= BPP_threshold) final_sig <- (chk_Analysis_output$beta1quants[2] <= 0 | chk_Analysis_output$beta1quants[1] >= 0) trial_sig <- (BPP_sig & final_sig) trial_non_sig <- !trial_sig N_trial <- n_interim + BPP_sig*(n-n_interim) # Return the results results <- c( # BPP_sig = BPP_sig, trial_sig = trial_sig, trial_non_sig = trial_non_sig, N_trial = N_trial ) # Return the results # results <- c( # beta1_est_int = chk_Analysis_output$beta1mean_interim, # beta1_lower_int = chk_Analysis_output$beta1quants_interim[1], # beta1_upper_int = chk_Analysis_output$beta1quants_interim[2], # beta0_est_int = chk_Analysis_output$beta0mean_interim, # BPP = chk_Analysis_output$BPP, # BPP_sig = BPP_sig, # beta1_est_final = chk_Analysis_output$beta1mean, # beta1_lower_final = chk_Analysis_output$beta1quants[1], # beta1_upper_final = chk_Analysis_output$beta1quants[2], # beta0_est_final = chk_Analysis_output$beta0mean, # final_sig = final_sig, # trial_sig = trial_sig, # trial_non_sig = trial_non_sig, # N_trial = N_trial # ) return(results) } # For example, sim_trial()
design_space <- design_space(name = c("n", "n_interim", "BPP_threshold", "alpha"), lower = c(700,300,0,0), upper = c(1400,650,1,0.2))
hypotheses <- hypotheses(par_name = c("p0_true", "p1_true"), values = matrix(c(0.12, 0.12, 0.12, 0.06), ncol = 2), hyp_names = c("null", "alt"))
constraints <- constraints(name = c("a", "b"), out = c("trial_sig", "trial_non_sig"), hyp = c("null", "alt"), nom = c(0.2, 0.4), delta =c(0.95, 0.95), stoch = c(TRUE, TRUE))
objectives <- objectives(name = c("f1", "f2", "f3"), out = c("trial_sig", "trial_non_sig", "N_trial"), hyp = c("null", "alt", "null"), weight = c(100, 100, 1), stoch = c(TRUE, TRUE, TRUE), binary = c(TRUE, TRUE, FALSE))
Suppose we plan a Bayesian analysis of a trial comparing two therapies, where we expect a multiple membership structure. In particular, each of the $2n$ patients in the trial will receive six treatment sessions, but these sessions can be delivered by any of the $m$ therapists in their arm of the trial. Considering the number of therapists in each arm as fixed at $m = 10$, we want to know how many patients are required for the trial to have sufficient power whilst controlling the type I error rate (both being frequentist concepts).
We start with the simulation. To simulate the data we first construct the matrix $\mathbf{V}$ (v_m
in the code) which has a row for every patient and a colum for every therapist, with $\mathbf{V}_{ij}$ denoting the proportion of patient $i$'s treatment delivered by therapist $j$, with $i = 1, \ldots , 2n$ and $j = 1, \ldots , 2m$. Denoting within-cluster variance by $\sigma_e^2$ (var_e
), between-cluster variance by $\sigma_v^2$ (var_v
), and the ICC by $\rho$ (rho
), the outcome data can be simulated as
$$ \mathbf{y} \sim N\left(E[\mathbf{y}], \sigma_e^2 \mathbf{I}_{2n} + \sigma_v^2 \mathbf{V} \mathbf{V}^T \right). $$ This data is then analysed using Stan via `brm
library(dirmult) sim_trial <- function(n=100, var_e=0.9, rho=0.1, mu=0.3){ v_0 <- cbind(t(rmultinom(n = n, size = 6, prob = rdirichlet(1, rep(0.7,m)))), matrix(rep(0, n*m), nrow = n)) v_1 <- cbind(matrix(rep(0, n*m), nrow = n), t(rmultinom(n = n, size = 6, prob = rdirichlet(1, rep(0.7,m))))) v_m <- rbind(v_0, v_1) var_v <- rho*var_e/(1-rho); Sigma <- var_e * diag(2*n) #+ var_v * (v_m %*% t(v_m)) X <- matrix(c(rep(1, 2*n), rep(0:1, each = n)), ncol = 2) y <- t(mvtnorm::rmvnorm(1, mean = X %*% c(0, mu), sigma = Sigma)) df <- data.frame(y = y, trt = X[,2]) v_m <- as.data.frame(v_m) df <- cbind(df, t(apply(v_m, 1, function(x) rep(names(x), x)))) names(df)[3:8] <- paste0("v", 1:6) fit_empty <- brm(y ~ trt, #+ (1| mm(v1, v2, v3, v4, v5, v6)), prior = prior(normal(0, 5), class = b), data = df, chains = 0, silent = 2, file = "fit_empty") fit_empty <- readRDS("fit_empty.rds") fit <- update(fit_empty, newdata = df, chains = 1) g <- fixef(fit)[2,3] > 0 s <- !g return(c(g = g, s = s)) } sim_trial()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.