###################################################################################################
############# Bayesian Multilevel Analysis of Cluster Randomised Education Trials
##################################################################################################
#' Bayesian analysis of cluster randomised education trials using Vague Priors.
#'
#' \code{crtBayes} performs analysis of cluster randomised education trials using a multilevel model under a Bayesian setting,
#' assuming vague priors.
#'
#' @export
#' @param formula the model to be analysed is of the form y ~ x1+x2+.... Where y is the outcome variable and Xs are the independent variables.
#' @param random a string variable specifying the "clustering variable" as contained in the data. See example below.
#' @param intervention a string variable specifying the "intervention variable" as appearing in the formula and the data. See example below.
#' @param baseln A string variable allowing the user to specify the reference category for intervention variable. When not specified, the first level will be used as a reference.
#' @param adaptD As this function uses rstanarm, this term provides the target average proposal acceptance probability during Stan’s adaptation period. Default is NULL.
#' @param nsim number of MCMC iterations per chain. Default is 2000.
#' @param threshold a scalar or vector of pre-specified threshold(s) for estimating Bayesian posterior probability such that the observed effect size is greater than or equal to the threshold(s).
#' @param ... additional arguments of \code{\link[rstanarm]{stan_lmer}} to be passed to the function.
#' @param data data frame containing the data to be analysed.
#' @return S3 object; a list consisting of
#' \itemize{
#' \item \code{Beta}: Estimates and credible intervals for variables specified in the model. Use \code{summary.eefAnalytics} to get Rhat and effective sample size for each estimate.
#' \item \code{ES}: Conditional Hedges' g effect size and its 95% credible intervals.
#' \item \code{covParm}: A vector of variance decomposition into between cluster variance (Schools) and within cluster variance (Pupils). It also contains intra-cluster correlation (ICC).
#' \item \code{SchEffects}: A vector of the estimated deviation of each school from the intercept.
#' \item \code{ProbES}: A matrix of Bayesian Posterior Probabilities such that the observed effect size is greater than or equal to a pre-specified threshold(s).
#' \item \code{Model}: A stan_glm object used in ES computation, this object can be used for convergence diagnostic.
#' \item \code{Unconditional}: A list of unconditional effect sizes, covParm and ProbES obtained based on between and within cluster variances from the unconditional model (model with only the intercept as a fixed effect).
#' }
#' @example inst/examples/crtBExample.R
crtBayes <- function(formula,random,intervention,baseln,adaptD,nsim=2000,data,threshold=1:10/10,...)UseMethod("crtBayes")
#' @export
crtBayes.default <- function(formula,random,intervention,baseln,adaptD,nsim=2000,data,threshold=1:10/10,...){stop("No correct formula input given.")}
#' @export
crtBayes.formula <- function(formula,random,intervention,baseln,adaptD,nsim=2000,data,threshold=1:10/10,...){
#if(nsim<2000){stop("nsim must be at least 2000")}
data <- na.omit(data.frame(data)[ ,unique(c(all.vars(formula),random, intervention))])
data <- data[order(data[,which(colnames(data)==random)]),]
intervention <- intervention
trt <- data[,which(colnames(data)==intervention)]
tmp2 <- which(colnames(data)==random)
cluster2 <- data[,tmp2]
chk <- sum(rowSums(table(cluster2,trt)!=0)>1)
if(chk >0){stop("This is not a CRT design")}
stp <- as.character(row.names(table(cluster2,trt)))
stp2 <- (apply(table(cluster2,trt),1,function(x)colnames(table(cluster2,trt))[x!=0]))
tmp3 <- which(colnames(data)==intervention)
#data[,tmp3] <- as.factor(data[,tmp3])
if(missing(baseln)){data[,tmp3] <- as.factor(data[,tmp3])}
if(!missing(baseln)){data[,tmp3] <- relevel(as.factor(data[,tmp3]),baseln)}
if(missing(adaptD)){adaptD=NULL}
mf <- model.frame(formula=formula, data=data)
mf <- mf[order(cluster2),]
cluster <- cluster2[order(cluster2)]
trt <- trt[order(cluster2)]
fixedDesignMatrix <- as.matrix(data.frame(model.matrix(attr(mf, "terms"), data=data)))
tmp <- colnames(fixedDesignMatrix )
tmp[1] <- "Intercept"
colnames(fixedDesignMatrix)<- tmp
posttest <- model.response(mf)
new.data <- data.frame(post=posttest, fixedDesignMatrix[,-1],cluster=cluster)
btp <- which(tmp %in% paste0(intervention, unique(data[, intervention]) ))
LHS.formula <- "post"
RHS.formula <-paste(tmp[-1], collapse = "+")
RHS.cluster <-"+(1|cluster)"
new.formula <- as.formula(paste0(LHS.formula,"~",RHS.formula,RHS.cluster))
new.formula0 <- as.formula(paste0(LHS.formula,"~",RHS.cluster))
nsim=nsim
if(length(tmp2)!= 1){stop("Cluster variable misspecified")}
if(length(tmp3)!= 1){stop("Intervention variable misspecified")}
#if(nsim < 2000){stop("nsim >= 10000 is recommended")}
output <- erantBAYES(formula=new.formula, unc.formula=new.formula0,intervention=intervention,data1=new.data,cluster=random, adaptD=adaptD,nsim=nsim,threshold=threshold,btp=btp,...)
output$Method <- "MLM"
output$Function <- "crtBayes"
class(output) <- "eefAnalytics"
invisible(output)
}
#############################################################################################################################
################################################################################################################################
## - internal
erantBAYES<- function(formula,unc.formula,intervention,data1,cluster,btp, adaptD=adaptD,nsim=nsim,threshold,...){
freqFit <- stan_lmer(formula=formula, data=data1,adapt_delta =adaptD, seed = 1234,iter=nsim,...)
freqFit0 <-stan_lmer(formula=unc.formula,data=data1,adapt_delta =adaptD, seed = 1234,iter=nsim,...)
Sim_Beta <- data.frame(as.matrix(freqFit))
# btp <- which(substring(names(Sim_Beta),1,nchar(intervention))==intervention &
# nchar(names(Sim_Beta))==(nchar(intervention)+1))
#btp <- which(names(Sim_Beta) %in% Int.lev)
output1 <- list()
output2 <- list()
ProbES <- list()
for( i in 1:length(btp )){
Output0=errantSummary(freqFit=freqFit,freqFit0=freqFit0,Sim_Beta= Sim_Beta, btp= btp[i])
Beta <- Output0$Beta
ICC1 <- Output0$ICC1
Sim_Beta_t <- Output0$Sim_Beta_t
var.B1 <- Output0$var.B1
var.W1 <- Output0$var.W1
var.tt1 <- Output0$var.tt1
sim_ES.tt1 <- Sim_Beta_t/sqrt(var.tt1)
sim_ES.W1 <- Sim_Beta_t/sqrt(var.W1)
ICC2 <- Output0$ICC2
var.W2 <- Output0$var.W2
var.tt2 <- Output0$var.tt2
sim_ES.tt2 <- Sim_Beta_t/sqrt(var.tt2)
sim_ES.W2 <- Sim_Beta_t/sqrt(var.W2)
ProbES0 <- data.frame(cbind(Total1=sapply(threshold, function(j) mean(sim_ES.tt1 > j)),
Within1 =sapply(threshold, function(j) mean(sim_ES.W1 > j)),
Total2=sapply(threshold, function(j) mean(sim_ES.tt2 > j)),
Within2 =sapply(threshold, function(j) mean(sim_ES.W2 > j))))
row.names(ProbES0) <- paste0("P(ES>",threshold,")")
ProbES[[i]] <- round(ProbES0,3)
gtmp.W1 <- round(c("ES"=mean(sim_ES.W1), quantile(sim_ES.W1,probs=c(0.025,0.975))),2)
gtmp.W2 <- round(c("ES"=mean(sim_ES.W2), quantile(sim_ES.W2,probs=c(0.025,0.975))),2)
gtmp.tt1 <- round(c("ES"=mean(sim_ES.tt1), quantile(sim_ES.tt1,probs=c(0.025,0.975))),2)
gtmp.tt2 <- round(c("ES"=mean(sim_ES.tt2), quantile(sim_ES.tt2,probs=c(0.025,0.975))),2)
output.1 <- rbind(Within=gtmp.W1,Total=gtmp.tt1)
output.2 <- rbind(Within=gtmp.W2,Total=gtmp.tt2)
colnames(output.1) <- c("Estimate","95% LB","95% UB")
colnames(output.2) <- c("Estimate","95% LB","95% UB")
output1[[i]] <- round(output.1,2)
output2[[i]] <- round(output.2,2)
}
names(output1) <- names(Sim_Beta)[btp]
names(output2) <- names(Sim_Beta)[btp]
Output <- list(Beta=round(Beta,2), ES=output1, covParm=Output0$sigmaBE1, SchEffects=Output0$SchEffects, ProbES=lapply(ProbES, function(x) x[, c("Within1","Total1")]),Model=freqFit,
Unconditional= list(ES=output2, covParm=Output0$sigmaBE2, ProbES=lapply(ProbES, function(x) x[, c("Within2","Total2")])))
return(Output)
}
## - internal
errantSummary <- function(freqFit,freqFit0,Sim_Beta, btp){
Sim_Beta_t <- as.matrix(freqFit,pars=names(Sim_Beta[btp])) #treatment effect
Beta <- data.frame( summary(freqFit,pars=c("alpha","beta"), probs = c(0.025, 0.975), digits=2 ))[c("mean","X2.5.","X97.5.")]
names(Beta)<- c("Estimate","95% LB","95% UB")
#conditional ouptut
Sim_var.W1 <- as.matrix(freqFit,pars="sigma")^2 #sigma(pupil)
Sim_var.B1 <- as.matrix(freqFit,pars="Sigma[cluster:(Intercept),(Intercept)]")
Sim_var.tt1 <- Sim_var.B1+Sim_var.W1
Sim_ICC1 <- Sim_var.B1/Sim_var.tt1
sigmaBE1 <- c(mean(Sim_var.B1),mean(Sim_var.W1),mean(Sim_var.tt1),mean(Sim_ICC1))
names(sigmaBE1)<- c("Schools","Pupils","Total","ICC")
#conditional ouptut
Sim_var.W2 <- as.matrix(freqFit0,pars="sigma")^2 #sigma(pupil)
Sim_var.B2 <- as.matrix(freqFit0,pars="Sigma[cluster:(Intercept),(Intercept)]")
Sim_var.tt2 <- Sim_var.B2+Sim_var.W2
Sim_ICC2 <- Sim_var.B2/Sim_var.tt2
sigmaBE2 <- c(mean(Sim_var.B2),mean(Sim_var.W2),mean(Sim_var.tt2),mean(Sim_ICC2))
names(sigmaBE2)<- c("Schools","Pupils","Total","ICC")
#random effects
SchEffects0 <- summary(freqFit, regex_pars = "b\\[\\(Intercept\\) cluster\\:", probs = c(0.025, 0.975),digits = 2)
SchEffects1 <- data.frame(SchEffects0)[,"mean"]
schl_ID <- as.numeric(as.character(substr(rownames(SchEffects0), nchar("b[(Intercept) cluster\\:"), nchar(rownames(SchEffects0))-1)))
SchEffects <- round(data.frame(cbind(Schools=schl_ID,Intercept=SchEffects1)),2)
#all output
ModelSmry <- list(Beta=Beta, Sim_Beta_t=Sim_Beta_t,
var.B1=Sim_var.B1, var.W1=Sim_var.W1, var.tt1=Sim_var.tt1, ICC1=Sim_ICC1, sigmaBE1=round(sigmaBE1,2),
var.B2=Sim_var.B2, var.W2=Sim_var.W2, var.tt2=Sim_var.tt2, ICC2=Sim_ICC2, sigmaBE2=round(sigmaBE2,2),
SchEffects =SchEffects)
return(ModelSmry)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.