#' Approximate Bayesian algorithm for the Common Language Effect Size (CLES) estimator
#'
#' @description Computes the posterior CLES without a likelihood function via an ABC-rejection algorithm.
#' CLES (McGraw and Wong, 1992) also called Exceedance probability (Huang, 2021), Probability of
#' Superiority or Area Undere the Curve (AUC; Ruscio, 2008; Ruscio and Mullen, 2010) is a metric that
#' calculates the proportion of samples from group y that exceeds a random sample of group x. While often
#' Confidence intervals are calculated, this is not inference based on the posterior probability, but on the
#' likelihood, thus the data. This function is a simple ABC-rejection algorithmn to calculate posterior
#' mean and sd of x and y an infer CLES.
#'
#' @param x A numeric vector for sample x.
#' @param y A numeric vector for sample y.
#' @param prior A list with two priors for location an scale parameters.
#' @param qtol The value for the quantile tolerance. A lower value selects simulated parameters that fall closer to the observed parameters. Default is 0.01 and indicates that the closest 1 percent is accepted and the rest rejected.
#' @param adjustment The adjustment method to act if the simulated values actually originated closer from the prior. The current method is the Linear Method (LM), but will also be accompanied by a non-linear RF model.
#' @param print.progress Prints the progress of the number of simulations (nsim) completed. By default is TRUE
#' @param timing Prints the time that was needed to complete the simulations. By default is TRUE.
#' @param seed Default is the Devil. Devils seed.
#'
#' @return Values for CLES derived from the posterior means of samples x and y.
#' @export abcCLES
#'
#' @examples
#' \dontrun{
#'#Create random example for CLES P(y > x)
#' set.seed(666)
#' x <- rnorm(20, 23, 2)
#' y <- rnorm(20, 32, 2)
#' sdx <- mean(x)
#' sdy <- mean(y)
#' nsim<- 250000
#'
#'#Create priors
#'set.seed(666)
#'priors <- list(prior1 = rlnorm(nsim, 3.2, 0.15),
#' prior2 = rgamma(nsim, sdx),
#' prior3 = rlnorm(nsim, 3.4, 0.15),
#' prior4 = rgamma(nsim, sdy))
#'
#'#Control the priors by eye-balling
#'par(mfrow=c(2,2))
#'hist(priors[[1]], xlab = "", main="Prior1")
#'hist(priors[[2]], xlab = "", main="Prior2")
#'hist(priors[[3]], xlab = "", main="Prior3")
#'hist(priors[[4]], xlab = "", main="Prior4")
#'dev.off()
#'
#Run simulations
#'res <- abcCLES(x, y, prior = priors)
#'
#Plot CLES obtaind by inference from posteriors means and sd
#'hist(res, breaks=20, main="CLES (Posterior)", xlab="")}
abcCLES <- function(x, y, prior=NULL, qtol=0.005,
adjustment="LM", print.progress = T,
timing = T, seed=666){
#Start timing
if(timing == T){stime <- Sys.time()}
if(is.null(prior)){stop("Prior cannot be empty")}
if(all(sapply(prior,length)==length(prior[[1]])) == F){stop("Priors need to be of the same length")}
if(is.null(x)){stop("x cannot be empty")}
if(is.null(y)){stop("y cannot be empty")}
nx <- length(na.omit(x))
ny <- length(na.omit(y))
lp <- length(prior)
nsim <- length(prior[[1]])
simlist <- list()
observations <- c(mean(x, na.rm = T), sd(x, na.rm = T), mean(y, na.rm = T), sd(y, na.rm = T))
for(i in 1:4){
simlist[[i]] <- rep(NA, nsim)}
if(is.numeric(seed)==T){set.seed(seed)}
for (p in 1:nsim){
if(print.progress == T){print(p)}
simx <- rnorm(nx, prior[[1]][p], prior[[2]][p])
simy <- rnorm(ny, prior[[3]][p], prior[[4]][p])
mux <- mean(simx)
sdx <- sd(simx)
muy <- mean(simy)
sdy <- sd(simy)
sim <- c(mux, sdx, muy, sdy)
extsim <- lapply(1:4, function(x) sim[x])
for(s in 1:4){
simlist[[s]][p] <- extsim[[s]]}
}
#Statistics
statistics <- as.data.frame(matrix(ncol=8, nrow=nsim))
for(i in 1:4){
statistics[,i] <- simlist[[i]]
statistics[,i+4] <- priors[[i]]}
colnames(statistics) <- c(paste0(rep("simulate",lp),1:4), paste0(rep("prior",4),1:4))
#Distance (Epsilon)
statistics$distance <- rowSums(sqrt((statistics[,c(1:4)]-matrix(rep(observations, each=nsim), ncol=4))^2))
#Select closest quantile qtol
parameters <- statistics[order(statistics$distance),][1:c(nsim*qtol),]
rownames(parameters) <- NULL
#Linear adjustment
if(adjustment == "LM"){
parameters <- cbind.data.frame(as.data.frame(matrix(ncol=4, nrow=nrow(parameters))), parameters)
colnames(parameters)[1:4] <- paste0(rep("LMadjusted",4),1:4)
nr <- 1
for(c in 1:4){
si <- c+4
pr <- c+8
parameters[,nr] <- mean(parameters[,pr])+resid(lm(parameters[,pr]~parameters[,si]))
nr <-nr+1}}
md <- parameters$LMadjusted1-parameters$LMadjusted3
sdz <- sqrt(parameters$LMadjusted2^2 + parameters$LMadjusted4^2)
cles <- pnorm(0, md, sdz)
parameters <- data.frame(CLES=cles, parameters)
#Stop timing
if(timing == T){etime <- Sys.time()
print(etime-stime)}
return(parameters)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.