R/abcCLES.R

Defines functions abcCLES

Documented in abcCLES

#' Approximate Bayesian algorithm for the Common Language Effect Size (CLES) estimator
#'
#' @description Computes the posterior CLES without a likelihood function via an ABC-rejection algorithm.
#' CLES (McGraw and Wong, 1992) also called  Exceedance probability (Huang, 2021), Probability of
#' Superiority or Area Undere the Curve (AUC; Ruscio, 2008; Ruscio and Mullen, 2010) is a metric that
#' calculates the proportion of samples from group y that exceeds a random sample of group x. While often
#' Confidence intervals are calculated, this is not inference based on the posterior probability, but on the
#' likelihood, thus the data. This function is a simple ABC-rejection algorithmn to calculate posterior
#' mean and sd of x and y an infer CLES.
#'
#' @param x A numeric vector for sample x.
#' @param y A numeric vector for sample y.
#' @param prior A list with two priors for location an scale parameters.
#' @param qtol The value for the quantile tolerance. A lower value selects simulated parameters that fall closer to the observed parameters. Default is 0.01 and indicates that the closest 1 percent is accepted and the rest rejected.
#' @param adjustment The adjustment method to act if the simulated values actually originated closer from the prior. The current method is the Linear Method (LM), but will also be accompanied by a non-linear RF model.
#' @param print.progress Prints the progress of the number of simulations (nsim) completed. By default is TRUE
#' @param timing Prints the time that was needed to complete the simulations. By default is TRUE.
#' @param seed Default is the Devil. Devils seed.
#'
#' @return Values for CLES derived from the posterior means of samples x and y.
#' @export abcCLES
#'
#' @examples
#' \dontrun{
#'#Create random example for CLES P(y > x)
#' set.seed(666)
#' x   <- rnorm(20, 23, 2)
#' y   <- rnorm(20, 32, 2)
#' sdx <- mean(x)
#' sdy <- mean(y)
#' nsim<- 250000
#'
#'#Create priors
#'set.seed(666)
#'priors <- list(prior1  = rlnorm(nsim, 3.2, 0.15),
#'               prior2  = rgamma(nsim, sdx),
#'               prior3  = rlnorm(nsim, 3.4, 0.15),
#'               prior4  = rgamma(nsim, sdy))
#'
#'#Control the priors by eye-balling
#'par(mfrow=c(2,2))
#'hist(priors[[1]], xlab = "", main="Prior1")
#'hist(priors[[2]], xlab = "", main="Prior2")
#'hist(priors[[3]], xlab = "", main="Prior3")
#'hist(priors[[4]], xlab = "", main="Prior4")
#'dev.off()
#'
#Run simulations
#'res <- abcCLES(x, y, prior = priors)
#'
#Plot CLES obtaind by inference from posteriors means and sd
#'hist(res, breaks=20, main="CLES (Posterior)", xlab="")}
abcCLES <- function(x, y, prior=NULL, qtol=0.005,
                    adjustment="LM", print.progress = T,
                    timing = T, seed=666){

  #Start timing
  if(timing == T){stime <- Sys.time()}
  if(is.null(prior)){stop("Prior cannot be empty")}
  if(all(sapply(prior,length)==length(prior[[1]])) == F){stop("Priors need to be of the same length")}
  if(is.null(x)){stop("x cannot be empty")}
  if(is.null(y)){stop("y cannot be empty")}

  nx           <- length(na.omit(x))
  ny           <- length(na.omit(y))
  lp           <- length(prior)
  nsim         <- length(prior[[1]])
  simlist      <- list()

  observations <- c(mean(x, na.rm = T), sd(x, na.rm = T), mean(y, na.rm = T), sd(y, na.rm = T))

    for(i in 1:4){
    simlist[[i]] <- rep(NA, nsim)}

  if(is.numeric(seed)==T){set.seed(seed)}

  for (p in 1:nsim){

    if(print.progress == T){print(p)}

    simx    <- rnorm(nx, prior[[1]][p], prior[[2]][p])
    simy    <- rnorm(ny, prior[[3]][p], prior[[4]][p])

    mux     <- mean(simx)
    sdx     <- sd(simx)

    muy     <- mean(simy)
    sdy     <- sd(simy)

    sim     <- c(mux, sdx, muy, sdy)

    extsim  <- lapply(1:4, function(x) sim[x])
    for(s in 1:4){
      simlist[[s]][p] <- extsim[[s]]}

  }

  #Statistics
  statistics <- as.data.frame(matrix(ncol=8, nrow=nsim))
  for(i in 1:4){
    statistics[,i]       <- simlist[[i]]
    statistics[,i+4]     <- priors[[i]]}

  colnames(statistics) <- c(paste0(rep("simulate",lp),1:4), paste0(rep("prior",4),1:4))

  #Distance (Epsilon)
  statistics$distance   <- rowSums(sqrt((statistics[,c(1:4)]-matrix(rep(observations, each=nsim), ncol=4))^2))

  #Select closest quantile qtol
  parameters             <- statistics[order(statistics$distance),][1:c(nsim*qtol),]
  rownames(parameters)   <- NULL

  #Linear adjustment
  if(adjustment == "LM"){
    parameters                 <- cbind.data.frame(as.data.frame(matrix(ncol=4, nrow=nrow(parameters))), parameters)
    colnames(parameters)[1:4] <- paste0(rep("LMadjusted",4),1:4)
    nr <- 1
    for(c in 1:4){
      si <- c+4
      pr <- c+8
      parameters[,nr]   <- mean(parameters[,pr])+resid(lm(parameters[,pr]~parameters[,si]))
      nr <-nr+1}}

  md         <- parameters$LMadjusted1-parameters$LMadjusted3
  sdz        <- sqrt(parameters$LMadjusted2^2 + parameters$LMadjusted4^2)
  cles       <- pnorm(0, md, sdz)
  parameters <- data.frame(CLES=cles, parameters)

  #Stop timing
  if(timing == T){etime <- Sys.time()
  print(etime-stime)}

  return(parameters)

}
snwikaij/NEMO documentation built on March 18, 2022, 12:03 a.m.