#' Permutation Test
#'
#' Performs a permutation test to see if there is an association between a region set and some other feature using
#' an evaluation function.
#'
#' @usage permTest(A, ntimes=100, randomize.function, evaluate.function, alternative="auto", min.parallel=1000, force.parallel=NULL, randomize.function.name=NULL, evaluate.function.name=NULL, verbose=FALSE, ...)
#'
#' @param A a region set in any of the accepted formats by \code{\link{toGRanges}} (\code{\link{GenomicRanges}}, \code{\link{data.frame}}, etc...)
#' @param ntimes number of permutations
#' @param randomize.function function to create random regions. It must return a set of regions.
#' @param evaluate.function function to search for association. It must return a numeric value.
#' @param alternative the alternative hypothesis must be one of \code{"greater"}, \code{"less"} or \code{"auto"}. If \code{"auto"}, the alternative will be decided depending on the data.
#' @param min.parallel if force.parallel is not specified, this will be used to determine the threshold for parallel computation. If \code{length(A) * ntimes > min.parallel}, it will activate the parallel computation. Single threaded otherwise.
#' @param force.parallel logical indicating if the computation must be paralelized.
#' @param randomize.function.name character. If specified, the permTestResults object will have this name instead of the name of the randomization function used. Useful specially when using unnamed anonymous functions.
#' @param evaluate.function.name character. If specified, the permTestResults object will have this name instead of the name of the evaluation function used. Useful specially when using unnamed anonymous functions.
#' @param verbose a boolean. If verbose=TRUE it creates a progress bar to show the computation progress. When combined with parallel computation, it might have an impact in the total computation time.
#' @param ... further arguments to be passed to other methods.
#'
#' @details permTest performs a permutation test of the regions in RS to test the association with the feature evaluated with the evaluation function.
#' The regions are randomized using the randomization.function and the evaluation.function is used to evaluate them. More information can be found in
#' the vignette.
#'
#' @return
#' A list of class \code{permTestResults} containing the following components:
#' \itemize{
#' \item \bold{\code{pval}} the p-value of the test.
#' \item \bold{\code{ntimes}} the number of permutations.
#' \item \bold{\code{alternative}} a character string describing the alternative hypotesis.
#' \item \bold{\code{observed}} the value of the statistic for the original data set.
#' \item \bold{\code{permuted}} the values of the statistic for each permuted data set.
#' \item \bold{\code{zscore}} the value of the standard score. \code{(observed-\link{mean}(permuted))/\link{sd}(permuted)}
#' \item \bold{\code{randomize.function}} the randomization function used.
#' \item \bold{\code{randomize.function.name}} the name of the randomization used.
#' \item \bold{\code{evaluate.function}} the evaluation function used.
#' \item \bold{\code{evaluate.function.name}} the name of the evaluation function used.
#' }
#'
#' @seealso \code{\link{overlapPermTest}}
#'
#' @examples
#' genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1")
#' A <- createRandomRegions(nregions=20, length.mean=10000000, length.sd=20000, genome=genome, non.overlapping=FALSE)
#' B <- c(A, createRandomRegions(nregions=10, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE))
#'
#'
#' pt2 <- permTest(A=A, B=B, ntimes=10, alternative="auto", verbose=TRUE, genome=genome, evaluate.function=meanDistance, randomize.function=randomizeRegions, non.overlapping=FALSE)
#' summary(pt2)
#' plot(pt2)
#' plot(pt2, plotType="Tailed")
#'
#' @references
#' Davison, A. C. and Hinkley, D. V. (1997) Bootstrap methods and their application, Cambridge University Press, United Kingdom, 156-160
#'
#' @importFrom utils txtProgressBar setTxtProgressBar
#' @importFrom methods hasArg is
#' @importFrom S4Vectors subjectHits queryHits
#'
#'
#' @export permTest
#'
#' @importFrom stats sd
#min.parallel is specifies the minimum amount of work (number of regions in A per ntimes) to activate parallelization.
permTest <- function(A, ntimes=100, randomize.function, evaluate.function, alternative="auto", min.parallel=1000, force.parallel=NULL, randomize.function.name=NULL, evaluate.function.name=NULL, verbose=FALSE, ...) {
#check arguments
alternative<-match.arg(alternative,c("less","greater", "auto"))
if(!hasArg(A)) stop("A is missing")
if(!is.numeric(ntimes)) stop("ntime must be numeric")
if(!hasArg(randomize.function)) stop("randomize.function is missing")
if(!is.function(randomize.function)) stop("randomize.function must be a function")
if(!hasArg(evaluate.function)) stop("evaluate.function is missing")
if(!(is.function(evaluate.function) | is.list(evaluate.function))) stop("evaluate.function must be a function")
if(!is.numeric(min.parallel)) stop("min.parallel must be numeric")
if(ntimes<100) print(paste0("Note: The minimum p-value with only ",ntimes," permutations is ",1/(ntimes+1),". You should consider increasing the number of permutations."))
A <- toGRanges(A) #does nothing if already a GRanges object
if(!is.null(force.parallel)) {
doParallel <- force.parallel
} else {
doParallel <- (length(A)*ntimes > min.parallel)
if(verbose) message("Auto-setting parallel computing to: ", doParallel)
}
#Evaluation Function: get the function name and convert to list if its not yet
if(!is.list(evaluate.function)) { #if it's a single function
if(is.null(evaluate.function.name)) {
evaluate.function.name <- as.character(match.call()["evaluate.function"])
}
ef <- list()
ef[[evaluate.function.name]] <- evaluate.function
evaluate.function <- ef
} else { #if it's a list of functions
if(!is.null(evaluate.function.name)) { #if names were explicitely provided
names(evaluate.function) <- evaluate.function.name
} else { #try to leave the current names or create new ones if no names present
if(is.null(names(evaluate.function))) {
names(evaluate.function) <- paste0("Function", seq_len(length(evaluate.function)))
}
}
}
#Randomization Function: Get a name
if(is.null(randomize.function.name)) {
randomize.function.name <- match.call()["randomize.function"]
}
#Start the permutation test
#compute the evaluation function(s) using the original region set A
original.evaluate <- sapply(seq_len(length(evaluate.function)), function(i,...) {return(evaluate.function[[i]](A,...))}, ...)
if(!is.numeric(original.evaluate)) {
stop(paste0("The evaluation function must return a numeric value but it returned an object of class ", class(original.evaluate)))
}
if(verbose) {
#WARNING: to give some visual information about the computation done, we use a Progress Bar. However, since the GUI will not be updated
#whilst in multi core processing (mclapply), we partition the work in chunks and repeatedly call the parallel computation, once per chunk.
#Between chunks, the progess bar will be updated. The problem is that the total computation time might increase due to waiting and joining
#multiple times.
#Create the progress bar
pb <- txtProgressBar(min = 0, max = ntimes, style = 3)
setTxtProgressBar(pb, 0)
}
#define the function to create and evaluate the random sets
randomize_and_evaluate <- function(foo, ...) {
#randomize
randomA <- randomize.function(A,...)
#evaluate the random region set
if(verbose) {
setTxtProgressBar(pb, foo)
}
#compute the evaluation function(s) using the RANDOMIZED region set randomA
rand.evaluate <- sapply(seq_len(length(evaluate.function)), function(i, ...) {return(evaluate.function[[i]](randomA,...))}, ...)
return(rand.evaluate)
}
#create the random sets and evaluate them
if(doParallel) {
if(verbose) { #if verbose, we will do the computations in chunks and update the progress bar in between
random.evaluate <- numeric()
chunk.size <- max(round(ntimes/100+1), 10)
e <- 0
done <- FALSE
while(!done) {
s <- e + 1
e <- s + chunk.size
if(e >= ntimes) {
e <- ntimes
done <- TRUE
}
random.evaluate <- rbind(random.evaluate, do.call(rbind, mclapply(c(s:e), randomize_and_evaluate, ...)))
setTxtProgressBar(pb, e)
}
} else { #if not verbose, just do it
random.evaluate <- do.call(rbind, mclapply(seq_len(ntimes), randomize_and_evaluate, ...))
}
} else {
random.evaluate <- do.call(rbind, lapply(seq_len(ntimes), randomize_and_evaluate, ...))
}
#The simulation process has finished. Now build a permTestResults object for each evaluate.function
results <- list()
for(i in seq_len(length(evaluate.function))) {
#Get the data for the i-th function
func.name <- names(evaluate.function)[i]
orig.ev <- original.evaluate[i]
rand.ev <- random.evaluate[,i]
#warn if any NA or NaN
num.nas <- length(which(is.na(rand.ev)))
num.valid.values <- ntimes-num.nas
if(num.valid.values < ntimes) {
if(num.valid.values>0) {
warning(paste0(num.nas, " iterations returned NA or NaN. Only ", ," iterations have been used to compute the p-value."))
} else {
warning(paste0("All ", num.nas, " iterations returned NA or NaN. No valid values returned. It is not possible to compute the p-value nor z-score."))
}
}
if(num.valid.values > 0) {
#decide the alternative if alternative == "auto"
if(alternative == "auto") {
alt <- ifelse(orig.ev < mean(rand.ev, na.rm=TRUE), "less", "greater")
} else {
alt <- alternative
}
#Compute the p-value
if (alt == "less") {
pval <- (sum(orig.ev >= rand.ev, na.rm=TRUE) + 1) / (num.valid.values + 1)
} else { #alt == "greater"
pval <- (sum(orig.ev <= rand.ev, na.rm=TRUE) + 1) / (num.valid.values + 1)
}
#if the original alternative was not the best one, suggest the user to change it
if(alternative=="greater" & orig.ev<mean(rand.ev,na.rm=TRUE)) message("Alternative is greater and the observed statistic is less than the permuted statistic mean. Maybe you want to use recomputePermTest to change the alternative hypothesis.")
if(alternative=="less" & orig.ev>mean(rand.ev,na.rm=TRUE)) message("Alternative is less and the observed statistic is greater than the permuted statistic mean. Maybe you want to use recomputePermTest to change the alternative hypothesis.")
#Compute the z-score
if(orig.ev == 0 & all(rand.ev == 0)){ #If everything is 0, warning and "empty" results
warning(paste0("All permuted values and the original evaluation value are equal to 0. Z-score cannot be computed."))
pval <- 1
zscore <- NA
} else{
zscore <- round((orig.ev - mean(rand.ev, na.rm=TRUE)) / stats::sd(rand.ev, na.rm=TRUE), 4)
}
} else {
pval <- NA
zscore <- NA
alt <- alternative
}
if(!is.na(pval)) {
if(!is.finite(zscore)){ #if all evaluations are equal, the sd is 0 and the z-score is infinite
warning(paste0("All permuted values are equal to ", rand.ev[1], ". Z-score is infinite."))
}
}
#Create the permTestResults object
res<-list(pval=pval, ntimes=ntimes, alternative=alt, observed=orig.ev, permuted=rand.ev, zscore=zscore,
evaluate.function=evaluate.function[[i]], evaluate.function.name=func.name,
randomize.function=randomize.function, randomize.function.name=randomize.function.name)
class(res) <- "permTestResults"
results[[func.name]] <- res
}
class(results) <- "permTestResultsList"
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.