#' Evaluate (run) a benchmark
#'
#' This function evaluates a benchmark pipeline, specified by an object of type \code{Benchmark}.
#' This means that all the projection, clustering or projection->clustering subpipelines that were set up when creating the benchmark object are executed, and their performance is scored.
#' Both the benchmark object and its auxiliary HDF5 file (created when the \code{Benchmark} constructor was called) are needed for this.
#'
#' # Optional scoring of projection steps
#'
#' Optionally, results of projection steps (if included) can be scored using evaluation metrics designed to measure the quality of dimension reduction (preservation of information versus original high-dimensional data).
#' This makes sense for methods that reduce dimensionality of the original data for the purposes of visualisation or to make the data more amenable to clustering.
#' To turn on scoring of projection steps, set parameter \code{score_projection} to \code{TRUE}.
#' Based on a numeric bound (parameter \code{projection_collapse_n}), metrics based exclusively on *k*-nearest-neighbour graphs of original data and each projection will be computed if the row count of input data exceeds that value.
#' If the row count is lower than or equal to the limit, full distance matrices (quadratic complexity) will be computed.
#' In the first case, the local continuity meta-criterion (LCMC) as well as B_{NX} ('local intrusiveness versus extrusiveness') can be computed.
#' In the second case, trustworthiness and continuity are also computed.
#' By default, \code{projection_collapse_n} is set to \code{500}, preventing the computation of dull distance matrices except for very small datasets.
#' Additionally, the parameter \code{projection_neighbourhood} specifies the number of nearest neighbours used for partitioning the full co-ranking matrix (if the size of data is less than or equal to \code{projection_collapse_n}).
#'
#' # Parallelisation
#'
#' For stability analysis of clustering tools, repeated runs of the tool can be run in parallel (unless this is forbidden in the tool wrapper).
#' To do this, specify the parameter \code{n_cores}.
#' To use all available CPU cores, you can use \code{parallel::detectCores()} as the value of \code{n_cores}.
#'
#' @param benchmark object of class \code{Benchmark}, as generated by the constructor \code{Benchmark}
#' @param score_projections logical: whether results of projection steps should be scored. Default value is \code{FALSE}
#' @param projection_collapse_n integer: upper bound of dataset size for which full distance matrices should be computed in evaluation (if \code{score_projections} is set to \code{TRUE}). Default value is \code{500}
#' @param projection_neighbourhood integer: number of nearest neighbours to use in K-ary neighbourhood-based evaluation of projection quality (if \code{score_projections} is set to \code{TRUE} and size of input dataset is less than \code{projection_collapse_n}). Default value is \code{100}
#' @param n_cores optional integer: number of CPU threads to use for parallelisation of repeated runs of clustering for stability analysis. Default value is \code{NULL} (no parallelisation)
#' @param which_python optional string: path to Python if Python needs to be used via \code{reticulate}. Default value is \code{NULL} (\code{reticulate} uses its default Python configuration)
#' @param seed.projection optional numeric value: value random seed to be used prior to each deployment of a projection method. (Use \code{NULL} to avoid setting a seed.) Default value is \code{1}
#' @param seed.clustering optional numeric value: value random seed to be used prior to each deployment of a clustering method. (Use \code{NULL} to avoid setting a seed.) Default value is \code{1}
#' @param ask_overwrite logical: if \code{benchmark} was evaluated before, should the user be asked prior to overwriting the previous evaluation results? Default value is \code{TRUE}
#' @param verbose logical: should progress messages be printed during evaluation? Default value is \code{TRUE}
#'
#' @seealso
#'
#' * **\code{AddLayout}**: allows you to add a separate 2-dimensional layout of the input dataset or to use an existing projection (produced in the evaluation) as a visualisation layout.
#'
#' @export
Evaluate <- function(benchmark, score_projections, n_cores, which_python, seed.projection, seed.clustering, ask_overwrite, verbose) UseMethod('Evaluate', benchmark)
Evaluate.Benchmark <- function(
benchmark,
score_projections = FALSE,
projection_collapse_n = 500,
projection_neighbourhood = 100,
n_cores = NULL,
which_python = NULL,
seed.projection = 1,
seed.clustering = 1,
ask_overwrite = TRUE,
verbose = TRUE
) {
if (!file.exists(benchmark$h5_path))
stop(paste0('Auxiliary HDF5 file ', benchmark$h5_path, ' not found'))
if (benchmark$evaluated_previously && ask_overwrite) {
cat(crayon::bgRed(' ')); .msg(' (?) '); cat(crayon::bgRed(' \n'))
response <- readline(prompt = paste0('Benchmark was evaluated previously. Overwrite evaluation results? (Y/n) '))
if (response != 'Y') {
.msg_alt_bad('Aborting Benchmark evaluation, returning NULL\n')
return(NULL)
}
}
if (benchmark$uses_python) {
if (!is.null(which_python)) {
if (verbose) { .msg_python('Configuring reticulate') }
reticulate::use_python(which_python, required = TRUE)
}
}
if (score_projections) {
if (benchmark$row_count > projection_collapse_n) {
benchmark$compute_knn <- TRUE
} else {
benchmark$compute_dist <- TRUE
}
}
if (verbose) {
.msg('Starting evaluation of '); .msg_name(benchmark$name);
.msg(', time stamp: '); .msg_alt(as.character(Sys.time()), '\n')
}
if (benchmark$compute_knn && !benchmark$knn_available) {
knn <- Evaluate_ComputekNNMatrix(benchmark, verbose)
SavekNNMatrix(benchmark, knn, verbose)
benchmark$knn_available <- TRUE
}
if (benchmark$compute_dist && !benchmark$dist_available) {
if (verbose) .msg('Computing distance matrix... ')
systime <- system.time({
d <- coRanking:::euclidean_C(GetExpressionMatrix(benchmark, concatenate = TRUE))
})
if (verbose) {
.msg_alt_good('done in ', round(systime['elapsed'], 2), ' seconds\n')
}
SaveDistanceMatrix(benchmark, d, verbose)
if (verbose)
.msg_alt_good('done\n')
}
benchmark$n_cores <- n_cores
benchmark$seed.projection <- seed.projection
benchmark$seed.clustering <- seed.clustering
benchmark$score_projections <- score_projections
benchmark$projection_collapse_n <- projection_collapse_n
benchmark$projection_neighbourhood <- projection_neighbourhood
HDF5_InitialiseEvaluationResults(benchmark)
EvalProjection(benchmark, verbose)
EvalClustering(benchmark, verbose)
benchmark$evaluated_previously <- TRUE
if (verbose) { .msg_alt_good('Evaluation complete'); .msg(', time stamp: '); .msg_alt(as.character(Sys.time())) }
gc(verbose = FALSE)
invisible(benchmark)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.