# tuning of methods
#' Evaluate parameters
#'
#' Evaluate a set of parameters for a given method with a given metric.
#'
#'
#' @param data_list a list of data matrices with continuous data of format
#' samples x features (with the same number of samples).
#'
#' @param method a string being the name of the built-in method to be used or a
#' 'method list' with the same format as the built-in ones.
#'
#' @param grid the grid generated in `tuning()`.
#'
#' @param metric a string being the name of the built-in metric to be used or a
#' 'metric list' with the same format as the built-in ones. If the chosen
#' metric is internal, be sure to choose a metric corresponding to the kind of
#' data returned by the chosen method.
#'
#' @param true_partition a factor or integer vector being a classification of the
#' patients. It will be used with external metrics.
#'
#' @param grid_row the row indicating the set of parameters to be tested in the grid
#' @param return_metric logical. To return the result of the method instead of the value of the metric.
#'
#' @return the value of the metric or the result of the method if `return_metric` is TRUE
#'
evaluation <- function(data_list, method, grid, grid_row,
metric = NULL,
true_partition = NULL,
return_metric = TRUE) {
## Preconditions & preparation:
# get all the args from the method:
full_args <- as.list(base::formals(method))
# add the data: (not in the grid because of it's size):
full_args$data_list <- data_list
# add the args of the grid:
full_args[colnames(grid)] <- as.list(grid[grid_row, ])
if (return_metric) {
full_args$minimal_return <- TRUE
}
## Main
res <- do.call(method, full_args)
## Postconditions & return
if (!return_metric) {
res
} else {
metric$metric(
pred_partition = res$partition,
true_partition = true_partition,
data_for_metric = res[[res$element_for_metric]]
)
}
}
#' Tunes the methods to have the best set of parameters
#'
#' tuning considers a list of values of parameters (grid_support) to be tested
#' (all the combinations are tested) and find the set of parameters that have
#' the best value for the metric selected.
#'
#' To use a custom method, just pass your function method. Your function has to
#' respect the following structure:
#' * Input: function(data_list, param_1, param_2, ..... , param_x, ...)
#' * Output: A list with:
#' * partition: an integer vector labelling the sample
#' * element_for_metric: a character that indicates the name of the element
#' used with an internal metric for validation. It can be NULL.
#' * output_1
#' * ...
#' * output_x
#'
#' @param data_list a list of data matrices with continuous data of format
#' samples x features (with the same number of samples).
#'
#' @param method a string being the name of the built-in method to be used or a
#' 'method list' with the same format as the built-in ones. See Details
#'
#' @param grid_support a list with a set of value for each parameter to be
#' tuned. The typo has to be correct. Use formals(method$func) to know all the
#' available parameters.
#'
#' @param metric a string being the name of the built-in metric to be used or a
#' 'metric list' with the same format as the built-in ones. If the chosen
#' metric is internal, be sure to choose a metric corresponding to the kind of
#' data returned by the chosen method.
#'
#' @param true_partition a factor or integer vector being a classification of the
#' patients. It will be used with external metrics.
#'
#' @param parallel logical, TRUE for parallel computing (default and
#' recommanded).
#'
#' @param plot logical, set TRUE to plot the metric evaluation of each grid
#' point to have an overview of the impact of the variation of the parameters
#' on the value of the considered metric.
#'
#' @param verbose logical, set FALSE to avoid printing of informations
#' @param save_results logical, to save the result of tuning in `path_to_file`
#' @param file_name filename finishing by .RData or .rda
#' @param path_to_file if save_results, results of the tuning are save to this
#' path
#' @param ncores integer, the number of core to be used for the parallel
#' computation.
#' If NULL, it takes the maximum number of cores - 1, using the function
#' \code{\link[parallel]{detectCores}}.
#'
#'
#' @return a list:
#'
#' * $metric_val contain the best value of the metric
#' among all the grid
#'
#' * $parameters is the corresponding set of parameters
#'
#' * $method_used contains the name of the tuned method, to know where the
#' result comes from.
#'
#' * $method_res contains the result of the method, i.e. a partition and the
#' data returned by the method, to allow external and/or internal validation
#' by other metrics.
#'
#'
#' @export
tuning <- function(data_list, method, grid_support, metric,
true_partition = NULL,
parallel = TRUE,
ncores = NULL,
save_results = FALSE,
file_name = "./tuning_result.RData",
plot = TRUE,
verbose = TRUE) {
## Preconditions & preparation:
if (is.character(method)) {
method <- get_method(method)
}
if (is.character(metric)) {
metric <- get_metric(metric)
}
grid <- base::expand.grid(grid_support, stringsAsFactors = FALSE)
l <- dim(grid)[1]
## Main
# because sometimes it's too long, we warn the user:
if (verbose) print(paste0("Evaluation of ", l, " possibilities..."))
# evaluation of every possibilities:
if (parallel) {
# cluster setup
if (is.null(ncores)) ncores <- parallel::detectCores() - 1
cl <- parallel::makeCluster(ncores, type = "FORK")
# computation
execution_time <- system.time(evaluations <- parallel::parLapply(
cl = cl,
X = 1:l,
fun = function(grid_row) evaluation(
data_list = data_list,
method = method,
grid = grid,
grid_row = grid_row,
metric = metric,
true_partition = true_partition,
return_metric = TRUE
)
))
parallel::stopCluster(cl)
} else {
execution_time <- system.time(
evaluations <- lapply(1:l, function(grid_row) evaluation(
data_list = data_list,
method = method,
grid = grid,
grid_row = grid_row,
metric = metric,
true_partition = true_partition,
return_metric = TRUE
))
)
}
if (verbose) {
print(paste0(
"... done in ", round(execution_time[3], 3),
" seconds", "!"
))
}
## Postconditions & return
# Find the best set of parameters
if (verbose) print("Find the best parameters...")
if (metric$maximize) {
max_ind <- max.col(t(evaluations))
} else { # Minimizing x is equivalent to maximizing -x
max_ind <- max.col(-t(evaluations))
}
if (verbose) print("... done !")
if (plot) {
plot(
x = 1:l, y = evaluations,
xlab = "grid points", ylab = metric$label,
main = paste0(metric$label, " for all grid points")
)
}
# Reconstruct the result:
res <- evaluation(data_list, method, grid, max_ind, metric, return_metric = F)
# Build return list:
res_list <- list(
method = method,
method_res = res,
best_parameters = as.list(grid[max_ind, ]),
metric = metric,
best_metric_value = evaluations[[max_ind]],
all_metric_values = unlist(evaluations),
parameters_grid = grid,
grid_support = grid_support
)
# Save the computation:
if (save_results) {
tuning_result <- list(
date_of_execution = Sys.Date(),
time_of_execution = execution_time,
tuning_result_list = res_list
)
save(tuning_result, file = file_name)
}
# Return:
return(res_list)
}
#' An overview of the results with all available metrics
#'
#' overview_metrics use the result of the tuning and diagnoses it using a list
#' of selected metric. It's useful to see how other metrics evaluate the
#' result tuned with only one given metric which is subject to biases.
#'
#' @param method_result a list returned by built-in methods i.e. a list with an
#' element `partition` and `data_returned`. To use directly the result of the
#' function tuning, use the element $method_res of the result of the tuning.
#' @param internal_metrics a character vector indicating the name of internal
#' metrics to be used. Be careful, the internal metric used must correspond to
#' the data returned by the selected method (a distance matrix? an affinity
#' matrix?, a feature matrix? ...)
#' @param true_partition factor or integer vector being a classification of the
#' patients to be used with external metrics.
#' @param print logical. Print or not the values of metrics in the console.
#' @param plot logical. Plot or not silhouette graph
#'
#' @param external_metrics a character vector with the name of external metrics
#' to be used with the provided ground-truth. If NULL, all the external
#' metrics are evaluated.
#'
#' @return a data.frame with the value of metrics.
#' @export
#'
overview_metrics <- function(method_result,
internal_metrics = NULL,
true_partition = NULL,
external_metrics = NULL,
print = T,
plot = T) {
## Preconditions & preparation:
all_metrics <- get_metric(list_format = TRUE)
internal_metrics_list <- NULL
external_metrics_list <- NULL
# First select internal_metrics:
if (!is.null(internal_metrics)) {
internal_metrics_list <- all_metrics[names(all_metrics)
%in%
internal_metrics]
}
# Then select external_metrics
if (!is.null(external_metrics)) {
external_metrics_list <- all_metrics[names(all_metrics)
%in%
external_metrics]
} else {
# otherwise, if a ground-truth is provided, it selects all external metrics.
if (!is.null(true_partition)) {
mask <- rep(FALSE, length(all_metrics))
i <- 1
for (metric in all_metrics) {
if (!metric$internal) {
mask[i] <- TRUE
}
i <- i + 1
}
external_metrics_list <- all_metrics[mask]
}
}
# concatenate the list
actual_list <- do.call(c, list(internal_metrics_list, external_metrics_list))
l <- length(actual_list)
## Main
if (l == 0) {
stop("Neither internal metric nor ground-truth provided,
please provide a ground-truth (and/) or a list of internal metrics")
} else {
# compute the values of metrics in actual_list
metrics_values <- data.frame(value = 1:l, metric = names(actual_list))
rownames(metrics_values) = names(actual_list)
for (metric in actual_list) {
metrics_values[metric$name, "value"] <- metric$metric(
pred_partition = method_result$partition,
true_partition = true_partition,
data_for_metric = method_result[[method_result$element_for_metric]]
)
}
}
## Return
metrics_values
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.