Nothing
# __________________ #< 1ee2f435e0cd344bcd4c561d5eb5542d ># __________________
# Evaluate ####
#' @title Evaluate your model's performance
#' @description
#' \Sexpr[results=rd, stage=render]{lifecycle::badge("maturing")}
#'
#' Evaluate your model's predictions
#' on a set of evaluation metrics.
#'
#' Create ID-aggregated evaluations by multiple methods.
#'
#' Currently supports regression and classification
#' (binary and multiclass). See \code{`type`}.
#' @param data \code{data.frame} with predictions, targets and (optionally) an ID column.
#' Can be grouped with \code{\link[dplyr]{group_by}}.
#'
#' \subsection{Multinomial}{
#' When \code{`type`} is \code{"multinomial"}, the predictions can be passed in one of two formats.
#'
#' \subsection{Probabilities (Preferable)}{
#'
#' One column per class with the probability of that class.
#' The columns should have the name of their class,
#' as they are named in the target column. E.g.:
#'
#' \tabular{rrrrr}{
#' \strong{class_1} \tab \strong{class_2} \tab
#' \strong{class_3} \tab \strong{target}\cr
#' 0.269 \tab 0.528 \tab 0.203 \tab class_2\cr
#' 0.368 \tab 0.322 \tab 0.310 \tab class_3\cr
#' 0.375 \tab 0.371 \tab 0.254 \tab class_2\cr
#' ... \tab ... \tab ... \tab ...}
#' }
#' \subsection{Classes}{
#'
#' A single column of type \code{character} with the predicted classes. E.g.:
#'
#' \tabular{rrrrr}{
#' \strong{prediction} \tab \strong{target}\cr
#' class_2 \tab class_2\cr
#' class_1 \tab class_3\cr
#' class_1 \tab class_2\cr
#' ... \tab ...}
#'
#' }
#' }
#' \subsection{Binomial}{
#' When \code{`type`} is \code{"binomial"}, the predictions can be passed in one of two formats.
#'
#' \subsection{Probabilities (Preferable)}{
#' One column with the \strong{probability of class being
#' the second class alphabetically}
#' (1 if classes are 0 and 1). E.g.:
#'
#' \tabular{rrrrr}{
#' \strong{prediction} \tab \strong{target}\cr
#' 0.769 \tab 1\cr
#' 0.368 \tab 1\cr
#' 0.375 \tab 0\cr
#' ... \tab ...}
#' }
#'
#' Note: At the alphabetical ordering of the class labels, they are of type \code{character},
#' why e.g. \code{100} would come before \code{7}.
#'
#' \subsection{Classes}{
#'
#' A single column of type \code{character} with the predicted classes. E.g.:
#'
#' \tabular{rrrrr}{
#' \strong{prediction} \tab \strong{target}\cr
#' class_0 \tab class_1\cr
#' class_1 \tab class_1\cr
#' class_1 \tab class_0\cr
#' ... \tab ...}
#' }
#'
#' Note: The prediction column will be converted to the probability \code{0.0}
#' for the first class alphabetically and \code{1.0} for
#' the second class alphabetically.
#' }
#' \subsection{Gaussian}{
#' When \code{`type`} is \code{"gaussian"}, the predictions should be passed as
#' one column with the predicted values. E.g.:
#'
#' \tabular{rrrrr}{
#' \strong{prediction} \tab \strong{target}\cr
#' 28.9 \tab 30.2\cr
#' 33.2 \tab 27.1\cr
#' 23.4 \tab 21.3\cr
#' ... \tab ...}
#' }
#' @param target_col Name of the column with the true classes/values in \code{`data`}.
#'
#' When \code{`type`} is \code{"multinomial"}, this column should contain the class names,
#' not their indices.
#' @param prediction_cols Name(s) of column(s) with the predictions.
#'
#' Columns can be either numeric or character depending on which format is chosen.
#' See \code{`data`} for the possible formats.
#' @param id_col Name of ID column to aggregate predictions by.
#'
#' N.B. Current methods assume that the target class/value is constant within the IDs.
#'
#' N.B. When aggregating by ID, some metrics may be disabled.
#' @param id_method Method to use when aggregating predictions by ID.
#' Either \code{"mean"} or \code{"majority"}.
#'
#' When \code{`type`} is \code{gaussian}, only the \code{"mean"} method is available.
#'
#' \subsection{mean}{
#' The average prediction (value or probability) is calculated per ID and evaluated.
#' This method assumes that the target class/value is constant within the IDs.
#' }
#' \subsection{majority}{
#' The most predicted class per ID is found and evaluated. In case of a tie,
#' the winning classes share the probability (e.g. \code{P = 0.5} each when two majority classes).
#' This method assumes that the target class/value is constant within the IDs.
#' }
#' @param apply_softmax Whether to apply the softmax function to the
#' prediction columns when \code{`type`} is \code{"multinomial"}.
#'
#' N.B. \strong{Multinomial models only}.
#' @param cutoff Threshold for predicted classes. (Numeric)
#'
#' N.B. \strong{Binomial models only}.
#' @param positive Level from dependent variable to predict.
#' Either as character (\emph{preferable}) or level index (\code{1} or \code{2} - alphabetically).
#'
#' E.g. if we have the levels \code{"cat"} and \code{"dog"} and we want \code{"dog"} to be the positive class,
#' we can either provide \code{"dog"} or \code{2}, as alphabetically, \code{"dog"} comes after \code{"cat"}.
#'
#' \strong{Note:} For \emph{reproducibility}, it's preferable to \strong{specify the name directly}, as
#' different \code{\link[base:locales]{locales}} may sort the levels differently.
#'
#' Used when calculating confusion matrix metrics and creating \code{ROC} curves.
#'
#' The \code{Process} column in the output can be used to verify this setting.
#'
#' N.B. Only affects the evaluation metrics. \strong{Does NOT affect what the probabilities are of (always the second class alphabetically).}
#'
#' N.B. \strong{Binomial models only}.
#' @param parallel Whether to run evaluations in parallel,
#' when \code{`data`} is grouped with \code{\link[dplyr:group_by]{group_by}}.
#' @param metrics \code{list} for enabling/disabling metrics.
#'
#' E.g. \code{list("RMSE" = FALSE)} would remove \code{RMSE} from the regression results,
#' and \code{list("Accuracy" = TRUE)} would add the regular \code{Accuracy} metric
#' to the classification results.
#' Default values (\code{TRUE}/\code{FALSE}) will be used for the remaining available metrics.
#'
#' You can enable/disable all metrics at once by including
#' \code{"all" = TRUE/FALSE} in the \code{list}. This is done prior to enabling/disabling
#' individual metrics, why f.i. \code{list("all" = FALSE, "RMSE" = TRUE)}
#' would return only the \code{RMSE} metric.
#'
#' The \code{list} can be created with
#' \code{\link[cvms:gaussian_metrics]{gaussian_metrics()}},
#' \code{\link[cvms:binomial_metrics]{binomial_metrics()}}, or
#' \code{\link[cvms:multinomial_metrics]{multinomial_metrics()}}.
#'
#' Also accepts the string \code{"all"}.
#' @param type Type of evaluation to perform:
#'
#' \code{"gaussian"} for regression (like linear regression).
#'
#' \code{"binomial"} for binary classification.
#'
#' \code{"multinomial"} for multiclass classification.
#' @param include_predictions Whether to include the predictions
#' in the output as a nested \code{tibble}. (Logical)
#' @param models Deprecated.
#' @details
#'
#' Packages used:
#'
#' \strong{Binomial} and \strong{Multinomial}:
#'
#' \code{ROC} and \code{AUC}:
#'
#' Binomial: \code{\link[pROC:roc]{pROC::roc}}
#'
#' Multinomial: \code{\link[pROC:multiclass.roc]{pROC::multiclass.roc}}
#' @return
#' ----------------------------------------------------------------
#'
#' \subsection{Gaussian Results}{
#'
#' ----------------------------------------------------------------
#'
#' \code{tibble} containing the following metrics by default:
#'
#' Average \strong{\code{RMSE}}, \strong{\code{MAE}}, \strong{\code{NRMSE(IQR)}},
#' \strong{\code{RRSE}}, \strong{\code{RAE}}, \strong{\code{RMSLE}}.
#'
#' See the additional metrics (disabled by default) at
#' \code{\link[cvms:gaussian_metrics]{?gaussian_metrics}}.
#'
#' Also includes:
#'
#' A nested \code{tibble} with the \strong{Predictions} and targets.
#'
#' A nested \strong{Process} information object with information
#' about the evaluation.
#' }
#'
#' ----------------------------------------------------------------
#'
#' \subsection{Binomial Results}{
#'
#' ----------------------------------------------------------------
#'
#' \code{tibble} with the following evaluation metrics, based on a
#' \code{confusion matrix} and a \code{ROC} curve fitted to the predictions:
#'
#' \code{Confusion Matrix}:
#'
#' \strong{\code{Balanced Accuracy}},
#' \strong{\code{Accuracy}},
#' \strong{\code{F1}},
#' \strong{\code{Sensitivity}},
#' \strong{\code{Specificity}},
#' \strong{\code{Positive Predictive Value}},
#' \strong{\code{Negative Predictive Value}},
#' \strong{\code{Kappa}},
#' \strong{\code{Detection Rate}},
#' \strong{\code{Detection Prevalence}},
#' \strong{\code{Prevalence}}, and
#' \strong{\code{MCC}} (Matthews correlation coefficient).
#'
#' \code{ROC}:
#'
#' \strong{\code{AUC}}, \strong{\code{Lower CI}}, and \strong{\code{Upper CI}}
#'
#' Note, that the \code{ROC} curve is only computed if \code{AUC} is enabled. See \code{metrics}.
#'
#' Also includes:
#'
#' A nested \code{tibble} with the \strong{predictions} and targets.
#'
#' A \code{list} of \strong{ROC} curve objects (if computed).
#'
#' A nested \code{tibble} with the \strong{confusion matrix}.
#' The \code{Pos_} columns tells you whether a row is a
#' True Positive (\code{TP}), True Negative (\code{TN}),
#' False Positive (\code{FP}), or False Negative (\code{FN}),
#' depending on which level is the "\code{positive}" class.
#' I.e. the level you wish to predict.
#'
#' A nested \strong{Process} information object with information
#' about the evaluation.
#' }
#'
#' ----------------------------------------------------------------
#'
#' \subsection{Multinomial Results}{
#'
#' ----------------------------------------------------------------
#'
#' For each class, a \emph{one-vs-all} binomial evaluation is performed. This creates
#' a \strong{Class Level Results} \code{tibble} containing the same metrics as the binomial results
#' described above (excluding \code{Accuracy}, \code{MCC}, \code{AUC}, \code{Lower CI} and \code{Upper CI}),
#' along with a count of the class in the target column (\strong{\code{Support}}).
#' These metrics are used to calculate the \strong{macro-averaged} metrics.
#' The nested class level results \code{tibble} is also included in the output \code{tibble},
#' and could be reported along with the macro and overall metrics.
#'
#' The output \code{tibble} contains the macro and overall metrics.
#' The metrics that share their name with the metrics in the nested
#' class level results \code{tibble} are averages of those metrics
#' (note: does not remove \code{NA}s before averaging).
#' In addition to these, it also includes the \strong{\code{Overall Accuracy}} and
#' the multiclass \strong{\code{MCC}}.
#'
#' \strong{Note:} \strong{\code{Balanced Accuracy}} is the macro-averaged metric,
#' \emph{not} the macro sensitivity as sometimes used!
#'
#' Other available metrics (disabled by default, see \code{metrics}):
#' \strong{\code{Accuracy}},
#' \emph{multiclass} \strong{\code{AUC}},
#' \strong{\code{Weighted Balanced Accuracy}},
#' \strong{\code{Weighted Accuracy}},
#' \strong{\code{Weighted F1}},
#' \strong{\code{Weighted Sensitivity}},
#' \strong{\code{Weighted Sensitivity}},
#' \strong{\code{Weighted Specificity}},
#' \strong{\code{Weighted Pos Pred Value}},
#' \strong{\code{Weighted Neg Pred Value}},
#' \strong{\code{Weighted Kappa}},
#' \strong{\code{Weighted Detection Rate}},
#' \strong{\code{Weighted Detection Prevalence}}, and
#' \strong{\code{Weighted Prevalence}}.
#'
#' Note that the "Weighted" average metrics are weighted by the \code{Support}.
#'
#' When having a large set of classes, consider keeping \code{AUC} disabled.
#'
#' Also includes:
#'
#' A nested \code{tibble} with the \strong{Predictions} and targets.
#'
#' A \code{list} of \strong{ROC} curve objects when \code{AUC} is enabled.
#'
#' A nested \code{tibble} with the multiclass \strong{Confusion Matrix}.
#'
#' A nested \strong{Process} information object with information
#' about the evaluation.
#'
#' \subsection{Class Level Results}{
#'
#' Besides the binomial evaluation metrics and the \code{Support},
#' the nested class level results \code{tibble} also contains a
#' nested \code{tibble} with the \strong{Confusion Matrix} from the one-vs-all evaluation.
#' The \code{Pos_} columns tells you whether a row is a
#' True Positive (\code{TP}), True Negative (\code{TN}),
#' False Positive (\code{FP}), or False Negative (\code{FN}),
#' depending on which level is the "positive" class. In our case, \code{1} is the current class
#' and \code{0} represents all the other classes together.
#' }
#' }
#' @author Ludvig Renbo Olsen, \email{r-pkgs@@ludvigolsen.dk}
#' @export
#' @family evaluation functions
#' @examples
#' \donttest{
#' # Attach packages
#' library(cvms)
#' library(dplyr)
#'
#' # Load data
#' data <- participant.scores
#'
#' # Fit models
#' gaussian_model <- lm(age ~ diagnosis, data = data)
#' binomial_model <- glm(diagnosis ~ score, data = data)
#'
#' # Add predictions
#' data[["gaussian_predictions"]] <- predict(gaussian_model, data,
#' type = "response",
#' allow.new.levels = TRUE
#' )
#' data[["binomial_predictions"]] <- predict(binomial_model, data,
#' allow.new.levels = TRUE
#' )
#'
#' # Gaussian evaluation
#' evaluate(
#' data = data, target_col = "age",
#' prediction_cols = "gaussian_predictions",
#' type = "gaussian"
#' )
#'
#' # Binomial evaluation
#' evaluate(
#' data = data, target_col = "diagnosis",
#' prediction_cols = "binomial_predictions",
#' type = "binomial"
#' )
#'
#' #
#' # Multinomial
#' #
#'
#' # Create a tibble with predicted probabilities and targets
#' data_mc <- multiclass_probability_tibble(
#' num_classes = 3, num_observations = 45,
#' apply_softmax = TRUE, FUN = runif,
#' class_name = "class_",
#' add_targets = TRUE
#' )
#'
#' class_names <- paste0("class_", 1:3)
#'
#' # Multinomial evaluation
#' evaluate(
#' data = data_mc, target_col = "Target",
#' prediction_cols = class_names,
#' type = "multinomial"
#' )
#'
#' #
#' # ID evaluation
#' #
#'
#' # Gaussian ID evaluation
#' # Note that 'age' is the same for all observations
#' # of a participant
#' evaluate(
#' data = data, target_col = "age",
#' prediction_cols = "gaussian_predictions",
#' id_col = "participant",
#' type = "gaussian"
#' )
#'
#' # Binomial ID evaluation
#' evaluate(
#' data = data, target_col = "diagnosis",
#' prediction_cols = "binomial_predictions",
#' id_col = "participant",
#' id_method = "mean", # alternatively: "majority"
#' type = "binomial"
#' )
#'
#' # Multinomial ID evaluation
#'
#' # Add IDs and new targets (must be constant within IDs)
#' data_mc[["Target"]] <- NULL
#' data_mc[["ID"]] <- rep(1:9, each = 5)
#' id_classes <- tibble::tibble(
#' "ID" = 1:9,
#' "Target" = sample(x = class_names, size = 9, replace = TRUE)
#' )
#' data_mc <- data_mc %>%
#' dplyr::left_join(id_classes, by = "ID")
#'
#' # Perform ID evaluation
#' evaluate(
#' data = data_mc, target_col = "Target",
#' prediction_cols = class_names,
#' id_col = "ID",
#' id_method = "mean", # alternatively: "majority"
#' type = "multinomial"
#' )
#'
#' #
#' # Training and evaluating a multinomial model with nnet
#' #
#'
#' # Only run if `nnet` is installed
#' if (requireNamespace("nnet", quietly = TRUE)){
#'
#' # Create a data frame with some predictors and a target column
#' class_names <- paste0("class_", 1:4)
#' data_for_nnet <- multiclass_probability_tibble(
#' num_classes = 3, # Here, number of predictors
#' num_observations = 30,
#' apply_softmax = FALSE,
#' FUN = rnorm,
#' class_name = "predictor_"
#' ) %>%
#' dplyr::mutate(Target = sample(
#' class_names,
#' size = 30,
#' replace = TRUE
#' ))
#'
#' # Train multinomial model using the nnet package
#' mn_model <- nnet::multinom(
#' "Target ~ predictor_1 + predictor_2 + predictor_3",
#' data = data_for_nnet
#' )
#'
#' # Predict the targets in the dataset
#' # (we would usually use a test set instead)
#' predictions <- predict(
#' mn_model,
#' data_for_nnet,
#' type = "probs"
#' ) %>%
#' dplyr::as_tibble()
#'
#' # Add the targets
#' predictions[["Target"]] <- data_for_nnet[["Target"]]
#'
#' # Evaluate predictions
#' evaluate(
#' data = predictions,
#' target_col = "Target",
#' prediction_cols = class_names,
#' type = "multinomial"
#' )
#' }
#' }
evaluate <- function(data,
target_col,
prediction_cols,
type,
id_col = NULL,
id_method = "mean",
apply_softmax = FALSE,
cutoff = 0.5,
positive = 2,
metrics = list(),
include_predictions = TRUE,
parallel = FALSE,
models = deprecated()) {
if (!rlang::is_missing(models)) {
deprecate_stop("1.0.0", "cvms::evaluate(models = )",
details = "Now only evaluates predictions."
)
}
# Check arguments ####
assert_collection <- checkmate::makeAssertCollection()
checkmate::assert_data_frame(
x = data, min.rows = 1,
min.cols = 2,
col.names = "named",
add = assert_collection
)
checkmate::assert_string(
x = target_col, min.chars = 1,
add = assert_collection
)
checkmate::assert_character(
x = prediction_cols,
min.len = 1,
min.chars = 1,
add = assert_collection
)
checkmate::assert_string(
x = id_col, min.chars = 1, null.ok = TRUE,
add = assert_collection
)
checkmate::reportAssertions(assert_collection)
group_columns <- colnames(dplyr::group_keys(data))
if (target_col %in% prediction_cols) {
assert_collection$push("'target_col' was in 'prediction_cols'.")
}
if (target_col %in% group_columns) {
assert_collection$push("'data' cannot be grouped by the 'target_col' column.")
}
if (length(intersect(prediction_cols, group_columns)) > 0) {
assert_collection$push("'data' cannot be grouped by a prediction column.")
}
if (!is.null(id_col)){
if (id_col %in% prediction_cols) {
assert_collection$push("'id_col' was in 'prediction_cols'.")
}
if (id_col == target_col) {
assert_collection$push("'id_col' and 'target_col' cannot be identical.")
}
if (id_col %in% group_columns) {
assert_collection$push("'data' cannot be grouped by the 'id_col' column.")
}
}
checkmate::reportAssertions(assert_collection)
# End of argument checks ####
# Remove unnecessary columns
data <- base_select(
data,
c(
group_columns,
target_col,
prediction_cols,
id_col
)
) %>%
as.data.frame(stringsAsFactors = FALSE) %>%
dplyr::as_tibble() %>% # removes grouping
dplyr::group_by_at(group_columns)
eval <- run_evaluate(
data = data,
target_col = target_col,
prediction_cols = prediction_cols,
type = type,
id_col = id_col,
id_method = id_method,
models = NULL,
apply_softmax = apply_softmax,
cutoff = cutoff,
positive = positive,
metrics = metrics,
include_predictions = include_predictions,
parallel = parallel,
caller = "evaluate()"
)
# Set extra class
class(eval) <- c("eval_results", class(eval))
eval
}
run_evaluate <- function(data,
target_col,
prediction_cols,
type = "gaussian",
id_col = NULL,
id_method = "mean",
models = NULL,
apply_softmax = FALSE,
cutoff = 0.5,
positive = 2,
metrics = list(),
fold_info_cols = NULL,
fold_and_fold_col = NULL,
include_predictions = TRUE,
include_fold_columns = TRUE,
parallel = FALSE,
na.rm = NULL,
caller = "evaluate()") {
if (checkmate::test_string(x = metrics, pattern = "^all$")) {
metrics <- list("all" = TRUE)
}
# Check arguments ####
assert_collection <- checkmate::makeAssertCollection()
# Data frame
checkmate::assert_data_frame(
x = data, min.rows = 1,
min.cols = 2,
col.names = "named",
add = assert_collection
)
# String
checkmate::assert_string(
x = target_col, min.chars = 1,
add = assert_collection
)
checkmate::assert_character(
x = prediction_cols,
min.len = 1,
min.chars = 1,
add = assert_collection
)
checkmate::assert_string(
x = id_col, min.chars = 1, null.ok = TRUE,
add = assert_collection
)
checkmate::assert_string(x = caller, add = assert_collection)
# Names
checkmate::reportAssertions(assert_collection) # before names check
checkmate::assert_names(
x = colnames(data),
must.include = c(target_col, prediction_cols, id_col),
what = "colnames",
add = assert_collection
)
# Choice
checkmate::assert_choice(
x = type,
choices = c("gaussian", "binomial", "multinomial"),
add = assert_collection
)
checkmate::assert_choice(
x = id_method,
choices = c("mean", "majority"),
add = assert_collection
)
# Flag
checkmate::assert_flag(x = apply_softmax, add = assert_collection)
checkmate::assert_flag(x = include_predictions, add = assert_collection)
checkmate::assert_flag(x = include_fold_columns, add = assert_collection)
checkmate::assert_flag(x = parallel, add = assert_collection)
checkmate::assert_flag(x = na.rm, null.ok = TRUE, add = assert_collection)
# List
checkmate::assert_list(
x = models, null.ok = TRUE,
min.len = 1, names = "unnamed",
add = assert_collection
)
checkmate::assert_list(
x = metrics,
types = "logical",
any.missing = FALSE,
names = "named",
add = assert_collection
)
checkmate::assert_list(
x = fold_info_cols,
types = "character",
any.missing = FALSE,
names = "named",
null.ok = TRUE,
add = assert_collection
)
checkmate::assert_data_frame(
x = fold_and_fold_col,
col.names = "named",
any.missing = FALSE,
null.ok = TRUE,
add = assert_collection
)
# Number
checkmate::assert_number(
x = cutoff,
lower = 0,
upper = 1,
add = assert_collection
)
checkmate::assert(
checkmate::check_choice(
x = positive,
choices = c(1, 2)
),
checkmate::check_string(
x = positive,
min.chars = 1
)
)
checkmate::reportAssertions(assert_collection)
# Need to report before, so type is not NULL
if (type != "multinomial" &&
length(prediction_cols) != 1){
assert_collection$push(
paste0("When 'type' is '", type, "', 'prediction_cols' must have length 1.")
)
}
checkmate::reportAssertions(assert_collection)
# End of argument checks ####
# Convert families to the internally used
family <- type
# Create basic model_specifics object
model_specifics <- list(
model_formula = "",
family = family,
REML = NULL,
link = NULL,
control = NULL,
cutoff = cutoff,
positive = positive,
model_verbose = FALSE,
model_fn = NULL,
predict_fn = NULL,
preprocess_fn = NULL,
preprocess_once = NULL,
for_process = list(
apply_softmax=apply_softmax,
id_col = id_col
),
hparams = NULL,
caller = caller
) %>%
update_model_specifics()
info_cols <- list("Results" = FALSE)
# One-hot encode predicted classes, if multinomial
# and prediction_cols is one column with classes
if (type == "multinomial" && length(prediction_cols) == 1) {
if (!is.character(data[[target_col]]) ||
!is.character(data[[prediction_cols]])){
assert_collection$push(paste0(
"When 'type' is 'multinomial' and 'prediction_cols' has length",
" 1, both 'data[[target_col]]' and 'data[[prediction_cols]]' must",
" have type character."))
}
if (isTRUE(apply_softmax)) {
assert_collection$push(paste0(
"When passing 'prediction_cols' as single column with multiple classes, ",
"'apply_softmax' should be 'FALSE'."
))
}
checkmate::reportAssertions(assert_collection)
# Extract the categorical levels in both target and prediction cols
c_levels <- union(data[[target_col]], data[[prediction_cols]])
data <- one_hot_encode(data, prediction_cols,
c_levels = c_levels,
use_epsilon = FALSE
)
prediction_cols <- sort(c_levels)
}
if (type == "binomial"){
if (!(is.numeric(data[[prediction_cols]]) ||
is.character(data[[prediction_cols]]))){
assert_collection$push(
paste0("When 'type' is 'binomial', 'data[[prediction_cols]]' mus",
"t be either numeric or character."))
}
if (is.numeric(data[[prediction_cols]])){
if (max(data[[prediction_cols]]) > 1 ||
min(data[[prediction_cols]]) < 0) {
assert_collection$push(
paste0(
"When 'type' is 'binomial' and 'data[[prediction_cols]]' ",
"is numeric, the values in 'data[[prediction_cols]]' must be b",
"etween 0 and 1."
))
}
# One may believe that setting the `positive` argument to the name
# of a class should mean that probabilities > `cutoff` would be
# considered that class, but this is not the case, so we
# make the user aware of this (once)
if (is.character(positive) && positive != sort(unique(as.character(data[[target_col]])))[[2]]){
inform_about_positive_no_effect_on_probs(positive=positive)
}
}
checkmate::reportAssertions(assert_collection)
if (is.character(data[[prediction_cols]])){
c_levels <- sort(union(data[[target_col]], data[[prediction_cols]]))
if (length(c_levels) != 2){
assert_collection$push(paste0("When 'type' is 'binomial' and 'data[[prediction_cols]]' ",
"has type character, the target and prediction columns must h",
"ave exactly 2 unique values combined. ",
"Did you mean to use type='multinomial'?"))
checkmate::reportAssertions(assert_collection)
}
# Replace with probabilities (1.0 or 0.0)
data[[prediction_cols]] <- ifelse(data[[prediction_cols]] == c_levels[[2]],
1.0, 0.0)
}
# Enable Accuracy if not otherwise specified
metrics <- add_metric_if_not_specified(metrics, "Accuracy", value=TRUE, check_all=TRUE)
}
# Find number of classes if classification
if (type == "binomial") {
num_classes <- 2
} else if (type == "multinomial") {
# We might not have every available class
# in the targets, so we rely on the number
# of prediction cols instead
num_classes <- length(prediction_cols)
} else {
num_classes <- NULL
}
# If the dataset is grouped, we need the indices and keys for the groups
# so we can evaluate group wise
# Get grouping keys
grouping_keys <- dplyr::group_keys(data)
# Make sure, the grouping_keys and the dataset are in the same order
# As we otherwise risk adding them in the wrong order later
data <- dplyr::arrange(data, !!!rlang::syms(colnames(grouping_keys)))
# Get group indices
grouping_factor <- dplyr::group_indices(data)
# Map for joining the grouping keys to the indices
grouping_keys_with_indices <- grouping_keys %>%
dplyr::mutate(.group = 1:dplyr::n())
if (!is.null(models) && length(unique(grouping_factor)) != length(models)) {
stop(paste0(
"When the dataframe is grouped, ",
"please provide a fitted model object per group or set models to NULL."
))
}
# Add grouping factor with a unique tmp var
local_tmp_grouping_factor_var <- create_tmp_name(data, ".group")
data[[local_tmp_grouping_factor_var]] <- grouping_factor
# Now that we've saved the groups
# we can ungroup the dataset
data <- data %>% dplyr::ungroup()
# Create temporary prediction column name
local_tmp_prediction_col_var <- create_tmp_name(data, "tmp_prediction_col")
local_tmp_std_col_var <- create_tmp_name(data, "tmp_std_col")
if (!is.null(id_col)) {
# ID level evaluation
# Currently don't support model object metrics
# in ID aggregation mode
if (!is.null(models)){
stop("When aggregating by ID, 'models' should be NULL.")
}
# Prepare data for ID level evaluation
data_for_id_evaluation <- prepare_id_level_evaluation(
data = data,
target_col = target_col,
prediction_cols = prediction_cols,
family = family,
cutoff = cutoff,
id_col = id_col,
id_method = id_method,
groups_col = local_tmp_grouping_factor_var,
apply_softmax = FALSE,
new_prediction_col_name = local_tmp_prediction_col_var,
new_std_col_name = local_tmp_std_col_var
) %>%
dplyr::ungroup() %>%
dplyr::left_join(grouping_keys_with_indices, by = ".group")
if (family == "multinomial") {
prediction_cols <- local_tmp_prediction_col_var
}
# Run ID level evaluation
evaluations <- run_internal_evaluate_wrapper(
data = data_for_id_evaluation,
type = family,
prediction_col = prediction_cols,
target_col = target_col,
id_col = id_col,
id_method = id_method,
fold_info_cols = fold_info_cols,
fold_and_fold_col = fold_and_fold_col,
groups_col = local_tmp_grouping_factor_var,
grouping_keys = grouping_keys,
stds_col = local_tmp_std_col_var,
models = models,
model_specifics = model_specifics,
metrics = metrics,
info_cols = info_cols,
num_classes = num_classes,
parallel = parallel,
include_predictions = include_predictions,
include_fold_columns = include_fold_columns,
na.rm = na.rm
)
} else {
# Regular evaluation
if (family == "multinomial") {
# Prepare data for multinomial evaluation
data <- prepare_multinomial_evaluation(
data = data,
target_col = target_col,
prediction_cols = prediction_cols,
apply_softmax = apply_softmax,
new_prediction_col_name = local_tmp_prediction_col_var
)
prediction_cols <- local_tmp_prediction_col_var
} else {
if (length(prediction_cols) > 1) {
stop(paste0("'prediction_cols' must have length 1 when type is '", type, "'."))
}
}
# Run evaluation
evaluations <- run_internal_evaluate_wrapper(
data = data,
type = family,
prediction_col = prediction_cols,
target_col = target_col,
models = models,
fold_info_cols = fold_info_cols,
fold_and_fold_col = fold_and_fold_col,
groups_col = local_tmp_grouping_factor_var,
grouping_keys = grouping_keys,
model_specifics = model_specifics,
metrics = metrics,
info_cols = info_cols,
num_classes = num_classes,
parallel = parallel,
include_predictions = include_predictions,
include_fold_columns = include_fold_columns,
na.rm = na.rm
)
}
evaluations
}
run_internal_evaluate_wrapper <- function(data,
type,
prediction_col,
target_col,
models,
groups_col,
grouping_keys,
id_col = NULL,
id_method = NULL,
stds_col = NULL,
fold_info_cols = NULL,
fold_and_fold_col = NULL,
model_specifics,
metrics = list(),
info_cols = list(),
include_predictions = TRUE,
include_fold_columns = TRUE,
num_classes = NULL,
na.rm = NULL,
parallel = FALSE) {
if (type != "gaussian") {
if (is.null(num_classes)) {
num_classes <- length(unique(data[[target_col]]))
}
}
if (is.null(fold_info_cols)) {
tmp_fold_cols_obj <- create_tmp_fold_cols(data)
data <- tmp_fold_cols_obj[["data"]]
fold_info_cols <- tmp_fold_cols_obj[["fold_info_cols"]]
include_fold_columns <- FALSE
} else {
include_fold_columns <- include_fold_columns
}
# Extract unique group identifiers
unique_group_levels <- unique(data[[groups_col]])
evaluations <- plyr::llply(seq_along(unique_group_levels),
.parallel = parallel, function(gr_ind) {
gr <- unique_group_levels[[gr_ind]]
data_for_current_group <- data[data[[groups_col]] == gr, ]
# Assign current model
if (is.null(models)) {
model <- NULL
} else {
model <- list(models[[gr_ind]])
}
internal_evaluate(
data = data_for_current_group,
type = type,
prediction_col = prediction_col,
target_col = target_col,
models = model,
id_col = id_col,
id_method = id_method,
fold_info_cols = fold_info_cols,
fold_and_fold_col = fold_and_fold_col,
grouping_key_names = colnames(grouping_keys),
stds_col = stds_col,
model_specifics = model_specifics,
metrics = metrics,
info_cols = info_cols,
include_fold_columns = include_fold_columns,
include_predictions = include_predictions,
na.rm = na.rm
)
}
) %>%
dplyr::bind_rows() %>%
tibble::as_tibble()
# Add group key to class level results
if (type == "multinomial") {
grouping_keys <- repeat_data_frame_if(grouping_keys, 2,
condition = na.rm == "both"
)
# Extract all the class level results tibbles
# Remove the Results column
# And add the grouping keys
class_level_results <- evaluations[["Class Level Results"]] %>%
# ".__grouping__" is used when na.rm = "both"!
dplyr::bind_rows(.id = ".__grouping__") %>%
tibble::as_tibble() %>%
base_deselect(cols = "Results")
evaluations[["Class Level Results"]] <- NULL
grouping_keys[[".__grouping__"]] <- as.character(seq_len(nrow(grouping_keys)))
evaluations[[".__grouping__"]] <- as.character(seq_len(nrow(evaluations)))
class_level_results <- grouping_keys %>%
# TODO This part might not work with na.rm = "both"
dplyr::right_join(class_level_results, by = ".__grouping__")
# Nest class level results
class_level_results <- class_level_results %>%
dplyr::group_by_at(c(".__grouping__", colnames(grouping_keys))) %>%
dplyr::group_nest(keep = TRUE) %>%
# Remove ".__grouping__" again
dplyr::mutate(data = purrr::map(.data$data,
.f = ~ .x %>%
base_deselect(cols = ".__grouping__")
)) %>%
base_rename(before = "data", after = "Class Level Results") %>%
base_select(c(".__grouping__", "Class Level Results"))
evaluations <- evaluations %>%
dplyr::left_join(class_level_results, by = ".__grouping__") %>%
base_deselect(".__grouping__")
}
# Add grouping keys
results <- grouping_keys %>%
dplyr::bind_cols(evaluations) %>%
base_deselect(".__grouping__")
# Move Process last
if ("Process" %in% colnames(results)){
results <- results %>%
dplyr::relocate("Process", .after = dplyr::last_col())
}
# If na.rm != "both" and it contains the NAs_removed column
if ((!is.character(na.rm) || na.rm != "both") &&
"NAs_removed" %in% names(results)) {
results[["NAs_removed"]] <- NULL
}
results
}
internal_evaluate <- function(data,
type = "gaussian",
prediction_col = "prediction",
target_col = "target",
model_was_null_col = NULL,
fold_info_cols = list(
rel_fold = "rel_fold",
abs_fold = "abs_fold",
fold_column = "fold_column"
),
fold_and_fold_col = NULL,
grouping_key_names = NULL,
stds_col = NULL,
models = NULL,
id_col = NULL,
id_method = NULL,
model_specifics = list(),
metrics = list(),
info_cols = list(),
include_fold_columns = TRUE,
include_predictions = TRUE,
na.rm = NULL) {
stopifnot(type %in% c("gaussian", "binomial", "multinomial")) # , "multiclass", "multilabel"))
# Fill metrics with default values for non-specified metrics
# and get the names of the metrics
metrics <- set_metrics(
family = type, metrics_list = metrics,
include_model_object_metrics = !is.null(models)
)
info_cols <- set_info_cols(family = type, info_cols_list = info_cols)
if (!is.null(na.rm) && na.rm == "both") {
info_cols <- c(info_cols, "NAs_removed")
}
# Set default na.rm if NULL
if (is.null(na.rm)) {
na.rm <- dplyr::case_when(
type == "gaussian" ~ TRUE,
type == "binomial" ~ FALSE,
type == "multinomial" ~ FALSE
)
}
# data is a table with predictions, targets and folds
# predictions can be values, logits, or classes, depending on evaluation type
# Unless specified otherwise, we don't care about non-converged models etc. here
# so we tell the eval that the models were not NULL. (TODO Would we even have predictions otherwise?)
if (is.null(model_was_null_col)) {
model_was_null_col <- "model_was_null"
data[[model_was_null_col]] <- FALSE
}
# Extract grouping key info
if (!is.null(grouping_key_names)) {
group_info <- data %>%
base_select(grouping_key_names)
} else {
group_info <- NULL
}
# Evaluate the predictions
prediction_evaluation <- internal_evaluate_predictions(
data = data,
prediction_col = prediction_col,
target_col = target_col,
model_was_null_col = model_was_null_col,
id_col = id_col,
id_method = id_method,
type = type,
fold_info_cols = fold_info_cols,
fold_and_fold_col = fold_and_fold_col,
group_info = group_info,
stds_col = stds_col,
model_specifics = model_specifics,
metrics = metrics,
include_fold_columns = include_fold_columns,
include_predictions = include_predictions,
na.rm = na.rm
)
if (!is.null(models)) {
model_evaluations <- plyr::ldply(seq_len(length(models)), function(i) {
internal_evaluate_model(
model = models[[i]],
train_data = NULL,
test_data = NULL,
type = type,
fold_info = NULL,
fold_info_cols = fold_info_cols,
model_specifics = model_specifics,
metrics = metrics,
include_fold_columns = include_fold_columns
)
})
output <- dplyr::bind_cols(model_evaluations, prediction_evaluation)
} else {
output <- prediction_evaluation
}
# Extract ROC from Results col
if (type == "multinomial") {
ROCs <- output[["Results"]] %c% "ROC" %>%
unlist(recursive = FALSE)
if ("mv.multiclass.roc" %ni% class(ROCs[[1]])){
ROCs <- unlist(ROCs, recursive = FALSE)
}
output[["ROC"]] <- ROCs
}
new_col_order <- c(metrics, intersect(info_cols, colnames(output)))
base_select(output, cols = new_col_order)
}
create_tmp_fold_cols <- function(data) {
# Create fold columns
local_tmp_fold_col_var <- create_tmp_name(data, "fold_column")
local_tmp_rel_fold_col_var <- create_tmp_name(data, "rel_fold")
local_tmp_abs_fold_col_var <- create_tmp_name(data, "abs_fold")
data[[local_tmp_fold_col_var]] <- as.character(1)
data[[local_tmp_rel_fold_col_var]] <- as.character(1)
data[[local_tmp_abs_fold_col_var]] <- as.character(1)
fold_info_cols <- list(
rel_fold = local_tmp_rel_fold_col_var,
abs_fold = local_tmp_abs_fold_col_var,
fold_column = local_tmp_fold_col_var
)
list(
"data" = data,
"fold_info_cols" = fold_info_cols
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.