Nothing
#' @title Aggregation methods.
#'
#' @description
#' \describe{
#' \item{**test.mean**}{Mean of performance values on test sets.}
#' \item{**test.sd**}{Standard deviation of performance values on test sets.}
#' \item{**test.median**}{Median of performance values on test sets.}
#' \item{**test.min**}{Minimum of performance values on test sets.}
#' \item{**test.max**}{Maximum of performance values on test sets.}
#' \item{**test.sum**}{Sum of performance values on test sets.}
#' \item{**train.mean**}{Mean of performance values on training sets.}
#' \item{**train.sd**}{Standard deviation of performance values on training sets.}
#' \item{**train.median**}{Median of performance values on training sets.}
#' \item{**train.min**}{Minimum of performance values on training sets.}
#' \item{**train.max**}{Maximum of performance values on training sets.}
#' \item{**train.sum**}{Sum of performance values on training sets.}
#' \item{**b632**}{Aggregation for B632 bootstrap.}
#' \item{**b632plus**}{Aggregation for B632+ bootstrap.}
#' \item{**testgroup.mean**}{Performance values on test sets are grouped according
#' to resampling method. The mean for every group is calculated, then the mean of those means.
#' Mainly used for repeated CV.}
#' \item{**testgroup.sd**}{Similar to **testgroup.mean** - after
#' the mean for every group is calculated, the standard deviation of those means is obtained.
#' Mainly used for repeated CV.}
#' \item{**test.join**}{Performance measure on joined test sets.
#' This is especially useful for small sample sizes where unbalanced group sizes have a significant impact
#' on the aggregation, especially for cross-validation test.join might make sense now.
#' For the repeated CV, the performance is calculated on each repetition and then aggregated
#' with the arithmetic mean.}
#' }
#' @seealso [Aggregation]
#' @name aggregations
#' @usage NULL
#' @format NULL
#' @export
#' @rdname aggregations
test.mean = makeAggregation(
id = "test.mean",
name = "Test mean",
properties = "req.test",
fun = function(task, perf.test, perf.train, measure, group, pred) mean(perf.test)
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
test.sd = makeAggregation(
id = "test.sd",
name = "Test sd",
properties = "req.test",
fun = function(task, perf.test, perf.train, measure, group, pred) sd(perf.test)
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
test.median = makeAggregation(
id = "test.median",
name = "Test median",
properties = "req.test",
fun = function(task, perf.test, perf.train, measure, group, pred) median(perf.test)
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
test.min = makeAggregation(
id = "test.min",
name = "Test minimum",
properties = "req.test",
fun = function(task, perf.test, perf.train, measure, group, pred) min(perf.test)
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
test.max = makeAggregation(
id = "test.max",
name = "Test maximum",
properties = "req.test",
fun = function(task, perf.test, perf.train, measure, group, pred) max(perf.test)
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
test.sum = makeAggregation(
id = "test.sum",
name = "Test sum",
properties = "req.test",
fun = function(task, perf.test, perf.train, measure, group, pred) sum(perf.test)
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
test.range = makeAggregation(
id = "test.range",
name = "Test range",
properties = "req.test",
fun = function(task, perf.test, perf.train, measure, group, pred) diff(range(perf.test))
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
test.rmse = makeAggregation(
id = "test.rmse",
name = "Test RMSE",
properties = "req.test",
fun = function(task, perf.test, perf.train, measure, group, pred) sqrt(mean(perf.test^2))
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
train.mean = makeAggregation(
id = "train.mean",
name = "Training mean",
properties = "req.train",
fun = function(task, perf.test, perf.train, measure, group, pred) mean(perf.train)
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
train.sd = makeAggregation(
id = "train.sd",
name = "Training sd",
properties = "req.train",
fun = function(task, perf.test, perf.train, measure, group, pred) sd(perf.train)
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
train.median = makeAggregation(
id = "train.median",
name = "Training median",
properties = "req.train",
fun = function(task, perf.test, perf.train, measure, group, pred) median(perf.train)
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
train.min = makeAggregation(
id = "train.min",
name = "Training min",
properties = "req.train",
fun = function(task, perf.test, perf.train, measure, group, pred) min(perf.train)
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
train.max = makeAggregation(
id = "train.max",
name = "Training max",
properties = "req.train",
fun = function(task, perf.test, perf.train, measure, group, pred) max(perf.train)
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
train.sum = makeAggregation(
id = "train.sum",
name = "Training sum",
properties = "req.train",
fun = function(task, perf.test, perf.train, measure, group, pred) sum(perf.train)
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
train.range = makeAggregation(
id = "train.range",
name = "Training range",
properties = "req.train",
fun = function(task, perf.test, perf.train, measure, group, pred) diff(range(perf.train))
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
train.rmse = makeAggregation(
id = "train.rmse",
name = "Training RMSE",
properties = "req.train",
fun = function(task, perf.test, perf.train, measure, group, pred) sqrt(mean(perf.train^2))
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
b632 = makeAggregation(
id = "b632",
name = ".632 Bootstrap",
properties = c("req.train", "req.test"),
fun = function(task, perf.test, perf.train, measure, group, pred) {
mean(0.632 * perf.test + 0.368 * perf.train)
}
)
# FIXME: read this again properly and double check it
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
b632plus = makeAggregation(
id = "b632plus",
name = ".632 Bootstrap plus",
properties = c("req.train", "req.test"),
fun = function(task, perf.test, perf.train, measure, group, pred) {
df = as.data.frame(pred)
a = numeric(length(perf.test))
for (i in seq_along(a)) {
df2 = df[df$iter == i, , drop = FALSE]
y1 = df2$truth
y2 = df2$response
grid = expand.grid(y1, y2, KEEP.OUT.ATTRS = FALSE)
pred2 = makePrediction(
task.desc = pred$task.desc, row.names = rownames(grid),
id = NULL, truth = grid[, 1L], predict.type = "response", y = grid[, 2L],
time = NA_real_
)
gamma = performance(pred2, measures = measure)
R = (perf.test[i] - perf.train[i]) / (gamma - perf.train[i])
w = 0.632 / (1 - 0.368 * R)
a[i] = (1 - w) * perf.train[i] + w * perf.test[i]
}
return(mean(a))
}
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
testgroup.mean = makeAggregation(
id = "testgroup.mean",
name = "Test group mean",
properties = "req.test",
fun = function(task, perf.test, perf.train, measure, group, pred) {
mean(vnapply(split(perf.test, group), mean))
}
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
testgroup.sd = makeAggregation(
id = "testgroup.sd",
name = "Test group standard deviation",
properties = "req.test",
fun = function(task, perf.test, perf.train, measure, group, pred) {
sd(BBmisc::vnapply(split(perf.test, group), mean))
}
)
#' @export
#' @usage NULL
#' @format NULL
#' @rdname aggregations
test.join = makeAggregation(
id = "test.join",
name = "Test join",
properties = "req.test",
fun = function(task, perf.test, perf.train, measure, group, pred) {
df = as.data.frame(pred)
f = if (length(group)) group[df$iter] else factor(rep(1L, nrow(df)))
mean(vnapply(split(df, f), function(df) {
if (pred$predict.type == "response") y = df$response
if (pred$predict.type == "prob") {
y = df[, stri_startswith_fixed(colnames(df), "prob."), drop = FALSE]
colnames(y) = stri_sub(colnames(y), 6L)
}
npred = makePrediction(
task.desc = pred$task.desc, row.names = rownames(df),
id = NULL, truth = df$truth, predict.type = pred$predict.type, y = y,
time = NA_real_
)
performance(npred, measure)
}))
}
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.