#' selectPortfolio
#'
#' Method used to start the multi-objective algorithm portfolio selection
#'
#' @param data [\code{data.frame}] \cr
#' Data to be analysed. Must contain all \code{var.cols}, \code{algo.col}
#' and \code{repl.col}. \code{var.cols} should form a Pareto front for every
#' combination of \code{repl.col} and \code{algo.col}.
#' @param var.cols [\code{character(2)}] \cr
#' Names of columns with containing the values of the Pareto fronts.
#' @param algo.col [\code{character(1)}] \cr
#' Name of column containing the particular algorithm.
#' @param repl.col [\code{character(1)}] \cr
#' Name of column containing the particular replication.
#' @param repl.col [\code{character(1)}] \cr
#' Name of column containing the particular data set. Can be missing.
#' @param indicator [\code{character(1)}] \cr
#' Which multi-objective indicator should be used? Possible values are: \cr
#' \dQuote{hv}: Dominated Hypervolume (S-Metric), the default \cr
#' \dQuote{epsilon}: epsilon-indicator \cr
#' \dQuote{r2}: R2-indicator \cr
#' @param ref.point [\code{numeric(2)}] \cr
#' Reference point for the hypervolume indicator. Default is c(1.1, 1.1)
#' @param lambda [\code{numeric(1)}] \cr
#' Number of vector used in the calculation of the R2-indicator. Default is 100.
#' @param eta [\code{numeric(1)}] \cr
#' In how many replications each algorithm must have non-dominated points
#' to be selected in the first selection step? Must be in [0, 1] - 0.5
#' requres non-dominated points more than in half of all replications. Default is 0.5
#' @param w [\code{numeric(2)}] \cr
#' Weight vector for the augmented Tschebbyscheff Norm in the second selection
#' stept. The first elements corresponds to the number of algorithms, the second
#' to the optimality gap. If you want to have a low optimality gap, the second
#' element of w should be set to an high value, if a low number of algorithms
#' is more important the first element must be high. All positive real weights
#' are allowed and will be normalized to sum(w) = 1. Default is c(0.05, 0.95).
#' @param cp [\code{numeric(1)}] \cr
#' Complexity parameter of the decision tree used in step 3. Same parameter as
#' in \link[rpart]{rpart}. Default is 0.1.
#' @param normalize [\code{logical(1)}] \cr
#' Should the data be normalized to [0, 1]? Default is TRUE.
#'
#' @return
#' Object of class \code{frontTestResult}. Named list with the elements:
#'
#' \itemize{
#' \item{non.dominated.algos [\code{logical}]}{Named vector, each element
#' corresponds to one algorithm. TRUE if algorithm is selected in step 1.}
#' \item{algos.domination.count [\code{numeric}]}{Named vector, each element
#' corresponds to one algorithm. Number of replications with non-dominated
#' points for each algorithm.}
#' \item{relevant.algos [\code{logical}]}{Named vector, each element
#' corresponds to one algorithm selected in step 1. TRUE if the algorithm
#' is selected in step 2.}
#' \item{algos.selection.vals [\code{data.frame}]}{Data.frame with 4 cols and
#' 2^(number of algorithms after in step 2) - 1 rows. First cols active in
#' this subset. Last 2 rows are numeric. First is the number of algos in this
#' subset, second one the optimality gap..}
#' \item{best.algo.order [\code{Factor}]}{Vector gives the order of the algorithms
#' on the common Pareto front, algorithms low values of the first var.col first.}
#' \item{split.vals [\code{numeric}]}{Split values between the algorithms given
#' in best.algo.order.}
#' \item{args [\code{list}] List containing all input arguments}
#' }
#' @examples
#' \dontrun{
#' # Load data - for the data with subsampling enabled use apprSubsampleSVMParetoFronts
#' data(apprSVMParetoFronts)
#'
#' # Avaible datasets: codrna, mnist, protein, vehicle
#' data = subset(apprSVMParetoFronts, apprSVMParetoFronts$dataset == "mnist")
#'
#' # Start the front analysis with the main procedure
#' res = selectPortfolio(
#' data = data,
#' var.cols = c("error", "execTime"),
#' algo.col = "solver",
#' repl.col = "repl",
#' indicator = "hv",
#' ref.point = c(1.1, 1.1),
#' eta = 0.5,
#' w = c(0.05, 0.95),
#' cp = 0.01,
#' normalize = TRUE
#' )
#' print(res)
#' plot(res, colors = c("turquoise", "green", "violet", "red", "black", "blue"))}
#' @export
selectPortfolio = function(data, var.cols, algo.col, repl.col, data.col,
indicator = "hv", ref.point = c(1.1, 1.1), lambda = 100,
eta = 0.5, ny = 0.5, w = c(0.05, 0.95), cp = 0.1, normalize = TRUE) {
requirePackages(c("emoa"))
# data.col can be missing, analysis will be done on single data set
if (missing(data.col)) {
data$dataset = "dataset"
data.col = "dataset"
}
assertDataFrame(data, any.missing = FALSE)
assertSubset(var.cols, colnames(data))
assertCharacter(var.cols, len = 2)
assertChoice(algo.col, colnames(data))
assertChoice(repl.col, colnames(data))
assertChoice(indicator, c("hv", "r2", "epsilon"))
assertNumeric(ref.point, lower = 0L, len = 2L)
lambda = asCount(lambda)
assertNumber(eta, lower = 0, upper = 1)
assertNumeric(w, lower = 0, len = 2L)
w = w / sum(w)
assertNumber(cp, lower = 0L)
assertFlag(normalize)
if (length(unique(c(algo.col, repl.col, var.cols, data.col))) > 5L)
stop("algo.col, var.col, data.col and repl.col must be distinct.")
# Initialize a list with all input args for the result object
args = list(
data = data,
var.cols = var.cols,
algo.col = algo.col,
repl.col = repl.col,
data.col = data.col,
indicator = indicator,
ref.point = ref.point,
lambda = lambda,
eta = eta,
w = w,
cp = cp,
normalize = normalize
)
algos = factor(sort(unique(data[, algo.col])))
# Normalize Data par data set
if (normalize) {
splitted = split(data[, var.cols], data[, data.col])
normalized = lapply(splitted, function(d)
normalize(d[, var.cols], method = "range", range = c(0, 1))
)
data[, var.cols] = do.call(rbind, normalized)
}
# Select contribution function with respect to indicator character
contrFun = switch(indicator,
hv = function(points, o)
hypervolume_indicator(t(points), t(o), ref = ref.point),
epsilon = function(points, o)
epsilon_indicator(t(points), t(o)),
r2 = function(points, o)
r2_indicator(t(points), t(o), lambda = lambda)
)
# First selection: Remove all algorithms that are dominated in at least eta% repls
non.dom.algos = relevantAlgosDominationSelection(data, var.cols, algo.col,
repl.col, data.col, eta, ny)
# Now drop non-selected algos and set new factor levels
# FIXME: dropAlgorithms schreiben
algos = factor(algos[non.dom.algos$relevant.algos])
data = subset(data, data[, algo.col] %in% algos)
data[, algo.col] = factor(data[, algo.col], levels = algos)
# If only 1 algorithms remains, stop here. This algorithm dominates the complete
# front
if (length(algos) == 1) {
res = list(
non.dominated.algos = non.dom.algos$relevant.algos,
algos.domination.count = non.dom.algos$counts,
relevant.algos = NULL,
algos.selection.vals = NULL,
best.algo.order = algos,
split.vals = c(),
args = args
)
res = addClasses(res, "mosap_result")
return(res)
}
# Second Selection: Multicrit Selection
selected.algos = relevantAlgosMulticritSelection(data, var.cols, algo.col,
repl.col, data.col, contrFun, w)
# Now drop non selected algos and set new factor levels
# FIXME: funktion dropAlgorithms
algos = factor(algos[selected.algos$relevant.algos])
data = subset(data, data[, algo.col] %in% algos)
data[, algo.col] = factor(data[, algo.col], levels = algos)
# Third Step: Find the best order of algorithms
if (length(algos) == 1L) {
perms = list(perm = algos, split.vals = NULL)
}
if (length(algos) > 1L) {
perms = sortedParetoFrontClassification(data, var.cols, algo.col, repl.col,
data.col, contrFun, cp)
}
# Build result object
res = list(
non.dominated.algos = non.dom.algos$relevant.algos,
acc = perms$acc,
algos.domination.count = non.dom.algos$counts,
relevant.algos = selected.algos$relevant.algos,
algos.selection.vals = selected.algos$algo.contrs,
best.algo.order = perms$perm,
split.vals = perms$split.vals,
args = args
)
res = addClasses(res, "mosap_result")
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.