R/core.benchmark.R

Defines functions benchmark

#' @title
#' Benchmarking of algorithms on objecitve functions / problems.
#'
#' @description
#' This function takes a set of algorithms \eqn{A} and a set of objective functions or
#' problems \eqn{P} and performs each \code{repls} independent runs of each \eqn{A \times P}
#' combination. The results is a \code{data.frame} with the results of all runs.
#'
#' @note
#' The function works in both single- and multi-objective settings. Note that each objective
#' function needs to have the same dimension of the objective space. Otherwise the
#' gathering of results will fail. It is left as a responsibility to the user to
#' keep track of this.
#'
#' @details
#' The function uses \code{\link[future.apply]{future_lapply}} internally and hence allows
#' for parallel execution of the independent jobs using any future plan. Note that,
#'
#' @param algos [\code{list}]\cr
#'   List of ecr3 algorithms, i.e., objects of class \dQuote{EA}.
#' @param repls [\code{integer(1)}]\cr
#'   Number of independent repetitions for each combination of algorithm and fun/problem.
#' @param funs [\code{named list}]\cr
#'   List of objective/fitness functions. If the list is named, the names are used to
#'   distinguish the results in the output. In case the list is unnamed the functions
#'   are named \dQuote{fun1, ..., funN} where N is the length of \code{funs}. If the
#'   passed functions are \pkg{smoof} functions names are extracted automatically.
#'   Default is \code{NULL}, i.e., no functions.
#' @param problems [\code{named list}]\cr
#'   List of problems as an alternative to \code{funs}. This should be used if the
#'   fitness function is the same, but depends on another input. Think of the knapsack
#'   problem where the fitness function is given by the total profit of the packing
#'   plan which satisfies the knapsack packing limit. Here, the problem is the
#'   set of items and their respective weights/profits and the knapsack size.
#'   Default is \code{NULL}, i.e., no problems.
#' @return [\code{data.frame}] Data frame with columns \dQuote{y1, ... yN} where
#'   N is the number of objectives of each fitness function, \dQuote{algorithm}
#'  (the algorithm name), repl (replication) and prob (function or problem).
#' @export
# FIXME: handle PRNG seeding (should be done by future; look this up)
benchmark = function(algos, repls, funs = NULL, problems = NULL) {
  #FIXME: add class check for list element
  checkmate::assertList(algos, min.len = 1L, any.missing = FALSE, all.missing = FALSE)
  repls = checkmate::asCount(repls, positive = TRUE)

  if (!xor(is.null(funs), is.null(problems))) {
    BBmisc::stopf("[ecr3] benchmark: You may either pass a list of funs or a list of problems, but not both.")
  }

  # internally we handle both as problems
  is.based.on.funs = !is.null(funs)
  if (is.null(problems))
    problems = funs

  n.algos = length(algos)
  n.probs = length(problems)

  prob.names = BBmisc::coalesce(names(funs), paste0("fun%i", seq_len(n.funs)))

  # extract smoof names if possible
  if (all(sapply(problems, smoof::isSmoofFunction)))
    prob.names = sapply(problems, smoof::getName)

  grid = expand.grid(
    algo = seq_len(n.algos),
    prob = seq_len(n.probs),
    repl = seq_len(repls),
    stringsAsFactors = FALSE)

  lg$info("Running %i experiments via future.", nrow(grid))

  res = future.apply::future_apply(grid, 1L, function(row) {
    su = as.list(row)
    BBmisc::catf("Algorithm %s on fun %i (repl %i)", su$algo, su$prob, su$repl)
    algo = algos[[su$algo]]$clone(deep = TRUE)
    res = NULL
    if (is.based.on.funs) {
      algo$control$fitness.fun = problems[[su$prob]]
      # FIXME: I really don't get why algo$control$fitness.fun is changed, but algo$run() evaluates the old fitness.fun
      # BBmisc::catf("FF: %s", getName(algo$control$fitness.fun))
      # BBmisc::catf("FF sample: %.2f, %.2f", algo$control$fitness.fun(c(0.5, 0.5))[1L], algo$control$fitness.fun(c(0.5, 0.5))[2L])
      res = try({algo$run(1L)}, silent = TRUE)
    } else {
      # FIXME: problem must be named somehow?
      run.params = c(list(repl = 1L), problems[su$prob])
      res = try({do.call(algo$run, run.params)})
    }

    # failed run?
    if (inherits(res, "try-error")) {
      lg$error("Expriment (algorithm = %s, problem = %s, repl = %i) failed.",
        algo$algo.name, prob.names[su$algo], su$repl)
      return(NULL)
    }

    xy = res$getXY()
    xy$algorithm = NULL
    cbind(data.frame(algorithm = algo$algo.name, prob = prob.names[su$prob], repl = su$repl), xy)
  })

  # filter failed runs
  n = nrow(grid)
  n.failed = sum(sapply(res, is.null))
  n.done   = n - n.failed

  lg$info("benchmark() on experiments finished (%.2f success rate).", (n.done / n) * 100)

  BBmisc::catf("Benchmark summary:")
  BBmisc::catf("DONE  : %i/%i (%.2f%%)", n.done, n, (n.done / n) * 100)
  BBmisc::catf("FAILED: %i/%i (%.2f%%)", n.failed, n, (n.failed / n) * 100)
  #FIXME: more information on failed runs? Or keep it simple?

  res = BBmisc::filterNull(res)
  res = dplyr::as_tibble(do.call(rbind, res))
  return(res)
}
jakobbossek/ecr3 documentation built on Nov. 14, 2019, 7:47 p.m.