R/psRace.R
In irace: Iterated Racing for Automatic Algorithm Configuration

Documented in psRace

#' Post-selection race
#'
#' \code{psRace} performs a post-selection race of a set of configurations.
#'
#' @inheritParams has_testing_data
#'
#' @param max_experiments `numeric(1)`\cr Number of experiments for the
#'   post-selection race. If it is equal to or smaller than 1, then it is a
#'   fraction of the total budget given by
#'   `iraceResults$scenario$maxExperiments` or `iraceResults$scenario$maxTime /
#'   iraceResults$state$boundEstimate`.
#' @param conf_ids  IDs of the configurations in `iraceResults$allConfigurations` to be used for the post-selection.
#' If `NULL`, then the configurations are automatically selected.
#' @param iteration_elites  If `FALSE`, give priority to selecting the configurations that were elite in the last iteration.
#' If `TRUE`, select from all elite configurations of all iterations. This parameter only has an effect when `conf_ids` is not `NULL`.
#' @param psrace_logFile  `character(1)`\cr Log file to save the post-selection race log. If `NULL`, the log is saved in `iraceResults$scenario$logFile`.
#'
#' @return The elite configurations after the post-selection. In addition, if `iraceResults$scenario$logFile` is defined,
#' it saves an updated copy of `iraceResults` in that file, where `iraceResults$psrace_log` is a list with the following elements:
#'  \describe{
#'    \item{configurations}{Configurations used in the post-selection race.}
#'    \item{instances}{Data frame with the instances used in the post-selection race. First column has the
#'    instances IDs from `iraceResults$scenario$instances`, second column the seed assigned to the instance.}
#'    \item{max_experiments}{Configuration budget assigned to the post-selection race.}
#'    \item{experiments}{Matrix of results generated by the post-selection  race, in the same format as the matrix \code{iraceResults$experiments}. Column names are the configuration IDs and  row names are the instance IDs.}
#'    \item{elites}{Best configurations found in the experiments.}
#'  }
#'
#' @examples
#' \donttest{
#'   irace_log <- read_logfile(system.file(package="irace", "exdata", "sann.rda"))
#'   # Use a temporary file to not change the original "sann.rda".
#'   psrace_logFile <- withr::local_tempfile(fileext = ".Rdata")
#'   # Execute the post-selection after the execution of irace. Use 10% of the total budget.
#'   psRace(irace_log, max_experiments=0.1, psrace_logFile = psrace_logFile)
#'   # Print psrace_log
#'   irace_log <- read_logfile(psrace_logFile)
#'   str(irace_log$psrace_log)
#' }
#' @author Leslie Pérez Cáceres and Manuel López-Ibáñez
#' @export
psRace <- function(iraceResults, max_experiments, conf_ids = NULL, iteration_elites = FALSE,
                   psrace_logFile = NULL)
{
  irace_note("Starting post-selection:\n")
  if (missing(iraceResults)) stop("argument 'iraceResults' is missing.")
  iraceResults <- read_logfile(iraceResults)
  scenario <- iraceResults$scenario
  if (!is.null(psrace_logFile))
    scenario$logFile <- psrace_logFile

  race_state <- iraceResults$state
  race_state$initialize(scenario, new = FALSE) # Restores the random seed
  if (max_experiments <= 0) stop("'max_experiments' must be positive.")
  if (max_experiments <= 1) {
    budget <- if (scenario$maxTime == 0L)
                scenario$maxExperiments else scenario$maxTime / race_state$boundEstimate
    max_experiments <- as.integer(ceiling(max_experiments * budget))
    if (scenario$maxTime == 0L)
      cat(sep="", "# scenario maxExperiments:", scenario$maxExperiments, "\n")
    else
      cat(sep="", "# scenario maxTime:", scenario$maxTime, "\n")
  }
  # Get selected configurations.
  if (is.null(conf_ids)) {
    # FIXME: Handle scenario$maxTime > 0
    irace_assert(scenario$maxTime == 0)

    which_max_last <- function(x) 1L + length(x) - which.max(rev(x))

    # We want to race at least two configurations, so we generate all
    # integers between 3 and 2^max_confs - 1L that are not powers of 2, then convert
    # it to a bit-string then to a logical vector.
    generate_combs_2 <- function(max_confs) {
      max_pow <- as.integer(2^max_confs)
      pow_2 <- as.integer(2L^(0L:(max_confs-1L)))
      s <- seq.int(3L, max_pow - 1L, 2L)
      if (max_pow >= 8L)
        s <- c(s, setdiff(seq.int(6L, max_pow - 2L, 2L), pow_2[-1L:-3L]))
      lapply(s, function(z) as.logical((z %/% pow_2) %% 2L))
    }
    # A version of the above that is not limited to two configurations.
    generate_combs_1 <- function(max_confs) {
      max_pow <- as.integer(2^max_confs)
      pow_2 <- as.integer(2L^(0L:(max_confs-1L)))
      lapply(seq.int(1L, max_pow - 1L, 1L),
        function(z) as.logical((z %/% pow_2) %% 2L))
    }
    # FIXME: Is this really faster than head(x, n=max_len) ?
    truncate_conf_needs <- function(x, max_len) {
      if (length(x) <= max_len) return(x)
      x[seq_len(max_len)]
    }

    get_confs_for_psrace <- function(iraceResults, iteration_elites, max_experiments, conf_ids, rejected_ids) {
      scenario <- iraceResults$scenario
      report_selected <-
        if (scenario$quiet) do_nothing
        else function(conf_ids, conf_needs)
          cat(sep="", "# Configurations selected: ", paste0(collapse=", ", conf_ids),
            ".\n# Pending instances: ", paste0(collapse=", ", conf_needs), ".\n")

      allElites <- iraceResults$allElites
      experiments <- iraceResults$experiments
      conf_ids <- if (iteration_elites) unlist(rev(allElites)) else allElites[[length(allElites)]]
      conf_ids <- unique(c(conf_ids, iraceResults$allConfigurations[[".ID."]]))
      # NA may be generated if we skipped iterations.
      if (anyNA(conf_ids))
        conf_ids <- conf_ids[!is.na(conf_ids)]
      # Remove rejected configurations.
      if (length(rejected_ids))
        conf_ids <- setdiff(conf_ids, rejected_ids)
      experiments <- experiments[, conf_ids, drop = FALSE]
      conf_needs <- matrixStats::colCounts(experiments, value = NA, useNames = TRUE)
      n_done <- nrow(experiments) - min(conf_needs)
      # Remove any configuration that needs more than max_experiments.
      conf_needs <- conf_needs[conf_needs <= max_experiments]
      if (length(conf_needs) == 1L) {
        if (!scenario$quiet)
          cat("# Insufficient budget to race more than one configuration!\n")
        return(names(conf_needs))
      }
      if (!scenario$deterministic || n_done < length(scenario$instances)) {
        # We want to evaluate in at least n_new instances more.
        n_new <- scenario$eachTest * scenario$blockSize
        n_confs <- floor(max_experiments / n_new)
        # If we have n_confs that have been evaluated in all instances, select those.
        if (n_confs > 1L && sum(conf_needs == 0L) >= n_confs) {
          conf_needs <- conf_needs[conf_needs == 0L][1:n_confs]
          conf_ids <- names(conf_needs)
          report_selected(conf_ids, conf_needs)
          irace_assert(length(conf_ids) > 1L, eval_after = {
            cat("n_confs: ", n_confs, "\nn_new:", n_new, "\n")
            print(conf_needs)
            save(iraceResults, file="bug-conf_ids.Rdata")
          })
          return(conf_ids)
        }
        # Let's try first to evaluate on new instances.
        conf_needs_new <- truncate_conf_needs(conf_needs + n_new, max_len = 16L)
        irace_assert(length(conf_needs_new) > 1L, eval_after={
          cat("max_experiments: ", max_experiments, "\nn_new: ", n_new, "\nconf_needs:")
          print(conf_needs)
          save(iraceResults, file="bug-conf_ids.Rdata")
        })
        combs <- generate_combs_2(length(conf_needs_new))
        left <- sapply(combs, function(x) max_experiments - sum(conf_needs_new[x]), USE.NAMES=FALSE)
        irace_assert(!is.null(left) && length(left) > 0L && !anyNA(left),
          eval_after= {
            cat("max_experiments: ", max_experiments, "\n")
            cat("length(left): ", length(left), "\n")
            cat("sum(is.na(left)): ", sum(is.na(left)), "\n")
            save(iraceResults, file="bug-conf_ids.Rdata")
          })

        if (any(left >= 0L)) { # We have enough budget to evaluate on a new instance.
          # Select the combination that will allow to evaluate the most configurations.
          combs <- combs[left >= 0]
          n <- sapply(combs, sum, USE.NAMES=FALSE)
          winner <- which.max(n)
          conf_needs_new <- conf_needs_new[combs[[winner]]]
          conf_ids <- names(conf_needs_new)
          report_selected(conf_ids, conf_needs_new)
          irace_assert(length(conf_ids) > 1L, eval_after = {
            cat("max_experiments: ", max_experiments, "\n")
            cat("length(left): ", length(left), "\n")
            cat("sum(is.na(left)): ", sum(is.na(left)), "\n")
            cat("winner: ", winner, "\n")
            cat("combs[[winner]]: ", paste0(collapse=",", combs[[winner]]), "\n")
            save(iraceResults, file="bug-conf_ids.Rdata")
          })
          return(conf_ids)
        }
      }
      # We do not have enough budget to see new instances, so we need consider
      # at least 1 configuration that has NA values, but we still include the
      # configurations that have been evaluated on all instances.
      conf_needs_zero <- conf_needs[conf_needs == 0L]
      conf_needs <- truncate_conf_needs(conf_needs[conf_needs > 0L], 16L)
      combs <- generate_combs_1(length(conf_needs))
      left <- sapply(combs, function(x) max_experiments - sum(conf_needs[x]), USE.NAMES=FALSE)
      irace_assert(any(left >= 0), eval_after=save(iraceResults, file="bug-conf_ids.Rdata"))
      # Select the combination that will allow us to evaluate the most configurations.
      combs <- combs[left >= 0]
      n <- sapply(combs, sum, USE.NAMES=FALSE)
      winner <- which.max(n)
      conf_needs <- c(conf_needs_zero, conf_needs[combs[[winner]]])
      conf_ids <- names(conf_needs)
      report_selected(conf_ids, conf_needs)
      irace_assert(length(conf_ids) > 1L, eval_after = {
        print(conf_needs)
        cat("winner: ", winner, "\n")
        cat("combs[[winner]]: ", paste0(collapse=",", combs[[winner]]), "\n")
        save(iraceResults, file="bug-conf_ids.Rdata")
      })
      return(conf_ids)
    }
    conf_ids <- get_confs_for_psrace(iraceResults, iteration_elites, max_experiments,
      conf_ids = conf_ids, rejected_ids = race_state$rejected_ids)
    irace_assert(length(conf_ids) >= 1L, eval_after = {
      # Debug what happened if the assert failed.
      rejected_ids <- race_state$rejected_ids
      cat("blockSize:", scenario$blockSize, "\niteration_elites: ", as.character(iteration_elites),
        "\nrejected: ", paste0(collapse=", ", rejected_ids), "\n")
      allElites <- iraceResults$allElites
      experiments <- iraceResults$experiments
      conf_ids <- if (iteration_elites) unlist(rev(allElites)) else allElites[[length(allElites)]]
      cat("conf_ids1:", paste0(collapse=", ", conf_ids), "\n")
      conf_ids <- unique(c(conf_ids, iraceResults$allConfigurations[[".ID."]]))
      cat("conf_ids2:", paste0(collapse=", ", conf_ids), "\n")
      # NA may be generated if we skipped iterations.
      if (anyNA(conf_ids))
        conf_ids <- conf_ids[!is.na(conf_ids)]
      cat("conf_ids3:", paste0(collapse=", ", conf_ids), "\n")
      # Remove rejected configurations.
      if (length(rejected_ids))
        conf_ids <- setdiff(conf_ids, rejected_ids)
      cat("conf_ids4:", paste0(collapse=", ", conf_ids), "\n")
      conf_needs <- matrixStats::colCounts(experiments[, conf_ids, drop = FALSE], value = NA)
      cat("conf_needs:", paste0(collapse=", ", conf_needs), "\n")
    })
    if (length(conf_ids) == 1L) {
      # If we cannot run post-selection, we just return the elite configurations.
      allElites <- iraceResults$allElites
      return(allElites[[length(allElites)]])
    }

  } else if (length(conf_ids) <= 1L) {
    irace_error ("The number configurations provided should be larger than 1.")
  } else if (length(race_state$rejected_ids) && any(conf_ids %in% race_state$rejected_ids)) {
    irace_error ("Some configuration IDs provided were rejected in the previous run: ",
      paste0(collapse=", ", intersect(conf_ids, race_state$rejected_ids)), ".")
  }
  if (!all(conf_ids %in% iraceResults$allConfigurations[[".ID."]])) {
    irace_error("Some configuration IDs provided cannot be found in the configurations: ",
      paste0(collapse=", ", setdiff(conf_ids, iraceResults$allConfigurations[[".ID."]])), ".")
  }
  elite_configurations <- iraceResults$allConfigurations[iraceResults$allConfigurations[[".ID."]] %in% conf_ids, , drop=FALSE]

  # Generate new instances.
  generateInstances(race_state, scenario, max_experiments / nrow(elite_configurations), update = TRUE)
  elite_data <- iraceResults$experiments[, as.character(elite_configurations[[".ID."]]), drop=FALSE]
  race_state$next_instance <- nrow(elite_data) + 1L

  irace_note("seed: ", race_state$seed,
    "\n# Configurations: ", nrow(elite_configurations),
    "\n# Available experiments: ", max_experiments,
    "\n# minSurvival: 1\n")

  # We do not want to stop because of this limit.
  scenario$elitistLimit <- 0L
  # FIXME: elitist_race should not require setting this, but it currently does.
  scenario$elitist <- TRUE
  raceResults <- elitist_race(race_state,
    maxExp = max_experiments,
    minSurvival = 1L,
    configurations = elite_configurations,
    scenario = scenario,
    elite_data = elite_data,
    elitist_new_instances = 0L,
    # FIXME: This should be nrow(elite_data) + 1L if we have budget to evaluate on new instances.
    firstTest = nrow(elite_data) / scenario$blockSize)

  elite_configurations <- extractElites(raceResults$configurations,
    nbElites = race_state$minSurvival, debugLevel = scenario$debugLevel)
  irace_note("Elite configurations (first number is the configuration ID;",
               " listed from best to worst according to the ",
               test.type.order.str(scenario$testType), "):\n")
  if (!scenario$quiet)
    configurations_print(elite_configurations, metadata = scenario$debugLevel >= 1L)

  if (!is.null(scenario$logFile)) {
    elapsed <- race_state$time_elapsed()
    if (!scenario$quiet)
      cat("# Total CPU user time: ", elapsed["user"], ", CPU sys time: ", elapsed["system"],
        ", Wall-clock time: ", elapsed["wallclock"], "\n", sep="")
    indexIteration <- 1L + max(race_state$experiment_log[["iteration"]])
    # We add indexIteration as an additional column.
    set(raceResults$experiment_log, j = "iteration", value = indexIteration)
    race_state$experiment_log <- rbindlist(list(race_state$experiment_log, raceResults$experiment_log), fill=TRUE, use.names=TRUE)
    # Merge new results.
    iraceResults$experiments <- merge_matrix(iraceResults$experiments, raceResults$experiments)
    iraceResults$iterationElites[indexIteration] <- elite_configurations[[".ID."]][1L]
    iraceResults$allElites[[indexIteration]] <- elite_configurations[[".ID."]]
    iraceResults$scenario <- scenario
    iraceResults$state <- race_state
    # FIXME: This log should contain only information of what was done in the
    # psRace and avoid duplicating info from iraceResults.
    iraceResults$psrace_log <- list(configurations = elite_configurations,
      instances = race_state$instances_log[seq_len(nrow(raceResults$experiments)), , drop = FALSE],
      max_experiments = max_experiments,
      experiments = raceResults$experiments,
      elites = elite_configurations[[".ID."]])
    save_irace_logfile(iraceResults, logfile = scenario$logFile)
  }
  elite_configurations
}