Design for custom hyperparameter search and tuning metrics

1. New helper: createHyperparameterSettings()

Add a file R/HyperparameterSettings.R:

  createHyperparameterSettings <- function(
      search = "grid",
      tuningMetric = NULL,
      sampleSize = NULL,
      randomSeed = NULL,
      searchControl = list(),
      generator = NULL) {
    stopifnot(search %in% c("grid", "random", "custom"))
    if (identical(search, "random")) {
      stopifnot(length(sampleSize) == 1, is.numeric(sampleSize))
    }
    if (!is.null(generator)) {
      stopifnot(is.function(generator))
      search <- "custom"
    }

    structure(
      list(
        search = search,
        tuningMetric = tuningMetric,
        sampleSize = sampleSize,
        randomSeed = randomSeed,
        searchControl = searchControl,
        generator = generator
      ),
      class = "hyperparameterSettings"
    )
 }

Document this with roxygen2 tags and run roxygen2::roxygenize() to document. ``

———

2. Extend createModelDesign()

In R/RunMultiplePlp.R:

  createModelDesign <- function(
      ...,
      hyperparameterSettings = createHyperparameterSettings(),
      runCovariateSummary = TRUE) {
    ...
    settings$hyperparameterSettings <- hyperparameterSettings
    class(settings) <- "modelDesign"
    settings
  }

———

3. Propagate through runPlp pipeline

  hyperparameterSettings <- modelDesign$hyperparameterSettings %||% createHyperparameterSettings()
  settings <- list(
    trainData = data$Train,
    modelSettings = modelSettings,
    hyperparameterSettings = hyperparameterSettings,
    analysisId = analysisId,
    analysisPath = analysisPath
  )

Note: %||% means take modelDesign$hyperparameterSettings if it exists, otherwise default to createHyperparameterSettings()

runPlp(
    ...
    modelSettings = setLassoLogisticRegression(),
    hyperparameterSettings = createHyperparameterSettings(),
    ...
)
Update docs to describe `hyperparameterSettings`
  fitPlp <- function(trainData,
                     modelSettings,
                     hyperparameterSettings = createHyperparameterSettings(),
                     search = "grid",
                     analysisId,
                     analysisPath) {
    ...
    args <- list(
      trainData = trainData,
      modelSettings = modelSettings,
      hyperparameterSettings = hyperparameterSettings,
      analysisId = analysisId,
      analysisPath = analysisPath
    )

(Deprecate search if you like, but keep it for compatibility and let it override hyperparameterSettings$search when non-default.)

———

4. Update classifier fitters

Each classifier with CV (e.g., fitSklearn, fitRclassifier, fitGradientBoostingMachine, fitLightGBM) gains a hyperparameterSettings argument.

Within these functions:

———

5. Helpers for search preparation

Example code: - prepareHyperparameterGrid() returns an iterator object with next/finalize so every search path looks the same:

    prepareHyperparameterGrid <- function(paramDefinition, hyperSettings, modelName = NULL) {
      settings <- hyperSettings %||% createHyperparameterSettings()
      makeSequentialIterator <- function(pool) {
        i <- 0L
        list(
          next = function(history) {
            i <<- i + 1L
            if (i > length(pool)) return(NULL)
            pool[[i]]
          },
          finalize = function(history) invisible(NULL)
        )
      }
      if (is.null(paramDefinition) {
          empty <- list(list())
          return(makeSequentialIterator(empty))
      }
      expanded <- expandParamDefinition(paramDefinition)

      if (identical(settings$search, "grid")) {
        return(makeSequentialIterator(expanded))
      }

      if (identical(settings$search, "random")) {
        idx <- sample.int(
          length(expanded),
          size = min(settings$sampleSize, length(expanded))
        )
        return(makeSequentialIterator(expanded[idx]))
      }

      if (identical(settings$search, "custom")) {
        generator <- settings$generator
        if (is.function(generator)) {
          pool <- generator(
            definition = paramDefinition, 
            expanded = expanded, 
            settings = settings
          )
          return(makeSequentialIterator(pool))
        }
        generator$initialize(
          definition = paramDefinition,
          settings = settings
        )
        return(list(
          next = function(history) generator$next(history),
          finalize = function(history) {
              finalizeFn <- generator$finalize %||% function(...) invisible(NULL)
              finalizeFn(history)
        ))
      }

      stop(sprintf("Unknown hyper-parameter search strategy '%s'.", settings$search))
    }
    iterator <- prepareHyperparameterGrid(
      paramDefinition, 
      hyperSettings, 
      modelName
      )
    history <- list()

    repeat {
      candidate <- iterator$next(history)
      if (is.null(candidate)) break

      perf <- evaluateCandidate(candidate, data, metric)
      history[[length(history) + 1L]] <- list(param = candidate, performance = perf)
    }

    iterator$finalize(history)
    best <- selectBest(history, metric)

Grid, random, and adaptive custom searches all plug into this structure by returning an iterator that maintains its own state inside the closure or generator object.

———

6. Modify CV routines

```R evaluated <- lapply(grid, function(entry) { computeGridPerformance( prediction = entry$prediction, param = entry$param, metric = metric ) })

whichBest <- if (metric$maximize) { which.max(vapply(evaluated, [[, numeric(1), "cvPerformance")) } else { which.min(vapply(evaluated, [[, numeric(1), "cvPerformance")) ```

  computeGridPerformance <- function(prediction, param, metric) {
    perf <- metric$fun(prediction)
    folds <- vapply(unique(prediction$index), function(foldId) {
      metric$fun(prediction[prediction$index == foldId, ])
    }, numeric(1))

    ...
    list(
      metric = metric$label,
      cvPerformance = perf,
      cvPerformancePerFold = folds,
      param = param,
      hyperSummary = ...
    )
  }

For metrics needing thresholds, the supplied function can capture the threshold:

  createTuningMetric(
    fun = function(prediction) {
      pred <- ifelse(prediction$value >= 0.6, 1, 0)
      mean(pred == (prediction$outcomeCount > 0))
    },
    maximize = TRUE,
    label = "Accuracy@0.6"
  )

Because the function itself closes over 0.6, no extra plumbing is needed.

———

7. Backward compatibility

  if (!missing(search)) {
    hyperparameterSettings$search <- search
  }

This means existing code that calls fitPlp(..., search = "grid") keeps working.

———

8. Usage example

  metric <- createTuningMetric(
    fun = function(prediction) mean(ifelse(prediction$value >= 0.55, 1, 0) == (prediction$outcomeCount > 0)),
    maximize = TRUE,
    label = "Accuracy@0.55"
  )

  hyperSettings <- createHyperparameterSettings(
    search = "random",
    tuningMetric = metric,
    sampleSize = 10,
    randomSeed = 42
  )

  modelDesign <- createModelDesign(
    targetId = 1,
    outcomeId = 2,
    modelSettings = setAdaBoost(),
    hyperparameterSettings = hyperSettings
  )

  result <- runMultiplePlp(databaseDetails = databaseDetails, modelDesignList = list(modelDesign))

———

9. Tests & docs



Try the PatientLevelPrediction package in your browser

Any scripts or data that you put into this service are public.

PatientLevelPrediction documentation built on March 9, 2026, 5:07 p.m.