tests/testthat/helper_testhelpers.R

# Auxiliary functions for checking automlr: Creating dummy learners and tasks,
# checking that the correct learners, are present and that they get the expected
# parameters.

# TODO: put 'all = TRUE' in all expect_warning and expect_error

library("checkmate")

configureMlr()  # set defaults
configureMlr(show.learner.output = TRUE, on.learner.error = "warn")

DODEBUG$TEST = TRUE

# Create a data frame with the given number of features. The factorial features
# have nClasses cases.
createTestData = function(nrow, nNumeric = 0, nFactor = 0, nOrdered = 0,
    nClasses = 2) {
  res = as.data.frame(c(
                replicate(nNumeric, rnorm(nrow), FALSE),
                replicate(nFactor,
                    factor(sample(letters[1:nClasses], nrow, TRUE),
                        ordered = FALSE),
                    FALSE),
                replicate(nOrdered,
                    factor(sample(letters[1:nClasses], nrow, TRUE),
                        ordered = TRUE),
                    FALSE)))
  names(res) = c(
      if (nNumeric) paste0("num.", seq_len(nNumeric)),
      if (nFactor) paste0("fac.", seq_len(nFactor)),
      if (nOrdered) paste0("ord.", seq_len(nOrdered)))
  res
}

# Create a classification task with the given number of features.
# Possibly add NAs.
createTestClassifTask = function(id, nrow, nNumeric = 0, nFactor = 0,
    nOrdered = 0, nClasses = 2, missings = FALSE, ...) {
  data = createTestData(nrow, nNumeric, nFactor + 1, nOrdered, nClasses)
  target = paste0("fac.", nFactor + 1)
  mrow = (seq_len(nrow) %% 3 == 0) & missings
  data[mrow, colnames(data) != target] = NA
  makeClassifTask(id, data, target, ...)
}

# Create a classification task with the given number of features.
# Possibly add NAs.
createTestRegrTask = function(id, nrow, nNumeric = 0, nFactor = 0, nOrdered = 0,
    nClasses = 2, missings = FALSE, ...) {
  data = createTestData(nrow, nNumeric + 1, nFactor, nOrdered, nClasses)
  target = paste0("num.", nNumeric + 1)
  mrow = (seq_len(nrow) %% 3 == 0) & missings
  data[mrow, colnames(data) != target] = NA
  makeRegrTask(id, data, target, ...)
}

# human readable output of list
debuglist = function(l, prefix = "") {
  res = ""
  if (length(l) && checkmate::testNamed(l)) {
    for (n in sort(names(l))) {
      res = paste0(res, paste0(prefix, n, ": "))
      if (is.list(l[[n]])) {
        res = paste0(res, "\n", debuglist(l[[n]], paste0(prefix, "+")))
      } else {
        res = paste0(res, paste(l[[n]], collapse = ", "), "\n")
      }
    }
  } else {
    for (i in seq_along(l)) {
      showname = names(l)[i]
      if (is.null(showname) || is.na(showname) || showname == "") {
        showname = i
      }
      res = paste0(res, paste0(prefix, showname, ": "))
      if (is.list(l[[i]])) {
        res = paste0(res, "\n", debuglist(l[[i]], paste0(prefix, "+")))
      } else {
        res = paste0(res, paste(l[[i]], collapse = ", "), "\n")
      }
    }
  }
  res
}

# writing list, and corresponding expected output for testthat
debuglistout = function(l) cat("###\n", debuglist(l), "###\n")
expectout = function(l) paste("###\n", debuglist(l), "###")

# train and predict the learner on the task.
# test that the learner has params in 'trainps' during training
# and params in 'predps' during prediction
expect_learner_output = function(learner, task, name, trainps = list(),
    predps = list(), ...) {
  oldopts = getMlrOptions()
  configureMlr(show.learner.output = TRUE)
  wrapperflags = c(".remove.factors", ".remove.ordered", ".remove.NA",
      ".convert.fac2num", ".convert.ord2fac", ".convert.ord2num")
  wrapperArgs = list(...)
  # emulate wrapper output to get the expected string
  wrapperoutput = function(which) capture.output(
      for (w in names(wrapperArgs)) {
        catf("wrapper %s %s", w, which)
        if (which == "train") {
          debuglistout(wrapperArgs[[w]])
        }
      })
  expectedlout = expectout(c(list(myname = name), trainps))
  expect_output({
    catf("BEGINNING")
    model <- train(learner, task)
    catf("END")
  }, paste(c("BEGINNING", wrapperoutput("train"), expectedlout, "END"), collapse = "\n"),
  fixed = TRUE)
  expectedpout = expectout(c(list(myname = name), predps))
  expect_output({
    catf("BEGINNING")
    predict(model, task)
    catf("END")
  }, paste(c("BEGINNING", wrapperoutput("predict"), expectedpout, "END"), collapse = "\n"),
  fixed = TRUE)
  do.call(configureMlr, oldopts)
}

# Create a new learner in the global namespace with given parset and properties
# The created learner makes output about the given parameters and predicts
# randomly.
# isClassif==FALSE -> regr
testLearner = function(name, parset, properties, isClassif = TRUE, ...) {
  constructor = if (isClassif) makeRLearnerClassif else makeRLearnerRegr
  ret = constructor(name, character(0), parset, properties = properties, ...)
  if (isClassif) {
    ret$fix.factors.prediction = TRUE
  }
  pf = globalenv()

  assign(paste0("trainLearner.", name), envir = pf,
      value = function (.learner, .task, .subset, .weights = NULL, ...) {
    debuglistout(list(myname = name, ...))
    list(data = getTaskData(.task, .subset), target = getTaskTargetNames(.task))
  })
  assign(paste0("predictLearner.", name), envir = pf,
      value = function (.learner, .model, .newdata, ...) {
    debuglistout(list(myname = name, ...))
    sample(.model$learner.model$data[[.model$learner.model$target]],
        nrow(.newdata), replace = TRUE)
  })
  ret
}


optionalProps = c("numerics", "factors", "ordered",
    "missings", "twoclass", "multiclass")

# combination of all possible properties
allOPs = do.call(c, lapply(seq_along(optionalProps),
        function(i) combn(optionalProps, i, simplify = FALSE)))

# all possible learners
names(allOPs) = as.character(seq_along(allOPs))
propertyLearners = mapply(testLearner, name = names(allOPs),
    properties = allOPs, MoreArgs = list(parset = makeParamSet()),
    SIMPLIFY = FALSE)
# these learners as automlr input objects
autolearnersBASIC = lapply(propertyLearners, autolearner)

# which learner to expect to be present:
# mustBeHandledList is a list of conditions, at lest one of which must be true.
# The condition is that x must be a subset of the listed properties
defaultExpFun2 = function(mustBeHandledList)
  function(x) {  # yes we curry
    possible = sapply(mustBeHandledList, function(mustBeHandled) {
          # all things that must be handled are present
          all(mustBeHandled %in% x)
        })
  any(possible)  # at least one type of column can be processed
}

# check that the learners with properties that make `expfun` return TRUE are
# present in the automlr learner.
checkLearnersPresent = function(autolearnersPL, task, propertiesExpected,
    optionalProperties = character(0),
    expfun = defaultExpFun2(propertiesExpected), debugOut = FALSE) {
  expectedLearners = names(allOPs)[sapply(allOPs, expfun)]
  ps = getParamSet(buildLearners(autolearnersPL, task))
  presentLearners = unlist(ps$pars$selected.learner$values)
  if (debugOut) {
    catf("Expected Learners: %s", paste(expectedLearners, collapse = ", "))
    catf("Present Learners: %s", paste(presentLearners, collapse = ", "))
  }
  expect_set_equal(presentLearners, expectedLearners)
}

# create a classification task that requires the given properties to be present.
createTestWithProperties = function(properties) {
  assert(any(c("twoclass", "multiclass") %in% properties))
  nClasses = 2 + ("multiclass" %in% properties)
  createTestClassifTask("t", 200,
                        nNumeric = as.numeric("numerics" %in% properties),
                        nFactor = as.numeric("factors" %in% properties),
                        nOrdered = as.numeric("ordered" %in% properties),
                        nClasses = nClasses,
                        missings = as.numeric("missings" %in% properties))
}

# check that for any combination of numerics, factors, ordered, missings, and
# for either twoclass or multiclass, the learners included by automlr conform
# to the required learners for the tasks created by createTestWithProperties.
checkWrapperEffectEx = function(autolearnersPL, transformation = list,
    debugOut = FALSE, ...) {
  testprops = c("numerics", "factors", "ordered")
  for (classness in c("twoclass", "multiclass")) {
    for (numprops in seq_along(testprops)) {
      for (chosenprops in combn(testprops, numprops, simplify = FALSE)) {
        for (doMissings in c(FALSE, TRUE)) {
          totalprops = c(classness, if(doMissings) "missings", chosenprops)
          testTask = createTestWithProperties(totalprops)
          if (debugOut) {
            print(totalprops)
          }
          checkLearnersPresent(autolearnersPL, testTask,
              transformation(totalprops), debugOut = debugOut, ...)
        }
      }
    }
  }
}

# shorthand for building learners for pid.task
bl = function(...) {
  buildLearners(list(...), pid.task, verbosity = 5)
}

blt = function(learners, task) {
  buildLearners(learners, task, verbosity = 5)
}



# check that x is feasible in the param set, and that all feasible parameters
# are present.
isFeasibleNoneMissing = function(par, x) {
  nalist = rep(NA, length(par$pars))
  names(nalist) = getParamIds(par)
  isFeasible(par, as.list(insert(nalist, x)))
}

isFeasibleMissingPossible = function(ps, x) {
  assertSubset(names(x), getParamIds(ps))
  for (n in names(x)) {
    par = ps$pars[[n]]
    val = x[[n]]
    if ((!ParamHelpers:::requiresOk(par, x) && !isScalarNA(val)) ||
      !isFeasible(par, val)) {
      return(FALSE)
    }
  }
  TRUE
}

# shorthand for getting list of parameters
getpars = function(learner) getParamSet(learner)$pars

# check that all parameters are set, and that the learner does what is expected.
checkLearnerBehaviour = function(learner, task, params, ...) {
  expect_true(isFeasibleNoneMissing(getParamSet(learner), params))
#predict(train(setHyperPars(learner, par.vals = params), task), task)
  expect_learner_output(setHyperPars(learner, par.vals = params), task, ...)
}

checkLearnerData = function(learner, params, data, testdata) {
  capture.output(m <- train(setHyperPars(learner, par.vals = params), data))
  datacmp = m$learner.model$learner.model$next.model$learner.model$data
  testdata = unname(sort(sapply(testdata, collapse)))
  datacmp = unname(sort(sapply(datacmp, collapse)))
  expect_set_equal(testdata, datacmp)
}

# print wrapper for wrappers
pWW = function(al) {
  origcpo = al$learner$cpo
  al$learner$cpo = makeCPO(paste0("wrapped.", origcpo$bare.name), .par.set = getParamSet(origcpo),
    .par.vals = getHyperPars(origcpo), .datasplit = "task", cpo.trafo = function(data, target, ...) {
      catf("wrapper %s train", al$learner$name)
      debuglistout(list(...)[extractSubList(al$searchspace, "name")])
      data = data %>>% setHyperPars(origcpo, par.vals = list(...))
      control = retrafo(data)
      retrafo(data) = NULL
      data
    }, cpo.retrafo = function(data, control, ...) {
      catf("wrapper %s predict", al$learner$name)
      data %>>% control
    })()
  al$learner$cpo$properties = origcpo$properties
  al$learner$cpo$properties$properties.data = al$learner$cpo$properties$properties
  al
}
mlr-org/automlr documentation built on May 23, 2019, 3:02 a.m.