tests/testthat/helper_helpers.R

requirePackagesOrSkip = function(packs, default.method = "attach") {
  ok = requirePackages(packs, why = "unit test", stop = FALSE, suppress.warnings = TRUE, default.method = default.method)
  if (any(!ok)) {
    skip(sprintf("Required packages not installed: %s", collapse(names(ok)[!ok])))
  }
  invisible(TRUE)
}

e1071CVToMlrCV = function(e1071.tune.result) {

  tr = e1071.tune.result
  inds = tr$train.ind
  size = max(unlist(inds))
  folds = length(inds)

  d = makeResampleDesc("CV", iters = folds)
  cv.instance = makeResampleInstance(d, size = size)

  for (i in 1:folds) {
    cv.instance$train.inds[[i]] = inds[[i]]
    cv.instance$test.inds[[i]] = setdiff(1:size, inds[[i]])
  }
  return(cv.instance)
}


e1071BootstrapToMlrBootstrap = function(e1071.tune.result) {

  tr = e1071.tune.result
  inds = tr$train.ind

  size = length(inds[[1]])
  iters = length(inds)

  d = makeResampleDesc("Bootstrap", iters = iters)
  bs.instance = makeResampleInstance(d, size = size)

  for (i in 1:iters) {
    bs.instance$train.inds[[i]] = inds[[i]]
    bs.instance$test.inds[[i]] = setdiff(1:size, inds[[i]])
  }
  return(bs.instance)
}


testSimple = function(t.name, df, target, train.inds, old.predicts, parset = list()) {

  inds = train.inds
  train = df[inds, ]
  test = df[-inds, ]

  set.seed(getOption("mlr.debug.seed"))
  lrn = do.call("makeLearner", c(list(t.name), parset))
  # FIXME this heuristic will backfire eventually
  if (length(target) == 0) {
    task = makeClusterTask(data = df)
  } else if (is.numeric(df[, target])) {
    task = makeRegrTask(data = df, target = target)
  } else if (is.factor(df[, target])) {
    task = makeClassifTask(data = df, target = target)
  } else if (is.data.frame(df[, target]) && is.numeric(df[, target[1L]]) && is.logical(df[, target[2L]])) {
    task = makeSurvTask(data = df, target = target)
  } else if (is.data.frame(df[, target]) && is.logical(df[, target[1L]])) {
    task = makeMultilabelTask(data = df, target = target)
  } else {
    stop("Should not happen!")
  }
  m = train(lrn, task, subset = inds)

  if (inherits(m, "FailureModel")) {
    expect_s3_class(old.predicts, "try-error")
  } else {
    cp = predict(m, newdata = test)
    # Multilabel has a special data structure
    if (class(task)[1] == "MultilabelTask") {
      rownames(cp$data) = NULL
      expect_equal(unname(cp$data[, substr(colnames(cp$data), 1, 8) == "response"]), unname(old.predicts))
    } else {
      # to avoid issues with dropped levels in the class factor we only check the elements as chars
      if (is.numeric(cp$data$response) && is.numeric(old.predicts)) {
        if (lrn$predict.type == "se") {
          expect_equal(unname(cbind(cp$data$response, cp$data$se)), unname(old.predicts), tolerance = 1e-5)
        } else {
          expect_equal(unname(cp$data$response), unname(old.predicts), tolerance = 1e-5)
        }
      } else {
        expect_equal(as.character(cp$data$response), as.character(old.predicts))
      }
    }
  }
}


testSimpleParsets = function(t.name, df, target, train.inds, old.predicts.list, parset.list) {
  inds = train.inds
  train = df[inds, ]
  test = df[-inds, ]

  for (i in seq_along(parset.list)) {
    parset = parset.list[[i]]
    old.predicts = old.predicts.list[[i]]
    testSimple(t.name, df, target, train.inds, old.predicts, parset)
  }
}


testProb = function(t.name, df, target, train.inds, old.probs, parset = list()) {

  inds = train.inds
  train = df[inds, ]
  test = df[-inds, ]

  if (length(target) == 1) {
    task = makeClassifTask(data = df, target = target)
  } else {
    task = makeMultilabelTask(data = df, target = target)
  }
  set.seed(getOption("mlr.debug.seed"))
  lrn = do.call("makeLearner", c(t.name, parset, predict.type = "prob"))
  m = try(train(lrn, task, subset = inds))

  if (inherits(m, "FailureModel")) {
    expect_s3_class(old.predicts, "try-error")
  } else {
    cp = predict(m, newdata = test)
    # dont need names for num vector, 2 classes
    if (is.numeric(old.probs)) {
      names(old.probs) = NULL
    } else {
      old.probs = as.matrix(old.probs)
    }

    p = getPredictionProbabilities(cp)
    if (is.data.frame(p)) {
      p = as.matrix(p)
    }
    # we change names a bit so dont check them
    colnames(p) = colnames(old.probs) = NULL
    rownames(p) = rownames(old.probs) = NULL
    class(old.probs) = NULL
    expect_equal(p, old.probs, tolerance = 0.000001)
  }
}


testProbWithTol = function(t.name, df, target, train.inds, old.probs, parset = list(),
  tolerance = 1e-04) {

  inds = train.inds
  train = df[inds, ]
  test = df[-inds, ]

  if (length(target) == 1) {
    task = makeClassifTask(data = df, target = target)
  } else {
    task = makeMultilabelTask(data = df, target = target)
  }
  lrn = do.call("makeLearner", c(t.name, parset, predict.type = "prob"))
  m = try(train(lrn, task, subset = inds))

  if (inherits(m, "FailureModel")) {
    expect_s3_class(old.predicts, "try-error")
  } else {
    cp = predict(m, newdata = test)
    # dont need names for num vector, 2 classes
    if (is.numeric(old.probs)) {
      names(old.probs) = NULL
    } else {
      old.probs = as.matrix(old.probs)
    }

    p = getPredictionProbabilities(cp)
    if (is.data.frame(p)) {
      p = as.matrix(p)
    }
    # we change names a bit so dont check them
    colnames(p) = colnames(old.probs) = NULL
    rownames(p) = rownames(old.probs) = NULL
    class(old.probs) = NULL
    expect_equal(p, old.probs, tolerance = tol)
  }
}


testProbParsets = function(t.name, df, target, train.inds, old.probs.list, parset.list) {
  inds = train.inds
  train = df[inds, ]
  test = df[-inds, ]

  for (i in seq_along(parset.list)) {
    parset = parset.list[[i]]
    old.probs = old.probs.list[[i]]
    testProb(t.name, df, target, train.inds, old.probs, parset)
  }
}


testProbParsetsWithTol = function(t.name, df, target, train.inds, old.probs.list, parset.list,
  tolerance = 1e-04) {
  inds = train.inds
  train = df[inds, ]
  test = df[-inds, ]

  for (i in seq_along(parset.list)) {
    parset = parset.list[[i]]
    old.probs = old.probs.list[[i]]
    testProbWithTol(t.name, df, target, train.inds, old.probs, parset, tolerance = tol)
  }
}

testCV = function(t.name, df, target, folds = 2, parset = list(),
  tune.train, tune.predict = predict) {

  requirePackages("e1071", default.method = "load")
  data = df
  formula = formula(paste(target, "~."))

  tt = function(formula, data, subset = seq_len(nrow(data)), ...) {
    pars = list(formula = formula, data = data[subset, ])
    pars = c(pars, parset)
    set.seed(getOption("mlr.debug.seed"))
    capture.output({
      m = do.call(tune.train, pars)
    })
    return(m)
  }

  tp = function(model, newdata) {
    set.seed(getOption("mlr.debug.seed"))
    p = tune.predict(model, newdata)
    return(p)
  }

  tr = e1071::tune(
    method = tt, predict.func = tp, train.x = formula,
    data = data,
    tunecontrol = e1071::tune.control(cross = folds, best.model = FALSE))

  cv.instance = e1071CVToMlrCV(tr)
  lrn = do.call("makeLearner", c(t.name, parset))
  if (is.numeric(df[, target])) {
    task = makeRegrTask(data = df, target = target)
  } else if (is.factor(df[, target])) {
    task = makeClassifTask(data = df, target = target)
  }
  ms = resample(lrn, task, cv.instance)$measures.test
  if (inherits(task, "ClassifTask")) {
    expect_equal(mean(ms[, "mmce"]), tr$performances[1, 2])
    expect_equal(sd(ms[, "mmce"]), tr$performances[1, 3])
  } else {
    expect_equal(mean(ms[, "mse"]), tr$performances[1, 2])
    expect_equal(sd(ms[, "mse"]), tr$performances[1, 3])
  }
  invisible(TRUE)
}

testCVParsets = function(t.name, df, target, folds = 2, tune.train,
  tune.predict = predict, parset.list) {
  for (i in seq_along(parset.list)) {
    parset = parset.list[[i]]
    testCV(t.name, df, target, folds, parset, tune.train, tune.predict)
  }
}

testBootstrap = function(t.name, df, target, iters = 3, parset = list(), tune.train, tune.predict = predict) {

  requirePackages("e1071", default.method = "load")
  data = df
  formula = formula(paste(target, "~."))
  tr = e1071::tune(
    method = tune.train, predict.func = tune.predict, train.x = formula, data = data,
    tunecontrol = e1071::tune.control(sampling = "bootstrap", nboot = iters, boot.size = 1))

  bs.instance = e1071BootstrapToMlrBootstrap(tr)
  lrn = do.call("makeLearner", c(t.name, parset))

  if (is.numeric(df[, target])) {
    task = makeRegrTask(data = df, target = target)
  } else if (is.factor(df[, target])) {
    task = makeClassifTask(data = df, target = target)
  }
  ms = resample(lrn, task, bs.instance)$measures.test
  if (inherits(task, "ClassifTask")) {
    expect_equal(mean(ms[, "mmce"]), tr$performances[1, 2])
    expect_equal(sd(ms[, "mmce"]), tr$performances[1, 3])
  } else {
    expect_equal(mean(ms[, "mse"]), tr$performances[1, 2])
    expect_equal(sd(ms[, "mse"]), tr$performances[1, 3])
  }
}


listLearnersCustom = function(..., create = FALSE) {
  lrns = listLearners(..., create = create)
  if (create) {
    ids = BBmisc::extractSubList(lrns, "id")
    return(lrns[!grepl("mock", ids) & !grepl("^(classif|regr).h2o", ids)])
  } else {
    ids = lrns$class
    return(lrns[!grepl("mock", ids) & !grepl("^(classif|regr).h2o", ids), ])
  }
}

testFacetting = function(obj, nrow = NULL, ncol = NULL) {
  expect_equal(obj$facet$params$nrow, nrow)
  expect_equal(obj$facet$params$ncol, ncol)
}

testDocForStrings = function(doc, x, grid.size = 1L, ordered = FALSE) {
  text.paths = paste("/svg:svg//svg:text[text()[contains(., '",
    x, "')]]",
    sep = "")
  nodes = XML::getNodeSet(doc, text.paths, ns.svg)
  expect_equal(length(nodes), length(x) * grid.size)
  if (ordered) {
    node.strings = vcapply(nodes, XML::getChildrenStrings)
    expect_equal(node.strings[seq_along(x)], x)
  }
}

constant05Resample = function(...) {
  res = resample(...)
  res$aggr = rep(0.5, length(res$aggr))
  res
}

# evaluate expr without giving its output.
quiet = function(expr) {
  capture.output({
    ret = expr
  })
  ret
}
mlr-org/mlr documentation built on Jan. 12, 2023, 5:16 a.m.