tests/testthat/test_cpo_regrResiduals.R

context("cpoRegrResiduals test")


test_that("cpoRegrResiduals trafo works as expected", {

  residuals = train("regr.lm", bh.task)$learner.model$residuals

  trafo = bh.task %>>% cpoRegrResiduals("regr.lm", crr.train.residuals = "plain")

  expect_equal(getTaskData(trafo, target.extra = TRUE)$target, unname(residuals))

  expect_equal(clearRI(bh.task %>>% retrafo(trafo)), clearRI(trafo))

  set.seed(123)
  preds = predict(train("regr.randomForest", bh.task, 1:100), bh.task)$data$response
  residuals = getTaskData(bh.task, target.extra = TRUE)$target - preds

  set.seed(123)
  trafo = subsetTask(bh.task, 1:100) %>>% cpoRegrResiduals("regr.randomForest", crr.train.residuals = "plain")

  expect_equal(getTaskData(trafo, target.extra = TRUE)$target, residuals[1:100])

  expect_equal(getTaskData(bh.task %>>% retrafo(trafo), target.extra = TRUE)$target, residuals)

})

test_that("cpoRegrResiduals with crr.train.residuals 'oob'", {

  expect_error(cpoRegrResiduals("regr.lm", crr.train.residuals = "oob"), "must support properties 'oobpreds'")

  set.seed(123)
  trafo = bh.task %>>% cpoRegrResiduals("regr.randomForest", crr.train.residuals = "oob")

  set.seed(123)
  rfoob = getOOBPreds(train("regr.randomForest", bh.task), bh.task)$data$response

  expect_equal(getCPOTrainedState(inverter(trafo))$control$response, rfoob)
  expect_equal(getTaskData(trafo, target.extra = TRUE)$target, getTaskData(bh.task, target.extra = TRUE)$target - rfoob)

  set.seed(123)
  trafo = bh.task %>>% cpoRegrResiduals("regr.randomForest", predict.se = TRUE, crr.train.residuals = "oob")

  set.seed(123)
  trafo2 = bh.task %>>% setHyperPars(cpoRegrResiduals("regr.randomForest", predict.se = TRUE), regr.residuals.crr.train.residuals = "oob")

  expect_equal(getCPOTrainedState(inverter(trafo)), getCPOTrainedState(inverter(trafo2)))

  cpo = setHyperPars(cpoRegrResiduals("regr.lm"), regr.residuals.crr.train.residuals = "oob")
  expect_error(bh.task %>>% cpo, "for crr.resampling == 'oob' the Learner needs property 'oobpreds'")

  set.seed(123)
  rfse = predict(train(makeLearner("regr.randomForest", predict.type = "se"), bh.task), bh.task)
  rfoob = getOOBPreds(train("regr.randomForest", bh.task), bh.task)$data$response

  expect_equal(getCPOTrainedState(inverter(trafo))$control$response, rfoob)
  expect_equal(getTaskData(trafo, target.extra = TRUE)$target, getTaskData(bh.task, target.extra = TRUE)$target - rfoob)

  expect_equal(getCPOTrainedState(inverter(trafo))$control$se, rfse$data$se)


  set.seed(123)
  trafo = bh.task %>>% cpoRegrResiduals(makeLearner("regr.randomForest", ntree = 3), crr.train.residuals = "oob")

  set.seed(123)
  resp = predict(train(makeLearner("regr.randomForest", ntree = 3), bh.task), bh.task)$data$response

  set.seed(123)
  rfoob = getOOBPreds(train(makeLearner("regr.randomForest", ntree = 3), bh.task), bh.task)$data$response

  expect_equal(getCPOTrainedState(inverter(trafo))$control$response[is.na(rfoob)], resp[is.na(rfoob)])
  expect_equal(getCPOTrainedState(inverter(trafo))$control$response[!is.na(rfoob)], rfoob[!is.na(rfoob)])

  expect_equal(getTaskData(trafo, target.extra = TRUE)$target + getCPOTrainedState(inverter(trafo))$control$response,
    getTaskData(bh.task, target.extra = TRUE)$target)
})

test_that("cpoRegrResiduals with crr.train.residuals 'resample'", {

  # "response"

  set.seed(123)
  trafo1 = bh.task %>>% cpoRegrResiduals("regr.lm", crr.resampling = hout, crr.train.residuals = "resample")

  cpo = cpoRegrResiduals("regr.lm", crr.train.residuals = "plain", id = NULL)
  set.seed(123)
  trafo2 = bh.task %>>% setHyperPars(cpo, crr.train.residuals = "resample", crr.resampling = hout)

  expect_equal(getCPOTrainedState(inverter(trafo1))$control, getCPOTrainedState(inverter(trafo2))$control)

  set.seed(123)
  rr = holdout("regr.lm", bh.task, show.info = FALSE)

  ctrl = getCPOTrainedState(inverter(trafo1))$control$response

  expect_equal(ctrl[rr$pred$data$id], rr$pred$data$response)
  expect_equal(ctrl[-rr$pred$data$id], predict(train("regr.lm", bh.task), bh.task)$data$response[-rr$pred$data$id])

  expect_equal(getTaskData(trafo1, target.extra = TRUE)$target + ctrl, getTaskData(bh.task, target.extra = TRUE)$target)
  expect_equal(getTaskData(trafo2, target.extra = TRUE)$target + ctrl, getTaskData(bh.task, target.extra = TRUE)$target)

  # "se"

  boots = makeResampleDesc("Bootstrap", iters = 2)
  set.seed(123)
  trafo1 = bh.task %>>% cpoRegrResiduals("regr.lm", crr.resampling = boots, predict.se = TRUE, crr.train.residuals = "resample")

  cpo = cpoRegrResiduals("regr.lm", crr.train.residuals = "plain", id = NULL, predict.se = TRUE)
  set.seed(123)
  trafo2 = bh.task %>>% setHyperPars(cpo, crr.train.residuals = "resample", crr.resampling = boots)

  expect_equal(getCPOTrainedState(inverter(trafo1))$control, getCPOTrainedState(inverter(trafo2))$control)

  set.seed(123)
  rr = resample(makeLearner("regr.lm", predict.type = "se"), bh.task, boots, show.info = FALSE)

  ctrl = getCPOTrainedState(inverter(trafo1))$control$response
  ctrlse = getCPOTrainedState(inverter(trafo1))$control$se

  idtimes = table(rr$pred$data$id)
  double = as.integer(names(idtimes)[idtimes == 2])
  single = as.integer(names(idtimes)[idtimes == 1])
  nocover = setdiff(seq_len(getTaskSize(bh.task)), c(double, single))

  expect_equal(ctrl[single], sapply(seq_along(single), function(i) rr$pred$data$response[rr$pred$data$id == single[i]]))
  expect_equal(ctrlse[single], sapply(seq_along(single), function(i) rr$pred$data$se[rr$pred$data$id == single[i]]))
  expect_equal(ctrl[nocover], predict(train("regr.lm", bh.task), bh.task)$data$response[nocover])
  expect_equal(ctrlse[nocover], predict(train(makeLearner("regr.lm", predict.type = "se"), bh.task), bh.task)$data$se[nocover])

  expect_equal(ctrl[double], sapply(seq_along(double), function(i) {
    res = rr$pred$data$response[rr$pred$data$id == double[i]]
    se = rr$pred$data$se[rr$pred$data$id == double[i]]
    prec = 1 / se^2
    sum(res * prec) / sum(prec)
  }))


  expect_equal(ctrlse[double], sapply(seq_along(double), function(i) {
    se = rr$pred$data$se[rr$pred$data$id == double[i]]
    prec = 1 / se^2
    1 / sqrt(sum(prec) / 2)
  }))

  expect_equal(getTaskData(trafo1, target.extra = TRUE)$target + ctrl, getTaskData(bh.task, target.extra = TRUE)$target)
  expect_equal(getTaskData(trafo2, target.extra = TRUE)$target + ctrl, getTaskData(bh.task, target.extra = TRUE)$target)

})

test_that("cpoRegrResiduals has expected properties and parameters", {

  expect_set_equal(intersect(getLearnerProperties("regr.lm"), cpo.dataproperties),
    intersect(getCPOProperties(cpoRegrResiduals("regr.lm", crr.train.residuals = "plain"))$handling, cpo.dataproperties))


  expect_set_equal(intersect(getLearnerProperties("regr.randomForest"), cpo.dataproperties),
    intersect(getCPOProperties(cpoRegrResiduals("regr.randomForest", crr.train.residuals = "plain"))$handling, cpo.dataproperties))

  delreq = function(ps) {
    ps$pars = lapply(ps$pars, function(x) {
      x$requires = NULL
      x
    })
    ps$pars = dropNamed(ps$pars, c("crr.train.residuals", "crr.resampling"))
    ps
  }

  expect_equal(delreq(getParamSet(makeLearner("regr.lm"))), delreq(getParamSet(cpoRegrResiduals("regr.lm", id = NULL))))

  expect_equal(delreq(getParamSet(makeLearner("regr.randomForest"))), delreq(getParamSet(cpoRegrResiduals("regr.randomForest", id = NULL))))
  expect_equal(delreq(getParamSet(makeLearner("regr.randomForest")))$pars[c("ntree", "se.boot")],
    delreq(getParamSet(cpoRegrResiduals("regr.randomForest", id = NULL, export = c("ntree", "se.boot"))))$pars)

  tlrn = setHyperPars(makeLearner("regr.lm"), tol = .2)
  expect_equal(dropNamed(getHyperPars(cpoRegrResiduals(tlrn, id = NULL, crr.train.residuals = "plain")), c("crr.train.residuals", "crr.resampling")),
    getHyperPars(tlrn))

  cpo = cpoRegrResiduals(setHyperPars(makeLearner("regr.randomForest"), ntree = 100, se.boot = 101), export = "ntree")
  expect_equal(getHyperPars(cpo), list(regr.residuals.ntree = 100))

  expect_equal(getBareHyperPars(cpo), list(ntree = 100, crr.train.residuals = "plain", crr.resampling = cv5, se.boot = 101))

})

test_that("cpoRegrResiduals hyperparameters are used", {

  lrn = setHyperPars(makeLearner("regr.rpart"), minsplit = 3, minbucket = 5, maxdepth = 4)

  crr = cpoRegrResiduals(lrn, crr.train.residuals = "plain", export = c("minsplit", "minbucket"), affect.index = 1:4)

  set.seed(123)
  trafo = bh.task %>>% setHyperPars(crr, regr.residuals.minsplit = 4)

  set.seed(123)
  model = train(makeLearner("regr.rpart", minsplit = 4, minbucket = 5, maxdepth = 4), subsetTask(bh.task, features = 1:4))

  model$time = 0

  ctrl = getCPOTrainedState(retrafo(trafo))$control
  ctrl$time = 0

  expect_identical(getHyperPars(ctrl$learner), getHyperPars(model$learner))  # in particular

  expect_equal(ctrl, model) # in general

})


test_that("cpoRegrResiduals inverts as expected", {

  trafo = bh.task %>>% cpoRegrResiduals("regr.lm", crr.train.residuals = "plain")

  expect_equal(getCPOTrainedCapability(retrafo(trafo)), c(retrafo = 1, invert = -1))

  expect_error(invert(inverter(trafo), 1), "prediction to be inverted has different length from original task used for retrafo")

  expect_equal(invert(inverter(subsetTask(bh.task, 1:10) %>>% retrafo(trafo)), 1:10),
    unname(1:10 - getCPOTrainedState(retrafo(trafo))$control$learner.model$residuals[1:10] + getTaskData(bh.task, target.extra = TRUE)$target[1:10]))

  expect_equal(unname(invert(inverter(subsetTask(bh.task, 1:10) %>>% retrafo(trafo)), getCPOTrainedState(retrafo(trafo))$control$learner.model$residuals[1:10])),
    getTaskData(bh.task, target.extra = TRUE)$target[1:10])


  set.seed(123)
  trafo = subsetTask(bh.task, 1:10) %>>% cpoRegrResiduals("regr.randomForest", crr.train.residuals = "plain")

  retrafo = subsetTask(bh.task, 11:20) %>>% retrafo(trafo)

  expect_equal(invert(inverter(retrafo), getTaskData(retrafo, target.extra = TRUE)$target), getTaskData(bh.task, 11:20, target.extra = TRUE)$target)

  retrafo = getTaskData(bh.task, 21:30, target.extra = TRUE)$data %>>% retrafo(trafo)

  expect_equal(clearRI(retrafo), getTaskData(bh.task, 21:30, target.extra = TRUE)$data)

  set.seed(123)
  prediction = predict(train("regr.randomForest", subsetTask(bh.task, 1:10)), subsetTask(bh.task, 21:30))$data$response

  expect_equal(invert(inverter(retrafo), getTaskData(bh.task, 21:30, target.extra = TRUE)$target - prediction),
    getTaskData(bh.task, 21:30, target.extra = TRUE)$target)

})

test_that("cpoRegrResiduals se inversion", {

  expect_error(setPredictType(makeLearner("regr.rpart"), "se"), "Trying to predict standard errors")

  expect_error(cpoRegrResiduals("regr.rpart", predict.se = TRUE), "must support properties 'se'")

  expect_equal(getCPOPredictType(cpoRegrResiduals("regr.lm")), c(response = "response"))

  expect_equal(getCPOPredictType(cpoRegrResiduals("regr.lm", predict.se = TRUE)), c(response = "response", se = "se"))

  trafo = subsetTask(bh.task, 100:200) %>>% cpoRegrResiduals("regr.lm", predict.se = TRUE, crr.train.residuals = "plain")

  model = train(setPredictType(makeLearner("regr.lm"), "se"), subsetTask(bh.task, 100:200))

  ctrl = getCPOTrainedState(retrafo(trafo))$control
  ctrl$time = 0
  model$time = 0
  expect_equal(ctrl, model)

  expect_equal(predict(model, subsetTask(bh.task, 100:200))$data[c("response", "se")],
    getCPOTrainedState(inverter(trafo))$control)

  expect_equal(predict(model, subsetTask(bh.task, 1:99))$data[c("response", "se")],
    getCPOTrainedState(inverter(subsetTask(bh.task, 1:99) %>>% retrafo(trafo)))$control)

  expect_equal(predict(model, subsetTask(bh.task, 1:99))$data[c("response", "se")],
    getCPOTrainedState(inverter(getTaskData(bh.task, 1:99, target.extra = TRUE)$data %>>% retrafo(trafo)))$control)

  inv = inverter(getTaskData(bh.task, 201:300, target.extra = TRUE)$data %>>% retrafo(trafo))

  expect_equal(getCPOPredictType(inv), c(response = "response", se = "se"))

  prd = predict(model, subsetTask(bh.task, 201:300))$data
  res = getTaskData(bh.task, 201:300, target.extra = TRUE)$target - prd$response
  pse = prd$se
  expect_equal(invert(inv, res), getTaskData(bh.task, 201:300, target.extra = TRUE)$target)

  expect = cbind(getTaskData(bh.task, 201:300, target.extra = TRUE)$target, sqrt(pse^2 + 1))
  expect_equal(unname(invert(inv, cbind(res, 1), predict.type = "se")), expect)  # the main test!

})


test_that("cpoRegrResiduals handles discrete parameters correctly", {

  # mlr apparently has no learner with discrete vector learner params

  lrx = makeRLearnerRegr(cl = "testCpoRegrResiduals", package = character(0),
    par.set = makeParamSet(
        makeDiscreteVectorLearnerParam("dvlp1", len = 2, values = c(a = "b", b = "a", c = "x"), when = "both"),
        makeDiscreteVectorLearnerParam("dvlp2", len = 2, values = c(a = environment(), b = function() NULL), when = "both")),
    name = "testing", properties = c("numerics", "factors", "ordered", "missings"))
  lrx$fix.factors.prediction = FALSE

  trainLearner.testCpoRegrResiduals = function(.learner, .task, .subset, .weights, ...) {
    list(...)
  }

  predictLearner.testCpoRegrResiduals = function(.learner, .model, .newdata, ...) {
    rep(1, nrow(.newdata))
  }

  registerS3method("trainLearner", "testCpoRegrResiduals", trainLearner.testCpoRegrResiduals)
  registerS3method("predictLearner", "testCpoRegrResiduals", predictLearner.testCpoRegrResiduals)

  # test that this learner does what it is supposed to
  expect_equal(train(lrx, bh.task)$learner.model, list())

  expect_equal(train(setHyperPars(lrx, dvlp1 = list("a", "a")), bh.task)$learner.model, list(dvlp1 = list("a", "a")))

  expect_equal(train(setHyperPars(lrx, dvlp1 = list("a", "b"), dvlp2 = list(a = function() NULL, b = function() NULL)), bh.task)$learner.model,
    list(dvlp1 = list("a", "b"), dvlp2 = list(a = function() NULL, b = function() NULL)))

  expect_equal(predict(train(lrx, bh.task), bh.task)$data$response, rep(1, getTaskSize(bh.task)))


  # test that the dvlp are given to the crr as they should
  crr = cpoRegrResiduals(setHyperPars(lrx, dvlp1 = list("a", "x"), dvlp2 = list(a = function() NULL, b = function() NULL)), crr.train.residuals = "plain")

  x = bh.task %>>% crr

  expect_equal(getTaskData(x, target.extra = TRUE)$target, getTaskData(bh.task, target.extra = TRUE)$target - 1)

  expect_equal(getCPOTrainedState(retrafo(x))$control$learner.model, list(dvlp1 = list("a", "x"), dvlp2 = list(a = function() NULL, b = function() NULL)))

  expect_equal(getCPOTrainedState(bh.task %>|% setHyperPars(crr, regr.residuals.dvlp1 = list("b", "a")))$control$learner.model,
    list(dvlp1 = list("b", "a"), dvlp2 = list(a = function() NULL, b = function() NULL)))

  expect_equal(getCPOTrainedState(bh.task %>|% setHyperPars(crr, regr.residuals.dvlp1 = list("x", "x")))$control$learner.model,
    list(dvlp1 = list("x", "x"), dvlp2 = list(a = function() NULL, b = function() NULL)))

  expect_equal(getCPOTrainedState(bh.task %>|% setHyperPars(crr, regr.residuals.dvlp2 = list(environment(), function() NULL)))$control$learner.model,
    list(dvlp1 = list("a", "x"), dvlp2 = list(environment(), function() NULL)))

})
mlr-org/mlrCPO documentation built on Nov. 18, 2022, 11:25 p.m.