tests/testthat/test_compboost_internal.R

context("Compboost internal")

test_that("Compboost loggs correctly", {

  df = mtcars
  df$hp2 = df[["hp"]]^2

  X_hp = as.matrix(df[["hp"]], ncol = 1)
  X_wt = as.matrix(df[["wt"]], ncol = 1)

  y = df[["mpg"]]
  response = ResponseRegr$new("mpg", as.matrix(y))
  response_oob = ResponseRegr$new("mpg_oog", as.matrix(y))

  expect_silent({ data_source_hp = InMemoryData$new(X_hp, "hp") })
  expect_silent({ data_source_wt = InMemoryData$new(X_wt, "wt") })

  eval_oob_test = list(data_source_hp, data_source_wt)
  learning_rate = 0.05
  iter_max      = 500

  expect_silent({ linear_factory_hp = BaselearnerPolynomial$new(data_source_hp,
    list(degree = 1, intercept = FALSE)) })
  expect_silent({ linear_factory_wt = BaselearnerPolynomial$new(data_source_wt,
    list(degree = 1, intercept = FALSE)) })
  expect_silent({ quadratic_factory_hp = BaselearnerPolynomial$new(data_source_hp,
    list(degree = 2, intercept = FALSE)) })
  expect_silent({ factory_list = BlearnerFactoryList$new() })
  expect_silent({ factory_list$registerFactory(linear_factory_hp) })
  expect_silent({ factory_list$registerFactory(linear_factory_wt) })
  expect_silent({ factory_list$registerFactory(quadratic_factory_hp) })
  expect_silent({ loss_quadratic = LossQuadratic$new() })
  expect_silent({ optimizer = OptimizerCoordinateDescent$new() })
  expect_silent({ log_iterations = LoggerIteration$new(" iterations", TRUE, iter_max) })
  expect_silent({ log_time_ms    = LoggerTime$new("time_microseconds", TRUE, 200000, "microseconds") })
  expect_silent({ log_time_sec   = LoggerTime$new("time_seconds", TRUE, 10, "seconds") })
  expect_silent({ log_time_min   = LoggerTime$new("time_minutes", TRUE, 10, "minutes") })
  expect_silent({ log_inbag      = LoggerInbagRisk$new("inbag_risk", FALSE, loss_quadratic, 0.01, 5) })
  expect_silent({ log_oob        = LoggerOobRisk$new("oob_risk", FALSE, loss_quadratic, 0.01, 5, eval_oob_test, response_oob) })
  expect_silent({ logger_list = LoggerList$new() })
  expect_silent({ logger_list$registerLogger(log_iterations) })
  expect_silent({ logger_list$registerLogger(log_time_ms) })
  expect_silent({ logger_list$registerLogger(log_time_sec) })
  expect_silent({ logger_list$registerLogger(log_time_min) })
  expect_silent({ logger_list$registerLogger(log_inbag) })
  expect_silent({ logger_list$registerLogger(log_oob) })

  expect_output(show(log_inbag))
  expect_output(show(log_oob))

  expect_output(logger_list$printRegisteredLogger())
  expect_silent({
    cboost = Compboost_internal$new(
      response      = response,
      learning_rate = learning_rate,
      stop_if_all_stopper_fulfilled = FALSE,
      factory_list = factory_list,
      loss         = loss_quadratic,
      logger_list  = logger_list,
      optimizer    = optimizer
    )
  })
  expect_output({ cboost$train(trace = 1) })
  expect_silent({ logger_data = cboost$getLoggerData() })
  expect_equal(logger_list$getNumberOfRegisteredLogger(), 6)
  expect_equal(dim(logger_data$logger_data), c(iter_max, logger_list$getNumberOfRegisteredLogger()))
  expect_equal(cboost$getLoggerData()$logger_data[, 1], 1:500)
  expect_equal(cboost$getLoggerData()$logger_data[, 2], cboost$getLoggerData()$logger_data[, 3])

})

test_that("compboost does the same as mboost", {

  df = mtcars
  df$hp2 = df[["hp"]]^2

  X_hp  = as.matrix(df[["hp"]], ncol = 1)
  X_hp2 = as.matrix(df[["hp2"]], ncol = 1)
  X_wt  = as.matrix(df[["wt"]], ncol = 1)

  y = df[["mpg"]]
  response = ResponseRegr$new("mpg", as.matrix(y))

  data_source_hp = expect_silent(InMemoryData$new(X_hp, "hp"))
  data_source_hp2 = expect_silent(InMemoryData$new(X_hp2, "hp2"))
  data_source_wt = expect_silent(InMemoryData$new(X_wt, "wt"))

  eval_oob_test = list(data_source_hp, data_source_wt, data_source_hp2)

  learning_rate = 0.05
  iter_max = 500

  linear_factory_hp = expect_silent(BaselearnerPolynomial$new(data_source_hp, list(degree = 1, intercept = FALSE)))
  linear_factory_wt = expect_silent(BaselearnerPolynomial$new(data_source_wt, list(degree = 1, intercept = FALSE)))
  quadratic_factory_hp = expect_silent(BaselearnerPolynomial$new(data_source_hp2, list(degree = 1, intercept = FALSE)))
  factory_list = expect_silent(BlearnerFactoryList$new())

  # Register factorys:
  expect_silent(factory_list$registerFactory(linear_factory_hp))
  expect_silent(factory_list$registerFactory(linear_factory_wt))
  expect_silent(factory_list$registerFactory(quadratic_factory_hp))

  loss_quadratic = expect_silent(LossQuadratic$new())
  optimizer = expect_silent(OptimizerCoordinateDescent$new())
  log_iterations = expect_silent(LoggerIteration$new(" iterations", TRUE, iter_max))
  log_time = expect_silent(LoggerTime$new("time_ms", FALSE, 500, "microseconds"))
  logger_list = expect_silent(LoggerList$new())
  expect_silent(logger_list$registerLogger(log_iterations))
  expect_silent(logger_list$registerLogger(log_time))
  cboost = expect_silent(Compboost_internal$new(
    response      = response,
    learning_rate = learning_rate,
    stop_if_all_stopper_fulfilled = TRUE,
    factory_list = factory_list,
    loss         = loss_quadratic,
    logger_list  = logger_list,
    optimizer    = optimizer))
  expect_output(cboost$train(trace = 100))
  suppressWarnings({
    library(mboost)

    mod = mboost(
      formula = mpg ~ bols(hp, intercept = FALSE) +
        bols(wt, intercept = FALSE) +
        bols(hp2, intercept = FALSE),
      data    = df,
      control = boost_control(mstop = iter_max, nu = learning_rate)
    )
  })

  # Create vector of selected baselearner:
  # --------------------------------------

  cboost_xselect = match(
    x     = cboost$getSelectedBaselearner(),
    table = c(
      "hp_poly1",
      "wt_poly1",
      "hp2_poly1"
    )
  )
  expect_equal(predict(mod), cboost$getPrediction(FALSE))
  expect_equal(mod$xselect(), cboost_xselect)
  expect_true(all(round(as.vector(unlist(coef(mod))), 4) %in%
    round(as.vector(unlist(cboost$getEstimatedParameter())), 4)
  ))
  expect_equal(cboost$getOffset()[1], attr(coef(mod), "offset"))
  expect_equal(dim(cboost$getLoggerData()$logger_data), c(500, 2))
  expect_equal(cboost$getLoggerData()$logger_data[, 1], 1:500)
  expect_equal(length(cboost$getLoggerData()$logger_data[, 2]), 500)

  # Check if paraemter getter of smaller iteration works:
  suppressWarnings({
    mod_reduced = mboost(
      formula = mpg ~ bols(hp, intercept = FALSE) +
        bols(wt, intercept = FALSE) +
        bols(hp2, intercept = FALSE),
      data    = df,
      control = boost_control(mstop = 200, nu = learning_rate)
    )
  })
  expect_true(all(round(as.vector(unlist(coef(mod_reduced))), 4) %in%
    round(as.vector(unlist(cboost$getParameterAtIteration(200))), 4)
  ))

  idx = 2:4 * 120
  matrix_compare = matrix(NA_real_, nrow = 3, ncol = 3)

  for (i in seq_along(idx)) {
    expect_silent({ matrix_compare[i, ] = unname(unlist(cboost$getParameterAtIteration(idx[i]))) })
  }
  expect_equal(cboost$getParameterMatrix()$parameter_matrix[idx, ], matrix_compare)
  expect_equal(cboost$predict(eval_oob_test, FALSE), predict(mod, df))
  expect_silent(cboost$setToIteration(200, -1))
  expect_equal(cboost$predict(eval_oob_test, FALSE), predict(mod_reduced, df))

  suppressWarnings({
    mod_new = mboost(
      formula = mpg ~ bols(hp, intercept = FALSE) +
        bols(wt, intercept = FALSE) +
        bols(hp2, intercept = FALSE),
      data    = df,
      control = boost_control(mstop = 700, nu = learning_rate)
    )
  })
  expect_output(cboost$setToIteration(700, -1))
  expect_equal(cboost$getPrediction(FALSE), predict(mod_new))
})
schalkdaniel/compboost documentation built on April 15, 2023, 9:03 p.m.