tests/testthat/test-cubist.R

# Tests for R/cubist.R

# --- cubist.default() tests ---

test_that("cubist works with basic numeric predictors", {
  data <- new_cubist_data(n = 100, p = 5)
  mod <- cubist(data$x, data$y)

  expect_s3_class(mod, "cubist")
  expect_equal(mod$dims, c(100L, 5L))
  expect_equal(mod$committees, 1)
  expect_true(!is.null(mod$model))
  expect_true(!is.null(mod$output))
})

test_that("cubist works with factor predictors", {
  set.seed(8361)
  x <- data.frame(
    num = rnorm(50),
    fac = factor(sample(letters[1:3], 50, replace = TRUE))
  )
  y <- ifelse(x$fac == "a", 1, 2) + x$num + rnorm(50, sd = 0.1)

  mod <- cubist(x, y)
  expect_s3_class(mod, "cubist")
})

test_that("cubist works with character predictors (converted to factors)", {
  set.seed(8361)
  x <- data.frame(
    num = rnorm(50),
    chr = sample(c("cat", "dog"), 50, replace = TRUE),
    stringsAsFactors = FALSE
  )
  y <- ifelse(x$chr == "cat", 1, 2) + x$num + rnorm(50, sd = 0.1)

  mod <- cubist(x, y)
  expect_s3_class(mod, "cubist")
})

test_that("cubist works with ordered factor predictors", {
  set.seed(8361)
  x <- data.frame(
    num = rnorm(50),
    ord = ordered(
      sample(c("low", "med", "high"), 50, replace = TRUE),
      levels = c("low", "med", "high")
    )
  )
  y <- as.numeric(x$ord) + x$num + rnorm(50, sd = 0.1)

  mod <- cubist(x, y)
  expect_s3_class(mod, "cubist")
})

test_that("cubist works with mixed predictor types", {
  x <- new_mixed_data(n = 100)
  y <- x$num + as.numeric(x$fac) + rnorm(100, sd = 0.1)

  mod <- cubist(x, y)
  expect_s3_class(mod, "cubist")
  expect_equal(mod$dims[2], 4L)
})

test_that("cubist converts tibble input to data.frame", {
  skip_if_not_installed("dplyr")

  data <- new_cubist_data(n = 50, p = 3)
  x_tbl <- dplyr::as_tibble(data$x)

  mod <- cubist(x_tbl, data$y)
  expect_s3_class(mod, "cubist")
})

test_that("cubist works with case weights", {
  data <- new_cubist_data(n = 100, p = 3)
  weights <- runif(100, 0.5, 2)

  mod <- cubist(data$x, data$y, weights = weights)
  expect_s3_class(mod, "cubist")
  expect_true(mod$caseWeights)
})

test_that("cubist handles reserved name 'sample' in predictors", {
  data <- new_sample_name_data(n = 50)

  mod <- cubist(data$x, data$y)
  expect_s3_class(mod, "cubist")
  # The variable 'sample' should appear in the usage/output
  expect_true("sample" %in% mod$vars$all)
})

test_that("cubist converts __Sample back to sample in output", {
  # Create data with a column starting with 'sample'
  set.seed(8361)
  x <- data.frame(
    sample_var = rnorm(100),
    other = rnorm(100)
  )
  y <- x$sample_var + x$other + rnorm(100, sd = 0.1)

  mod <- cubist(x, y)
  expect_s3_class(mod, "cubist")

  # Check that output contains 'sample' not '__Sample'
  # The conversion should happen
  expect_true(
    grepl("sample", mod$output, ignore.case = TRUE) ||
      !grepl("__Sample", mod$output)
  )
})

test_that("cubist works with single predictor", {
  set.seed(8361)
  x <- data.frame(single = rnorm(50))
  y <- x$single * 2 + rnorm(50, sd = 0.1)

  mod <- cubist(x, y)
  expect_s3_class(mod, "cubist")
  expect_equal(mod$dims[2], 1L)
})

test_that("cubist works with many predictors", {
  data <- new_cubist_data(n = 100, p = 20)

  mod <- cubist(data$x, data$y)
  expect_s3_class(mod, "cubist")
  expect_equal(mod$dims[2], 20L)
})

test_that("cubist stores usage statistics for unused variables", {
  data <- new_cubist_data(n = 100, p = 10)

  mod <- cubist(data$x, data$y)

  # All variables should be in usage, even if not used

  expect_all_true(names(data$x) %in% mod$usage$Variable)
})

# --- Error handling tests ---

test_that("cubist errors with non-numeric outcome", {
  data <- new_cubist_data(n = 50, p = 3)

  expect_snapshot(
    cubist(data$x, factor(data$y > 0)),
    error = TRUE
  )
})

test_that("cubist errors with committees outside 1-100", {
  data <- new_cubist_data(n = 50, p = 3)

  expect_snapshot(
    cubist(data$x, data$y, committees = 0),
    error = TRUE
  )
  expect_snapshot(
    cubist(data$x, data$y, committees = 101),
    error = TRUE
  )
})

test_that("cubist errors when x is not data.frame or matrix", {
  data <- new_cubist_data(n = 50, p = 3)

  expect_snapshot(
    cubist(as.list(data$x), data$y),
    error = TRUE
  )
})

test_that("cubist errors with non-numeric weights", {
  data <- new_cubist_data(n = 50, p = 3)

  expect_snapshot(
    cubist(data$x, data$y, weights = rep("a", 50)),
    error = TRUE
  )
})

test_that("cubist errors with missing column names", {
  data <- new_cubist_data(n = 50, p = 3)
  x_no_names <- as.matrix(data$x)
  colnames(x_no_names) <- NULL

  expect_snapshot(
    cubist(x_no_names, data$y),
    error = TRUE
  )
})

# --- cubistControl() tests ---

test_that("cubistControl returns correct default values", {
  ctrl <- cubistControl()

  expect_false(ctrl$unbiased)
  expect_equal(ctrl$rules, 100)
  expect_equal(ctrl$extrapolation, 1) # 100/100

  expect_equal(ctrl$sample, 0)
  expect_equal(ctrl$label, "outcome")
  expect_true(ctrl$seed >= 0 && ctrl$seed < 4096)
})

test_that("cubistControl with unbiased = TRUE", {
  ctrl <- cubistControl(unbiased = TRUE)
  expect_true(ctrl$unbiased)
})

test_that("cubistControl accepts valid rules parameter", {
  ctrl <- cubistControl(rules = 50)
  expect_equal(ctrl$rules, 50)

  ctrl_na <- cubistControl(rules = NA)
  expect_true(is.na(ctrl_na$rules))
})

test_that("cubistControl converts extrapolation to fraction", {
  ctrl <- cubistControl(extrapolation = 50)
  expect_equal(ctrl$extrapolation, 0.5)
})

test_that("cubistControl converts sample to fraction", {
  ctrl <- cubistControl(sample = 50)
  expect_equal(ctrl$sample, 0.5)
})

test_that("cubistControl applies modulo 4096 to seed", {
  ctrl <- cubistControl(seed = 5000)
  expect_equal(ctrl$seed, 5000 %% 4096)
})

test_that("cubistControl accepts custom label", {
  ctrl <- cubistControl(label = "my_outcome")
  expect_equal(ctrl$label, "my_outcome")
})

test_that("cubistControl errors with rules outside 1-1000000", {
  expect_snapshot(cubistControl(rules = 0), error = TRUE)
  expect_snapshot(cubistControl(rules = 1000001), error = TRUE)
})

test_that("cubistControl errors with extrapolation outside 0-100", {
  expect_snapshot(cubistControl(extrapolation = -1), error = TRUE)
  expect_snapshot(cubistControl(extrapolation = 101), error = TRUE)
})

test_that("cubistControl errors with sample outside 0-99.9", {
  expect_snapshot(cubistControl(sample = -1), error = TRUE)
  expect_snapshot(cubistControl(sample = 100), error = TRUE)
})

# --- print.cubist() tests ---

test_that("print.cubist displays single committee correctly", {
  data <- new_cubist_data(n = 100, p = 5)
  mod <- cubist(data$x, data$y, committees = 1)

  output <- capture.output(print(mod))
  expect_true(any(grepl("Number of committees: 1", output)))
  expect_true(any(grepl("Number of rules:", output)))
})

test_that("print.cubist displays case weights message", {
  data <- new_cubist_data(n = 100, p = 3)
  weights <- runif(100, 0.5, 2)
  mod <- cubist(data$x, data$y, weights = weights)

  output <- capture.output(print(mod))
  expect_true(any(grepl("Case weights used", output)))
})

test_that("print.cubist displays unbiased option", {
  data <- new_cubist_data(n = 100, p = 3)
  mod <- cubist(data$x, data$y, control = cubistControl(unbiased = TRUE))

  output <- capture.output(print(mod))
  expect_true(any(grepl("unbiased rules", output)))
})

test_that("print.cubist displays extrapolation option", {
  data <- new_cubist_data(n = 100, p = 3)
  mod <- cubist(data$x, data$y, control = cubistControl(extrapolation = 50))

  output <- capture.output(print(mod))
  expect_true(any(grepl("extrapolation", output)))
})

test_that("print.cubist displays sample option", {
  data <- new_cubist_data(n = 100, p = 3)
  mod <- cubist(data$x, data$y, control = cubistControl(sample = 50))

  output <- capture.output(print(mod))
  expect_true(any(grepl("sub-sampling", output)))
})

# --- summary.cubist() and print.summary.cubist() tests ---

test_that("summary.cubist returns correct class", {
  data <- new_cubist_data(n = 100, p = 5)
  mod <- cubist(data$x, data$y)

  summ <- summary(mod)
  expect_s3_class(summ, "summary.cubist")
  expect_true(!is.null(summ$output))
  expect_true(!is.null(summ$call))
})

test_that("print.summary.cubist outputs correctly", {
  data <- new_cubist_data(n = 100, p = 5)
  mod <- cubist(data$x, data$y)
  summ <- summary(mod)

  output <- capture.output(print(summ))
  expect_true(length(output) > 0)
  expect_true(any(grepl("Call:", output)))
})

test_that("print.summary.cubist returns invisible(x)", {
  data <- new_cubist_data(n = 100, p = 5)
  mod <- cubist(data$x, data$y)
  summ <- summary(mod)

  # Capture output to prevent printing during tests
  capture.output(result <- withVisible(print(summ)))
  expect_false(result$visible)
  expect_s3_class(result$value, "summary.cubist")
})

# --- truncateText() tests ---

test_that("truncateText handles short text (no truncation needed)", {
  short_text <- "short"
  result <- Cubist:::truncateText(short_text)
  expect_equal(result, short_text)
})

test_that("truncateText wraps long text", {
  # Create a very long text that exceeds typical width
  long_text <- paste(rep("word", 50), collapse = " ")
  result <- Cubist:::truncateText(long_text)

  # Result should contain newlines if text was wrapped
  if (nchar(long_text) > getOption("width")) {
    expect_true(grepl("\n", result))
  }
})

test_that("truncateText handles multi-element input", {
  # Pass a vector of strings
  input <- c("first part", "second part")
  result <- Cubist:::truncateText(input)

  # Should be collapsed into single string
  expect_type(result, "character")
  expect_length(result, 1)
})

test_that("truncateText handles very long single line", {
  # Create text that definitely exceeds width
  old_width <- options(width = 40)

  on.exit(options(old_width))

  long_text <- paste(rep("longword", 20), collapse = " ")
  result <- Cubist:::truncateText(long_text)

  # Should wrap to multiple lines
  expect_true(grepl("\n", result))
})

# --- check_names() tests ---

test_that("check_names passes with column names", {
  df <- data.frame(a = 1:3, b = 4:6)
  expect_silent(Cubist:::check_names(df))
})

test_that("check_names errors without column names", {
  mat <- matrix(1:6, ncol = 2)
  colnames(mat) <- NULL

  expect_snapshot(Cubist:::check_names(mat), error = TRUE)
})

# --- check_date_columns() tests ---

test_that("check_date_columns passes with no date columns", {
  df <- data.frame(a = 1:3, b = 4:6)
  expect_silent(Cubist:::check_date_columns(df))
})

test_that("check_date_columns errors with Date column", {
  df <- data.frame(
    date_col = as.Date("2020-01-01") + 1:3,
    num = 1:3
  )

  expect_snapshot(Cubist:::check_date_columns(df), error = TRUE)
})

test_that("check_date_columns errors with POSIXct column", {
  df <- data.frame(
    datetime_col = as.POSIXct("2020-01-01") + 1:3,
    num = 1:3
  )

  expect_snapshot(Cubist:::check_date_columns(df), error = TRUE)
})

test_that("check_date_columns errors with POSIXlt column", {
  df <- data.frame(num = 1:3)
  df$datetime_col <- as.POSIXlt("2020-01-01") + 1:3

  expect_snapshot(Cubist:::check_date_columns(df), error = TRUE)
})

Try the Cubist package in your browser

Any scripts or data that you put into this service are public.

Cubist documentation built on March 3, 2026, 1:06 a.m.