tests/testthat/test-unsupervised.R

test_that("PCA models work", {
  model <- tl_model(iris[, 1:4], method = "pca")

  expect_s3_class(model, "tidylearn_pca")
  expect_equal(model$spec$paradigm, "unsupervised")

  # Check PCA components
  expect_true("scores" %in% names(model$fit))
  expect_true("loadings" %in% names(model$fit))
  expect_true("variance_explained" %in% names(model$fit))

  # Transform data
  transformed <- predict(model)
  expect_s3_class(transformed, "tbl_df")
  expect_true(all(grepl("PC", names(transformed)) | names(transformed) == ".obs_id"))
})

test_that("K-means clustering works", {
  model <- tl_model(iris[, 1:4], method = "kmeans", k = 3)

  expect_s3_class(model, "tidylearn_kmeans")

  # Check cluster assignments
  expect_true("clusters" %in% names(model$fit))
  clusters <- model$fit$clusters
  expect_equal(nrow(clusters), nrow(iris))
  expect_true("cluster" %in% names(clusters))

  # Clusters should be 1 to k
  expect_true(all(clusters$cluster %in% 1:3))
})

test_that("PAM (K-medoids) clustering works", {
  skip_if_not_installed("cluster")

  model <- tl_model(iris[, 1:4], method = "pam", k = 3)

  expect_s3_class(model, "tidylearn_pam")

  # Check cluster assignments
  clusters <- model$fit$clusters
  expect_equal(nrow(clusters), nrow(iris))
  expect_true(all(clusters$cluster %in% 1:3))
})

test_that("CLARA clustering works", {
  skip_if_not_installed("cluster")

  # Create larger dataset for CLARA
  large_data <- iris[rep(1:nrow(iris), 10), 1:4]

  model <- tl_model(large_data, method = "clara", k = 3, samples = 5)

  expect_s3_class(model, "tidylearn_clara")

  # Check cluster assignments
  clusters <- model$fit$clusters
  expect_equal(nrow(clusters), nrow(large_data))
})

test_that("Hierarchical clustering works", {
  model <- tl_model(iris[, 1:4], method = "hclust")

  expect_s3_class(model, "tidylearn_hclust")

  # Check dendrogram exists
  expect_true("model" %in% names(model$fit))
  expect_s3_class(model$fit$model, "hclust")
})

test_that("DBSCAN clustering works", {
  skip_if_not_installed("dbscan")

  model <- tl_model(iris[, 1:4], method = "dbscan", eps = 0.5, minPts = 5)

  expect_s3_class(model, "tidylearn_dbscan")

  # Check cluster assignments (including noise points as 0)
  clusters <- model$fit$clusters
  expect_equal(nrow(clusters), nrow(iris))
  expect_true("cluster" %in% names(clusters))
})

test_that("MDS works", {
  model <- tl_model(iris[, 1:4], method = "mds", k = 2)

  expect_s3_class(model, "tidylearn_mds")

  # Check MDS points
  expect_true("points" %in% names(model$fit))
  points <- model$fit$points
  expect_equal(nrow(points), nrow(iris))
})

test_that("clustering models predict on new data", {
  # Train clustering model
  model <- tl_model(iris[1:100, 1:4], method = "kmeans", k = 3)

  # Predict on new data
  new_data <- iris[101:150, 1:4]
  predictions <- predict(model, new_data = new_data)

  expect_equal(nrow(predictions), nrow(new_data))
  expect_true("cluster" %in% names(predictions))
})

test_that("PCA retains specified number of components", {
  model <- tl_model(iris[, 1:4], method = "pca")

  # Default should retain all components
  transformed <- predict(model)
  pc_cols <- sum(grepl("^PC", names(transformed)))
  expect_equal(pc_cols, 4)
})

test_that("unsupervised methods handle different data sizes", {
  # Small dataset
  small_data <- iris[1:20, 1:4]
  model_small <- tl_model(small_data, method = "kmeans", k = 2)
  expect_s3_class(model_small, "tidylearn_kmeans")

  # Large dataset
  large_data <- iris[rep(1:nrow(iris), 5), 1:4]
  model_large <- tl_model(large_data, method = "kmeans", k = 3)
  expect_s3_class(model_large, "tidylearn_kmeans")
})

test_that("clustering validates k parameter", {
  # k should be reasonable - expect an error for invalid k
  expect_error(
    tl_model(iris[, 1:4], method = "kmeans", k = nrow(iris) + 1)
  )

  # Valid k should work
  expect_s3_class(
    tl_model(iris[, 1:4], method = "kmeans", k = 3),
    "tidylearn_kmeans"
  )
})

test_that("unsupervised methods work with formula", {
  # PCA with formula
  model <- tl_model(iris, ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, method = "pca")

  expect_s3_class(model, "tidylearn_pca")

  # Clustering with formula
  model2 <- tl_model(iris, ~ Sepal.Length + Sepal.Width, method = "kmeans", k = 3)
  expect_s3_class(model2, "tidylearn_kmeans")
})

Try the tidylearn package in your browser

Any scripts or data that you put into this service are public.

tidylearn documentation built on Feb. 6, 2026, 5:07 p.m.