Nothing
test_that("tl_reduce_dimensions works with PCA", {
result <- tl_reduce_dimensions(iris, response = "Species", method = "pca", n_components = 3)
expect_type(result, "list")
expect_true("data" %in% names(result))
expect_true("reduction_model" %in% names(result))
# Check transformed data has PC columns
expect_true(any(grepl("PC", names(result$data))))
# Response should be preserved
expect_true("Species" %in% names(result$data))
expect_equal(result$data$Species, iris$Species)
# Should have requested number of components
pc_cols <- sum(grepl("^PC\\d+$", names(result$data)))
expect_equal(pc_cols, 3)
})
test_that("tl_reduce_dimensions works without response", {
result <- tl_reduce_dimensions(iris[, 1:4], method = "pca", n_components = 2)
expect_type(result, "list")
expect_true("data" %in% names(result))
# Should have PC columns
pc_cols <- sum(grepl("^PC", names(result$data)))
expect_gte(pc_cols, 2)
})
test_that("tl_add_cluster_features adds cluster columns", {
data_with_clusters <- tl_add_cluster_features(iris, response = "Species",
method = "kmeans", k = 3)
# Should have cluster column
expect_true(any(grepl("cluster_", names(data_with_clusters))))
# Original columns should be preserved
expect_true(all(names(iris) %in% names(data_with_clusters)))
# Cluster column should be a factor
cluster_col <- grep("cluster_", names(data_with_clusters), value = TRUE)
expect_s3_class(data_with_clusters[[cluster_col]], "factor")
})
test_that("tl_add_cluster_features works with different clustering methods", {
# K-means
data_kmeans <- tl_add_cluster_features(iris, response = "Species",
method = "kmeans", k = 3)
expect_true("cluster_kmeans" %in% names(data_kmeans))
# PAM
skip_if_not_installed("cluster")
data_pam <- tl_add_cluster_features(iris, response = "Species",
method = "pam", k = 3)
expect_true("cluster_pam" %in% names(data_pam))
})
test_that("tl_semisupervised performs label propagation", {
# Use only 10% of labels
set.seed(123)
labeled_idx <- sample(nrow(iris), size = 15)
model <- tl_semisupervised(iris, Species ~ .,
labeled_indices = labeled_idx,
cluster_method = "kmeans",
supervised_method = "logistic")
expect_s3_class(model, "tidylearn_semisupervised")
expect_s3_class(model, "tidylearn_supervised")
# Should have semisupervised info
expect_true("semisupervised_info" %in% names(model))
expect_equal(model$semisupervised_info$labeled_indices, labeled_idx)
# Can predict
preds <- predict(model)
expect_equal(nrow(preds), nrow(iris))
})
test_that("tl_anomaly_aware detects and handles outliers", {
skip_if_not_installed("dbscan")
# Flag anomalies
model_flag <- tl_anomaly_aware(iris, Species ~ .,
response = "Species",
anomaly_method = "dbscan",
action = "flag",
supervised_method = "logistic")
expect_s3_class(model_flag, "tidylearn_anomaly_aware")
expect_true("anomaly_info" %in% names(model_flag))
expect_equal(model_flag$anomaly_info$action, "flag")
# Remove anomalies
model_remove <- tl_anomaly_aware(iris, Species ~ .,
response = "Species",
anomaly_method = "dbscan",
action = "remove",
supervised_method = "logistic")
expect_s3_class(model_remove, "tidylearn_anomaly_aware")
expect_true("anomalies_removed" %in% names(model_remove))
})
test_that("tl_stratified_models creates cluster-specific models", {
models <- tl_stratified_models(mtcars, mpg ~ .,
cluster_method = "kmeans",
k = 3,
supervised_method = "linear")
expect_s3_class(models, "tidylearn_stratified")
expect_true("cluster_model" %in% names(models))
expect_true("supervised_models" %in% names(models))
# Should have one model per cluster
expect_gte(length(models$supervised_models), 1)
expect_lte(length(models$supervised_models), 3)
})
test_that("predict.tidylearn_stratified assigns to clusters and predicts", {
models <- tl_stratified_models(mtcars, mpg ~ .,
cluster_method = "kmeans",
k = 2,
supervised_method = "linear")
# Predict on training data
preds <- predict(models)
expect_equal(nrow(preds), nrow(mtcars))
expect_true(".pred" %in% names(preds))
expect_true(".cluster" %in% names(preds))
# Predict on new data
preds_new <- predict(models, new_data = mtcars[1:10, ])
expect_equal(nrow(preds_new), 10)
})
test_that("integration functions validate inputs", {
# Invalid response variable
expect_error(
tl_reduce_dimensions(iris, response = "InvalidColumn", method = "pca"),
"Response variable.*not found"
)
expect_error(
tl_add_cluster_features(iris, response = "InvalidColumn", method = "kmeans", k = 3),
"Response variable.*not found"
)
})
test_that("reduced data can be used for supervised learning", {
# Reduce dimensions
reduced <- tl_reduce_dimensions(iris, response = "Species",
method = "pca", n_components = 3)
# Train model on reduced data
model <- tl_model(reduced$data, Species ~ ., method = "logistic")
expect_s3_class(model, "tidylearn_logistic")
# Can predict
preds <- predict(model)
expect_equal(nrow(preds), nrow(iris))
})
test_that("cluster features improve model", {
# This is more of an integration test to ensure the workflow works
data_clustered <- tl_add_cluster_features(iris, response = "Species",
method = "kmeans", k = 3)
# Train model with cluster features
model <- tl_model(data_clustered, Species ~ ., method = "logistic")
expect_s3_class(model, "tidylearn_logistic")
# Can predict
preds <- predict(model)
expect_equal(nrow(preds), nrow(data_clustered))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.