Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 7,
fig.height = 5
)
## ----setup--------------------------------------------------------------------
library(tidylearn)
library(dplyr)
library(ggplot2)
## ----eval=FALSE---------------------------------------------------------------
# # Run AutoML on iris dataset
# result <- tl_auto_ml(iris, Species ~ .,
# task = "classification",
# time_budget = 60)
#
# # View best model
# print(result$best_model)
## ----eval=FALSE---------------------------------------------------------------
# # View all models tried
# names(result$models)
## ----eval=FALSE---------------------------------------------------------------
# # View leaderboard
# result$leaderboard
## ----eval=FALSE---------------------------------------------------------------
# # Run AutoML on regression problem
# result_reg <- tl_auto_ml(mtcars, mpg ~ .,
# task = "regression",
# time_budget = 60)
#
# # Best model
# print(result_reg$best_model)
## ----eval=FALSE---------------------------------------------------------------
# # AutoML with all features enabled
# result_full <- tl_auto_ml(
# data = iris,
# formula = Species ~ .,
# task = "auto", # Automatically detect task type
# use_reduction = TRUE, # Try PCA preprocessing
# use_clustering = TRUE, # Add cluster features
# time_budget = 120, # 2 minutes
# cv_folds = 5, # Cross-validation folds
# metric = NULL # Auto-select metric
# )
## ----eval=FALSE---------------------------------------------------------------
# # Task type is automatically detected
# result_auto <- tl_auto_ml(iris, Species ~ ., task = "auto")
# # Detects: Classification (factor response)
#
# result_auto_reg <- tl_auto_ml(mtcars, mpg ~ ., task = "auto")
# # Detects: Regression (numeric response)
## ----eval=FALSE---------------------------------------------------------------
# # Quick search (30 seconds)
# quick_result <- tl_auto_ml(iris, Species ~ ., time_budget = 30)
#
# # Thorough search (10 minutes)
# thorough_result <- tl_auto_ml(iris, Species ~ ., time_budget = 600)
## ----eval=FALSE---------------------------------------------------------------
# # Disable dimensionality reduction
# no_reduction <- tl_auto_ml(iris, Species ~ .,
# use_reduction = FALSE,
# time_budget = 60)
#
# # Disable cluster features
# no_clustering <- tl_auto_ml(iris, Species ~ .,
# use_clustering = FALSE,
# time_budget = 60)
#
# # Baseline models only
# baseline_only <- tl_auto_ml(iris, Species ~ .,
# use_reduction = FALSE,
# use_clustering = FALSE,
# time_budget = 30)
## ----eval=FALSE---------------------------------------------------------------
# # Adjust cross-validation folds
# result_cv <- tl_auto_ml(iris, Species ~ .,
# cv_folds = 10, # More folds = better estimate, slower
# time_budget = 120)
#
# # Fewer folds for faster evaluation
# result_fast <- tl_auto_ml(iris, Species ~ .,
# cv_folds = 3,
# time_budget = 60)
## ----eval=FALSE---------------------------------------------------------------
# result <- tl_auto_ml(iris, Species ~ ., time_budget = 60)
#
# # Best performing model
# best_model <- result$best_model
#
# # All models trained
# all_models <- result$models
#
# # Specific model
# baseline_logistic <- result$models$baseline_logistic
# pca_forest <- result$models$pca_forest
## ----eval=FALSE---------------------------------------------------------------
# # View performance comparison
# leaderboard <- result$leaderboard
#
# # Sort by performance
# leaderboard <- leaderboard %>%
# arrange(desc(performance))
#
# print(leaderboard)
## ----eval=FALSE---------------------------------------------------------------
# # Use best model for predictions
# predictions <- predict(result$best_model, new_data = new_data)
#
# # Or use a specific model
# predictions_pca <- predict(result$models$pca_forest, new_data = new_data)
## ----eval=FALSE---------------------------------------------------------------
# # Split data for evaluation
# split <- tl_split(iris, prop = 0.7, stratify = "Species", seed = 123)
#
# # Run AutoML on training data
# automl_iris <- tl_auto_ml(split$train, Species ~ .,
# time_budget = 90,
# cv_folds = 5)
#
# # Evaluate on test set
# test_preds <- predict(automl_iris$best_model, new_data = split$test)
# test_accuracy <- mean(test_preds$.pred == split$test$Species)
#
# cat("AutoML Test Accuracy:", round(test_accuracy * 100, 1), "%\n")
## ----eval=FALSE---------------------------------------------------------------
# # Compare models
# for (model_name in names(automl_iris$models)) {
# model <- automl_iris$models[[model_name]]
# preds <- predict(model, new_data = split$test)
# acc <- mean(preds$.pred == split$test$Species)
# cat(model_name, ":", round(acc * 100, 1), "%\n")
# }
## ----eval=FALSE---------------------------------------------------------------
# # Split mtcars data
# split_mtcars <- tl_split(mtcars, prop = 0.7, seed = 42)
#
# # Run AutoML
# automl_mpg <- tl_auto_ml(split_mtcars$train, mpg ~ .,
# task = "regression",
# time_budget = 90)
#
# # Evaluate
# test_preds_mpg <- predict(automl_mpg$best_model, new_data = split_mtcars$test)
# rmse <- sqrt(mean((test_preds_mpg$.pred - split_mtcars$test$mpg)^2))
#
# cat("AutoML Test RMSE:", round(rmse, 2), "\n")
## ----eval=FALSE---------------------------------------------------------------
# # Preprocess data first
# processed <- tl_prepare_data(
# split$train,
# Species ~ .,
# scale_method = "standardize",
# remove_correlated = TRUE
# )
#
# # Run AutoML on preprocessed data
# automl_processed <- tl_auto_ml(processed$data, Species ~ .,
# time_budget = 60)
#
# # Note: Need to apply same preprocessing to test data
# test_processed <- tl_prepare_data(
# split$test,
# Species ~ .,
# scale_method = "standardize"
# )
#
# test_preds_proc <- predict(automl_processed$best_model,
# new_data = test_processed$data)
## ----eval=FALSE---------------------------------------------------------------
# # Manual approach: choose one model
# manual_model <- tl_model(split$train, Species ~ ., method = "forest")
# manual_preds <- predict(manual_model, new_data = split$test)
# manual_acc <- mean(manual_preds$.pred == split$test$Species)
#
# # AutoML approach
# automl_model <- tl_auto_ml(split$train, Species ~ ., time_budget = 60)
# automl_preds <- predict(automl_model$best_model, new_data = split$test)
# automl_acc <- mean(automl_preds$.pred == split$test$Species)
#
# cat("Manual Selection:", round(manual_acc * 100, 1), "%\n")
# cat("AutoML:", round(automl_acc * 100, 1), "%\n")
## ----eval=FALSE---------------------------------------------------------------
# # First pass: quick exploration
# quick_automl <- tl_auto_ml(split$train, Species ~ .,
# time_budget = 30,
# use_reduction = TRUE,
# use_clustering = FALSE)
#
# # Analyze what worked
# best_approach <- quick_automl$best_model$spec$method
#
# # Second pass: focus on promising approaches
# if (grepl("pca", names(quick_automl$best_model)[1])) {
# # If PCA worked well, focus on dimensionality reduction
# refined_automl <- tl_auto_ml(split$train, Species ~ .,
# time_budget = 60,
# use_reduction = TRUE,
# use_clustering = TRUE)
# }
## ----eval=FALSE---------------------------------------------------------------
# # Get top 3 models
# top_models <- automl_iris$leaderboard %>%
# arrange(desc(performance)) %>%
# head(3)
#
# # Make predictions with each
# ensemble_preds <- list()
# for (i in 1:nrow(top_models)) {
# model_name <- top_models$model[i]
# model <- automl_iris$models[[model_name]]
# ensemble_preds[[i]] <- predict(model, new_data = split$test)$.pred
# }
#
# # Majority vote for classification
# final_pred <- apply(do.call(cbind, ensemble_preds), 1, function(x) {
# names(which.max(table(x)))
# })
#
# ensemble_acc <- mean(final_pred == split$test$Species)
# cat("Ensemble Accuracy:", round(ensemble_acc * 100, 1), "%\n")
## ----eval=FALSE---------------------------------------------------------------
# # AutoML automatically uses accuracy for classification
# result_class <- tl_auto_ml(iris, Species ~ .,
# metric = "accuracy",
# time_budget = 60)
## ----eval=FALSE---------------------------------------------------------------
# # AutoML automatically uses RMSE for regression
# result_reg <- tl_auto_ml(mtcars, mpg ~ .,
# metric = "rmse",
# time_budget = 60)
## ----eval=FALSE---------------------------------------------------------------
# # Reduce time budget
# quick_result <- tl_auto_ml(data, formula, time_budget = 30)
#
# # Reduce CV folds
# fast_result <- tl_auto_ml(data, formula, cv_folds = 3)
#
# # Disable feature engineering
# baseline_result <- tl_auto_ml(data, formula,
# use_reduction = FALSE,
# use_clustering = FALSE)
## ----eval=FALSE---------------------------------------------------------------
# # Increase time budget
# thorough_result <- tl_auto_ml(data, formula, time_budget = 300)
#
# # Ensure feature engineering is enabled
# full_result <- tl_auto_ml(data, formula,
# use_reduction = TRUE,
# use_clustering = TRUE)
## ----eval=FALSE---------------------------------------------------------------
# # Complete AutoML workflow
# workflow_split <- tl_split(iris, prop = 0.7, stratify = "Species", seed = 123)
#
# automl_result <- tl_auto_ml(
# data = workflow_split$train,
# formula = Species ~ .,
# task = "auto",
# use_reduction = TRUE,
# use_clustering = TRUE,
# time_budget = 120,
# cv_folds = 5
# )
#
# # Evaluate best model
# final_preds <- predict(automl_result$best_model, new_data = workflow_split$test)
# final_accuracy <- mean(final_preds$.pred == workflow_split$test$Species)
#
# cat("Final AutoML Accuracy:", round(final_accuracy * 100, 1), "%\n")
# cat("Best approach:", automl_result$best_model$spec$method, "\n")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.