Setup

knitr::opts_chunk$set(echo = TRUE)
library(forester)

Data preparation

data(compas)
compas_prep <- custom_preprocessing(data = compas,
                                   y = 'Ethnicity',
                                   na_indicators = c(''),
                                   removal_parameters = list(
                                     active_modules = c(duplicate_cols = TRUE, id_like_cols    = TRUE,
                                                        static_cols    = TRUE, sparse_cols     = TRUE,
                                                        corrupt_rows   = TRUE, correlated_cols = TRUE),
                                     id_names = c('id', 'nr', 'number', 'idx', 'identification', 'index'),
                                     static_threshold           = 0.99,
                                     sparse_columns_threshold   = 0.3,
                                     sparse_rows_threshold      = 0.3,
                                     high_correlation_threshold = 0.7
                                   ),
                                   imputation_parameters = list(
                                     imputation_method = 'median-other',
                                     k = 10,
                                     m = 5
                                   ),
                                   feature_selection_parameters = list(
                                     feature_selection_method = 'none',
                                     max_features = 'default',
                                     nperm = 1,
                                     cutoffPermutations = 20,
                                     threadsNumber = NULL,
                                     method = 'estevez'
                                   ),
                                   verbose = TRUE)

Training

train_out <- train(data          = compas,
      y                          = 'Ethnicity', 
      time                       = NULL,
      status                     = NULL,
      type                       = "auto",
      engine                     = c("ranger", "xgboost", "decision_tree", "lightgbm", "catboost"),
      verbose                    = TRUE,
      train_test_split           = c(0.6, 0.2, 0.2),
      split_seed                 = NULL,
      bayes_iter                 = 0,
      random_evals               = 2,
      metrics                    = "auto",
      sort_by                    = "auto",
      metric_function            = NULL,
      metric_function_name       = NULL,
      metric_function_decreasing = TRUE,
      best_model_number          = 5,
      custom_preprocessing       = NULL
)
train_out_1 <- train(data        = compas_prep$data,
      y                          = 'Ethnicity', 
      time                       = NULL,
      status                     = NULL,
      type                       = "auto",
      engine                     = c("ranger", "xgboost", "decision_tree", "lightgbm", "catboost"),
      verbose                    = TRUE,
      train_test_split           = c(0.6, 0.2, 0.2),
      split_seed                 = NULL,
      bayes_iter                 = 0,
      random_evals               = 2,
      metrics                    = "auto",
      sort_by                    = "auto",
      metric_function            = NULL,
      metric_function_name       = NULL,
      metric_function_decreasing = TRUE,
      best_model_number          = 5,
      custom_preprocessing       = compas_prep
)
train_out_2 <- train(data        = compas_prep$data,
      y                          = 'Ethnicity', 
      time                       = NULL,
      status                     = NULL,
      type                       = "auto",
      engine                     = c("ranger", "xgboost", "decision_tree", "lightgbm", "catboost"),
      verbose                    = TRUE,
      train_test_split           = c(0.6, 0.2, 0.2),
      split_seed                 = NULL,
      bayes_iter                 = 4,
      random_evals               = 0,
      metrics                    = "auto",
      sort_by                    = "auto",
      metric_function            = NULL,
      metric_function_name       = NULL,
      metric_function_decreasing = TRUE,
      best_model_number          = 5,
      custom_preprocessing       = compas_prep
)

Report

report(train_output = train_out_1, 
       output_file  = 'multiclass_classification_test_1',
       output_dir   = getwd(),
       check_data   = TRUE)
report(train_output = train_out_2, 
       output_file  = 'multiclass_classification_test_2',
       output_dir   = getwd(),
       check_data   = TRUE)


ModelOriented/forester documentation built on June 6, 2024, 7:29 a.m.