feature_selection: Feature selection wrapper

Description Usage Arguments Value Examples

View source: R/featureSelection.R

Description

Feature selection wrapper

Usage

1
feature_selection(dataset, method, class_attr = NULL, exclude = NULL, ...)

Arguments

dataset

we want to do feature selection on

method

selected method of feature selection

class_attr

character. Indicates the class attribute or attributes from dataset. Must exist in it.

exclude

character. Vector of attributes to exclude from the feature selection process

...

Further arguments for method

Value

The treated dataset (either with noisy instances replaced or erased)

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
library("smartdata")
library("rpart")
data(ecoli1, package = "imbalance")
data(HouseVotes84, package = "mlbench")

# Extracted from FSelector::best.first.search documentation
evaluator <- function(subset) {
  k <- 5
  splits <- runif(nrow(iris))
  results = sapply(1:k, function(i) {
    test.idx <- (splits >= (i - 1) / k) & (splits < i / k)
    train.idx <- !test.idx
    test <- iris[test.idx, , drop=FALSE]
    train <- iris[train.idx, , drop=FALSE]
    tree <- rpart(FSelector::as.simple.formula(subset, "Species"), train)
    error.rate = sum(test$Species != predict(tree, test, type="c")) / nrow(test)
    return(1 - error.rate)
  })
  print(subset)
  print(mean(results))
  return(mean(results))
}



super_iris <- feature_selection(iris, "Boruta", class_attr = "Species")
super_iris <- feature_selection(iris, "chi_squared",
                                class_attr = "Species", num_features = 3)
# Pick 3 attributes from the continuous ones
super_ecoli <- feature_selection(ecoli1, "information_gain",
                                 class_attr = "Class", num_features = 3)
super_ecoli <- feature_selection(ecoli1, "gain_ratio",
                                 class_attr = "Class", num_features = 3)
super_ecoli <- feature_selection(ecoli1, "sym_uncertainty",
                                 class_attr = "Class", num_features = 3)
super_votes <- feature_selection(HouseVotes84, "oneR", exclude = c("V1", "V2"),
                                 class_attr = "Class", num_features = 3)
super_votes <- feature_selection(iris, "RF_importance", class_attr = "Species",
                                 num_features = 3, type = 2)

super_iris  <- feature_selection(iris, "best_first_search", exclude = "Species",
                                 eval_fun = evaluator)
super_iris  <- feature_selection(iris, "forward_search", exclude = "Species",
                                 eval_fun = evaluator)
super_iris  <- feature_selection(iris, "backward_search", exclude = "Species",
                                 eval_fun = evaluator)

super_iris  <- feature_selection(iris, "cfs", class_attr = "Species")
super_iris  <- feature_selection(iris, "consistency", class_attr = "Species")

smartdata documentation built on Dec. 19, 2019, 1:08 a.m.