Nothing
library(dplyr)
library(FSelectorRcpp)
library(Matrix)
if (require("FSelector")) {
test_that("Comparsion with FSelector", {
expect_equal(
information.gain(Species ~ ., data = iris)$attr_importance,
information_gain(formula = Species ~ ., data = iris)$importance
)
expect_equal(gain.ratio(Species ~ ., data = iris)$attr_importance,
information_gain(formula = Species ~ ., data = iris,
type = "gainratio")$importance)
expect_equal(symmetrical.uncertainty(Species ~ .,
data = iris)$attr_importance,
information_gain(formula = Species ~ ., data = iris,
type = "symuncert")$importance)
})
test_that("Test character table", {
set.seed(500)
dt <- lapply(1:50, function(xx) {
x <- rnorm(1000, mean = 10 * xx)
y <- rnorm(1000, mean = 0.5 * xx)
z <- 10 * xx + 0.5 * sqrt(xx)
data.frame(x, y, z)
})
dt <- Reduce(rbind, dt)
dt$z <- as.factor(as.integer(round(dt$z)))
formula <- z ~ .
data <- dt
expect_lt(sum(information.gain(z ~ ., data)[, 1]
- information_gain(
formula = z ~ ., data = data)$importance),
1e-10)
})
test_that("Equal bin discretization", {
testthat::skip(message = "due to behaviour change in FSelector information.gain")
fs <- information.gain(formula = Sepal.Length ~ ., data = iris)
fsrcpp <- information_gain(formula = Sepal.Length ~ ., data = iris,
equal = TRUE)
expect_equal(fs$attr_importance, fsrcpp$importance)
})
}
test_that("Sparse matrix - basics", {
species <- iris$Species
x <- as.matrix(iris[, 1:4])
mode(x) <- "integer"
x <- Matrix(x, sparse = TRUE)
iris2 <- iris
mode(iris2$Sepal.Length) <- "integer"
mode(iris2$Sepal.Width) <- "integer"
mode(iris2$Petal.Length) <- "integer"
mode(iris2$Petal.Width) <- "integer"
expect_equal(information_gain(x, species)$importance,
information_gain(formula = Species ~ ., data = iris2)$importance)
expect_equal(
information_gain(x, species, discIntegers = FALSE)$importance,
information_gain(
formula = Species ~ ., data = iris2,
discIntegers = FALSE
)$importance
)
expect_equal(information_gain(x, species, type = "gainratio")$importance,
information_gain(formula = Species ~ ., data = iris2,
type = "gainratio")$importance)
expect_equal(information_gain(x, species, type = "symuncert")$importance,
information_gain(formula = Species ~ ., data = iris2,
type = "symuncert")$importance)
# When there's no column names just indexes will be used
colnames(x) <- NULL
expect_equal(
information_gain(x, species, type = "symuncert")$attributes,
1:4
)
})
test_that("Removing NAs in formula (order)", {
xx <- tibble(x = as.character(c(1, 2, 3)), y = as.character(c(1, 2, 3)),
na = c(NA, NA, 1))
expect_equal(information_gain(xx[, "x"], xx$y)$importance,
information_gain(y ~ x, xx)$importance)
})
test_that("Interfaces errors", {
expect_error(information_gain())
x <- 1
y <- 1
expect_error(information_gain(formula = x, data = x, x = x, y = x))
expect_error(information_gain(formula = x, data = x))
xx <- tibble(x = as.character(c(1, 2, 3)), y = as.character(c(1, 2, 3)))
expect_error(information_gain(x = y ~ ., y = xx))
})
test_that("Incorrect interface parameter specification", {
irisX <- iris[-5]
y <- as.vector(iris$Species)
expect_error(information_gain(x = irisX))
expect_error(information_gain(formula = Species ~ .))
expect_error(information_gain(data = iris))
expect_error(information_gain(y = y))
expect_error(information_gain(x = irisX, data = iris))
expect_error(information_gain(y = y, data = iris))
expect_error(information_gain(x = irisX, data = iris))
expect_error(information_gain(x = irisX, formula = Species ~ .))
expect_error(information_gain(y = y, formula = Species ~ .))
expect_error(information_gain(y = y, x = irisX, data = iris))
expect_error(information_gain(y = y, formula = Species ~ ., data = iris))
expect_error(information_gain(x = irisX, formula = Species ~ ., data = iris))
expect_error(information_gain(y = y, x = irisX, formula = Species ~ .))
expect_error(information_gain(
y = y, x = irisX, data = iris,
formula = Species ~ .))
expect_error(information_gain(formula = x ~ 1, data = 1:10))
})
test_that("Warning when y is numeric", {
dt <- tibble(x = rnorm(10), y = rnorm(10))
z <- rnorm(10)
expect_warning(information_gain(y ~ x, dt))
expect_warning(information_gain(dt, z))
})
test_that("Compare interfaces - formula vs x,y", {
expect_equal(
information_gain(Species ~ ., iris),
information_gain(x = iris[, -5], y = iris$Species)
)
})
test_that("Information gain - integer column - discIntegers", {
dt <- tibble(
y = iris$Species,
x = as.integer(iris$Sepal.Length),
z = as.numeric(as.integer(iris$Sepal.Length))
)
# discretize integer value
result <- information_gain(y ~ ., dt, discIntegers = TRUE)
expect_equal(length(unique(result$importance)), 1)
set.seed(123)
x <- as.integer(runif(1000, 1, 100))
dt1 <- data.frame(
y = as.integer(runif(1000, 1, 100)),
x = x, # int
z = as.numeric(x), # numeric
fc = factor(x) # factor
)
# discretize integer
r1 <- information_gain(y ~ ., dt1)
expect_equal(r1[[2]][[1]], r1[[2]][[2]]) # int is equal to numeric
# do not discretize integer column
r2 <- information_gain(y ~ ., dt1, discIntegers = FALSE)
expect_equal(r2[[2]][[1]], r2[[2]][[3]]) # int is equal to factor
})
test_that("Information gain - character column", {
dt <- tibble(
y = datasets::iris$Species,
x = as.character(as.integer(datasets::iris$Sepal.Length)),
z = as.character(as.integer(datasets::iris$Sepal.Width))
)
expected <- structure(
list(
attributes = c("x", "z"),
importance = c(0.392253286074497,
0.190561000237304)
),
class = "data.frame",
row.names = c(NA,-2L)
)
expect_equal(information_gain(y ~ ., dt), expected)
dt2 <- dt
dt2[[2]][10:20] <- NA
dt2[[3]][15:25] <- NA
expectedNA <- structure(
list(
attributes = c("x", "z"),
importance = c(0.431330813628601,
0.236289962850477)
),
class = "data.frame",
row.names = c(NA,-2L)
)
expect_equal(information_gain(y ~ ., dt2), expectedNA)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.