library(dplyr)
library(FSelectorRcpp)
iris_plus <- setNames(iris, gsub(
pattern = "\\.",
replacement = "+",
x = colnames(iris)
))
test_that("Data frame output", {
expect_s3_class(discretize(Species ~ ., iris),
class = "data.frame")
expect_s3_class(discretize(Species ~ Sepal.Length, iris),
class = "data.frame")
expect_s3_class(discretize(iris$Sepal.Length, iris$Species),
class = "data.frame")
})
if (require("FSelector") && require("RWeka")) {
test_that("Discretization - basic", {
dt <- lapply(1:5, function(xx) {
x <- rnorm(1000, mean = 10 * xx)
y <- rnorm(1000, mean = 0.5 * xx)
z <- 10 * xx + 0.5 * sqrt(xx)
data.frame(x, y, z)
})
dt <- do.call(bind_rows, dt)
dt$z <- as.factor(as.integer(round(dt$z)))
weka <- as.numeric(RWeka::Discretize(z ~ x, dt)[, 1])
fs <- as.numeric(discretize(dt$x, dt$z)[[1]])
expect_equal(weka, fs)
weka <- RWeka::Discretize(z ~ x, dt)[, 1]
fs <- discretize(dt$x, dt$z)[[1]]
levels(weka)
levels(fs)
})
test_that("Discretization - single NA (independent variable)", {
iris$Sepal.Length[3] <- NA
Weka <- as.numeric(RWeka::Discretize(Species ~ Sepal.Length,
data = iris)[, 1])
Weka <- c(Weka[1:2], NA, tail(Weka, -2))
fs <- as.numeric(FSelectorRcpp::discretize(
iris$Sepal.Length, iris$Species)[[2]])
expect_equal(Weka, fs)
})
}
test_that("Discretization - not supported data type - throw error.", {
x <- "a"
y <- "b"
expect_error(discretize(x, y))
})
test_that("Discretization - formula works both ways.", {
expect_equal(discretize(Species ~ ., iris), discretize(iris, Species ~ .))
})
test_that("Discretization - formula returns data.frame.", {
expect_s3_class(discretize(Species ~ ., iris), "data.frame")
})
test_that("Discretization - expect warning when there is non numeric column in
formula and all=FALSE.", {
dt <- cbind(iris, b = "a")
expect_warning(discretize(Species ~ ., dt, all = FALSE))
})
test_that("Discretization - not implemented for data.frame", {
dt <- cbind(iris, b = "a")
expect_error(discretize(dt))
})
test_that("Discretization - not supported method", {
expect_error(discretize(Species ~ ., iris, control = list(method = "test")))
control <- structure(list(method = "test"), class = "discretizationControl")
expect_error(discretize(Species ~ ., iris, control = control))
})
test_that("Discretization - equalsize - ordered.", {
x <- as.numeric(1:6)
y <- 1:6
d <- discretize(x, y, control = equalsizeControl(k = 2))[[2]]
expect_equal(as.numeric(d), c(1, 1, 1, 2, 2, 2))
d <- discretize(x, y, control = equalsizeControl(k = 3))[[2]]
expect_equal(as.numeric(d), c(1, 1, 2, 2, 3, 3))
d <- discretize(x, y, control = equalsizeControl(k = 4))[[2]]
expect_equal(as.numeric(d), c(1, 1, 2, 2, 3, 4))
d <- discretize(x, y, control = equalsizeControl(k = 5))[[2]]
expect_equal(as.numeric(d), c(1, 1, 2, 3, 4, 5))
})
test_that("Discretization - equalsize - reverse order", {
x <- as.numeric(6:1)
y <- 1:6
d <- discretize(x, y, control = equalsizeControl(k = 2))[[2]]
expect_equal(as.numeric(d), c(1, 1, 1, 2, 2, 2) %>% rev)
d <- discretize(x, y, control = equalsizeControl(k = 3))[[2]]
expect_equal(as.numeric(d), c(1, 1, 2, 2, 3, 3) %>% rev)
d <- discretize(x, y, control = equalsizeControl(k = 4))[[2]]
expect_equal(as.numeric(d), c(1, 1, 2, 2, 3, 4) %>% rev)
d <- discretize(x, y, control = equalsizeControl(k = 5))[[2]]
expect_equal(as.numeric(d), c(1, 1, 2, 3, 4, 5) %>% rev)
})
test_that("Discretization - equalsize - pseudo-random order", {
x <- c(6, 4, 2, 3, 1, 5)
y <- 1:6
d <- discretize(x, y, control = equalsizeControl(k = 2))[[2]]
expect_equal(as.numeric(d), c(2, 2, 1, 1, 1, 2))
d <- discretize(x, y, control = equalsizeControl(k = 3))[[2]]
expect_equal(as.numeric(d), c(3, 2, 1, 2, 1, 3))
d <- discretize(x, y, control = equalsizeControl(k = 4))[[2]]
expect_equal(as.numeric(d), c(4, 2, 1, 2, 1, 3))
d <- discretize(x, y, control = equalsizeControl(k = 5))[[2]]
expect_equal(as.numeric(d), c(5, 3, 1, 2, 1, 4))
})
test_that("Zero split points", {
set.seed(1)
x <- rep(0, 10)
y <- rep(0, 10)
expect_warning(discretize(x, y))
})
test_that("List interface inside function", {
fnc <- function(xx) {
discretize(list(xx$"Sepal+Length", xx[[2]],
xx[["Petal+Length"]]), xx$Species)
}
expect_equal(
colnames(fnc(iris_plus)),
c("Species", "Sepal+Length", "Sepal+Width", "Petal+Length"))
})
test_that("Interfaces", {
expect_equal(
colnames(discretize(iris$Sepal.Length, iris[["Species"]])),
c("Species", "Sepal.Length")
)
expect_equal(
colnames(discretize(iris$Sepal.Length, iris[[5]])),
c("Species", "Sepal.Length")
)
expect_equal(
colnames(discretize(list(iris$Sepal.Length, iris[[2]],
iris[["Petal.Length"]]), iris$Species)),
colnames(discretize(Species ~ . - Petal.Width, iris, all = FALSE))
)
expect_equal(
colnames(discretize(list(iris_plus$"Sepal+Length", iris_plus[[2]],
iris_plus[["Petal+Length"]]), iris_plus$Species)),
c("Species", "Sepal+Length", "Sepal+Width", "Petal+Length")
)
expect_s3_class(discretize(iris[-5], iris$Species), "data.frame")
})
test_that("Custom breaks", {
breaks <- c(0, 2, 4, 6, 8, 20, Inf)
disc <- discretize(iris, Species ~ Sepal.Length, customBreaksControl(breaks))
cc <- cut(iris$Sepal.Length, breaks = breaks, ordered_result = TRUE)
expect_true(all(disc$Sepal.Length == cc))
expect_error(customBreaksControl(c("A")))
})
test_that("Throw error for duplicated columns", {
x <- iris
colnames(x)[1:2] <- "X"
expect_error(discretize(Species ~ ., x))
expect_error(discretize(iris, iris$Species))
})
test_that("Throw an error when there's no numeric columns", {
expect_error(discretize(discretize(Species ~ ., iris), Species ~ .))
})
iris_num <- iris[, c(1, 5)]
iris_num[["SepLenInteger"]] <- as.integer(iris_num$Sepal.Length)
iris_num[["SepLenNumeric"]] <- as.numeric(as.integer(iris_num$Sepal.Length))
iris_num <- iris_num[, -1] # remove Sepal.Length column
test_that("to not discretize integer column set discIntegers = FALSE", {
expect_equal(
discretize(
Species ~ ., iris_num,
discIntegers = FALSE)[["SepLenInteger"]],
iris_num[["SepLenInteger"]]
)
expect_equal(
discretize(
iris_num, Species ~ .,
discIntegers = FALSE)[["SepLenInteger"]],
iris_num[["SepLenInteger"]]
)
expect_equal(
discretize(
iris_num[, 2:3], iris_num$Species,
discIntegers = FALSE)[["SepLenInteger"]],
iris_num[["SepLenInteger"]]
)
})
test_that("By default integer columns are discretized", {
res <- discretize(Species ~ ., iris_num)
expect_equal(
res[["SepLenInteger"]],
res[["SepLenNumeric"]]
)
res <- discretize(iris_num, Species ~ .)
expect_equal(
res[["SepLenInteger"]],
res[["SepLenNumeric"]]
)
res <- discretize(iris_num[, 2:3], iris_num$Species)
expect_equal(
res[["SepLenInteger"]],
res[["SepLenNumeric"]]
)
})
test_that("no double values error when discIntegers = FALSE", {
dt <- data.frame(y = c("A", "A"), x = c("A", "B"), z = 1:2)
expect_error(discretize(y~., dt, discIntegers = FALSE))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.