Nothing
context("test_discretization.R")
verbose <- TRUE
## build_bins
# ---------------
test_that("build_bins: with cols set to auto if perform it on all numerics", {
# Given
data_set <- data.table(num_col = c(1, 2, 3),
not_num_col <- c("a", "b", "c"))
# When
bins <- build_bins(data_set, cols = "auto", verbose = verbose)
# Then
expect_equal("num_col", names(bins))
})
test_that("build_bins: type doesn't affect result shape", {
# Given
data_set <- data.table(num_col = 1 : 100)
# When
bins_freq <- build_bins(data_set, cols = "num_col", type = "equal_freq", verbose = verbose)
bins_width <- build_bins(data_set, cols = "num_col", type = "equal_width", verbose = verbose)
# Then
expect_equal(length(bins_width), length(bins_freq))
expect_equal(names(bins_width), names(bins_freq))
expect_equal(sapply(bins_width, length), sapply(bins_freq, length))
})
test_that("build_bins: doesn't do anything on not numeric col", {
# Given
data_set <- data.table(not_numeric_col = c("a", "b", "c"))
# When
bins <- build_bins(data_set, cols = "not_numeric_col", verbose = verbose)
# Then
expect_equal(list(), bins)
})
test_that("build_bins: doesn't do anything on constant col", {
# Given
data_set <- data.table(constant_col = rep(1, 100))
# When
bins <- build_bins(data_set, cols = "constant_col", verbose = verbose)
# Then
expect_equal(list(), bins)
})
test_that("build_bins: expect error when type is not correct", {
# Given
data_set <- data.table(col = 1 : 10)
wrong_type <- "a"
# When and Then
expect_error(build_bins(data_set, type = wrong_type, verbose = verbose),
": type should either be 'equal_width' or 'equal_freq'.")
})
# equal_width_splits
# ------------------
test_that("private function: equal_width_splits: Generate n_bins + 1 threshold to have n_bins bins", {
# Given
a_column <- runif(100)
n_bins <- 9
# When
bins <- equal_width_splits(a_column, n_bins = n_bins, verbose = verbose)
# Then
expect_equal(length(bins), n_bins + 1)
})
test_that("private function: equal_width_splits: Generate n_bins + 1 threshold to have
n_bins bins even with less values than n_bins", {
# Given
a_column <- runif(5)
n_bins <- 9
# When
bins <- equal_width_splits(a_column, n_bins = n_bins, verbose = verbose)
# Then
expect_equal(length(bins), n_bins + 1)
})
test_that("private function: equal_width_splits", {
# Given
a_column <- c(1, 2, 3)
n_bins <- 2
# When
bins <- equal_width_splits(a_column, n_bins = n_bins, verbose = verbose)
# Then
expect_equal(bins, c(1, 2, 3))
})
test_that("private function: equal_width_splits", {
# Given
a_column <- c(1, 2, 2.1, 2.2, 3)
n_bins <- 2
# When
bins <- equal_width_splits(a_column, n_bins = n_bins, verbose = verbose)
# Then
expect_equal(bins, c(1, 2, 3))
})
test_that("private function: equal_width_splits", {
# Given
a_column <- c(1)
n_bins <- 10
# When
bins <- equal_width_splits(a_column, n_bins = n_bins, verbose = verbose)
# Then
expect_equal(bins, c(1))
})
# equal_freq_splits
# ------------------
test_that("private function: equal_freq_splits: Generate n_bins + 1 threshold to have n_bins bins", {
# Given
a_column <- runif(100)
n_bins <- 9
# When
bins <- equal_freq_splits(a_column, n_bins = n_bins, verbose = verbose)
# Then
expect_equal(length(bins), n_bins + 1)
})
test_that("private function: equal_freq_splits: ", {
# Given
a_column <- c(1, 2, 3)
n_bins <- 2
# When
bins <- equal_freq_splits(a_column, n_bins = n_bins, verbose = verbose)
# Then
expect_equal(bins, c(- Inf, 2, + Inf))
})
test_that("private function: equal_freq_splits", {
# Given
a_column <- c(1, 2, 2.1, 2.2, 3)
n_bins <- 2
# When
bins <- equal_freq_splits(a_column, n_bins = n_bins, verbose = verbose)
# Then
expect_equal(bins, c(- Inf, 2.1, + Inf))
})
test_that("private function: equal_freq_splits: with more bins than values", {
# Given
a_column <- c(1)
n_bins <- 10
# When
bins <- equal_freq_splits(a_column, n_bins = n_bins, verbose = verbose)
# Then
expect_equal(bins, c(- Inf, 1, + Inf))
})
# is.possible_to_split
# ------------------
test_that("private function: is.possible_to_split: control sanity check data_set should be a vector.", {
# Given
wrong_data_set <- "something"
n_bins <- 2
# When and Then
expect_error(is.possible_to_split(data_set = wrong_data_set, n_bins = n_bins),
"data_set should be a vector of numerics and n_bins a numeric.")
})
test_that("private function: is.possible_to_split: control sanity check n_bins should be numeric.", {
# Given
data_set <- c(1, 2)
wrong_n_bins <- "something"
# When and Then
expect_error(is.possible_to_split(data_set = data_set, n_bins = wrong_n_bins),
"data_set should be a vector of numerics and n_bins a numeric.")
})
## fast_discretization
# -------------------
test_that("fast_discretization: after discretisation there are no more numerics", {
# Given
data_set <- data.table(col = runif(10))
data_set[["col"]][1] <- NA # add a NA
# When
discretized_adult <- fast_discretization(data_set, bins = NULL, verbose = verbose)
# Then
expect_false(any(sapply(discretized_adult, is.numeric)))
})
## build_splits_names
# -------------------
test_that("private function: build_splits_names: without inf", {
# Given
splits <- c(0, 1, 2)
expected_split_names <- c("[0, 1[", "[1, 2]")
# When
split_names <- build_splits_names(splits)
# Then
expect_identical(expected_split_names, split_names)
expect_identical(build_splits_names(c(- Inf, 2, + Inf)), c("]-Inf, 2[", "[2, +Inf["))
})
test_that("private function: build_splits_names: with inf", {
# Given
splits <- c(- Inf, 2, + Inf)
expected_split_names <- c("]-Inf, 2[", "[2, +Inf[")
# When
split_names <- build_splits_names(splits)
# Then
expect_identical(expected_split_names, split_names)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.