context("Test functions from data_prepare")
testthat::test_that("target_encode_create works as expected", {
require(tibble)
require(purrr)
aux_test_helper <- function(input_frame, expected, test_name = "", ...){
te <- target_encode_create(
input_frame,
grep(names(input_frame), pattern = "^var", value = TRUE),
"outcome"
)
expect_true(setequal(names(te), names(expected)), info = test_name)
expect_equal(te$group, expected$group, info = test_name)
expect_equal(te$group_variable, expected$group_variable, info = test_name)
expect_equal(te$positive, expected$positive, tolerance = 1e-3,
info = test_name)
expect_equal(te$n, expected$n, info = test_name)
}
set.seed(12345)
# in the tests grouping_variables = "var*"
# and outcome_variable = "outcome"
test_cases <- tibble::tribble(
~test_name,
~input,
~expected,
"Simple_case_1: one group",
tibble::tibble(var = factor(c("a", "b")), outcome = c(T, F)),
tibble::tibble(
group = factor(c("a", "b")),
positive = c(1, 0),
group_variable = "var",
n = c(1, 1)
),
"Simple_case_2: one group",
tibble::tibble(var = factor(rep(c("a", "b"), 2)), outcome = rep(c(T, F), each = 2)), #nolint
tibble::tibble(
group = factor(c("a", "b")),
positive = c(1, 1),
group_variable = "var",
n = c(2, 2)
),
"Random_case_1: one group",
tibble::tibble(var = factor(floor(runif(100) * 3), labels = c("a","b","c")), outcome = as.logical(floor(runif(100) * 2))), #nolint
tibble::tibble(
group = factor(c("a", "b", "c")),
positive = c(16, 21, 23),
group_variable = "var",
n = c(32, 30, 38)
),
"Simple_case_3: two groups",
tibble::tibble(
var1 = factor(c("a", "b")),
var2 = factor(c("a", "c")),
outcome = c(T, F)
),
tibble::tibble(
group = factor(c("a", "b", "a", "c")),
positive = c(1, 0, 1, 0),
group_variable = paste0("var", c(1, 1, 2, 2)),
n = c(1, 1, 1, 1)
),
"Simple_case_4: two groups",
tibble::tibble(
var1 = factor(rep(c("a", "b"), 2)),
var2 = factor(rep(c("a", "c"), 2)),
outcome = rep(c(T, F), each = 2)
),
tibble::tibble(
group = factor(c("a", "b", "a", "c")),
positive = rep(1, 4),
group_variable = paste0("var", c(1, 1, 2, 2)),
n = c(2, 2, 2, 2)
),
"Random_case_2: two groups",
tibble::tibble(
var1 = factor(floor(runif(100) * 3), labels = c("a", "b", "c")),
var2 = factor(floor(3 * runif(100)), labels = c("a", "b", "d")),
outcome = as.logical(floor(runif(100) * 2))
),
tibble::tibble(
group = factor(c("a", "b", "c", "a", "b", "d")),
positive = c(16, 14, 22, 16, 18, 18),
group_variable = rep(c("var1", "var2"), each = 3),
n = c(32, 30, 38, 27, 35, 38)
)
)
purrr::walk2(
test_cases$input,
test_cases$expected,
.f = aux_test_helper
)
purrr::pwalk(
list(
test_cases$input,
test_cases$expected,
other_column = "stuff"
),
.f = aux_test_helper,
test_name = test_cases$test_name
)
aux_convert_factor <- function(ex, converter){
ex <- ex %>%
dplyr::mutate_at(
dplyr::vars(tidyselect::starts_with("var")),
converter
)
return(ex)
}
## Working also with different datatypes instead of factors.
converters <- c(as.character)
for (converter in converters) {
purrr::pwalk(list(
purrr::map(
test_cases$input,
aux_convert_factor,
converter = converter
),
test_cases$expected
),
.f = aux_test_helper
)
}
})
testthat::test_that("target_encode_apply works as expected", {
aux_test_helper <- function(
data,
group_variables,
preparation_map,
holdout_type,
prior_sample_size,
noise_level,
expected,
test_name = ""
){
te <- target_encode_apply(
data = data,
group_variables,
"outcome",
preparation_map,
holdout_type,
prior_sample_size,
noise_level
)
purrr::walk2(
te %>% dplyr::select(tidyselect::starts_with("target_encode")),
expected %>% dplyr::select(tidyselect::starts_with("target_encode")),
expect_equal,
tolerance = 1e-3,
info = test_name
)
}
standard_test_data <- tibble::tribble(
~catvar1, ~catvar2, ~outcome,
"toto", "foo", T,
"tata", "foo", F
)
standard_prep_map <- tibble::tribble(
~group, ~positive, ~group_variable, ~n,
"toto", 5, "catvar1", 40,
"tata", 10, "catvar1", 40,
"foo", 10, "catvar2", 20,
"bar", 5, "catvar2", 40
)
test_cases <- tibble::tribble(
~test_name,
~data,
~group_variables,
~preparation_map,
~holdout_type,
~prior_sample_size,
~noise_level,
~expected,
"Simple_case_1: one group",
standard_test_data,
"catvar1",
standard_prep_map,
"none",
0,
0,
tibble::tibble(target_encode_catvar1 = c(0.125, 0.25)),
"leave_one_out",
standard_test_data,
"catvar1",
standard_prep_map,
"leave_one_out",
0,
0,
tibble::tibble(target_encode_catvar1 = c(0.1025641, 0.2564103)),
"random_noise",
standard_test_data,
"catvar1",
standard_prep_map,
"none",
0,
0.05,
tibble::tibble(target_encode_catvar1 = c(0.1352142845, 0.270788164)),
"bayesian average",
standard_test_data,
"catvar1",
standard_prep_map,
"none",
40,
0,
tibble::tibble(target_encode_catvar1 = c(0.15625, 0.21875)),
"three together",
standard_test_data,
"catvar1",
standard_prep_map,
"leave_one_out",
40,
0.05,
tibble::tibble(target_encode_catvar1 = c(0.1557839047, 0.2423071514)),
"Simple_case_2: two groups",
standard_test_data,
c("catvar1", "catvar2"),
standard_prep_map,
"none",
0,
0,
tibble::tibble(
target_encode_catvar1 = c(0.125, 0.25),
target_encode_catvar2 = c(0.5, 0.5)
),
"random noise two groups",
standard_test_data,
c("catvar1", "catvar2"),
standard_prep_map,
"none",
0,
0.05,
tibble::tibble(
target_encode_catvar1 = c(0.135214284, 0.270788),
target_encode_catvar2 = c(0.504431280, 0.509588)
),
"one person group",
standard_test_data,
"catvar1",
tibble::tribble(
~group, ~positive, ~group_variable, ~n,
"toto", 1, "catvar1", 1,
"tata", 0, "catvar1", 2
),
"leave_one_out",
0,
0,
tibble::tibble(target_encode_catvar1 = c(NA, 0)),
"one person group with bayesian average",
standard_test_data,
"catvar1",
tibble::tribble(
~group, ~positive, ~group_variable, ~n,
"toto", 1, "catvar1", 1,
"tata", 0, "catvar1", 2
),
"leave_one_out",
3,
0,
tibble::tibble(target_encode_catvar1 = c(0.33333333, 0.25))
)
purrr::pwalk(
list(
test_cases$data,
test_cases$group_variables,
test_cases$preparation_map,
test_cases$holdout_type,
test_cases$prior_sample_size,
test_cases$noise_level,
test_cases$expected
),
.f = aux_test_helper
)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.