fte: Feature Target Encoding for classification

context("Test functions from data_prepare")

testthat::test_that("target_encode_create works as expected", {
  require(tibble)
  require(purrr)

  aux_test_helper <- function(input_frame, expected, test_name = "", ...){
    te <- target_encode_create(
      input_frame,
      grep(names(input_frame), pattern = "^var", value = TRUE),
      "outcome"
    )
    expect_true(setequal(names(te), names(expected)), info = test_name)
    expect_equal(te$group, expected$group, info = test_name)
    expect_equal(te$group_variable, expected$group_variable, info = test_name)
    expect_equal(te$positive, expected$positive, tolerance = 1e-3,
      info = test_name)
    expect_equal(te$n, expected$n, info = test_name)
  }



  set.seed(12345)
  # in the tests grouping_variables = "var*"
  # and outcome_variable = "outcome"
  test_cases <- tibble::tribble(
    ~test_name,
    ~input,
    ~expected,

    "Simple_case_1: one group",
    tibble::tibble(var = factor(c("a", "b")), outcome = c(T, F)),
    tibble::tibble(
      group = factor(c("a", "b")),
      positive = c(1, 0),
      group_variable = "var",
      n = c(1, 1)
    ),

    "Simple_case_2: one group",
    tibble::tibble(var = factor(rep(c("a", "b"), 2)), outcome = rep(c(T, F), each = 2)), #nolint
    tibble::tibble(
      group = factor(c("a", "b")),
      positive = c(1, 1),
      group_variable = "var",
      n = c(2, 2)
    ),

    "Random_case_1: one group",
    tibble::tibble(var = factor(floor(runif(100) * 3), labels = c("a","b","c")), outcome = as.logical(floor(runif(100) * 2))),  #nolint
    tibble::tibble(
      group = factor(c("a", "b", "c")),
      positive = c(16, 21, 23),
      group_variable = "var",
      n = c(32, 30, 38)
    ),

    "Simple_case_3: two groups",
    tibble::tibble(
      var1 = factor(c("a", "b")),
      var2 = factor(c("a", "c")),
      outcome = c(T, F)
    ),
    tibble::tibble(
      group = factor(c("a", "b", "a", "c")),
      positive = c(1, 0, 1, 0),
      group_variable = paste0("var", c(1, 1, 2, 2)),
      n = c(1, 1, 1, 1)
    ),

    "Simple_case_4: two groups",
    tibble::tibble(
      var1 = factor(rep(c("a", "b"), 2)),
      var2 = factor(rep(c("a", "c"), 2)),
      outcome = rep(c(T, F), each = 2)
    ),
    tibble::tibble(
      group = factor(c("a", "b", "a", "c")),
      positive = rep(1, 4),
      group_variable = paste0("var", c(1, 1, 2, 2)),
      n = c(2, 2, 2, 2)
    ),

    "Random_case_2: two groups",
    tibble::tibble(
      var1 = factor(floor(runif(100) * 3), labels = c("a", "b", "c")),
      var2 = factor(floor(3 * runif(100)), labels = c("a", "b", "d")),
      outcome = as.logical(floor(runif(100) * 2))
    ),
    tibble::tibble(
      group = factor(c("a", "b", "c", "a", "b", "d")),
      positive = c(16, 14, 22, 16, 18, 18),
      group_variable = rep(c("var1", "var2"), each = 3),
      n = c(32, 30, 38, 27, 35, 38)
    )
  )


  purrr::walk2(
    test_cases$input,
    test_cases$expected,
    .f = aux_test_helper
  )

  purrr::pwalk(
    list(
      test_cases$input,
      test_cases$expected,
      other_column = "stuff"
      ),
    .f = aux_test_helper,
    test_name = test_cases$test_name
  )

  aux_convert_factor <- function(ex, converter){
    ex  <- ex %>%
      dplyr::mutate_at(
        dplyr::vars(tidyselect::starts_with("var")),
        converter
      )
    return(ex)
  }

  ## Working also with different datatypes instead of factors.
  converters <- c(as.character)
  for (converter in converters) {
    purrr::pwalk(list(
        purrr::map(
          test_cases$input,
          aux_convert_factor,
          converter = converter
          ),
          test_cases$expected
        ),
      .f = aux_test_helper
    )
  }
})



testthat::test_that("target_encode_apply works as expected", {

  aux_test_helper <- function(
    data,
    group_variables,
    preparation_map,
    holdout_type,
    prior_sample_size,
    noise_level,
    expected,
    test_name = ""
  ){
    te <- target_encode_apply(
      data = data,
      group_variables,
      "outcome",
      preparation_map,
      holdout_type,
      prior_sample_size,
      noise_level
    )
    purrr::walk2(
      te %>% dplyr::select(tidyselect::starts_with("target_encode")),
      expected %>% dplyr::select(tidyselect::starts_with("target_encode")),
      expect_equal,
      tolerance = 1e-3,
      info = test_name
    )
  }

  standard_test_data <- tibble::tribble(
    ~catvar1, ~catvar2, ~outcome,
     "toto", "foo", T,
     "tata", "foo", F
  )
  standard_prep_map  <- tibble::tribble(
    ~group, ~positive, ~group_variable, ~n,
    "toto", 5, "catvar1", 40,
    "tata", 10, "catvar1", 40,
    "foo", 10, "catvar2", 20,
    "bar", 5, "catvar2", 40
  )

  test_cases <- tibble::tribble(
    ~test_name,
    ~data,
    ~group_variables,
    ~preparation_map,
    ~holdout_type,
    ~prior_sample_size,
    ~noise_level,
    ~expected,

    "Simple_case_1: one group",
    standard_test_data,
    "catvar1",
    standard_prep_map,
    "none",
    0,
    0,
    tibble::tibble(target_encode_catvar1 = c(0.125, 0.25)),

    "leave_one_out",
    standard_test_data,
    "catvar1",
    standard_prep_map,
    "leave_one_out",
    0,
    0,
    tibble::tibble(target_encode_catvar1 = c(0.1025641, 0.2564103)),

    "random_noise",
    standard_test_data,
    "catvar1",
    standard_prep_map,
    "none",
    0,
    0.05,
    tibble::tibble(target_encode_catvar1 = c(0.1352142845, 0.270788164)),

    "bayesian average",
    standard_test_data,
    "catvar1",
    standard_prep_map,
    "none",
    40,
    0,
    tibble::tibble(target_encode_catvar1 = c(0.15625, 0.21875)),

    "three together",
    standard_test_data,
    "catvar1",
    standard_prep_map,
    "leave_one_out",
    40,
    0.05,
    tibble::tibble(target_encode_catvar1 = c(0.1557839047, 0.2423071514)),

    "Simple_case_2: two groups",
    standard_test_data,
    c("catvar1", "catvar2"),
    standard_prep_map,
    "none",
    0,
    0,
    tibble::tibble(
      target_encode_catvar1 = c(0.125, 0.25),
      target_encode_catvar2 = c(0.5, 0.5)
    ),

    "random noise two groups",
    standard_test_data,
    c("catvar1", "catvar2"),
    standard_prep_map,
    "none",
    0,
    0.05,
    tibble::tibble(
      target_encode_catvar1 = c(0.135214284, 0.270788),
      target_encode_catvar2 = c(0.504431280, 0.509588)
    ),

    "one person group",
    standard_test_data,
    "catvar1",
    tibble::tribble(
      ~group, ~positive, ~group_variable, ~n,
      "toto", 1, "catvar1", 1,
      "tata", 0, "catvar1", 2
      ),
    "leave_one_out",
    0,
    0,
    tibble::tibble(target_encode_catvar1 = c(NA, 0)),

    "one person group with bayesian average",
    standard_test_data,
    "catvar1",
    tibble::tribble(
      ~group, ~positive, ~group_variable, ~n,
      "toto", 1, "catvar1", 1,
      "tata", 0, "catvar1", 2
      ),
    "leave_one_out",
    3,
    0,
    tibble::tibble(target_encode_catvar1 = c(0.33333333, 0.25))
    )

  purrr::pwalk(
    list(
      test_cases$data,
      test_cases$group_variables,
      test_cases$preparation_map,
      test_cases$holdout_type,
      test_cases$prior_sample_size,
      test_cases$noise_level,
      test_cases$expected
      ),
    .f = aux_test_helper
  )
})

signaux-faibles/fte documentation built on Jan. 29, 2020, 8:07 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

signaux-faibles/fte
Feature Target Encoding for classification

tests/testthat/test-fte.R
In signaux-faibles/fte: Feature Target Encoding for classification

R Package Documentation

Browse R Packages

We want your feedback!

signaux-faibles/fte Feature Target Encoding for classification

tests/testthat/test-fte.R In signaux-faibles/fte: Feature Target Encoding for classification

R Package Documentation

Browse R Packages

We want your feedback!

signaux-faibles/fte
Feature Target Encoding for classification

tests/testthat/test-fte.R
In signaux-faibles/fte: Feature Target Encoding for classification