tests/testthat/test-encodings.R

set.seed(1071)
design_mat <- cbind( data.frame(matrix(rnorm(5*100),ncol = 5)),
                     sample(sample(letters, 10), 100, replace = TRUE))
colnames(design_mat)[6] <- "factor_var"


test_that("Mean encoding works", {
  new_mat <- encode_mean(design_mat, "factor_var")
  
  expect_equal(ncol(new_mat), 10)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(apply(new_mat,2, is.numeric)), 10)
  
  new_mat <- encode_mean(design_mat, "factor_var", keep_factor = TRUE)
  
  expect_equal(ncol(new_mat), 11)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(unlist(lapply(new_mat, is.numeric))), 10)
})

test_that("Low rank encoding works", {
  new_mat <- encode_lowrank(design_mat, "factor_var")
  
  expect_equal(ncol(new_mat), 10)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(apply(new_mat,2, is.numeric)), 10)
  
  new_mat <- encode_lowrank(design_mat, "factor_var", keep_factor = TRUE)
  
  expect_equal(ncol(new_mat), 11)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(unlist(lapply(new_mat, is.numeric))), 10)
})

test_that("Median encoding works", {
  new_mat <- encode_median(design_mat, "factor_var")
  
  expect_equal(ncol(new_mat), 10)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(apply(new_mat,2, is.numeric)), 10)
  
  new_mat <- encode_median(design_mat, "factor_var", keep_factor = TRUE)
  
  expect_equal(ncol(new_mat), 11)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(unlist(lapply(new_mat, is.numeric))), 10)
})

test_that("Mnl encoding works", {
  new_mat <- suppressWarnings(encode_mnl(design_mat, "factor_var"))
  
  expect_equal(ncol(new_mat), 11)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(unlist(lapply(new_mat, is.numeric))), 11)
  
  new_mat <- suppressWarnings( encode_mnl(design_mat, "factor_var",
                                          keep_factor = TRUE))
  
  expect_equal(ncol(new_mat), 12)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(unlist(lapply(new_mat, is.numeric))), 11)
})


test_that("SPCA encoding works", {
  new_mat <- encode_SPCA(design_mat, "factor_var")
  
  expect_equal(ncol(new_mat), 10)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(apply(new_mat,2, is.numeric)), 10)
  
  new_mat <- encode_SPCA(design_mat, "factor_var", keep_factor = TRUE)
  
  expect_equal(ncol(new_mat), 11)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(unlist(lapply(new_mat, is.numeric))), 10)
})

test_that("Deviation encoding works", {
  new_mat <- encode_deviation(design_mat, "factor_var")
  
  expect_equal(ncol(new_mat), 14)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(apply(new_mat,2, is.numeric)), 14)
  
  new_mat <- encode_deviation(design_mat, "factor_var", keep_factor = TRUE)
  
  expect_equal(ncol(new_mat), 15)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(unlist(lapply(new_mat, is.numeric))), 14)
})


test_that("Difference encoding works", {
  new_mat <- encode_difference(design_mat, "factor_var")
  
  expect_equal(ncol(new_mat), 14)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(unlist(lapply(new_mat, is.numeric))), 14)
  
  new_mat <- encode_difference(design_mat, "factor_var", keep_factor = TRUE)
  
  expect_equal(ncol(new_mat), 15)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(unlist(lapply(new_mat, is.numeric))), 14)
})


test_that("Helmert encoding works", {
  new_mat <- encode_helmert(design_mat, "factor_var")
  
  expect_equal(ncol(new_mat), 14)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(apply(new_mat,2, is.numeric)), 14)
  
  new_mat <- encode_helmert(design_mat, "factor_var", keep_factor = TRUE)
  
  expect_equal(ncol(new_mat), 15)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(unlist(lapply(new_mat, is.numeric))), 14)
})

test_that("Simple effect encoding works", {
  new_mat <- encode_simple_effect(design_mat, "factor_var")
  
  expect_equal(ncol(new_mat), 14)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(apply(new_mat,2, is.numeric)), 14)
  
  new_mat <- encode_simple_effect(design_mat, "factor_var", keep_factor = TRUE)
  
  expect_equal(ncol(new_mat), 15)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(unlist(lapply(new_mat, is.numeric))), 14)
})

test_that("Repeated effect encoding works", {
  new_mat <- encode_repeated_effect(design_mat, "factor_var")
  
  expect_equal(ncol(new_mat), 14)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(apply(new_mat,2, is.numeric)), 14)
  
  new_mat <- encode_repeated_effect(design_mat, "factor_var", keep_factor = TRUE)
  
  expect_equal(ncol(new_mat), 15)
  expect_equal(nrow(new_mat), 100)
  expect_equal(sum(unlist(lapply(new_mat, is.numeric))), 14)
})

Try the categoryEncodings package in your browser

Any scripts or data that you put into this service are public.

categoryEncodings documentation built on March 2, 2020, 5:07 p.m.