tests/testthat/test-preprocessing.R

test_that("subset_data works properly", {

  formula = "y ~ x1 + x2 + x3 + x4"
  formula = as.formula(formula)

  df <- data.frame(
    y = as.factor(c(1,0)),
    y2 = factor(c(0,2), levels = c(0,1,2)),
    x1 = c(2,3),
    x2 = c(4,6),
    x3 = as.factor(c(0,1)),
    x4 = factor(c(0,2), levels = c(0,1,2))
  )

  sub_df = subset_data(formula = formula, data = df)

  full_df = subset_data(formula = as.formula("y ~ ."), data = df)

  expect_equal(ncol(sub_df), 5)
  expect_equal(ncol(full_df), 6)

})

test_that("standarize works correctly",{

  formula = "y ~ x1 + x2 + x3 + x4"
  formula = as.formula(formula)

  df <- data.frame(
    y = as.factor(c(1,0)),
    y2 = factor(c(0,2), levels = c(0,1,2)),
    x1 = c(2,3),
    x2 = c(4,6),
    x3 = as.factor(c(0,1)),
    x4 = factor(c(0,2), levels = c(0,1,2))
  )

  rec = recipes::recipe(formula = formula, data = df)

  # For "all":

  rec_all <- standarize_predictors(rec = rec, norm_num_vars = "all")

  rec_all_prep <- recipes::prep(rec_all, training = df)

  rec_all_bake <- recipes::bake(rec_all_prep, new_data = df)

  expect_equal(rec_all_bake$x1, c(-0.5*sqrt(2), 0.5*sqrt(2)), tolerance=1e-3)

  expect_equal(rec_all_bake$x2, c(-1/sqrt(2), 1/sqrt(2)), tolerance=1e-3)

  # For x1:

  rec_x1 <- standarize_predictors(rec = rec, norm_num_vars = c("x1"))

  rec_x1_prep <- recipes::prep(rec_x1, training = df)

  rec_x1_bake <- recipes::bake(rec_x1_prep, new_data = df)

  expect_equal(rec_x1_bake$x1, c(-0.5*sqrt(2), 0.5*sqrt(2)), tolerance=1e-3)

  expect_equal(rec_x1_bake$x2, df$x2, tolerance=1e-3)

})

test_that("one_hot_predictors works correctly",{

  formula = "y ~ x1 + x2 + x3 + x4"
  formula = as.formula(formula)

  df <- data.frame(
    y = as.factor(c(1,0)),
    y2 = factor(c(0,2), levels = c(0,1,2)),
    x1 = c(2,3),
    x2 = c(4,6),
    x3 = as.factor(c(0,1)),
    x4 = factor(c(0,2), levels = c(0,1,2))
  )

  rec = recipes::recipe(formula = formula, data = df)

  # With "all":

  rec_all <- one_hot_predictors(rec = rec, encode_cat_vars = "all")

  rec_all_prep <- recipes::prep(rec_all, training = df)

  rec_all_bake <- recipes::bake(rec_all_prep, new_data = df)

  expect_equal(rec_all_bake$x3_X0, c(1,0))

  expect_equal(rec_all_bake$x3_X1, c(0,1))

  expect_equal(rec_all_bake$x4_X0, c(1,0))

  expect_equal(rec_all_bake$x4_X1, c(0,0))

  expect_equal(rec_all_bake$x4_X2, c(0,1))

  # With only x4:

  rec_all_x4 <- one_hot_predictors(rec = rec, encode_cat_vars = c("x4"))

  rec_all_prep_x4 <- recipes::prep(rec_all_x4, training = df)

  rec_all_bake_x4 <- recipes::bake(rec_all_prep_x4, new_data = df)

  expect_equal(rec_all_bake_x4$x3, df$x3)

  expect_equal(rec_all_bake_x4$x4_X0, c(1,0))

  expect_equal(rec_all_bake_x4$x4_X1, c(0,0))

  expect_equal(rec_all_bake_x4$x4_X2, c(0,1))

})

Try the MLwrap package in your browser

Any scripts or data that you put into this service are public.

MLwrap documentation built on Aug. 8, 2025, 6:43 p.m.