tests/testthat/test-column_order.R

library(testthat)

skip_if_not_installed("modeldata")
data(biomass, package = "modeldata")

biomass_tr <- biomass[biomass$dataset == "Training", ]
biomass_te <- biomass[biomass$dataset == "Testing", ]

test_that("basic steps", {
  rec_1 <-
    recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur,
      data = biomass_tr
    ) %>%
    step_mutate(hydrogen = hydrogen / 2) %>%
    step_ratio(hydrogen, oxygen, nitrogen, denom = vars(carbon)) %>%
    step_corr(all_predictors(), threshold = .6) %>%
    prep()

  cols_1 <- c("hydrogen", "nitrogen", "sulfur", "HHV", "oxygen_o_carbon")

  expect_equal(
    names(bake(rec_1, new_data = NULL)),
    cols_1
  )
  expect_equal(
    names(bake(rec_1, biomass_te)),
    cols_1
  )

  expect_equal(
    names(bake(rec_1, new_data = NULL, all_predictors())),
    cols_1[cols_1 != "HHV"]
  )
  expect_equal(
    names(bake(rec_1, biomass_te, all_predictors())),
    cols_1[cols_1 != "HHV"]
  )
})


test_that("skipped steps", {
  rec_2 <-
    recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur,
      data = biomass_tr
    ) %>%
    step_mutate(hydrogen = hydrogen / 2) %>%
    step_ratio(hydrogen, oxygen, nitrogen, denom = vars(carbon)) %>%
    step_corr(all_predictors(), threshold = .6, skip = TRUE) %>%
    prep()

  cols_1 <- c(
    "hydrogen",
    "nitrogen",
    "sulfur",
    "HHV",
    "oxygen_o_carbon"
  )

  # Ordering relative to the original specification of the recipe is
  # preserved (i.e. carbon was the first column specified, and since we
  # skipped the step that would drop it, it appears first in the result)
  cols_2 <- c(
    "carbon",
    "hydrogen",
    "oxygen",
    "nitrogen",
    "sulfur",
    "HHV",
    "hydrogen_o_carbon",
    "oxygen_o_carbon",
    "nitrogen_o_carbon"
  )

  expect_equal(
    names(bake(rec_2, new_data = NULL)),
    cols_1
  )
  expect_equal(
    names(bake(rec_2, biomass_te)),
    cols_2
  )

  expect_equal(
    names(bake(rec_2, new_data = NULL, all_predictors())),
    cols_1[cols_1 != "HHV"]
  )
  expect_equal(
    names(bake(rec_2, biomass_te, all_predictors())),
    cols_2[cols_2 != "HHV"]
  )
})


test_that("remove and add a column", {
  rec_3 <-
    recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur,
      data = biomass_tr
    ) %>%
    step_rm(HHV) %>%
    step_ratio(hydrogen, oxygen, nitrogen, denom = vars(carbon)) %>%
    step_corr(all_predictors(), threshold = .6) %>%
    step_mutate(HHV = 17) %>%
    prep()

  cols_3 <- c("hydrogen", "nitrogen", "sulfur", "oxygen_o_carbon", "HHV")

  expect_equal(
    names(bake(rec_3, new_data = NULL)),
    cols_3
  )
  expect_equal(
    names(bake(rec_3, biomass_te)),
    cols_3
  )

  expect_equal(
    names(bake(rec_3, new_data = NULL, all_predictors())),
    cols_3[cols_3 != "HHV"]
  )
  expect_equal(
    names(bake(rec_3, biomass_te, all_predictors())),
    cols_3[cols_3 != "HHV"]
  )
})

test_that("extra roles", {
  rec_4 <-
    recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur,
      data = biomass_tr
    ) %>%
    add_role(nitrogen, new_role = "drummer") %>%
    step_rm(HHV) %>%
    step_ratio(hydrogen, oxygen, nitrogen, denom = vars(carbon)) %>%
    step_corr(all_predictors(), threshold = .6) %>%
    step_mutate(HHV = 17) %>%
    prep()

  cols_3 <- c("hydrogen", "nitrogen", "sulfur", "oxygen_o_carbon", "HHV")

  expect_equal(
    names(bake(rec_4, new_data = NULL)),
    cols_3
  )
  expect_equal(
    names(bake(rec_4, biomass_te)),
    cols_3
  )

  expect_equal(
    names(bake(rec_4, new_data = NULL, all_predictors())),
    cols_3[cols_3 != "HHV"]
  )
  expect_equal(
    names(bake(rec_4, biomass_te, all_predictors())),
    cols_3[cols_3 != "HHV"]
  )
})

Try the recipes package in your browser

Any scripts or data that you put into this service are public.

recipes documentation built on Aug. 26, 2023, 1:08 a.m.