tests/testthat/test-roles.R

library(testthat)
library(recipes)

skip_if_not_installed("modeldata")
data(biomass, package = "modeldata")

test_that("default method", {
  rec <- recipe(x = biomass)
  exp_res <- tibble(
    variable = colnames(biomass),
    type = rep(
      list(c("string", "unordered", "nominal"), c("double", "numeric")),
      c(2, 6)
    ),
    role = NA_character_,
    source = "original",
    required_to_bake = TRUE
  )
  expect_equal(summary(rec, TRUE), exp_res)
})

test_that("changing roles", {
  rec <- recipe(x = biomass)
  rec <- update_role(rec, sample, new_role = "some other role")
  exp_res <- tibble(
    variable = colnames(biomass),
    type = rep(
      list(c("string", "unordered", "nominal"), c("double", "numeric")),
      c(2, 6)
    ),
    role = rep(c("some other role", NA), c(1, 7)),
    source = "original",
    required_to_bake = TRUE
  )
  expect_equal(summary(rec, TRUE), exp_res)
})

test_that("change existing role", {
  rec <- recipe(x = biomass)

  expect_snapshot(error = TRUE,
    add_role(rec, sample, new_role = "some other role")
  )

  rec <- update_role(rec, sample, new_role = "some other role")
  rec <- update_role(rec, sample, new_role = "other other role")

  exp_res <- tibble(
    variable = colnames(biomass),
    type = rep(
      list(c("string", "unordered", "nominal"), c("double", "numeric")),
      c(2, 6)
    ),
    role = rep(c("other other role", NA), c(1, 7)),
    source = "original",
    required_to_bake = TRUE
  )
  expect_equal(summary(rec, TRUE), exp_res)
})

test_that("change only 1 role of variable with multiple roles", {
  rec <- recipe(x = biomass)
  rec <-
    rec %>%
    update_role(sample, new_role = "role 1") %>%
    add_role(sample, new_role = "role 2")

  orig_roles <- rec

  # changes only 1
  rec <- update_role(rec, sample, new_role = "role 3", old_role = "role 1")

  exp_res <- summary(orig_roles)
  exp_res$role[exp_res$role == "role 1"] <- "role 3"
  exp_res$required_to_bake <- TRUE
  expect_equal(summary(rec, TRUE), exp_res)
})

test_that("change every role of 2 variables", {
  rec <- recipe(x = biomass)
  rec <- update_role(rec, sample, dataset, new_role = "role 1")
  orig_roles <- summary(rec)
  rec <- update_role(rec, sample, dataset, new_role = "role 2")

  exp_res <- orig_roles
  exp_res$role[exp_res$role == "role 1"] <- "role 2"
  exp_res$required_to_bake <- TRUE

  expect_equal(summary(rec, TRUE), exp_res)
})

test_that("update only NA role", {
  rec <- recipe(x = biomass)
  orig_rec <- summary(rec)
  rec <- update_role(rec, sample, dataset, new_role = "some other role")

  exp_res <- orig_rec %>% arrange(variable)
  exp_res$role[exp_res$variable %in% c("sample", "dataset")] <- "some other role"
  exp_res$required_to_bake <- TRUE

  expect_equal(summary(rec, TRUE) %>% arrange(variable), exp_res)
})

test_that("new role for existing NA role", {
  rec <- recipe(x = biomass)
  rec <- update_role(rec, sample, new_role = "some other role")

  exp_res <- tibble(
    variable = colnames(biomass),
    type = rep(
      list(c("string", "unordered", "nominal"), c("double", "numeric")),
      c(2, 6)
    ),
    role = rep(c("some other role", NA), c(1, length(colnames(biomass)) - 1)),
    source = "original",
    required_to_bake = TRUE
  )
  expect_equal(summary(rec, TRUE), exp_res)
})

test_that("new role with specified type", {
  rec <- recipe(x = biomass)
  rec <- update_role(rec, sample, new_role = "blah")
  rec <- add_role(rec, sample, new_role = "some other role", new_type = "new type")

  exp_res <- tibble(
    variable = c("sample", colnames(biomass)),
    type = c(
      list(c("string", "unordered", "nominal")),
      list("new type"),
      list(c("string", "unordered", "nominal")),
      rep(list(c("double", "numeric")), 6)
    ),
    role = rep(c("blah", "some other role", NA), c(1, 1, 7)),
    source = "original",
    required_to_bake = TRUE
  )
  expect_equal(summary(rec, TRUE), exp_res)
})

test_that("add new role when two already exist with different types", {

  # type of the first existing role found is used
  rec <- recipe(x = biomass)
  rec <- update_role(rec, sample, new_role = "blah")
  rec <- add_role(rec, sample, new_role = "some other role", new_type = "new type")
  rec <- add_role(rec, sample, new_role = "another role")

  exp_res <- tibble(
    variable = c("sample", "sample", colnames(biomass)),
    type = c(
      list(c("string", "unordered", "nominal")),
      list("new type"),
      list(c("string", "unordered", "nominal")),
      list(c("string", "unordered", "nominal")),
      rep(list(c("double", "numeric")), 6)
    ),
    role = c("blah", "some other role", "another role", rep(NA, 7)),
    source = "original",
    required_to_bake = TRUE
  )
  expect_equal(summary(rec, TRUE), exp_res)
})

test_that("existing role is skipped", {
  rec <- recipe(x = biomass)
  rec <- update_role(rec, sample, new_role = "blah")
  rec <- add_role(rec, sample, new_role = "some other role")

  # skip me
  expect_snapshot(
    rec <- add_role(rec, sample, new_role = "some other role")
  )

  # also tests the order, new roles come directly after old ones
  exp_res <- tibble(
    variable = c("sample", colnames(biomass)),
    type = rep(
      list(c("string", "unordered", "nominal"), c("double", "numeric")),
      c(3, 6)
    ),
    role = rep(c("blah", "some other role", NA), c(1, 1, 7)),
    source = "original",
    required_to_bake = TRUE
  )
  expect_equal(summary(rec, TRUE), exp_res)
})

test_that("existing role is skipped, but new one is added", {
  rec <- recipe(x = biomass)
  rec <- update_role(rec, sample, new_role = "blah")
  rec <- add_role(rec, sample, new_role = "some other role")

  # partially skip me
  expect_snapshot(
    rec <- add_role(rec, sample, dataset, new_role = "some other role")
  )

  exp_res <- tibble(
    variable = c(
      rep(c("sample", "dataset"), c(2, 2)),
      setdiff(colnames(biomass), c("sample", "dataset"))
    ),
    type = rep(
      list(c("string", "unordered", "nominal"), c("double", "numeric")),
      c(4, 6)
    ),
    role = c("blah", "some other role", NA, "some other role", rep(NA, 6)),
    source = "original",
    required_to_bake = TRUE
  )
  expect_equal(summary(rec, TRUE), exp_res)
})

test_that("cannot add roles if the current one is `NA`", {
  rec <- recipe(x = biomass)
  expect_snapshot(error = TRUE,
    add_role(rec, sample, sulfur)
  )
})

test_that("`update_role()` cannot be ambiguous", {
  rec <- recipe(HHV ~ ., data = biomass)
  rec <- add_role(rec, sample, new_role = "x")

  expect_snapshot(error = TRUE,
    update_role(rec, sample, new_role = "y")
  )
})

test_that("`new_role` cannot be `NA_character_`", {
  rec <- recipe(x = biomass)

  expect_snapshot(error = TRUE,
    add_role(rec, sample, new_role = NA_character_)
  )

  expect_snapshot(error = TRUE,
    update_role(rec, sample, new_role = NA_character_)
  )
})

test_that("remove roles", {
  rec <- recipe(x = biomass)
  rec <- update_role(rec, sample, new_role = "role1")
  expect_snapshot(error = TRUE,
    rec <- remove_role(rec, sample, old_role = NA)
  )
  expect_snapshot(error = TRUE,
    rec <- remove_role(rec, sample)
  )

  expect_snapshot(
    remove_role(rec, sample, old_role = "non-existant")
  )

  rec <- remove_role(rec, sample, old_role = "role1")

  exp_res <- tibble(
    variable = colnames(biomass),
    type = rep(
      list(c("string", "unordered", "nominal"), c("double", "numeric")),
      c(2, 6)
    ),
    role = NA_character_,
    source = "original",
    required_to_bake = TRUE
  )
  expect_equal(summary(rec, TRUE), exp_res)
})

test_that("New type for an existing role can be added", {
  rec <- recipe(x = biomass)
  rec <- update_role(rec, sample, new_role = "role1")
  rec <- add_role(rec, sample, new_role = "role1", new_type = "text")

  exp_res <- tibble(
    variable = c("sample", colnames(biomass)),
    type = c(
      list(c("string", "unordered", "nominal")),
      list("text"),
      list(c("string", "unordered", "nominal")),
      rep(list(c("double", "numeric")), 6)
    ),
    role = c("role1", "role1", rep(NA, 7)),
    source = "original",
    required_to_bake = TRUE
  )
  expect_equal(summary(rec, TRUE), exp_res)
})

test_that("can use tidyselect ops in role selection", {
  rec <- recipe(mpg ~ ., mtcars) %>%
    step_center(all_predictors())

  # Swap "predictor" for "foo"
  rec <- update_role(
    rec,
    starts_with("c") & !carb,
    new_role = "foo",
    old_role = "predictor"
  )

  expect_identical(
    rec$term_info$role[rec$term_info$variable == "cyl"],
    "foo"
  )

  # Add "predictor" back
  rec <- add_role(
    rec,
    starts_with("c") & !carb,
    new_role = "predictor"
  )

  expect_identical(
    rec$term_info$role[rec$term_info$variable == "cyl"],
    c("foo", "predictor")
  )

  # Remove "foo"
  rec <- remove_role(
    rec,
    starts_with("c") & !carb,
    old_role = "foo"
  )

  expect_identical(
    rec$term_info$role[rec$term_info$variable == "cyl"],
    "predictor"
  )
})


test_that("empty dots and zero column selections return input with a warning", {
  rec <- recipe(x = biomass)

  expect_snapshot(
    rec2 <- add_role(rec)
  )
  expect_identical(rec2, rec)

  expect_snapshot(
    rec2 <- update_role(rec)
  )
  expect_identical(rec2, rec)

  expect_snapshot(
    rec2 <- remove_role(rec, old_role = "foo")
  )
  expect_identical(rec2, rec)

  expect_snapshot(
    rec2 <- add_role(rec, starts_with("foobar"))
  )
  expect_identical(rec2, rec)

  expect_snapshot(
    rec2 <- update_role(rec, starts_with("foobar"))
  )
  expect_identical(rec2, rec)

  expect_snapshot(
    rec2 <- remove_role(rec, starts_with("foobar"), old_role = "foo")
  )
  expect_identical(rec2, rec)
})

test_that("bad args", {
  expect_snapshot(error = TRUE,
    recipe(x = biomass) %>%
      add_role(carbon, new_role = letters[1:2])
  )

  expect_snapshot(error = TRUE,
    recipe(x = biomass) %>%
      add_role(carbon, new_role = "a", new_type = letters[1:2])
  )

  expect_snapshot(error = TRUE,
    recipe(x = biomass) %>%
      update_role(carbon, new_role = c("a", "b"))
  )

  expect_snapshot(error = TRUE,
    recipe(x = biomass) %>%
      update_role(carbon, old_role = c("a", "b"))
  )
})


# ------------------------------------------------------------------------------
# Multiples roles + Selection testing

test_that("adding multiple roles/types does not duplicate prepped columns", {
  rec <- recipe(HHV ~ ., data = biomass)

  # second role
  expect_equal(
    rec %>%
      add_role(carbon, new_role = "carb") %>%
      prep(training = biomass) %>%
      bake(new_data = NULL) %>%
      ncol(),
    8
  )

  # second type
  expect_equal(
    rec %>%
      add_role(carbon, new_type = "carb") %>%
      prep(training = biomass) %>%
      bake(new_data = NULL) %>%
      ncol(),
    8
  )
})

test_that("type selectors can be combined", {
  rec <- recipe(HHV ~ ., data = biomass)

  prepped <- rec %>%
    add_role(carbon, new_role = "predictor", new_type = "carb") %>%
    step_center(all_numeric(), -has_type("carb")) %>%
    prep(training = biomass)

  expect_equal(
    names(prepped$steps[[1]]$means),
    c("hydrogen", "oxygen", "nitrogen", "sulfur", "HHV")
  )
})

test_that("step_rm() removes ALL mention of variables with that role", {
  rec <- recipe(HHV ~ ., data = biomass)

  rec_prepped <- rec %>%
    add_role(carbon, new_role = "predictor", new_type = "carb") %>%
    step_rm(has_type("carb")) %>%
    prep(training = biomass) %>%
    summary()

  expect_false("carbon" %in% rec_prepped$variable)
})

# ------------------------------------------------------------------------------
# Tests related to #296
# https://github.com/tidymodels/recipes/issues/296

test_that("Existing `NA` roles are not modified in prep() when new columns are generated", {
  rec_dummy <- recipe(x = iris) %>%
    update_role(Sepal.Length, new_role = "outcome") %>%
    update_role(Species, new_role = "predictor") %>%
    step_dummy(Species)

  prepped_rec_dummy <- prep(rec_dummy, iris)

  orig <- summary(rec_dummy)
  new <- summary(prepped_rec_dummy)

  # These should be identical except for the modified Species term
  expect_equal(
    filter(orig, !grepl("Species", variable)),
    filter(new, !grepl("Species", variable))
  )

  expect_equal(
    filter(new, grepl("Species", variable)),
    tibble(
      variable = c("Species_versicolor", "Species_virginica"),
      type = rep(list(c("double", "numeric")), 2),
      role = rep("predictor", times = 2),
      source = rep("derived", times = 2)
    )
  )

  # Juicing with all predictors should only give these two columns
  expect_equal(
    colnames(bake(prepped_rec_dummy, new_data = NULL, all_predictors())),
    c("Species_versicolor", "Species_virginica")
  )
})


test_that("Existing `NA` roles are not modified in prep() when multiple new columns are generated", {
  rec <- recipe(x = iris) %>%
    update_role(Sepal.Length, new_role = "outcome") %>%
    update_role(Sepal.Width, new_role = "predictor") %>%
    update_role(Species, new_role = "predictor") %>%
    step_dummy(Species) %>%
    step_bs(Sepal.Width)

  prepped_rec <- prep(rec, iris)

  orig <- summary(rec)
  new <- summary(prepped_rec)

  # These should be identical except for the
  # modified Species and Sepal.Width terms
  expect_equal(
    filter(orig, !grepl("Species", variable), !grepl("Sepal.Width", variable)),
    filter(new, !grepl("Species", variable), !grepl("Sepal.Width", variable))
  )
})

test_that("Roles are correcly selected in bake", {
  x <- tibble::tibble(
    a = runif(10),
    b = runif(10),
    c = runif(10)
  )

  rec <- recipe(c ~ ., x) %>%
    update_role(b, new_role = "id") %>%
    add_role(a, new_role = "id") %>%
    prep()

  o <- recipes::bake(rec, x, recipes::has_role("id"))
  expect_equal(names(o), c("a", "b"))
})

test_that("role functions handle case weights correctly", {
  expect_snapshot(error = TRUE,
    recipe(mpg ~ ., data = mtcars) %>%
      update_role("disp", new_role = "case_weights")
  )

  expect_snapshot(error = TRUE,
    recipe(mpg ~ ., data = mtcars) %>%
      add_role("disp", new_role = "case_weights")
  )

  mtcars1 <- mtcars %>%
    mutate(wt = importance_weights(wt))

  expect_snapshot(error = TRUE,
    recipe(mpg ~ ., data = mtcars1) %>%
      remove_role(wt, old_role = "case_weights")
  )

  expect_snapshot(error = TRUE,
    recipe(mpg ~ ., data = mtcars1) %>%
      update_role(wt)
  )

  expect_snapshot(error = TRUE,
    recipe(mpg ~ ., data = mtcars1) %>%
      add_role(wt)
  )
})


test_that("role information from summary()", {

  # ----------------------------------------------------------------------------
  # non-formula method

  rec_roles <- recipe(mtcars) %>%
    update_role(-mpg, new_role = "predictor") %>%
    update_role(mpg, new_role = "outcome") %>%
    update_role(gear, new_role = "id") %>%
    update_role(carb, new_role = "important") %>%
    prep()

  req_roles <-
    rec_roles %>%
    update_role_requirements("important", bake = FALSE) %>%
    prep()

  expect_snapshot(summary(rec_roles, original = TRUE))
  expect_snapshot(summary(req_roles, original = TRUE))

  # ----------------------------------------------------------------------------
  # missing role values

  na_rec <-
    mtcars %>%
    recipe() %>%
    update_role(mpg, new_role = "outcome") %>%
    update_role(disp, wt, new_role = "predictor") %>%
    update_role(carb, new_role = "other") %>%
    prep()

  na_req_rec <-
    na_rec %>%
    update_role_requirements("NA", bake = FALSE) %>%
    prep()

  expect_snapshot(summary(na_rec, original = TRUE))
  expect_snapshot(summary(na_req_rec, original = TRUE))

})

Try the recipes package in your browser

Any scripts or data that you put into this service are public.

recipes documentation built on Aug. 26, 2023, 1:08 a.m.