tests/testthat/test-term_info.R

library(testthat)
library(recipes)

# results based on second example at
# https://github.com/tidymodels/textrecipes/issues/17#issue-382503618
tokenize_old <-
  tibble::tribble(
    ~variable, ~type, ~role, ~source,
    "essay0", "nominal", "predictor", "original",
    "essay0", "nominal", "textual", "original",
    "essay1", "nominal", "predictor", "original",
    "essay1", "nominal", "textual", "original",
    "essay2", "nominal", "predictor", "original",
    "essay2", "nominal", "textual", "original",
    "essay3", "nominal", "predictor", "original",
    "essay3", "nominal", "textual", "original",
    "essay4", "nominal", "predictor", "original",
    "essay4", "nominal", "textual", "original",
    "essay5", "nominal", "predictor", "original",
    "essay5", "nominal", "textual", "original",
    "essay6", "nominal", "predictor", "original",
    "essay6", "nominal", "textual", "original",
    "essay7", "nominal", "predictor", "original",
    "essay7", "nominal", "textual", "original",
    "essay8", "nominal", "predictor", "original",
    "essay8", "nominal", "textual", "original",
    "essay9", "nominal", "predictor", "original",
    "essay9", "nominal", "textual", "original"
  )

tokenize_new <-
  tibble::tribble(
    ~variable, ~type,
    "essay0", "list",
    "essay1", "list",
    "essay2", "list",
    "essay3", "list",
    "essay4", "list",
    "essay5", "list",
    "essay6", "list",
    "essay7", "list",
    "essay8", "list",
    "essay9", "list"
  )

tokenize_res <-
  tibble::tribble(
    ~variable, ~type, ~role, ~source,
    "essay0", "list", "predictor", "original",
    "essay0", "list", "textual", "original",
    "essay1", "list", "predictor", "original",
    "essay1", "list", "textual", "original",
    "essay2", "list", "predictor", "original",
    "essay2", "list", "textual", "original",
    "essay3", "list", "predictor", "original",
    "essay3", "list", "textual", "original",
    "essay4", "list", "predictor", "original",
    "essay4", "list", "textual", "original",
    "essay5", "list", "predictor", "original",
    "essay5", "list", "textual", "original",
    "essay6", "list", "predictor", "original",
    "essay6", "list", "textual", "original",
    "essay7", "list", "predictor", "original",
    "essay7", "list", "textual", "original",
    "essay8", "list", "predictor", "original",
    "essay8", "list", "textual", "original",
    "essay9", "list", "predictor", "original",
    "essay9", "list", "textual", "original"
  )

untokenize_old <-
  tibble::tribble(
    ~variable, ~type, ~role, ~source,
    "essay0", "nominal", "predictor", "original",
    "essay0", "nominal", "textual", "original",
    "essay1", "nominal", "predictor", "original",
    "essay1", "nominal", "textual", "original",
    "essay2", "nominal", "predictor", "original",
    "essay2", "nominal", "textual", "original",
    "essay3", "nominal", "predictor", "original",
    "essay3", "nominal", "textual", "original",
    "essay4", "nominal", "predictor", "original",
    "essay4", "nominal", "textual", "original",
    "essay5", "nominal", "predictor", "original",
    "essay5", "nominal", "textual", "original",
    "essay6", "nominal", "predictor", "original",
    "essay6", "nominal", "textual", "original",
    "essay7", "nominal", "predictor", "original",
    "essay7", "nominal", "textual", "original",
    "essay8", "nominal", "predictor", "original",
    "essay8", "nominal", "textual", "original",
    "essay9", "nominal", "predictor", "original",
    "essay9", "nominal", "textual", "original"
  )

untokenize_new <-
  tibble::tribble(
    ~variable, ~type,
    "essay0", "list",
    "essay1", "list",
    "essay2", "list",
    "essay3", "list",
    "essay4", "list",
    "essay5", "list",
    "essay6", "list",
    "essay7", "list",
    "essay8", "list",
    "essay9", "list"
  )

untokenize_res <-
  tibble::tribble(
    ~variable, ~type, ~role, ~source,
    "essay0", "list", "predictor", "original",
    "essay0", "list", "textual", "original",
    "essay1", "list", "predictor", "original",
    "essay1", "list", "textual", "original",
    "essay2", "list", "predictor", "original",
    "essay2", "list", "textual", "original",
    "essay3", "list", "predictor", "original",
    "essay3", "list", "textual", "original",
    "essay4", "list", "predictor", "original",
    "essay4", "list", "textual", "original",
    "essay5", "list", "predictor", "original",
    "essay5", "list", "textual", "original",
    "essay6", "list", "predictor", "original",
    "essay6", "list", "textual", "original",
    "essay7", "list", "predictor", "original",
    "essay7", "list", "textual", "original",
    "essay8", "list", "predictor", "original",
    "essay8", "list", "textual", "original",
    "essay9", "list", "predictor", "original",
    "essay9", "list", "textual", "original"
  )

test_that("issue #", {
  expect_equal(
    recipes:::merge_term_info(.old = tokenize_old, .new = tokenize_new),
    tokenize_res
  )

  expect_equal(
    recipes:::merge_term_info(.old = untokenize_old, .new = untokenize_new),
    untokenize_res
  )
})


# tests based on the results of this code before the update:

skip_if_not_installed("modeldata")
data(credit_data, package = "modeldata")
#
# recipe(Status ~ Seniority + Job + Price + Age, data = credit_data) %>%
#   step_dummy(Job) %>%
#   step_poly(Price) %>%
#   step_rm(Age) %>%
#   prep(credit_data)

dummy_old <-
  tibble::tribble(
    ~variable, ~type, ~role, ~source,
    "Seniority", "numeric", "predictor", "original",
    "Job", "nominal", "predictor", "original",
    "Price", "numeric", "predictor", "original",
    "Age", "numeric", "predictor", "original",
    "Status", "nominal", "outcome", "original"
  )
dummy_new <-
  tibble::tribble(
    ~variable, ~type,
    "Seniority", "numeric",
    "Price", "numeric",
    "Age", "numeric",
    "Status", "nominal",
    "Job_freelance", "numeric",
    "Job_others", "numeric",
    "Job_partime", "numeric"
  )

dummy_res <-
  tibble::tribble(
    ~variable, ~type, ~role, ~source,
    "Seniority", "numeric", "predictor", "original",
    "Price", "numeric", "predictor", "original",
    "Age", "numeric", "predictor", "original",
    "Status", "nominal", "outcome", "original",
    "Job_freelance", "numeric", NA, NA,
    "Job_others", "numeric", NA, NA,
    "Job_partime", "numeric", NA, NA
  )

# recipes:::merge_term_info(.new = dummy_new, .old = dummy_old)

poly_old <-
  tibble::tribble(
    ~variable, ~type, ~role, ~source,
    "Seniority", "numeric", "predictor", "original",
    "Price", "numeric", "predictor", "original",
    "Age", "numeric", "predictor", "original",
    "Status", "nominal", "outcome", "original",
    "Job_freelance", "numeric", "predictor", "derived",
    "Job_others", "numeric", "predictor", "derived",
    "Job_partime", "numeric", "predictor", "derived"
  )

poly_new <-
  tibble::tribble(
    ~variable, ~type,
    "Seniority", "numeric",
    "Age", "numeric",
    "Status", "nominal",
    "Job_freelance", "numeric",
    "Job_others", "numeric",
    "Job_partime", "numeric",
    "Price_poly_1", "numeric",
    "Price_poly_2", "numeric"
  )

poly_res <-
  tibble::tribble(
    ~variable, ~type, ~role, ~source,
    "Seniority", "numeric", "predictor", "original",
    "Age", "numeric", "predictor", "original",
    "Status", "nominal", "outcome", "original",
    "Job_freelance", "numeric", "predictor", "derived",
    "Job_others", "numeric", "predictor", "derived",
    "Job_partime", "numeric", "predictor", "derived",
    "Price_poly_1", "numeric", NA, NA,
    "Price_poly_2", "numeric", NA, NA
  )

rm_old <-
  tibble::tribble(
    ~variable, ~type, ~role, ~source,
    "Seniority", "numeric", "predictor", "original",
    "Age", "numeric", "predictor", "original",
    "Status", "nominal", "outcome", "original",
    "Job_freelance", "numeric", "predictor", "derived",
    "Job_others", "numeric", "predictor", "derived",
    "Job_partime", "numeric", "predictor", "derived",
    "Price_poly_1", "numeric", "predictor", "derived",
    "Price_poly_2", "numeric", "predictor", "derived"
  )

rm_new <-
  tibble::tribble(
    ~variable, ~type,
    "Seniority", "numeric",
    "Status", "nominal",
    "Job_freelance", "numeric",
    "Job_others", "numeric",
    "Job_partime", "numeric",
    "Price_poly_1", "numeric",
    "Price_poly_2", "numeric"
  )

rm_res <-
  tibble::tribble(
    ~variable, ~type, ~role, ~source,
    "Seniority", "numeric", "predictor", "original",
    "Status", "nominal", "outcome", "original",
    "Job_freelance", "numeric", "predictor", "derived",
    "Job_others", "numeric", "predictor", "derived",
    "Job_partime", "numeric", "predictor", "derived",
    "Price_poly_1", "numeric", "predictor", "derived",
    "Price_poly_2", "numeric", "predictor", "derived"
  )

test_that("typical use cases", {
  expect_equal(
    recipes:::merge_term_info(.old = dummy_old, .new = dummy_new),
    dummy_res
  )

  expect_equal(
    recipes:::merge_term_info(.old = poly_old, .new = poly_new),
    poly_res
  )

  expect_equal(
    recipes:::merge_term_info(.old = rm_old, .new = rm_new),
    rm_res
  )
})

Try the recipes package in your browser

Any scripts or data that you put into this service are public.

recipes documentation built on Aug. 26, 2023, 1:08 a.m.