test_that("can mold recipes", {
sparse_bp <- default_recipe_blueprint(composition = "dgCMatrix")
matrix_bp <- default_recipe_blueprint(composition = "matrix")
rec <- recipes::recipe(Species ~ Sepal.Length, data = iris)
x1 <- mold(rec, iris)
x2 <- mold(rec, iris, blueprint = sparse_bp)
x3 <- mold(rec, iris, blueprint = matrix_bp)
expect_s3_class(x1$predictors, "data.frame")
expect_s4_class(x2$predictors, "dgCMatrix")
expect_matrix(x3$predictors)
expect_s3_class(x1$outcomes, "data.frame")
expect_s3_class(x2$outcomes, "data.frame")
expect_s3_class(x3$outcomes, "data.frame")
expect_equal(colnames(x1$predictors), "Sepal.Length")
expect_equal(colnames(x2$predictors), "Sepal.Length")
expect_equal(colnames(x3$predictors), "Sepal.Length")
expect_s3_class(x1$outcomes[[1]], "factor")
expect_s3_class(x1$blueprint, "default_recipe_blueprint")
# Training data should _not_ be in the recipe
expect_error(recipes::juice(x1$blueprint))
})
test_that("can mold recipes with intercepts", {
rec <- recipes::recipe(Species ~ Sepal.Length, data = iris)
x1 <- mold(
rec, iris,
blueprint = default_recipe_blueprint(intercept = TRUE)
)
x2 <- mold(
rec, iris,
blueprint = default_recipe_blueprint(
intercept = TRUE,
composition = "dgCMatrix"
)
)
x3 <- mold(
rec, iris,
blueprint = default_recipe_blueprint(
intercept = TRUE,
composition = "matrix"
)
)
expect_true("(Intercept)" %in% colnames(x1$predictors))
expect_true("(Intercept)" %in% colnames(x2$predictors))
expect_true("(Intercept)" %in% colnames(x3$predictors))
})
test_that("can pass `fresh` through to `prep()`", {
sparse_bp <- default_recipe_blueprint(composition = "dgCMatrix")
iris1 <- iris[1:50, ]
iris2 <- iris[51:100, ]
rec <- recipes::recipe(Species ~ Sepal.Length, data = iris1)
rec <- recipes::step_center(rec, Sepal.Length)
prepped_rec <- recipes::prep(rec, iris1)
non_fresh_mean <- prepped_rec$steps[[1]]$means
x1 <- mold(prepped_rec, iris2, blueprint = default_recipe_blueprint(fresh = FALSE))
mold_non_fresh_mean1 <- x1$blueprint$recipe$steps[[1]]$means
x2 <- mold(prepped_rec, iris2, blueprint = default_recipe_blueprint(fresh = FALSE, composition = "dgCMatrix"))
mold_non_fresh_mean2 <- x2$blueprint$recipe$steps[[1]]$means
y1 <- mold(prepped_rec, iris2, blueprint = default_recipe_blueprint(fresh = TRUE))
mold_fresh_mean1 <- y1$blueprint$recipe$steps[[1]]$means
y2 <- mold(prepped_rec, iris2, blueprint = default_recipe_blueprint(fresh = TRUE, composition = "dgCMatrix"))
mold_fresh_mean2 <- y2$blueprint$recipe$steps[[1]]$means
fresh_mean <- c(Sepal.Length = mean(iris2$Sepal.Length))
expect_equal(non_fresh_mean, mold_non_fresh_mean1)
expect_false(identical(non_fresh_mean, mold_fresh_mean1))
expect_equal(non_fresh_mean, mold_non_fresh_mean2)
expect_false(identical(non_fresh_mean, mold_fresh_mean2))
expect_equal(mold_fresh_mean1, fresh_mean)
expect_equal(mold_fresh_mean2, fresh_mean)
})
test_that("`fresh` defaults to `TRUE`", {
sparse_bp <- default_recipe_blueprint(composition = "dgCMatrix")
iris1 <- iris[1:50, ]
iris2 <- iris[51:100, ]
rec <- recipes::recipe(Species ~ Sepal.Length, data = iris1)
rec <- recipes::step_center(rec, Sepal.Length)
prepped_rec <- recipes::prep(rec, iris1)
non_fresh_mean <- prepped_rec$steps[[1]]$means
fresh_mean <- c(Sepal.Length = mean(iris2$Sepal.Length))
x1 <- mold(prepped_rec, iris2, blueprint = default_recipe_blueprint())
mold_fresh_mean1 <- x1$blueprint$recipe$steps[[1]]$means
x2 <- mold(prepped_rec, iris2, blueprint = sparse_bp)
mold_fresh_mean2 <- x2$blueprint$recipe$steps[[1]]$means
expect_false(identical(non_fresh_mean, mold_fresh_mean1))
expect_identical(mold_fresh_mean1, fresh_mean)
expect_false(identical(non_fresh_mean, mold_fresh_mean2))
expect_identical(mold_fresh_mean2, fresh_mean)
})
test_that("can pass `strings_as_factors` through to `prep()`", {
df <- tibble(y = 1, x = "a")
rec <- recipes::recipe(y ~ x, data = df)
# Defaults to `TRUE`
x <- mold(rec, df)
expect_identical(x$predictors$x, factor("a"))
x <- mold(rec, df, blueprint = default_recipe_blueprint(strings_as_factors = FALSE))
expect_identical(x$predictors$x, "a")
})
test_that("`data` is validated", {
expect_error(
mold(recipes::recipe(Species ~ Sepal.Length, data = iris), 1),
"`data` must be a data.frame or a matrix"
)
})
test_that("`extras` holds a slot for `roles`", {
sparse_bp <- default_recipe_blueprint(composition = "dgCMatrix")
matrix_bp <- default_recipe_blueprint(composition = "matrix")
rec <- recipes::recipe(Species ~ Sepal.Length, data = iris)
x1 <- mold(rec, iris)
x2 <- mold(rec, iris, blueprint = sparse_bp)
x3 <- mold(rec, iris, blueprint = matrix_bp)
expect_equal(x1$extras, list(roles = NULL))
expect_equal(x2$extras, list(roles = NULL))
expect_equal(x3$extras, list(roles = NULL))
expect_equal(x1$blueprint$extra_role_ptypes, NULL)
expect_equal(x2$blueprint$extra_role_ptypes, NULL)
expect_equal(x3$blueprint$extra_role_ptypes, NULL)
})
test_that("non-standard roles ptypes are stored by default", {
rec <- recipes::recipe(Species ~ ., iris)
rec <- recipes::update_role(rec, Sepal.Width, new_role = "dummy")
x <- mold(rec, iris)
expect_identical(
x$blueprint$extra_role_ptypes,
list(dummy = tibble::tibble(Sepal.Width = double()))
)
expect_identical(
x$extras$roles,
list(dummy = tibble::tibble(Sepal.Width = iris$Sepal.Width))
)
})
test_that("case weights is not considered a required extra role by default", {
iris$weight <- frequency_weights(seq_len(nrow(iris)))
rec <- recipes::recipe(Species ~ ., iris)
rec <- recipes::update_role(rec, Sepal.Width, new_role = "dummy")
x <- mold(rec, iris)
expect_identical(
x$blueprint$extra_role_ptypes,
list(dummy = tibble::tibble(Sepal.Width = double()))
)
expect_identical(
x$extras$roles$dummy,
tibble::tibble(Sepal.Width = iris$Sepal.Width)
)
expect_identical(
x$extras$roles$case_weights,
tibble::tibble(weight = iris$weight)
)
})
test_that("case weights can be updated to be a required extra role", {
iris$weight <- frequency_weights(seq_len(nrow(iris)))
rec <- recipes::recipe(Species ~ ., iris)
rec <- recipes::update_role(rec, Sepal.Width, new_role = "dummy")
rec <- recipes::update_role_requirements(rec, "case_weights", bake = TRUE)
x <- mold(rec, iris)
expect_identical(
x$blueprint$extra_role_ptypes$dummy,
tibble::tibble(Sepal.Width = double())
)
expect_identical(
x$blueprint$extra_role_ptypes$case_weights,
tibble::tibble(weight = frequency_weights(integer()))
)
expect_identical(
x$extras$roles$dummy,
tibble::tibble(Sepal.Width = iris$Sepal.Width)
)
expect_identical(
x$extras$roles$case_weights,
tibble::tibble(weight = iris$weight)
)
})
test_that("only original non-standard columns are in the extra roles ptype", {
sparse_bp <- default_recipe_blueprint(composition = "dgCMatrix")
# same custom role, but note step_bs() columns aren't original columns
rec <- recipes::recipe(Species ~ ., iris)
rec <- recipes::update_role(rec, Sepal.Width, new_role = "dummy")
rec <- recipes::step_bs(rec, Sepal.Length, role = "dummy", deg_free = 3)
x1 <- mold(rec, iris)
x2 <- mold(rec, iris, blueprint = sparse_bp)
# extra roles ptype only has original columns
expect_identical(
colnames(x1$blueprint$extra_role_ptypes$dummy),
"Sepal.Width"
)
expect_identical(
colnames(x2$blueprint$extra_role_ptypes$dummy),
"Sepal.Width"
)
# all `dummy` columns are returned in the `extras` slot
expect_identical(
colnames(x1$extras$roles$dummy),
c("Sepal.Width", paste("Sepal.Length", c("1", "2", "3"), sep = "_bs_"))
)
expect_identical(
colnames(x2$extras$roles$dummy),
c("Sepal.Width", paste("Sepal.Length", c("1", "2", "3"), sep = "_bs_"))
)
})
test_that("`extra_role_ptypes` and `extras` only hold roles that actually exist in the data", {
df <- tibble(y = 1, x = 1, z = 2)
rec <- recipes::recipe(y ~ ., df)
rec <- recipes::update_role(rec, x, new_role = "dummy")
# For example `default_bake_role_requirements()` specifies that `NA` is
# a required role, but there aren't any columns with that role in `df`
x <- mold(rec, df)
expect_named(x$blueprint$extra_role_ptypes, "dummy")
expect_named(x$extras$roles, "dummy")
})
test_that("multiple extra roles types can be stored", {
sparse_bp <- default_recipe_blueprint(composition = "dgCMatrix")
rec <- recipes::recipe(Species ~ ., iris)
rec <- recipes::step_bs(rec, Sepal.Length, role = "dummy", deg_free = 3)
rec <- recipes::step_bs(rec, Sepal.Width, role = "dummy2", deg_free = 3)
x1 <- mold(rec, iris)
x2 <- mold(rec, iris, blueprint = sparse_bp)
# these are not original columns
expect_equal(
x1$blueprint$extra_role_ptypes,
NULL
)
expect_equal(
x2$blueprint$extra_role_ptypes,
NULL
)
# They are automatically are returned in the `mold()` result
expect_equal(
names(x1$extras$roles),
c("dummy", "dummy2")
)
expect_equal(
names(x2$extras$roles),
c("dummy", "dummy2")
)
})
test_that("`NA` roles are treated as extra roles", {
sparse_bp <- default_recipe_blueprint(composition = "dgCMatrix")
rec <- recipes::recipe(iris)
rec <- recipes::update_role(rec, Sepal.Length, new_role = "predictor")
rec <- recipes::update_role(rec, Species, new_role = "outcome")
rec <- recipes::update_role(rec, Sepal.Width, new_role = "custom")
x1 <- mold(rec, iris)
x2 <- mold(rec, iris, blueprint = sparse_bp)
expect_equal(
colnames(x1$predictors),
"Sepal.Length"
)
expect_equal(
colnames(x2$predictors),
"Sepal.Length"
)
expect_equal(
colnames(x1$blueprint$ptypes$predictors),
"Sepal.Length"
)
expect_equal(
colnames(x2$blueprint$ptypes$predictors),
"Sepal.Length"
)
expect_equal(
colnames(x1$outcomes),
"Species"
)
expect_equal(
colnames(x2$outcomes),
"Species"
)
expect_equal(
colnames(x1$blueprint$ptypes$outcomes),
"Species"
)
expect_equal(
colnames(x2$blueprint$ptypes$outcomes),
"Species"
)
expect_equal(
colnames(x1$extras$roles$custom),
"Sepal.Width"
)
expect_equal(
colnames(x2$extras$roles$custom),
"Sepal.Width"
)
expect_equal(
colnames(x1$extras$roles$`NA`),
c("Petal.Length", "Petal.Width")
)
expect_equal(
colnames(x2$extras$roles$`NA`),
c("Petal.Length", "Petal.Width")
)
expect_equal(
colnames(x1$blueprint$extra_role_ptypes$custom),
"Sepal.Width"
)
expect_equal(
colnames(x2$blueprint$extra_role_ptypes$custom),
"Sepal.Width"
)
expect_equal(
colnames(x1$blueprint$extra_role_ptypes$`NA`),
c("Petal.Length", "Petal.Width")
)
expect_equal(
colnames(x2$blueprint$extra_role_ptypes$`NA`),
c("Petal.Length", "Petal.Width")
)
})
test_that("roles that aren't required are not retained as `extra_role_ptypes`, but are in the mold result", {
rec <- recipes::recipe(Species ~ ., iris)
rec <- recipes::update_role(rec, Sepal.Width, new_role = "dummy1")
rec <- recipes::update_role(rec, Sepal.Length, new_role = "dummy2")
rec <- recipes::update_role_requirements(rec, "dummy1", bake = FALSE)
x <- mold(rec, iris)
expect_equal(
x$blueprint$extra_role_ptypes,
list(dummy2 = tibble::tibble(Sepal.Length = double()))
)
expect_equal(
x$extras$roles$dummy1,
tibble::tibble(Sepal.Width = iris$Sepal.Width)
)
expect_equal(
x$extras$roles$dummy2,
tibble::tibble(Sepal.Length = iris$Sepal.Length)
)
})
test_that("Missing y value returns a 0 column tibble for `outcomes`", {
sparse_bp <- default_recipe_blueprint(composition = "dgCMatrix")
rec <- recipes::recipe(~Sepal.Width, data = iris)
x1 <- mold(rec, iris)
x2 <- mold(rec, iris, blueprint = sparse_bp)
expect_equal(nrow(x1$outcomes), 150)
expect_equal(ncol(x1$outcomes), 0)
expect_equal(x1$outcomes, x2$outcomes)
})
test_that("Missing y value returns a 0 column / 0 row tibble for `ptype`", {
sparse_bp <- default_recipe_blueprint(composition = "dgCMatrix")
rec <- recipes::recipe(~Sepal.Width, data = iris)
x1 <- mold(rec, iris)
x2 <- mold(rec, iris, blueprint = sparse_bp)
expect_equal(x1$blueprint$ptypes$outcomes, tibble())
expect_equal(x2$blueprint$ptypes$outcomes, tibble())
})
test_that("`mold()` is compatible with hardhat 0.2.0 blueprints", {
path <- test_path("data", "hardhat-0.2.0-pre-mold-recipe.rds")
object <- readRDS(path)
data <- object$data
blueprint <- object$blueprint
rec <- recipes::recipe(y ~ ., data = data)
rec <- recipes::step_mutate(rec, z = 1)
out <- mold(rec, data = data, blueprint = blueprint)
expect <- tibble::tibble(x = data$x, z = 1)
expect_identical(out$predictors, expect)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.