Nothing
context("test-models.R")
test_that("Test predict_model (regression)", {
# Data -----------
if (requireNamespace("MASS", quietly = TRUE)) {
data("Boston", package = "MASS")
x_var <- c("lstat", "rm", "dis", "indus")
y_var <- "medv"
x_train <- tail(Boston[, x_var], -6)
y_train <- tail(Boston[, y_var], -6)
x_test <- head(Boston[, x_var], 6)
str_formula <- "y_train ~ lstat + rm + dis + indus"
train_df <- cbind(y_train, x_train)
# List of models
l <- list(
stats::lm(str_formula, data = train_df),
stats::glm(str_formula, data = train_df)
)
if (requireNamespace("ranger", quietly = TRUE)) {
l[[length(l) + 1]] <- ranger::ranger(str_formula, data = train_df)
}
if (requireNamespace("xgboost", quietly = TRUE)) {
l[[length(l) + 1]] <- xgboost::xgboost(data = as.matrix(x_train), label = y_train, nrounds = 3, verbose = FALSE)
}
if (requireNamespace("mgcv", quietly = TRUE)) {
l[[length(l) + 1]] <- mgcv::gam(as.formula(str_formula), data = train_df)
}
# Tests
for (i in seq_along(l)) {
# Input equals data.frame
expect_true(
is.vector(predict_model(l[[i]], x_test))
)
expect_true(
is.atomic(predict_model(l[[i]], x_test))
)
expect_true(
is.double(predict_model(l[[i]], x_test))
)
expect_true(
length(predict_model(l[[i]], x_test)) == nrow(x_test)
)
# Input equals matrix
expect_true(
is.double(predict_model(l[[i]], as.matrix(x_test)))
)
expect_true(
is.atomic(predict_model(l[[i]], as.matrix(x_test)))
)
expect_true(
is.vector(predict_model(l[[i]], as.matrix(x_test)))
)
expect_true(
length(predict_model(l[[i]], as.matrix(x_test))) == nrow(x_test)
)
}
}
})
test_that("Test predict_model (binary classification)", {
# Data -----------
if (requireNamespace("datasets", quietly = TRUE)) {
data("iris", package = "datasets")
x_var <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
y_var <- "Species"
iris$Species <- as.character(iris$Species)
iris <- iris[which(iris$Species != "virginica"), ]
iris$Species <- as.factor(iris$Species)
x_train <- tail(iris[, x_var], -6)
y_train <- tail(iris[, y_var], -6)
x_test <- head(iris[, x_var], 6)
str_formula <- "y_train ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width"
train_df <- cbind(y_train, x_train)
# List of models
l <- list(
suppressWarnings(stats::glm(str_formula, data = train_df, family = "binomial"))
)
if (requireNamespace("mgcv", quietly = TRUE)) {
l[[length(l) + 1]] <- suppressWarnings(mgcv::gam(as.formula(str_formula), data = train_df, family = "binomial"))
}
if (requireNamespace("ranger", quietly = TRUE)) {
l[[length(l) + 1]] <- ranger::ranger(str_formula, data = train_df, probability = TRUE)
}
if (requireNamespace("xgboost", quietly = TRUE)) {
l[[length(l) + 1]] <- xgboost::xgboost(
data = as.matrix(x_train),
label = as.integer(y_train) - 1,
nrounds = 2,
verbose = FALSE,
objective = "binary:logistic",
eval_metric = "error"
)
}
# Tests
for (i in seq_along(l)) {
# Input equals data.frame
expect_true(
is.vector(predict_model(l[[i]], x_test))
)
expect_true(
is.atomic(predict_model(l[[i]], x_test))
)
expect_true(
is.double(predict_model(l[[i]], x_test))
)
expect_true(
length(predict_model(l[[i]], x_test)) == nrow(x_test)
)
expect_true(
all(data.table::between(predict_model(l[[i]], x_test), 0, 1))
)
# Input equals matrix
expect_true(
is.double(predict_model(l[[i]], as.matrix(x_test)))
)
expect_true(
is.atomic(predict_model(l[[i]], as.matrix(x_test)))
)
expect_true(
is.vector(predict_model(l[[i]], as.matrix(x_test)))
)
expect_true(
length(predict_model(l[[i]], as.matrix(x_test))) == nrow(x_test)
)
expect_true(
all(data.table::between(predict_model(l[[i]], as.matrix(x_test)), 0, 1))
)
# Check that output is equal
expect_equal(
predict_model(l[[i]], x_test), predict_model(l[[i]], as.matrix(x_test))
)
}
# Errors
l <- list()
if (requireNamespace("ranger", quietly = TRUE)) {
l[[length(l) + 1]] <- ranger::ranger(str_formula, data = train_df)
}
if (requireNamespace("xgboost", quietly = TRUE)) {
l[[length(l) + 1]] <- xgboost::xgboost(
data = as.matrix(x_train),
label = as.integer(y_train) - 1,
nrounds = 2,
verbose = FALSE,
objective = "reg:logistic"
)
}
# Tests
for (i in seq_along(l)) {
# Input equals data.frame
expect_error(
get_model_specs(l[[i]])
)
# Input equals matrix
expect_error(
get_model_specs(l[[i]])
)
}
}
})
test_that("Test predict_model (multi-classification)", {
# Data -----------
if (requireNamespace("datasets", quietly = TRUE)) {
data("iris", package = "datasets")
x_var <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
y_var <- "Species"
x_train <- tail(iris[, x_var], -6)
y_train <- tail(iris[, y_var], -6)
x_test <- head(iris[, x_var], 6)
str_formula <- "y_train ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width"
train_df <- cbind(y_train, x_train)
# List of models
l <- list()
if (requireNamespace("ranger", quietly = TRUE)) {
l[[length(l) + 1]] <- ranger::ranger(
str_formula,
data = train_df
)
l[[length(l) + 1]] <- ranger::ranger(
str_formula,
data = train_df,
probability = TRUE
)
}
if (requireNamespace("xgboost", quietly = TRUE)) {
l[[length(l) + 1]] <- xgboost::xgboost(
as.matrix(x_train),
label = as.integer(y_train) - 1,
nrounds = 2,
verbose = FALSE,
objective = "multi:softprob",
eval_metric = "merror",
num_class = 3
)
l[[length(l) + 1]] <- xgboost::xgboost(
as.matrix(x_train),
label = as.integer(y_train) - 1,
nrounds = 2,
verbose = FALSE,
objective = "multi:softmax",
eval_metric = "merror",
num_class = 3
)
}
# Tests
for (i in seq_along(l)) {
# Input equals data.frame
expect_error(
get_model_specs(l[[i]], x_test)
)
# Input equals matrix
expect_error(
get_model_specs(l[[i]], as.matrix(x_test))
)
}
}
})
test_that("Test check_features + update_data", {
# Data -----------
if (requireNamespace("MASS", quietly = TRUE)) {
data("Boston", package = "MASS")
y_var <- "medv"
x_train <- tail(Boston, -6)
y_train <- tail(Boston[, y_var], -6)
y_train_binary <- as.factor(tail((Boston[, y_var] > 20) * 1, -6))
# convert to factors for testing purposes
x_train$rad <- factor(round(x_train$rad))
x_train$chas <- factor(round(x_train$chas))
train_df <- cbind(x_train, y_train, y_train_binary)
x_var_numeric <- c("lstat", "rm", "dis", "indus")
x_var_factor <- c("lstat", "rm", "dis", "indus", "rad", "chas")
formula_numeric <- as.formula(paste0("y_train ~ ", paste0(x_var_numeric, collapse = "+")))
formula_factor <- as.formula(paste0("y_train ~ ", paste0(x_var_factor, collapse = "+")))
formula_binary_numeric <- as.formula(paste0("y_train_binary ~ ", paste0(x_var_numeric, collapse = "+")))
formula_binary_factor <- as.formula(paste0("y_train_binary ~ ", paste0(x_var_factor, collapse = "+")))
dummylist <- make_dummies(traindata = x_train[, x_var_factor], testdata = x_train[, x_var_factor])
# List of models to run silently
l_silent <- list(
stats::lm(formula_numeric, data = train_df),
stats::lm(formula_factor, data = train_df),
stats::glm(formula_numeric, data = train_df),
stats::glm(formula_factor, data = train_df),
stats::glm(formula_binary_numeric, data = train_df, family = "binomial"),
stats::glm(formula_binary_factor, data = train_df, family = "binomial")
)
l_message <- list()
if (requireNamespace("mgcv", quietly = TRUE)) {
l_silent[[length(l_silent) + 1]] <- mgcv::gam(formula_numeric, data = train_df)
l_silent[[length(l_silent) + 1]] <- mgcv::gam(formula_factor, data = train_df)
l_silent[[length(l_silent) + 1]] <- mgcv::gam(formula_binary_numeric, data = train_df, family = "binomial")
l_silent[[length(l_silent) + 1]] <- mgcv::gam(formula_binary_factor, data = train_df, family = "binomial")
}
if (requireNamespace("xgboost", quietly = TRUE)) {
l_silent[[length(l_silent) + 1]] <- xgboost::xgboost(
data = dummylist$train_dummies, label = y_train,
nrounds = 3, verbose = FALSE
)
l_silent[[length(l_silent)]]$feature_list <- dummylist$feature_list
l_silent[[length(l_silent) + 1]] <- xgboost::xgboost(
data = dummylist$train_dummies,
label = as.integer(y_train_binary) - 1,
nrounds = 3,
verbose = FALSE,
objective = "binary:logistic",
eval_metric = "error"
)
l_silent[[length(l_silent)]]$feature_list <- dummylist$feature_list
l_message[[length(l_message) + 1]] <- xgboost::xgboost(
data = as.matrix(x_train[, x_var_numeric]),
label = y_train, nrounds = 3, verbose = FALSE
)
}
if (requireNamespace("ranger", quietly = TRUE)) {
l_message[[length(l_message) + 1]] <- ranger::ranger(formula_numeric, data = train_df)
l_message[[length(l_message) + 1]] <- ranger::ranger(formula_factor, data = train_df)
l_message[[length(l_message) + 1]] <- ranger::ranger(formula_binary_numeric, data = train_df, probability = TRUE)
l_message[[length(l_message) + 1]] <- ranger::ranger(formula_binary_factor, data = train_df, probability = TRUE)
}
data_features <- get_data_specs(train_df)
for (i in seq_along(l_silent)) {
model_features <- get_model_specs(l_silent[[i]])
expect_silent(check_features(model_features, data_features))
}
for (i in seq_along(l_message)) {
model_features <- get_model_specs(l_message[[i]])
expect_message(check_features(model_features, data_features))
}
# Checking all stops in check_features
data_features_ok <- get_data_specs(train_df)
# Non-matching labels
data_features_error <- get_data_specs(train_df)
data_features_error$labels <- NULL
expect_error(check_features(data_features_ok, data_features_error))
expect_message(check_features(data_features_error, data_features_ok, use_1_as_truth = T))
expect_error(check_features(data_features_error, data_features_ok, use_1_as_truth = F))
# Missing features
data_features_error <- get_data_specs(train_df[, -3])
expect_error(check_features(data_features_ok, data_features_error))
expect_error(check_features(data_features_error, data_features_ok, use_1_as_truth = F))
# Duplicated column names
data_features_error <- get_data_specs(cbind(crim = train_df[, 1], train_df))
expect_error(check_features(data_features_error, data_features_error))
# Empty column names
train_df_0 <- train_df
names(train_df_0)[1] <- ""
data_features_error <- get_data_specs(train_df_0)
expect_error(check_features(data_features_error, data_features_error))
# feature class is NA
data_features_error <- data_features_ok
data_features_error$classes <- rep(NA, length(data_features_error$classes))
expect_message(check_features(data_features_error, data_features_ok))
# feature classes are different
data_features_error <- data_features_ok
data_features_error$classes <- rev(data_features_error$classes)
names(data_features_error$classes) <- names(data_features_ok$classes)
expect_error(check_features(data_features_ok, data_features_error))
# invalid feature class
data_features_error <- data_features_ok
data_features_error$classes[1] <- "logical"
expect_error(check_features(data_features_error, data_features_error))
# non-matching factor levels
data_features_error <- data_features_ok
data_features_error$factor_levels$chas <- c(data_features_error$factor_levels$chas, "2")
expect_error(check_features(data_features_ok, data_features_error))
#### Now turning to update_data tests ####
model_features_ok <- get_model_specs(l_silent[[2]])
# Checking null output and message to remove features
train_dt <- as.data.table(train_df)
data_to_update <- copy(train_dt)
expect_message(expect_null(update_data(data_to_update, model_features_ok)))
# Checking that features are indeed removed
expect_equal(names(data_to_update), model_features_ok$labels)
# Second call with same input should do nothing
expect_silent(expect_null(update_data(data_to_update, model_features_ok)))
# Checking null output and message to shuffle factor levels
data_to_update_2 <- head(copy(train_dt), 20)
data_to_update_2$rad <- droplevels(data_to_update_2$rad)
org_levels_rad <- levels(data_to_update_2$rad)
expect_message(expect_null(update_data(data_to_update_2, model_features_ok)))
# Checking that levels are indeed updated
expect_true(length(org_levels_rad) < length(levels(data_to_update_2$rad)))
expect_equal(model_features_ok$factor_levels$rad, levels(data_to_update_2$rad))
}
})
test_that("Test missing colnames", {
# Data -----------
if (requireNamespace("MASS", quietly = TRUE)) {
data("Boston", package = "MASS")
x_var <- c("lstat", "rm", "dis", "indus")
y_var <- "medv"
x_train <- as.matrix(tail(Boston[, x_var], -6))
y_train <- tail(Boston[, y_var], -6)
x_test <- as.matrix(head(Boston[, x_var]))
x_train_nonames <- x_train
colnames(x_train_nonames) <- NULL
x_test_nonames <- x_test
colnames(x_test_nonames) <- NULL
if (requireNamespace("xgboost", quietly = TRUE)) {
model <- xgboost::xgboost(
data = x_train, label = y_train, nrounds = 3, verbose = FALSE
)
model_nonames <- xgboost::xgboost(
data = x_train_nonames, label = y_train, nrounds = 3, verbose = FALSE
)
# missing colnames in model
expect_error(shapr(model_nonames, x_train))
# missing colnames in training data
expect_error(shapr(model, x_train_nonames))
# missing colnames in both model and training data
expect_error(shapr(model_nonames, x_train_nonames))
# missing colnames in test data
explain <- shapr(x_train, model)
p <- mean(y_train)
expect_error(
explain(
x_test_nonames,
approach = "empirical",
explainer = explainer,
prediction_zero = p
)
)
}
}
})
test_that("Test get_supported_models", {
org_models <- get_supported_models()
# Adding to .GlobalEnv
get_model_specs.test <<- function() 1
new_models <- get_supported_models()
# Removin form .GlobalEnv
rm("get_model_specs.test",pos = ".GlobalEnv")
expect_false("test" %in% org_models$model_class)
expect_true("test" %in% new_models$model_class)
})
test_that("Test get_model_specs", {
# Data -----------
if (requireNamespace("MASS", quietly = TRUE) & requireNamespace("gbm", quietly = TRUE)) {
data("Boston", package = "MASS")
y_var <- "medv"
x_train <- tail(Boston, -6)
y_train <- tail(Boston[, y_var], -6)
train_df <- cbind(x_train, y_train)
x_var_numeric <- c("lstat", "rm", "dis", "indus")
formula_numeric <- as.formula(paste0("y_train ~ ", paste0(x_var_numeric, collapse = "+")))
# Unsupported model
model_unsupported <- gbm::gbm(formula_numeric, distribution = "gaussian", data = train_df)
expect_error(get_model_specs(model_unsupported))
# This is further tested for custom models in the script tests/manual_test_scripts/test_custom_models.R
# run expect_silent(source("tests/testthat/manual_test_scripts/test_custom_models.R")) manually to run those tests
}
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.