Nothing
context("test-shapley.R")
suppressWarnings(RNGversion(vstr = "3.5.0"))
test_that("Basic test functions in shapley.R", {
# Load data -----------
if (requireNamespace("MASS", quietly = TRUE)) {
data("Boston", package = "MASS")
x_var <- c("lstat", "rm", "dis", "indus")
x_train <- tail(Boston[, x_var], 50)
# Load premade lm model. Path needs to be relative to testthat directory in the package
model <- readRDS("model_objects/lm_model_object.rds")
# Prepare the data for explanation
explainer <- shapr(x_train, model)
expect_known_value(explainer,
file = "test_objects/shapley_explainer_obj.rds",
update = FALSE
)
}
})
test_that("Testing data input to shapr in shapley.R", {
if (requireNamespace("MASS", quietly = TRUE)) {
data("Boston", package = "MASS")
y_var <- "medv"
x_train <- tail(Boston, -6)
y_train <- tail(Boston[, y_var], -6)
y_train_binary <- as.factor(tail((Boston[, y_var] > 20) * 1, -6))
# convert to factors for testing purposes
x_train$rad <- factor(round(x_train$rad))
x_train$chas <- factor(round(x_train$chas))
train_df <- cbind(x_train, y_train, y_train_binary)
x_var_numeric <- c("lstat", "rm", "dis", "indus")
x_var_factor <- c("lstat", "rm", "dis", "indus", "rad", "chas")
train_df_used_numeric <- x_train[, x_var_numeric]
train_df_used_factor <- x_train[, x_var_factor]
formula_numeric <- as.formula(paste0("y_train ~ ", paste0(x_var_numeric, collapse = "+")))
formula_factor <- as.formula(paste0("y_train ~ ", paste0(x_var_factor, collapse = "+")))
formula_binary_numeric <- as.formula(paste0("y_train_binary ~ ", paste0(x_var_numeric, collapse = "+")))
formula_binary_factor <- as.formula(paste0("y_train_binary ~ ", paste0(x_var_factor, collapse = "+")))
dummylist <- make_dummies(traindata = x_train[, x_var_factor], testdata = x_train[, x_var_factor])
# List of models to run silently
l_numeric <- list(
stats::lm(formula_numeric, data = train_df),
stats::glm(formula_numeric, data = train_df)
)
if (requireNamespace("mgcv", quietly = TRUE)) {
l_numeric[[length(l_numeric) + 1]] <- mgcv::gam(formula_numeric, data = train_df)
}
l_factor <- list(
stats::lm(formula_factor, data = train_df),
stats::glm(formula_factor, data = train_df)
)
if (requireNamespace("mgcv", quietly = TRUE)) {
l_factor[[length(l_factor) + 1]] <- mgcv::gam(formula_factor, data = train_df)
}
if (requireNamespace("xgboost", quietly = TRUE)) {
l_factor[[length(l_factor) + 1]] <- xgboost::xgboost(
data = dummylist$train_dummies,
label = y_train,
nrounds = 3, verbose = FALSE
)
l_factor[[length(l_factor)]]$feature_list <- dummylist$feature_list
}
for (i in seq_along(l_numeric)) {
expect_silent(shapr(train_df_used_numeric, l_numeric[[i]])) # No modification
expect_message(shapr(train_df, l_numeric[[i]])) # Features dropped
}
for (i in seq_along(l_factor)) {
expect_silent(shapr(train_df_used_factor, l_factor[[i]])) # No modification
expect_message(shapr(train_df, l_factor[[i]])) # Features dropped
}
# Testing errors on incompatible model and data
# Missing features
model <- stats::lm(formula_factor, data = train_df)
data_error <- train_df[, -3]
expect_error(shapr(data_error, model))
# Duplicated column names
data_error <- train_df_used_factor
data_error <- cbind(data_error, lstat = 1)
expect_error(shapr(data_error, model))
# Empty column names in data
data_error <- train_df
colnames(data_error) <- NULL
expect_error(shapr(data_error, model))
# Empty column names in model (ok if found in data -- and we trust it)
if (requireNamespace("xgboost", quietly = TRUE)) {
data_with_colnames <- data_without_colnames <- as.matrix(train_df_used_numeric)
colnames(data_without_colnames) <- NULL
model_xgb <- xgboost::xgboost(
data = data_without_colnames, label = y_train,
nrounds = 3, verbose = FALSE
)
expect_message(shapr(data_with_colnames, model_xgb))
}
# Data feature with incorrect class
data_error <- train_df_used_factor
data_error$lstat <- as.logical(data_error$lstat > 15)
expect_error(shapr(data_error, model))
# non-matching factor levels
data_error <- head(train_df_used_factor)
data_error$rad <- droplevels(data_error$rad)
expect_error(shapr(data_error, model))
}
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.