Nothing
context("test-features.R")
test_that("Test feature_combinations", {
# Example 1 -----------
m <- 3
exact <- TRUE
w <- 10^6
x1 <- feature_combinations(m = m, exact = exact, weight_zero_m = w)
x2 <- feature_exact(m, w)
# Example 2 -----------
m <- 10
exact <- FALSE
n_combinations <- 50
w <- 10^6
set.seed(1)
y1 <- feature_combinations(
m = m,
exact = exact,
n_combinations = n_combinations,
weight_zero_m = w
)
set.seed(1)
y2 <- feature_not_exact(
m = m,
n_combinations = n_combinations,
weight_zero_m = w
)
y2[, p := NULL]
# Example 3 -----------
m <- 3
exact <- FALSE
n_combinations <- 1e4
w <- 10^6
set.seed(1)
y3 <- feature_combinations(
m = m,
exact = exact,
n_combinations = n_combinations,
weight_zero_m = w
)
# Test results -----------
expect_equal(x1, x2)
expect_equal(y1, y2)
expect_equal(nrow(y3), 2^3)
expect_error(feature_combinations(100))
expect_error(feature_combinations(100, n_combinations = NULL))
})
test_that("Test feature_exact", {
# Example -----------
m <- 3
weight_zero_m <- 10^6
x <- feature_exact(m, weight_zero_m)
# Define results -----------
cnms <- c("id_combination", "features", "n_features", "N", "shapley_weight")
classes <- c("integer", "list", "integer", "integer", "double")
lfeatures <- list(
integer(0),
1L,
2L,
3L,
c(1L, 2L),
c(1L, 3L),
c(2L, 3L),
c(1L, 2L, 3L)
)
n_features <- c(0, rep(1, 3), rep(2, 3), 3)
n <- c(1, rep(3, 6), 1)
# Tests -----------
expect_true(data.table::is.data.table(x))
expect_equal(names(x), cnms)
expect_equal(unname(sapply(x, typeof)), classes)
expect_equal(x[["id_combination"]], seq(nrow(x)))
expect_equal(x[["features"]], lfeatures)
expect_equal(x[["n_features"]], n_features)
expect_equal(x[["N"]], n)
})
test_that("Test feature_not_exact", {
# Example -----------
m <- 10
exact <- FALSE
n_combinations <- 50
w <- 10^6
set.seed(1)
x <- feature_not_exact(
m = m,
n_combinations = n_combinations,
weight_zero_m = w
)
set.seed(1)
cnms <- c("id_combination", "features", "n_features", "N", "shapley_weight", "p")
classes <- c("integer", "list", "integer", "integer", "integer", "double")
n <- sapply(seq(m - 1), choose, n = m)
w_all <- shapley_weights(m = m, N = n, n_features = seq(m - 1)) * n
w_default <- w_all / sum(w_all)
# Test results -----------
expect_true(data.table::is.data.table(x))
expect_equal(names(x), cnms)
expect_equal(unname(sapply(x, typeof)), classes)
expect_true(nrow(x) <= n_combinations + 2)
expect_equal(x[["id_combination"]], seq(nrow(x)))
for (i in x[, .I]) {
f <- x[["features"]][[i]]
if (length(f) == 0) {
expect_equal(x[["n_features"]][[i]], 0)
expect_equal(x[["N"]][[i]], 1)
expect_equal(x[["shapley_weight"]][[i]], w)
expect_equal(x[["p"]][[i]], NA_real_)
} else if (length(f) == m) {
expect_equal(f, seq(m))
expect_equal(x[["n_features"]][[i]], m)
expect_equal(x[["N"]][[i]], 1)
expect_equal(x[["shapley_weight"]][[i]], w)
expect_equal(x[["p"]][[i]], NA_real_)
} else {
k <- length(f)
expect_equal(f, sort(f))
expect_equal(x[["n_features"]][[i]], k)
expect_equal(x[["N"]][[i]], choose(m, k))
expect_equal(x[["p"]][[i]], w_default[x[["n_features"]][[i]]])
expect_equal(between(x[["shapley_weight"]][[i]], 1L, n_combinations), TRUE)
}
}
})
test_that("Test helper_feature", {
# Example -----------
m <- 5
feature_sample <- list(
integer(0),
1:2,
3:5,
1:2,
1:5
)
x <- helper_feature(m, feature_sample)
# Define results -----------
x2 <- c(1, 2, 1, 2, 1)
x3 <- c(FALSE, FALSE, FALSE, TRUE, FALSE)
# Test results -----------
cnms <- c("sample_frequence", "is_duplicate")
classes <- c("integer", "logical")
expect_true(data.table::is.data.table(x))
expect_equal(names(x), cnms)
expect_equal(nrow(x), length(feature_sample))
expect_equal(classes, unname(sapply(x, typeof)))
expect_equal(x[["sample_frequence"]], x2)
expect_equal(x[["is_duplicate"]], x3)
})
test_that("Test make_dummies", {
if (requireNamespace("MASS", quietly = TRUE)) {
data("Boston", package = "MASS")
x_var <- c("lstat", "rm", "dis", "indus")
y_var <- "medv"
x_train <- as.data.frame(Boston[401:411, x_var])
y_train <- Boston[401:408, y_var]
x_test <- as.data.frame(Boston[1:4, x_var])
# convert to factors for illustrational purpose
x_train$rm <- factor(round(x_train$rm))
x_test$rm <- factor(round(x_test$rm), levels = levels(x_train$rm))
factor_feat <- sapply(x_train, is.factor)
nb_factor_feat <- sum(factor_feat)
dummylist <- make_dummies(traindata = x_train, testdata = x_train)
# Tests
expect_type(dummylist, "list")
expect_equal(length(dummylist$feature_list$contrasts_list), nb_factor_feat)
expect_equal(length(dummylist$feature_list$labels), ncol(x_train))
expect_equal(sum(dummylist$feature_list$classes == "factor"), nb_factor_feat)
expect_equal(ncol(dummylist$feature_list$contrasts_list$rm), length(levels(x_train$rm)))
# 1) What if train has more features than test but features in test are contained in train
x_train1 <- cbind(x_train, 1)
x_test1 <- x_test
expect_error(make_dummies(traindata = x_train1, testdata = x_test1))
# 2) What if train has different feature types than test
x_train2 <- x_train
x_test2 <- x_test
x_test2$rm <- as.numeric(x_test2$rm)
expect_error(make_dummies(traindata = x_train2, testdata = x_test2))
# 3) What if test has more features than train but features in train are contained in test
x_train3 <- x_train
x_test3 <- cbind(x_test, 1)
expect_error(make_dummies(traindata = x_train3, testdata = x_test3))
# 4) What if train and test only have numerical features
x_train4 <- x_train
x_train4$rm <- as.numeric(x_train4$rm)
x_test4 <- x_test
x_test4$rm <- as.numeric(x_test4$rm)
expect_type(make_dummies(traindata = x_train4, testdata = x_test4), "list")
# 5) What if train and test only have categorical features
x_train5 <- x_train
x_train5 <- x_train5[, "rm", drop = FALSE]
x_test5 <- x_test
x_test5 <- x_test5[, "rm", drop = FALSE]
expect_type(make_dummies(traindata = x_train5, testdata = x_test5), "list")
# 6) What if test has the same levels as train but random ordering of levels
x_train6 <- x_train
x_train6$rm <- factor(x_train6$rm, levels = 4:9)
x_test6 <- x_test
x_test6$rm <- factor(x_test6$rm, levels = c(8, 9, 7, 4, 5, 6))
expect_type(make_dummies(traindata = x_train6, testdata = x_test6), "list")
# 7) What if test has different levels than train
x_train7 <- x_train
x_train7$rm <- factor(x_train7$rm, levels = 4:9)
x_test7 <- x_test
x_test7$rm <- factor(x_test7$rm, levels = 6:8)
expect_error(make_dummies(traindata = x_train7, testdata = x_test7))
# 8) What if train and test have different feature names
x_train8 <- x_train
x_test8 <- x_test
names(x_test8) <- c("lstat2", "rm2", "dis2", "indus2")
expect_error(make_dummies(traindata = x_train8, testdata = x_test8))
# 9) What if one variables has an empty name
x_train9 <- x_train
colnames(x_train9) <- c("", "rm", "dis", "indus")
x_test9 <- x_test
colnames(x_test9) <- c("", "rm", "dis", "indus")
expect_error(make_dummies(traindata = x_train9, testdata = x_test9))
# 10) What if traindata has a column that repeats
x_train10 <- cbind(x_train, lstat = x_train$lstat)
x_test10 <- cbind(x_test, lstat = x_test$lstat)
expect_error(make_dummies(traindata = x_train10, testdata = x_test10))
# 11) What if traindata has no column names
x_train11 <- x_train
colnames(x_train11) <- NULL
x_test11 <- x_test
colnames(x_test11) <- NULL
expect_error(make_dummies(traindata = x_train11, testdata = x_test11))
# 12 Test that traindata_new and testdata_new will be the same as the original
# x_train and x_test. The only time this is different is if the levels of train
# and test are different. See below.
dummylist12 <- make_dummies(traindata = x_train, testdata = x_test)
#
expect_true(all(data.frame(dummylist12$traindata_new) == x_train))
expect_true(all(levels(dummylist12$traindata_new$rm) == levels(x_train$rm)))
expect_true(all(data.frame(dummylist12$testdata_new) == x_test))
expect_true(all(levels(dummylist12$testdata_new$rm) == levels(x_test$rm)))
# 13 Different levels same as check # 12
#
x_train13 <- x_train
x_train13$rm <- factor(x_train13$rm, levels = 4:9)
x_test13 <- x_test
x_test13$rm <- factor(x_test13$rm, levels = c(8, 9, 7, 4, 5, 6))
dummylist13 <- make_dummies(traindata = x_train13, testdata = x_test13)
#
expect_true(all(data.frame(dummylist13$traindata_new) == x_train13))
expect_true(all(levels(dummylist13$traindata_new$rm) == levels(x_train13$rm)))
expect_true(all(data.frame(dummylist13$testdata_new) == x_test13))
# Important !!!!
expect_false(all(levels(dummylist13$testdata_new$rm) == levels(x_test13$rm)))
}
})
test_that("Test apply_dummies", {
if (requireNamespace("MASS", quietly = TRUE)) {
data("Boston", package = "MASS")
x_var <- c("lstat", "rm", "dis", "indus")
y_var <- "medv"
x_train <- as.data.frame(Boston[401:411, x_var])
y_train <- Boston[401:408, y_var]
x_test <- as.data.frame(Boston[1:4, x_var])
# convert to factors for illustrational purpose
x_train$rm <- factor(round(x_train$rm))
x_test$rm <- factor(round(x_test$rm), levels = levels(x_train$rm))
numeric_feat <- !sapply(x_train, is.factor)
nb_numeric_feat <- sum(numeric_feat)
dummylist <- make_dummies(traindata = x_train, testdata = x_test)
x_test_dummies <- apply_dummies(feature_list = dummylist$feature_list, testdata = x_test)
# Tests
expect_type(x_test_dummies, "double")
expect_equal(
ncol(x_test_dummies),
nb_numeric_feat + ncol(dummylist$feature_list$contrasts_list$rm)
)
# Test that make_dummies() and apply_dummies() gives the same output
# for a given traindata and testdata
expect_true(all(dummylist$test_dummies == x_test_dummies))
# 1) What if you re-arrange the columns in x_train
x_test1 <- x_test[, c(2, 3, 1, 4)]
diff_column_placements <- apply_dummies(dummylist$feature_list, testdata = x_test1)
expect_equal(colnames(diff_column_placements), colnames(x_test_dummies))
# 2) What if you put in less features then the original traindata
x_test2 <- x_test[, c(2, 1)]
expect_error(apply_dummies(dummylist$feature_list, testdata = x_test2))
# 3) What if you change the feature types of testdata
x_test3 <- sapply(x_test, as.numeric)
expect_error(apply_dummies(dummylist$feature_list, testdata = x_test3))
# 4) What if you add a feature
x_test4 <- cbind(x_train[, c(1, 2)], new_var = x_train[, 2], x_train[, c(3, 4)])
expect_error(apply_dummies(dummylist$feature_list, testdata = x_test4))
# 6) What if test has the same levels as train but random ordering of levels
x_test6 <- x_test
x_test6$rm <- factor(x_test6$rm, levels = c(8, 9, 7, 4, 5, 6))
expect_error(apply_dummies(dummylist$feature_list, testdata = x_test6))
# 7) What if test has different levels than train
x_test7 <- x_test
x_test7$rm <- factor(x_test7$rm, levels = 6:8)
expect_error(apply_dummies(dummylist$feature_list, testdata = x_test7))
# 8) What if train and test have different feature names
x_test8 <- x_test
names(x_test8) <- c("lstat2", "rm2", "dis2", "indus2")
expect_error(apply_dummies(dummylist$feature_list, testdata = x_test8))
# 9) What if one variables has an empty name
x_test9 <- x_test
colnames(x_test9) <- c("", "rm", "dis", "indus")
expect_error(apply_dummies(dummylist$feature_list, testdata = x_test9))
# 10) What if traindata has a column that repeats
x_test10 <- cbind(x_test, lstat = x_test$lstat)
expect_error(apply_dummies(dummylist$feature_list, testdata = x_test10))
# 11) What if testdata has no column names
x_test11 <- x_test
colnames(x_test11) <- NULL
expect_error(apply_dummies(dummylist$feature_list, testdata = x_test11))
}
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.