data(ionosphere, package = 'kknn')
ionosphere = ionosphere[, -2] # remove second column which has a single unique value
data(Boston, package = 'MASS')
data(iris)
context('Utils testing')
# acc - function
testthat::test_that("the acc function returns a correct result", {
y_true = c(1,1,1,0,0,0,2,2,2,3,3,3,3)
preds = model.matrix(~. - 1, data.frame(as.factor(c(1,1,1,0,0,0,2,2,2,3,3,3,3))))
res = acc(y_true, preds)
testthat::expect_identical(res, 1)
})
# function_grid
testthat::test_that("the function_grid samples one item from each sublist, so that the length of the initial sublists is equal to the length of the end sublists", {
lst_all = list(
args1 = list(ntree = seq(30, 50, 5), mtry = c(2:3), nodesize = seq(5, 15, 5)),
args2 = list(k = 3:20, distance = c(1:5), kernel = c("triweight", "cos", "inv", "gaussian", "rank", "optimal")),
args3 = list(k = 3:20),
args4 = list(weights_function = c('uniform', 'triangular', 'epanechnikov', 'biweight', 'triweight'), params_knn = list(k = 3:20, algorithm = c("kd_tree", "cover_tree"))),
args5 = list(weights_function = c('uniform', 'triangular', 'epanechnikov', 'biweight'),
params_knn = list(k = 3:20, treetype = "kd", searchtype = c("standard", "priority"),
radius = seq(3.0, 20.0, 0.5), eps = c(rep(0, 20), seq(0, 50, 1)), distance_metric = c('euclidean', 'manhattan'))),
args6 = list(weights_function = c('uniform', 'triangular', 'epanechnikov', 'biweight'),
params_knn = list(k = 3:20, eps = c(rep(0, 20), seq(0, 50, 1)), searchtype = c("auto", "brute"))),
args7 = list(control = RWeka::Weka_control(K = seq(3, 20, 1), I = c(TRUE, FALSE), X = c(TRUE, FALSE))),
args8 = list(weights_function = c('uniform', 'triangular', 'epanechnikov', 'biweight', 'triweight'), k = 3:20, h = c(0.1, 0.5, 1.0, 2.0),
method = c('euclidean', 'manhattan')))
res = as.vector(unlist(lapply(lst_all, function(x) length(function_grid(grid_params = x)))))
length_sublists = as.vector(unlist(lapply(lst_all, function(x) length(x))))
testthat::expect_identical(res, length_sublists)
})
testthat::test_that("the function_grid samples and returns a list for each grid_argument", {
lst_all = list(
args1 = list(ntree = seq(30, 50, 5), mtry = c(2:3), nodesize = seq(5, 15, 5)),
args2 = list(k = 3:20, distance = c(1:5), kernel = c("triweight", "cos", "inv", "gaussian", "rank", "optimal")),
args3 = list(k = 3:20),
args4 = list(weights_function = c('uniform', 'triangular', 'epanechnikov', 'biweight', 'triweight'), params_knn = list(k = 3:20, algorithm = c("kd_tree", "cover_tree"))),
args5 = list(weights_function = c('uniform', 'triangular', 'epanechnikov', 'biweight'),
params_knn = list(k = 3:20, treetype = "kd", searchtype = c("standard", "priority"),
radius = seq(3.0, 20.0, 0.5), eps = c(rep(0, 20), seq(0, 50, 1)), distance_metric = c('euclidean', 'manhattan'))),
args6 = list(weights_function = c('uniform', 'triangular', 'epanechnikov', 'biweight'),
params_knn = list(k = 3:20, eps = c(rep(0, 20), seq(0, 50, 1)), searchtype = c("auto", "brute"))),
args7 = list(control = RWeka::Weka_control(K = seq(3, 20, 1), I = c(TRUE, FALSE), X = c(TRUE, FALSE))),
args8 = list(weights_function = c('uniform', 'triangular', 'epanechnikov', 'biweight', 'triweight'), k = 3:20, h = c(0.1, 0.5, 1.0, 2.0),
method = c('euclidean', 'manhattan')))
res = unlist(lapply(lst_all, function(x) is.list(function_grid(grid_params = x))))
testthat::expect_true(sum(res) == length(lst_all))
})
# func_weka_rerun [ for the RWeka package, if re-run = TRUE ]
testthat::test_that("the func_weka_rerun function takes a list of parameters (those parameters result from the subset_mods function) and it returns a list of sublists", {
output_of_subset_mods_function = list(K = c(19, 13, 20, 6, 12),
I = c("FALSE", "TRUE", "TRUE", "TRUE", "TRUE"),
X = c("TRUE", "TRUE", "TRUE", "TRUE", "TRUE"))
res = func_weka_rerun(output_of_subset_mods_function)
len_each_sublist = sum(unlist(lapply(res, function(x) length(x[[1]]))))
len_initial = sum(unlist(lapply(output_of_subset_mods_function, length)))
len_list = length(res)
len_init_each = mean(unlist(lapply(output_of_subset_mods_function, length)))
testthat::expect_true(len_each_sublist == len_initial && len_list == len_init_each)
})
# optimal_grid [ all functions except for RWeka, if re-run = TRUE ]
testthat::test_that("the func_weka_rerun function takes a list of parameters (those parameters result from the subset_mods function) and it returns a list of sublists", {
output_of_subset_mods_function = list(k = c(3, 4, 4, 3, 7),
distance = c(1, 1, 1, 1, 1),
kernel = c("rank", "triangular","biweight", "cos", "cos"))
res = lapply(1:length(output_of_subset_mods_function[[1]]), function(x) optimal_grid(output_of_subset_mods_function, x))
len_each_sublist = sum(unlist(lapply(res, function(x) length(x))))
len_initial = sum(unlist(lapply(output_of_subset_mods_function, length)))
len_list = length(res)
len_init_each = mean(unlist(lapply(output_of_subset_mods_function, length)))
testthat::expect_true(len_each_sublist == len_initial && len_list == len_init_each)
})
# length_grid [ independently of which algorithm will be used ]
testthat::test_that("the length_grid function returns the maximum length of the parameter grid", {
output_of_subset_mods_function = list(k = c(3, 4, 4, 3, 7),
distance = c(1, 1, 1, 1, 1),
kernel = c("rank", "triangular","biweight", "cos", "cos"))
res = max(length_grid(output_of_subset_mods_function))
tes = max(unlist(lapply(output_of_subset_mods_function, length)))
testthat::expect_true(res == tes)
})
# predict.gbm
testthat::test_that("the work-around for the gbm prediction function returns mutli-column probability matrix in case of multinomial classification", {
fit = gbm::gbm(Species ~., iris, distribution = "multinomial", n.trees = 100)
pred = predict_gbm_workaround_probs(fit, iris[, -ncol(iris)], type = 'response')
testthat::expect_true(ncol(pred) == length(unique(iris[, 'Species'])))
})
testthat::test_that("the work-around for the gbm prediction function returns a two column probability matrix in case of binomial classification", {
X = iris
X$Species = as.numeric(X$Species)
X$Species[X$Species == 1] = 2
X$Species = X$Species - 2
fit = gbm::gbm(Species ~., X, distribution = "bernoulli", n.trees = 100)
pred = predict_gbm_workaround_probs(fit, X[, -ncol(X)], type = 'response')
testthat::expect_true(ncol(pred) == length(unique(X[, 'Species'])))
})
# EXCEPTIONS_preds function
testthat::test_that("the output of the predictions produced by the selected REGRESSION algorithms is in the correct form", {
X = Boston[, -ncol(Boston)]
y = Boston[, ncol(Boston)]
fit1 = nnet::nnet(medv~., Boston, size = 5, trace = F)
pred1 = EXCEPTIONS_preds(fit1, Boston[, -ncol(Boston)], T)
# fit2 = ranger::ranger(medv~., Boston, num.trees = 10, write.forest = T)
# pred2 = preds = EXCEPTIONS_preds(fit2, Boston[, -ncol(Boston)], T)
fit3 = e1071::svm(medv~., Boston)
pred3 = EXCEPTIONS_preds(fit3, Boston[, -ncol(Boston)], T)
fit4 = LiblineaR::LiblineaR(X, y, type = 11)
pred4 = EXCEPTIONS_preds(fit4, Boston[, -ncol(Boston)], T)
#lst = list(pred1, pred2, pred3, pred4)
lst = list(pred1, pred3, pred4)
res = unlist(lapply(lst, function(x) is.vector(x) && length(x) == nrow(Boston)))
testthat::expect_true(sum(res) == length(lst))
})
testthat::test_that("the output of the predictions produced by the selected BINARY CLASSIFICATION algorithms is in the correct form", {
X = ionosphere[, -ncol(ionosphere)]
y = ionosphere[, ncol(ionosphere)]
dat_gbm = ionosphere
dat_gbm$class = as.numeric(dat_gbm$class) - 1
fit1 = adabag::boosting(class~., ionosphere, mfinal = 5)
pred1 = EXCEPTIONS_preds(fit1, ionosphere[, -ncol(ionosphere)], F)
fit2 = MASS::lda(class~., ionosphere)
pred2 = EXCEPTIONS_preds(fit2, ionosphere[, -ncol(ionosphere)], F)
fit3 = e1071::naiveBayes(class~., ionosphere)
pred3 = EXCEPTIONS_preds(fit3, ionosphere[, -ncol(ionosphere)], F)
fit4 = extraTrees::extraTrees(X, y, ntree = 5)
pred4 = EXCEPTIONS_preds(fit4, ionosphere[, -ncol(ionosphere)], F)
fit5 = gbm::gbm(class~., data = dat_gbm, distribution = "bernoulli", n.trees = 5) # gbm-bernoulli requires the data to be in c(0,1)
pred5 = EXCEPTIONS_preds(fit5, dat_gbm[, -ncol(dat_gbm)], F)
fit6 = glmnet::cv.glmnet(as.matrix(X), y, type.measure = "class", family = 'binomial', nfolds = 3) # with type = 'response' returns NA's in the glass-dataset
pred6 = EXCEPTIONS_preds(fit6, as.matrix(ionosphere[, -ncol(ionosphere)]), F)
fit7 = LiblineaR::LiblineaR(X, y, type = 0) # Computing probabilities is only supported for Logistic Regressions (LiblineaR 'type' 0, 6 or 7)
pred7 = EXCEPTIONS_preds(fit7, ionosphere[, -ncol(ionosphere)], F)
fit8 = e1071::svm(class~., ionosphere, probability = T)
pred8 = EXCEPTIONS_preds(fit8, ionosphere[, -ncol(ionosphere)], F)
fit9 = nnet::nnet(class~., ionosphere, size = 5, trace = F)
pred9 = EXCEPTIONS_preds(fit9, ionosphere[, -ncol(ionosphere)], F)
# fit10 = ranger::ranger(class~., ionosphere, num.trees = 10, write.forest = T, probability = T)
# pred10 = preds = EXCEPTIONS_preds(fit10, ionosphere[, -ncol(ionosphere)], F)
#lst = list(pred1, pred2, pred3, pred4, pred5, pred6, pred7, pred8, pred9, pred10)
lst = list(pred1, pred2, pred3, pred4, pred5, pred6, pred7, pred8, pred9)
res = unlist(lapply(lst, function(x) is.matrix(x) && ncol(x) == length(unique(ionosphere[, 'class']))))
testthat::expect_true(sum(res) == length(lst))
})
testthat::test_that("the output of the predictions produced by the selected MULTICLASS CLASSIFICATION algorithms is in the correct form", {
X = iris[, -ncol(iris)]
y = iris[, ncol(iris)]
fit1 = adabag::boosting(Species~., iris, mfinal = 5)
pred1 = EXCEPTIONS_preds(fit1, iris[, -ncol(iris)], F)
fit2 = MASS::lda(Species~., iris)
pred2 = EXCEPTIONS_preds(fit2, iris[, -ncol(iris)], F)
fit3 = e1071::naiveBayes(Species~., iris)
pred3 = EXCEPTIONS_preds(fit3, iris[, -ncol(iris)], F)
fit4 = extraTrees::extraTrees(X, y, ntree = 5)
pred4 = EXCEPTIONS_preds(fit4, iris[, -ncol(iris)], F)
fit5 = gbm::gbm(Species~., data = iris, distribution = "multinomial", n.trees = 5)
pred5 = EXCEPTIONS_preds(fit5, iris[, -ncol(iris)], F)
fit6 = glmnet::cv.glmnet(as.matrix(X), y, type.measure="class", family = 'multinomial', nfolds = 3)
pred6 = EXCEPTIONS_preds(fit6, as.matrix(iris[, -ncol(iris)]), F)
fit7 = LiblineaR::LiblineaR(X, y, type = 0) # Computing probabilities is only supported for Logistic Regressions (LiblineaR 'type' 0, 6 or 7)
pred7 = EXCEPTIONS_preds(fit7, iris[, -ncol(iris)], F)
fit8 = e1071::svm(Species~., iris, probability = T)
pred8 = EXCEPTIONS_preds(fit8, iris[, -ncol(iris)], F)
fit9 = nnet::nnet(Species~., iris, size = 5, trace = F)
pred9 = EXCEPTIONS_preds(fit9, iris[, -ncol(iris)], F)
# fit10 = ranger::ranger(Species~., iris, num.trees = 10, write.forest = T, probability = T)
# pred10 = preds = EXCEPTIONS_preds(fit10, iris[, -ncol(iris)], F)
#lst = list(pred1, pred2, pred3, pred4, pred5, pred6, pred7, pred8, pred9, pred10)
lst = list(pred1, pred2, pred3, pred4, pred5, pred6, pred7, pred8, pred9)
res = unlist(lapply(lst, function(x) is.matrix(x) && ncol(x) == length(unique(iris[, 'Species']))))
testthat::expect_true(sum(res) == length(lst))
})
# repeated_resampling function
testthat::test_that("the repeated_resampling function gives an error if the method is invalid", {
y = Boston$medv
testthat::expect_error(repeated_resampling(y, NULL, REPEATS = 2, sample_rate = 0.75, FOLDS = NULL))
})
testthat::test_that("the repeated_resampling function gives an error if the method is invalid", {
y = Boston$medv
testthat::expect_error(repeated_resampling(y, 'invalid', REPEATS = 2, sample_rate = 0.75, FOLDS = NULL))
})
testthat::test_that("the repeated_resampling function gives an error if the method is invalid", {
y = Boston$medv
testthat::expect_error(repeated_resampling(y, 1, REPEATS = 2, sample_rate = 0.75, FOLDS = NULL))
})
testthat::test_that("the repeated_resampling function gives an error if the method is invalid", {
y = Boston$medv
testthat::expect_error(repeated_resampling(y, 'invalid', REPEATS = 2, sample_rate = 0.75, FOLDS = NULL))
})
testthat::test_that("the bootstrap method works for a continuous response variable", {
y = Boston$medv
samp_rate = 0.75
repeats = 2
train_length = round(length(y) * samp_rate)
# test_length = length(y) - train_length # in bootstraps the test-set lengths are not equal
res = repeated_resampling(y, 'bootstrap', REPEATS = repeats, sample_rate = samp_rate, FOLDS = NULL)
testthat::expect_true(is.list(res) && length(res) == repeats && mean(unlist(lapply(1:length(res), function(x) length(res$idx_train[[x]])))) == train_length)
})
testthat::test_that("the repeated_resampling function works for a factor response variable", {
y = iris$Species
samp_rate = 0.75
repeats = 2
train_length = round(length(y) * samp_rate)
# test_length = length(y) - train_length # in bootstraps the test-set lengths are not equal
res = repeated_resampling(y, 'bootstrap', REPEATS = repeats, sample_rate = samp_rate, FOLDS = NULL)
testthat::expect_true(is.list(res) && length(res) == repeats && mean(unlist(lapply(1:length(res), function(x) length(res$idx_train[[x]])))) == train_length)
})
testthat::test_that("the train-test-split method works for a continuous response variable", {
y = Boston$medv
samp_rate = 0.75
repeats = 2
train_length = round(length(y) * samp_rate)
test_length = length(y) - train_length
res = repeated_resampling(y, 'train_test_split', REPEATS = repeats, sample_rate = samp_rate, FOLDS = NULL)
testthat::expect_true(is.list(res) && length(res) == repeats && mean(unlist(lapply(1:length(res), function(x) length(res$idx_train[[x]])))) == train_length
&& mean(unlist(lapply(1:length(res), function(x) length(res$idx_test[[x]])))) == test_length)
})
testthat::test_that("the train-test-split method works for a factor response variable", {
y = iris$Species
samp_rate = 0.75
repeats = 2
train_length = round(length(y) * samp_rate)
test_length = length(y) - train_length
res = repeated_resampling(y, 'train_test_split', REPEATS = repeats, sample_rate = samp_rate, FOLDS = NULL)
testthat::expect_true(is.list(res) && length(res) == repeats && mean(unlist(lapply(1:length(res), function(x) length(res$idx_train[[x]])))) == train_length
&& mean(unlist(lapply(1:length(res), function(x) length(res$idx_test[[x]])))) == test_length)
})
testthat::test_that("the cross_validation method works for a continuous response variable", {
y = Boston$medv
folds = 3
repeats = 2
res = repeated_resampling(y, 'cross_validation', REPEATS = repeats, sample_rate = NULL, FOLDS = folds)
tmp_train = unlist(res$idx_train, recursive = F)
tmp_test = unlist(res$idx_test, recursive = F)
tmp = unlist(lapply(1:length(tmp_train), function(x) (length(tmp_train[[x]]) + length(tmp_test[[x]]))))
testthat::expect_true(is.list(res) && length(res) == repeats && sum(sapply(tmp, function(x) identical(length(y), x))) == length(tmp))
})
testthat::test_that("the cross_validation method works for a factor response variable", {
y = iris$Species
folds = 3
repeats = 2
res = repeated_resampling(y, 'cross_validation', REPEATS = repeats, sample_rate = NULL, FOLDS = folds)
tmp_train = unlist(res$idx_train, recursive = F)
tmp_test = unlist(res$idx_test, recursive = F)
tmp = unlist(lapply(1:length(tmp_train), function(x) (length(tmp_train[[x]]) + length(tmp_test[[x]]))))
testthat::expect_true(is.list(res) && length(res) == repeats && sum(sapply(tmp, function(x) identical(length(y), x))) == length(tmp))
})
# shuffle data
testthat::test_that("shuffle data takes a vector as input and returns a vector as output", {
y = c(1:50)
testthat::expect_true(is.vector(FeatureSelection::func_shuffle(y, times = 10)))
})
testthat::test_that("the length of the input vector equals the length of the output vector", {
y = c(1:50)
output = FeatureSelection::func_shuffle(y, times = 10)
testthat::expect_true(length(y) == length(output))
})
# classification folds
testthat::test_that("throws an error if the RESP is not a factor", {
y = c(1:10)
testthat::expect_error(FeatureSelection::class_folds(5, y, shuffle = T), "RESP must be a factor")
})
testthat::test_that("returns a warning if the folds are not equally split", {
y = as.factor(sample(1:5, 99, replace = T))
testthat::expect_warning(FeatureSelection::class_folds(5, y, shuffle = T), 'the folds are not equally split')
})
testthat::test_that("the number of folds equals the number of the resulted sublist indices", {
y = as.factor(sample(1:5, 100, replace = T))
testthat::expect_length(FeatureSelection::class_folds(5, y, shuffle = T), 5)
})
# return object with shuffle = F
testthat::test_that("the number of folds equals the number of the resulted sublist indices", {
y = as.factor(sample(1:5, 100, replace = T))
testthat::expect_length(FeatureSelection::class_folds(5, y, shuffle = F), 5)
})
# regression folds
testthat::test_that("throws an error if the RESP is not a factor", {
y = as.factor(c(1:50))
testthat::expect_error(FeatureSelection::regr_folds(5, y, stratified = F), "this function is meant for regression for classification use the 'class_folds' function")
})
testthat::test_that("returns a warning if the folds are not equally split", {
y = sample(1:5, 99, replace = T)
testthat::expect_warning(FeatureSelection::regr_folds(5, y, stratified = F), 'the folds are not equally split')
})
testthat::test_that("the number of folds equals the number of the resulted sublist indices", {
y = sample(1:5, 100, replace = T)
testthat::expect_length(FeatureSelection::regr_folds(5, y, stratified = F), 5)
})
# object with stratified = T
testthat::test_that("the number of folds equals the number of the resulted sublist indices", {
y = sample(1:5, 100, replace = T)
testthat::expect_length(FeatureSelection::regr_folds(5, y, stratified = T), 5)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.