Nothing
######################
# Information theory #
######################
context("findviews_to_predict - information theory")
test_that("mutual information function does the job", {
check_MI <- function(s1, s2){
f1 <- as.factor(s1)
f2 <- as.factor(s2)
m <- mutual_information(f1, f2)
expect_true(is.numeric(m))
}
check_MI(c(1,2,3), c(1,1,2))
check_MI(c(1,1,3), c(2,2,2))
check_MI(c(1,1,NA), c(2,2,2))
check_MI(c(1,NA), c(2,NA))
check_MI(c(), c())
check_MI(c(NA), c(NA))
check_MI(numeric(0), numeric(0))
f1 <- factor(c(1,1,2,3))
f2 <- factor(c(1,2,2,3))
expect_equal(mutual_information(f1, f2), 1.0)
f1 <- factor(c(1,1,2,1))
f2 <- factor(c(1,2,2,1))
expect_equal(mutual_information(f1, f2), 0.311, tolerance = .001)
})
#################
# Preprocessors #
#################
context("findviews_to_predict - preprocessing")
test_that("preprocessor does the job", {
check_pp <- function(s)
expect_is(preprocess_target(s), "factor")
check_pp(c(1,2,3,4,5))
check_pp(c(1, NA))
check_pp(c('2', '1'))
check_pp(c())
})
###########
# Ranking #
###########
context("findviews_to_predict - ranking")
test_that("ranking works for cat data", {
check_rk <- function(data, one_view = NULL){
set.seed(55555)
target <- sample(factor(c('1', '2', '3')), nrow(data), replace = T)
views <- if (is.null(one_view)) list(names(data))
else views <- list(one_view)
out <- score_predictive_cat(views, data, target)
expect_is(out, "numeric")
expect_length(out, length(views))
}
check_rk(df_cat)
check_rk(df_cat, names(df_cat)[1])
check_rk(df_cat, names(df_cat)[1==2])
check_rk(df_cat_NA, names(df_cat_NA))
check_rk(df_onecol_cat, names(df_onecol_cat))
})
test_that("ranking works for num data", {
check_rk <- function(data, one_view = NULL){
set.seed(55555)
target <- sample(factor(c('1', '2', '3')), nrow(data), replace = T)
views <- if (is.null(one_view)) list(names(data))
else views <- list(one_view)
if (length(unlist(views)) == 0) views <- list()
out <- score_predictive_num(views, data, target)
expect_is(out, "numeric")
expect_length(out, length(views))
}
check_rk(df_num)
check_rk(df_num, names(df_num)[1])
check_rk(df_num, names(df_num)[1==2])
check_rk(df_num_NA, names(df_num_NA))
check_rk(df_onecol, names(df_onecol))
})
###########
# Ranking #
###########
context("findviews_to_predict - main function")
check_output <- function(df, num, ...){
# Generates a phony target column
target <- sample(factor(c('1', '2', '3')), nrow(df), replace = T)
df <- cbind(df, target)
names(df)[[length(names(df))]] <- 'target'
# Runs Ziggy
out <- findviews_to_predict_core('target', df, view_size_max = num, ...)
# Structure checks
expect_is(out, "list")
expect_named(out, c('views_cat',
'views_num',
'scores_cat',
'scores_num',
'details_num',
'details_cat',
'excluded',
'target_data'), ignore.order=T)
# Content checks
expect_is(out$views_num, "list")
expect_is(out$views_cat, "list")
expect_true(all(names(df) %in% unlist(c(out$views_num,
out$views_cat,
out$excluded,
'target'))))
if (length(out$views_num) > 0){
expect_true(all(sapply(out$views_num, is.character)))
expect_true(all(sapply(out$views_num, function(v) length(v) <= num)))
expect_true(all(sapply(out$views_num, function(v) length(v) > 0)))
}
if (length(out$views_cat) > 0){
expect_true(all(sapply(out$views_cat, is.character)))
expect_true(all(sapply(out$views_cat, function(v) length(v) <= num)))
expect_true(all(sapply(out$views_cat, function(v) length(v) > 0)))
}
expect_named(out$excluded, c('unknown_type',
'flat_num', 'flat_cat',
'sparse_num', 'sparse_cat'),
ignore.order = T)
expect_is(out$scores_num, "numeric")
expect_is(out$scores_cat, "numeric")
expect_length(out$scores_num, length(out$views_num))
expect_length(out$scores_cat, length(out$views_cat))
}
test_that("findviews_to_predict returns properly", {
check_output(df_mix, 3)
check_output(df_num, 3)
check_output(df_cat, 2)
check_output(df_zerocol, 3)
check_output(df_onecol, 3)
check_output(df_onecol_cat, 3)
})
test_that("findviews_to_predict can deal with flat columns", {
check_output(df_flat1, 2)
check_output(df_flat2, 2)
check_output(df_flat3, 2)
})
test_that("findviews_to_predict can deal with NAs", {
check_output(df_num_NA, 3)
check_output(df_cat_NA, 3)
})
test_that("findviews_to_predict can handle 0 length strings", {
check_output(df_cat_emptystring, 3)
})
# # Checks sampling
# test_that("sampling works properly", {
# # Generates a phony target column
# target <- sample(factor(c('1', '2', '3')), nrow(df_num), replace = T)
# df <- cbind(df_num, target)
# names(df)[[length(names(df))]] <- 'target'
#
# OLD <- SAMPLE_SIZE
# SAMPLE_SIZE <<- 4
# expect_warning(findviews_to_predict_core('target', df))
# SAMPLE_SIZE <<- OLD
# })
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.