require(modellingTools, quietly = TRUE,warn.conflicts = FALSE)
require(stringr, quietly = TRUE,warn.conflicts = FALSE)
require(dplyr, quietly = TRUE,warn.conflicts = FALSE)
require(magrittr, quietly = TRUE,warn.conflicts = FALSE)
context("Information Value")
dat <- dplyr::data_frame(x = c(1,2,1,2,3,4,3,5,6,7,8),
x2 = c(4,3,2,5,5,5,6,6,3,2,1),
x3 = c(2,2,2,3,3,3,4,4,4,5,6),
y = c(0,0,0,1,0,1,1,0,1,1,1),
ym = c(0,0,0,0,0,1,0,1,0,1,1))
bins <- c(1,3,5,8)
dat_b <- modellingTools::simple_bin(dat,bins = bins,exclude_vars = "y") %>% select(-ym)
dat_f <- iris %>%
dplyr::mutate(resp = c(rep(0,40),rep(1,40),rep(0,40),rep(1,30)))
var_groups <- dplyr::data_frame(var = c("x","x2","x3"),
group = c(1,1,2))
var_groups_df <- var_groups %>% as.data.frame()
var_groups_badname <- var_groups %>% dplyr::rename("rav" = var)
woe_f_test <- modellingTools::woe_single(dat_f,var = "Species",response = "resp")
woe_expected <- data_frame(x = factor(c("[1,3]",
"[3,5]",
"[5,8]")),
good_capture = c(3/5,1/5,1/5),
bad_capture = c(1/6,2/6,3/6),
woe = c(log((1/6) / (3/5)),
log((2/6) / (1/5)),
log((3/6) / (1/5)))
)
woe_test <- woe_single(dat = dat,
bins = bins,
var = "x",
response = "y",
warn = TRUE,
auto_merge = FALSE)
woe_merged_expected <- data_frame(var = factor(c("[1,5]",
"[5,8]")),
good_capture = c(6/7,1/7),
bad_capture = c(1/4,3/4),
woe = c(log((1/4) / (6/7)),
log((3/4) / (1/7)))
)
woe_merged_test <- woe_single(dat = dat,
bins = bins,
var = "x",
response = "ym",
warn = FALSE,
auto_merge = TRUE)
IV_expected <- (1/6 - 3/5) * log((1/6) / (3/5)) +
(2/6 - 1/5) * log((2/6) / (1/5)) +
(3/6 - 1/5) * log((3/6) / (1/5))
IV_test <- information_value(dat = dat,
bins = bins,
var = "x",
response = "y",
warn = TRUE,
auto_merge = FALSE)
iv_sort_expected <- dplyr::data_frame(var = c("x","x2","x3"),
iv = c(information_value(dat_b,"x",response = "y",warn = FALSE,auto_merge = TRUE),
information_value(dat_b,"x2",response = "y",warn = FALSE,auto_merge = TRUE),
information_value(dat_b,"x3",response = "y",warn = FALSE,auto_merge = TRUE)
))
iv_sort_test <- modellingTools::iv_sort(dat_b,response = "y")
iv_sort_test_g <- modellingTools::iv_sort(dat_b,response = "y",var_grouping = var_groups)
opt_bin_test <- modellingTools::optimal_bin(train = dat_f,
response = "resp")
iv_cutpoints_test <- modellingTools::information_value(dat = dat,
bins = bins,
var = "x",
response = "y",
warn = FALSE,
auto_merge = TRUE,
output_cutpoints = TRUE)
test_that("woe works as expected", {
expect_equal(woe_test,woe_expected)
#expect_equal(woe_merged_test,woe_merged_expected)
})
test_that("woe works on factor variables", {
expect_is(woe_f_test,c("tbl_df","tbl","data.frame"))
})
test_that("woe throws appropriate warnings and errors", {
expect_warning(woe_single(dat = dat,
bins = bins,
var = "x",
response = "ym",
warn = TRUE,
auto_merge = FALSE)
)
expect_error(woe_single(dat = dat,
bins = 4,
var = "x",
response = "ym")
)
expect_error(woe_single(dat = dat,
bins = bins,
var = "y",
response = "x")
)
expect_error(woe_single(dat = dat,
bins = 0,
var = "x",
response = "ym")
)
})
test_that("IV gives correct result", {
expect_equal(IV_expected,IV_test)
expect_equal(iv_sort_expected,iv_sort_test)
expect_is(iv_cutpoints_test,"list")
expect_length(iv_cutpoints_test,2)
expect_is(iv_cutpoints_test$iv,"numeric")
expect_is(iv_cutpoints_test$cutpoints,"numeric")
})
test_that("IV Sort gives correct warnings", {
# Response not in dataset
expect_error(modellingTools::iv_sort(dat_b,response = "yy"))
# Columns not factored
expect_error(modellingTools::iv_sort(dat,response = "y"))
# Non-numeric response
expect_error(modellingTools::iv_sort(dat_b %>% mutate(yf = factor(y)),response = "yf"))
# Non-binary response
expect_error(modellingTools::iv_sort(dat_b,response = "x3"))
expect_warning(modellingTools::iv_sort(dat_b,response = "y",var_grouping = var_groups_df))
expect_warning(modellingTools::iv_sort(dat_b,response = "y",var_grouping = var_groups_badname))
})
test_that("IV Sort preserves grouping", {
expect_equal(colnames(iv_sort_test_g),c("var","iv","group"))
})
test_that("Optimal binning returns an object of the correct class and names", {
expect_is(opt_bin_test,"list")
expect_equal(names(opt_bin_test),c("sepal_length","sepal_width","petal_length","petal_width"))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.