tests/testthat/test_informationvalue.R

require(modellingTools, quietly = TRUE,warn.conflicts = FALSE)
require(stringr, quietly = TRUE,warn.conflicts = FALSE)
require(dplyr, quietly = TRUE,warn.conflicts = FALSE)
require(magrittr, quietly = TRUE,warn.conflicts = FALSE)



context("Information Value")

dat <- dplyr::data_frame(x = c(1,2,1,2,3,4,3,5,6,7,8),
                         x2 = c(4,3,2,5,5,5,6,6,3,2,1),
                         x3 = c(2,2,2,3,3,3,4,4,4,5,6),
                  y = c(0,0,0,1,0,1,1,0,1,1,1),
                  ym = c(0,0,0,0,0,1,0,1,0,1,1))
bins <- c(1,3,5,8)
dat_b <- modellingTools::simple_bin(dat,bins = bins,exclude_vars = "y") %>% select(-ym)

dat_f <- iris %>%
  dplyr::mutate(resp = c(rep(0,40),rep(1,40),rep(0,40),rep(1,30)))

var_groups <- dplyr::data_frame(var = c("x","x2","x3"),
                                group = c(1,1,2))
var_groups_df <- var_groups %>% as.data.frame()
var_groups_badname <- var_groups %>% dplyr::rename("rav" = var)



woe_f_test <- modellingTools::woe_single(dat_f,var = "Species",response = "resp")

woe_expected <- data_frame(x = factor(c("[1,3]",
                                   "[3,5]",
                                   "[5,8]")),
                           good_capture = c(3/5,1/5,1/5),
                           bad_capture = c(1/6,2/6,3/6),
                           woe = c(log((1/6) / (3/5)),
                                   log((2/6) / (1/5)),
                                   log((3/6) / (1/5)))
)

woe_test <- woe_single(dat = dat,
                       bins = bins,
                       var = "x",
                       response = "y",
                       warn = TRUE,
                       auto_merge = FALSE)

woe_merged_expected <- data_frame(var = factor(c("[1,5]",
                                          "[5,8]")),
                                  good_capture = c(6/7,1/7),
                                  bad_capture = c(1/4,3/4),
                                  woe = c(log((1/4) / (6/7)),
                                          log((3/4) / (1/7)))
)

woe_merged_test <- woe_single(dat = dat,
                       bins = bins,
                       var = "x",
                       response = "ym",
                       warn = FALSE,
                       auto_merge = TRUE)

IV_expected <- (1/6 - 3/5) * log((1/6) / (3/5)) +
               (2/6 - 1/5) * log((2/6) / (1/5)) +
               (3/6 - 1/5) * log((3/6) / (1/5))
IV_test <- information_value(dat = dat,
                             bins = bins,
                             var = "x",
                             response = "y",
                             warn = TRUE,
                             auto_merge = FALSE)

iv_sort_expected <- dplyr::data_frame(var = c("x","x2","x3"),
                                      iv = c(information_value(dat_b,"x",response = "y",warn = FALSE,auto_merge = TRUE),
                                             information_value(dat_b,"x2",response = "y",warn = FALSE,auto_merge = TRUE),
                                             information_value(dat_b,"x3",response = "y",warn = FALSE,auto_merge = TRUE)
                                      ))
iv_sort_test <- modellingTools::iv_sort(dat_b,response = "y")
iv_sort_test_g <- modellingTools::iv_sort(dat_b,response = "y",var_grouping = var_groups)

opt_bin_test <- modellingTools::optimal_bin(train = dat_f,
                                            response = "resp")

iv_cutpoints_test <- modellingTools::information_value(dat = dat,
                                                         bins = bins,
                                                         var = "x",
                                                         response = "y",
                                                         warn = FALSE,
                                                         auto_merge = TRUE,
                                                         output_cutpoints = TRUE)

test_that("woe works as expected", {
  expect_equal(woe_test,woe_expected)
  #expect_equal(woe_merged_test,woe_merged_expected)
})

test_that("woe works on factor variables", {
  expect_is(woe_f_test,c("tbl_df","tbl","data.frame"))
})

test_that("woe throws appropriate warnings and errors", {
  expect_warning(woe_single(dat = dat,
                            bins = bins,
                            var = "x",
                            response = "ym",
                            warn = TRUE,
                            auto_merge = FALSE)
  )
  expect_error(woe_single(dat = dat,
                          bins = 4,
                          var = "x",
                          response = "ym")
  )
  expect_error(woe_single(dat = dat,
                          bins = bins,
                          var = "y",
                          response = "x")
  )
  expect_error(woe_single(dat = dat,
                          bins = 0,
                          var = "x",
                          response = "ym")
  )
})

test_that("IV gives correct result", {
  expect_equal(IV_expected,IV_test)
  expect_equal(iv_sort_expected,iv_sort_test)
  expect_is(iv_cutpoints_test,"list")
  expect_length(iv_cutpoints_test,2)
  expect_is(iv_cutpoints_test$iv,"numeric")
  expect_is(iv_cutpoints_test$cutpoints,"numeric")
})

test_that("IV Sort gives correct warnings", {
  # Response not in dataset
  expect_error(modellingTools::iv_sort(dat_b,response = "yy"))
  # Columns not factored
  expect_error(modellingTools::iv_sort(dat,response = "y"))
  # Non-numeric response
  expect_error(modellingTools::iv_sort(dat_b %>% mutate(yf = factor(y)),response = "yf"))
  # Non-binary response
  expect_error(modellingTools::iv_sort(dat_b,response = "x3"))
  expect_warning(modellingTools::iv_sort(dat_b,response = "y",var_grouping = var_groups_df))
  expect_warning(modellingTools::iv_sort(dat_b,response = "y",var_grouping = var_groups_badname))
})

test_that("IV Sort preserves grouping", {
  expect_equal(colnames(iv_sort_test_g),c("var","iv","group"))
})

test_that("Optimal binning returns an object of the correct class and names", {
  expect_is(opt_bin_test,"list")
  expect_equal(names(opt_bin_test),c("sepal_length","sepal_width","petal_length","petal_width"))
})
awstringer/modellingTools documentation built on May 11, 2019, 4:11 p.m.