innR2binnR: Advanced Variable Discretization

require(modellingTools, quietly = TRUE,warn.conflicts = FALSE)
require(innR2binnR, quietly = TRUE,warn.conflicts = FALSE)
require(stringr, quietly = TRUE,warn.conflicts = FALSE)
require(dplyr, quietly = TRUE,warn.conflicts = FALSE)
require(magrittr, quietly = TRUE,warn.conflicts = FALSE)



context("Information Value")

dat <- dplyr::data_frame(x = c(1,2,1,2,3,4,3,5,6,7,8),
                         x2 = c(4,3,2,5,5,5,6,6,3,2,1),
                         x3 = c(2,2,2,3,3,3,4,4,4,5,6),
                  y = c(0,0,0,1,0,1,1,0,1,1,1),
                  ym = c(0,0,0,0,0,1,0,1,0,1,1))
bins <- c(1,3,5,8)
dat_b <- modellingTools::simple_bin(dat,bins = bins,exclude_vars = "y") %>% select(-ym)

woe_expected <- data_frame(var = factor(c("[1,3]",
                                   "[3,5]",
                                   "[5,8]")),
                           good_capture = c(3/5,1/5,1/5),
                           bad_capture = c(1/6,2/6,3/6),
                           woe = c(log((1/6) / (3/5)),
                                   log((2/6) / (1/5)),
                                   log((3/6) / (1/5)))
)

woe_test <- woe_single(dat = dat,
                       bins = bins,
                       var = "x",
                       response = "y",
                       warn = TRUE,
                       auto_merge = FALSE)

woe_merged_expected <- data_frame(var = factor(c("[1,5]",
                                          "[5,8]")),
                                  good_capture = c(6/7,1/7),
                                  bad_capture = c(1/4,3/4),
                                  woe = c(log((1/4) / (6/7)),
                                          log((3/4) / (1/7)))
)

woe_merged_test <- woe_single(dat = dat,
                       bins = bins,
                       var = "x",
                       response = "ym",
                       warn = FALSE,
                       auto_merge = TRUE)

IV_expected <- (1/6 - 3/5) * log((1/6) / (3/5)) +
               (2/6 - 1/5) * log((2/6) / (1/5)) +
               (3/6 - 1/5) * log((3/6) / (1/5))
IV_test <- information_value(dat = dat,
                             bins = bins,
                             var = "x",
                             response = "y",
                             warn = TRUE,
                             auto_merge = FALSE)

iv_sort_expected <- dplyr::data_frame(var = c("x","x2","x3"),
                                      iv = c(information_value(dat_b,"x",response = "y",warn = FALSE,auto_merge = TRUE),
                                             information_value(dat_b,"x2",response = "y",warn = FALSE,auto_merge = TRUE),
                                             information_value(dat_b,"x3",response = "y",warn = FALSE,auto_merge = TRUE)
                                      ))
iv_sort_test <- innR2binnR::iv_sort(dat_b,response = "y")

test_that("woe works as expected", {
  expect_identical(woe_test,woe_expected)
  expect_equal(woe_merged_test,woe_merged_expected)
})

test_that("woe throws appropriate warnings and errors", {
  expect_warning(woe_single(dat = dat,
                            bins = bins,
                            var = "x",
                            response = "ym",
                            warn = TRUE,
                            auto_merge = FALSE)
  )
  expect_error(woe_single(dat = dat,
                          bins = 4,
                          var = "x",
                          response = "ym")
  )
  expect_error(woe_single(dat = dat,
                          bins = bins,
                          var = "y",
                          response = "x")
  )
  expect_error(woe_single(dat = dat,
                          bins = 0,
                          var = "x",
                          response = "ym")
  )
})

test_that("IV gives correct result", {
  expect_equal(IV_expected,IV_test)
  expect_equal(iv_sort_expected,iv_sort_test)
})

test_that("IV Sort gives correct warnings", {
  # Response not in dataset
  expect_error(innR2binnR::iv_sort(dat_b,response = "yy"))
  # Columns not factored
  expect_error(innR2binnR::iv_sort(dat,response = "y"))
  # Non-numeric response
  expect_error(innR2binnR::iv_sort(dat_b %>% mutate(yf = factor(y)),response = "yf"))
  # Non-binary response
  expect_error(innR2binnR::iv_sort(dat_b,response = "x3"))
})

awstringer/innR2binnR documentation built on May 11, 2019, 4:11 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

awstringer/innR2binnR
Advanced Variable Discretization

tests/testthat/test_informationvalue.R
In awstringer/innR2binnR: Advanced Variable Discretization

R Package Documentation

Browse R Packages

We want your feedback!

awstringer/innR2binnR Advanced Variable Discretization

tests/testthat/test_informationvalue.R In awstringer/innR2binnR: Advanced Variable Discretization

R Package Documentation

Browse R Packages

We want your feedback!

awstringer/innR2binnR
Advanced Variable Discretization

tests/testthat/test_informationvalue.R
In awstringer/innR2binnR: Advanced Variable Discretization