Nothing
Tools for binning data
# Install rbin from CRAN
install.packages("rbin")
# Or the development version from GitHub
# install.packages("devtools")
devtools::install_github("rsquaredacademy/rbin")
rbin includes two addins for manually binning data:
rbinAddin()
rbinFactorAddin()
bins <- rbin_manual(mbank, y, age, c(29, 31, 34, 36, 39, 42, 46, 51, 56))
bins
#> Binning Summary
#> ---------------------------
#> Method Manual
#> Response y
#> Predictor age
#> Bins 10
#> Count 4521
#> Goods 517
#> Bads 4004
#> Entropy 0.5
#> Information Value 0.12
#>
#>
#> cut_point bin_count good bad woe iv entropy
#> 1 < 29 410 71 339 -0.483686036 2.547353e-02 0.6649069
#> 2 < 31 313 41 272 -0.154776266 1.760055e-03 0.5601482
#> 3 < 34 567 55 512 0.183985174 3.953685e-03 0.4594187
#> 4 < 36 396 45 351 0.007117468 4.425063e-06 0.5107878
#> 5 < 39 519 47 472 0.259825118 7.008270e-03 0.4383322
#> 6 < 42 431 33 398 0.442938178 1.575567e-02 0.3899626
#> 7 < 46 449 47 402 0.099298221 9.423907e-04 0.4836486
#> 8 < 51 521 40 481 0.439981550 1.881380e-02 0.3907140
#> 9 < 56 445 49 396 0.042587647 1.756117e-04 0.5002548
#> 10 >= 56 470 89 381 -0.592843261 4.564428e-02 0.7001343
# plot
plot(bins)
# combine levels
upper <- c("secondary", "tertiary")
out <- rbin_factor_combine(mbank, education, upper, "upper")
table(out$education)
#>
#> upper unknown primary
#> 3651 179 691
# bins
bins <- rbin_factor(out, y, education)
bins
#> Binning Summary
#> ---------------------------
#> Method Custom
#> Response y
#> Predictor education
#> Levels 3
#> Count 4521
#> Goods 517
#> Bads 4004
#> Entropy 0.51
#> Information Value 0.01
#>
#>
#> level bin_count good bad woe iv entropy
#> 1 upper 3651 426 3225 -0.02275738 0.0004219212 0.5197428
#> 2 primary 691 66 625 0.20109064 0.0057178780 0.4546110
#> 3 unknown 179 25 154 -0.22892949 0.0022651110 0.5833603
# plot
plot(bins)
bins <- rbin_quantiles(mbank, y, age, 10)
bins
#> Binning Summary
#> -----------------------------
#> Method Quantile
#> Response y
#> Predictor age
#> Bins 10
#> Count 4521
#> Goods 517
#> Bads 4004
#> Entropy 0.5
#> Information Value 0.12
#>
#>
#> cut_point bin_count good bad woe iv entropy
#> 1 < 29 410 71 339 -0.483686036 2.547353e-02 0.6649069
#> 2 < 31 313 41 272 -0.154776266 1.760055e-03 0.5601482
#> 3 < 34 567 55 512 0.183985174 3.953685e-03 0.4594187
#> 4 < 36 396 45 351 0.007117468 4.425063e-06 0.5107878
#> 5 < 39 519 47 472 0.259825118 7.008270e-03 0.4383322
#> 6 < 42 431 33 398 0.442938178 1.575567e-02 0.3899626
#> 7 < 46 449 47 402 0.099298221 9.423907e-04 0.4836486
#> 8 < 51 521 40 481 0.439981550 1.881380e-02 0.3907140
#> 9 < 56 445 49 396 0.042587647 1.756117e-04 0.5002548
#> 10 >= 56 470 89 381 -0.592843261 4.564428e-02 0.7001343
# plot
plot(bins)
bins <- rbin_winsorize(mbank, y, age, 10, winsor_rate = 0.05)
bins
#> Binning Summary
#> ------------------------------
#> Method Winsorize
#> Response y
#> Predictor age
#> Bins 10
#> Count 4521
#> Goods 517
#> Bads 4004
#> Entropy 0.51
#> Information Value 0.1
#>
#>
#> cut_point bin_count good bad woe iv entropy
#> 1 < 30.2 723 112 611 -0.3504082 0.0224390979 0.6219926
#> 2 < 33.4 567 55 512 0.1839852 0.0039536848 0.4594187
#> 3 < 36.6 573 58 515 0.1367176 0.0022470488 0.4728562
#> 4 < 39.8 497 44 453 0.2846962 0.0079801719 0.4315480
#> 5 < 43 396 37 359 0.2253982 0.0040782670 0.4478305
#> 6 < 46.2 461 43 418 0.2272751 0.0048235624 0.4473095
#> 7 < 49.4 281 22 259 0.4187793 0.0092684760 0.3961315
#> 8 < 52.6 309 32 277 0.1112753 0.0008106706 0.4801796
#> 9 < 55.8 244 25 219 0.1231896 0.0007809490 0.4767424
#> 10 >= 55.8 470 89 381 -0.5928433 0.0456442813 0.7001343
# plot
plot(bins)
bins <- rbin_equal_length(mbank, y, age, 10)
bins
#> Binning Summary
#> ---------------------------------
#> Method Equal Length
#> Response y
#> Predictor age
#> Bins 10
#> Count 4521
#> Goods 517
#> Bads 4004
#> Entropy 0.5
#> Information Value 0.17
#>
#>
#> cut_point bin_count good bad woe iv entropy
#> 1 < 24.6 85 24 61 -1.11418623 0.0347480126 0.8586371
#> 2 < 31.2 822 106 716 -0.13676519 0.0035843196 0.5545619
#> 3 < 37.8 1133 115 1018 0.13365680 0.0042514380 0.4737339
#> 4 < 44.4 943 82 861 0.30436899 0.0171748162 0.4262287
#> 5 < 51 623 52 571 0.34913923 0.0146733167 0.4142794
#> 6 < 57.6 612 66 546 0.06595797 0.0005741022 0.4933757
#> 7 < 64.2 229 43 186 -0.58245971 0.0213871054 0.6967893
#> 8 < 70.8 34 12 22 -1.44087046 0.0255269312 0.9366674
#> 9 < 77.4 25 13 12 -2.12704897 0.0471100183 0.9988455
#> 10 >= 77.4 15 4 11 -1.03540535 0.0051663529 0.8366407
# plot
plot(bins)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.