Nothing
library(optbin)
### Helper Functions
# Short for cat(sprintf(fmt, ...)). Automatically appends \n to fmt.
catf <- function(fmt, ...) {
cat(sprintf(sprintf("%s\n", fmt), ...))
}
# Nearly-equal floating point test of two numbers, within the absolute
# accuracy acc.
fpnear <- function(x1, x2, acc=1e-7) {
return(abs(x1-x2) < acc)
}
### optbin Functionality Tests
# Test the optimal binning result res with nbin bins against the expected
# endpoints expend, threshold values expthr, the total, best (M)SE expse,
# and the average value per bin expavg. Returns TRUE if all tests pass,
# FALSE if any fail.
verify_binning <- function(nbin, res, expend, expthr, expse, expavg) {
passed <- TRUE
if (nbin != res$numbins) {
catf(" expected %d bins but result has %d", nbin, res$numbins)
passed <- FALSE
}
for (b in 1:nbin) {
if (expend[b] != res$breaks[b]) {
catf(" expected bin %d endpoint at %d but got %d",
b, expend[b], res$breaks[b])
passed <- FALSE
}
}
for (b in 1:nbin) {
if (!fpnear(expthr[b], res$thr[b])) {
catf(" expected bin %d threshold %.3f but got %.3f",
b, expthr[b], res$thr[b])
passed <- FALSE
}
}
if (!fpnear(expse, res$minse)) {
catf(" expected bin %d metric %.5f but got %.5f",
b, expse, res$minse)
passed <- FALSE
}
for (b in 1:nbin) {
if (!fpnear(expavg[b], res$binavg[b])) {
catf(" expected bin %d average %.3f but got %.3f",
b, expavg[b], res$binavg[b])
passed <- FALSE
}
}
return(passed)
}
# Compare two binning results, resA and resB. Any difference causes the
# test to fail. Returns TRUE if the two are the same, FALSE if there
# are differences.
compare_binning <- function(resA, resB) {
passed <- TRUE
if (resA$numbins != resB$numbins) {
catf(" binning A has %d bins, B %d", resA$numbins, resB$numbins)
passed <- FALSE
}
if (resA$metric != resB$metric) {
catf(" binning A used %s, B %s", resA$metric, resB$metric)
passed <- FALSE
}
if (!fpnear(resA$minse, resB$minse)) {
catf(" binning A best %s %.5f, B %.5f", toupper(resA$metric),
resA$minse, resB$minse)
passed <- FALSE
}
for (b in 1:resA$numbins) {
if (!fpnear(resA$thr[b], resB$thr[b])) {
catf(" bin %d threshold in A is %.5f, B %.5f",
b, resA$thr[b], resB$thr[b])
passed <- FALSE
}
}
for (b in 1:resA$numbins) {
if (!fpnear(resA$binavg[b], resB$binavg[b])) {
catf(" bin %d average in A is %.5f, B %.5f",
b, resA$binavg[b], resB$binavg[b])
passed <- FALSE
}
}
for (b in 1:resA$numbins) {
if (!fpnear(resA$thr[b], resB$thr[b])) {
catf(" bin %d (M)SE in A is %.5f, B %.5f",
b, resA$binse[b], resB$binse[b])
passed <- FALSE
}
}
for (b in 1:resA$numbins) {
if (resA$breaks[b] != resB$breaks[b]) {
catf(" bin %d endpoint in A is %d, B %d",
b, resA$breaks[b], resB$breaks[b])
passed <- FALSE
}
}
return(passed)
}
# Test 1: Small linear sequence. Check with 2, 3, and 4 bins.
optbin_test1 <- function() {
passed <- TRUE
data <- c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)
catf(" running test set 1 with cache")
b <- optbin(data, 2)
if (!verify_binning(2, b, c(5, 10), c(5.0, 10.0), 20.0, c(3.0, 8.0))) {
passed <- FALSE
}
b <- optbin(data, 3)
if (!verify_binning(3, b, c(3, 6, 10), c(3.0, 6.0, 10.0),
9.0, c(2.0, 5.0, 8.5))) {
passed <- FALSE
}
b <- optbin(data, 4)
if (!verify_binning(4, b, c(2, 4, 7, 10), c(2.0, 4.0, 7.0, 10.0),
5.0, c(1.5, 3.5, 6.0, 9.0))) {
passed <- FALSE
}
if (passed) {
catf(" tests passed")
}
catf(" running test set 1 without cache")
b <- optbin(data, 2, max.cache=0)
if (!verify_binning(2, b, c(5, 10), c(5.0, 10.0), 20.0, c(3.0, 8.0))) {
passed <- FALSE
}
b <- optbin(data, 3, max.cache=0)
if (!verify_binning(3, b, c(3, 6, 10), c(3.0, 6.0, 10.0),
9.0, c(2.0, 5.0, 8.5))) {
passed <- FALSE
}
b <- optbin(data, 4, max.cache=0)
if (!verify_binning(4, b, c(2, 4, 7, 10), c(2.0, 4.0, 7.0, 10.0),
5.0, c(1.5, 3.5, 6.0, 9.0))) {
passed <- FALSE
}
if (passed) {
catf(" tests passed")
}
return(passed)
}
# Test 2: Two normals, N(1, 0.25), N(2, 0.25). Check with 2, 3, and 4 bins.
optbin_test2 <- function() {
passed <- TRUE
data <- c(0.86, 0.91, 0.92, 1.04, 1.23, 1.23, 1.30, 1.36, 1.79, 1.89,
1.89, 2.34, 2.41, 2.58, 2.79, 2.93, 2.96, 3.21, 3.41, 3.55)
catf(" running test set 2 with cache")
b <- optbin(data, 2)
if (!verify_binning(2, b, c(11, 20), c(1.89, 3.55),
2.956779797980, c(1.31090909, 2.90888888))) {
passed <- FALSE
}
b <- optbin(data, 3)
if (!verify_binning(3, b, c(8, 14, 20), c(1.36, 2.58, 3.55),
1.269070833333, c(1.10625, 2.15, 3.14166666))) {
passed <- FALSE
}
b <- optbin(data, 4)
if (!verify_binning(4, b, c(8, 11, 17, 20), c(1.36, 1.89, 2.96, 3.55),
0.686537500000,
c(1.10625, 1.85666666, 2.668333333, 3.39))) {
passed <- FALSE
}
if (passed) {
catf(" tests passed")
}
catf(" running test set 2 without cache")
b <- optbin(data, 2, max.cache=0)
if (!verify_binning(2, b, c(11, 20), c(1.89, 3.55),
2.956779797980, c(1.31090909, 2.90888888))) {
passed <- FALSE
}
b <- optbin(data, 3, max.cache=0)
if (!verify_binning(3, b, c(8, 14, 20), c(1.36, 2.58, 3.55),
1.269070833333, c(1.10625, 2.15, 3.14166666))) {
passed <- FALSE
}
b <- optbin(data, 4, max.cache=0)
if (!verify_binning(4, b, c(8, 11, 17, 20), c(1.36, 1.89, 2.96, 3.55),
0.686537500000,
c(1.10625, 1.85666666, 2.668333333, 3.39))) {
passed <- FALSE
}
if (passed) {
catf(" tests passed")
}
return(passed)
}
# Test 3: Four constant bins. Check with 2 t/m 6 bins (tie resolution 5, 6).
optbin_test3 <- function() {
passed <- TRUE
data <- c(1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0,
5.0, 5.0, 5.0, 5.0, 7.0, 7.0, 7.0, 7.0)
catf(" running test set 3 with cache")
b <- optbin(data, 2)
if (!verify_binning(2, b, c(8, 16), c(3.0, 7.0), 16.0, c(2.0, 6.0))) {
passed <- FALSE
}
b <- optbin(data, 3)
if (!verify_binning(3, b, c(4, 8, 16), c(1.0, 3.0, 7.0), 8.0,
c(1.0, 3.0, 6.0))) {
passed <- FALSE
}
b <- optbin(data, 4)
if (!verify_binning(4, b, c(4, 8, 12, 16), c(1.0, 3.0, 5.0, 7.0),
0.0, c(1.0, 3.0, 5.0, 7.0))) {
passed <- FALSE
}
b <- optbin(data, 5)
if (!verify_binning(5, b, c(2, 4, 8, 12, 16), c(1.0, 1.0, 3.0, 5.0, 7.0),
0.0, c(1.0, 1.0, 3.0, 5.0, 7.0))) {
passed <- FALSE
}
b <- optbin(data, 6)
if (!verify_binning(6, b, c(2, 4, 6, 8, 12, 16),
c(1.0, 1.0, 3.0, 3.0, 5.0, 7.0), 0.0,
c(1.0, 1.0, 3.0, 3.0, 5.0, 7.0))) {
passed <- FALSE
}
if (passed) {
catf(" tests passed")
}
catf(" running test set 3 without cache")
b <- optbin(data, 2, max.cache=0)
if (!verify_binning(2, b, c(8, 16), c(3.0, 7.0), 16.0, c(2.0, 6.0))) {
passed <- FALSE
}
b <- optbin(data, 3, max.cache=0)
if (!verify_binning(3, b, c(4, 8, 16), c(1.0, 3.0, 7.0), 8.0,
c(1.0, 3.0, 6.0))) {
passed <- FALSE
}
b <- optbin(data, 4, max.cache=0)
if (!verify_binning(4, b, c(4, 8, 12, 16), c(1.0, 3.0, 5.0, 7.0),
0.0, c(1.0, 3.0, 5.0, 7.0))) {
passed <- FALSE
}
b <- optbin(data, 5, max.cache=0)
if (!verify_binning(5, b, c(2, 4, 8, 12, 16), c(1.0, 1.0, 3.0, 5.0, 7.0),
0.0, c(1.0, 1.0, 3.0, 5.0, 7.0))) {
passed <- FALSE
}
b <- optbin(data, 6, max.cache=0)
if (!verify_binning(6, b, c(2, 4, 6, 8, 12, 16),
c(1.0, 1.0, 3.0, 3.0, 5.0, 7.0), 0.0,
c(1.0, 1.0, 3.0, 3.0, 5.0, 7.0))) {
passed <- FALSE
}
if (passed) {
catf(" tests passed")
}
return(passed)
}
# Test 4: Data where SE and MSE bins differ. Check with 2 and 3 bins.
optbin_test4 <- function() {
passed <- TRUE
data <- c(1.00, 1.25, 1.50, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00,
5.00, 8.00, 8.25, 8.50)
catf(" running test set 2 with cache and using SE")
b <- optbin(data, 2, metric='se')
if (!verify_binning(2, b, c(9, 13), c(3.0, 8.5),
11.796875000000, c(2.0, 7.4375))) {
passed <- FALSE
}
b <- optbin(data, 3, metric='se')
if (!verify_binning(3, b, c(8, 10, 13), c(2.75, 5.0, 8.50),
4.750000000000, c(1.875, 4.0, 8.25))) {
passed <- FALSE
}
if (passed) {
catf(" tests passed")
}
catf(" running test set 2 with cache and using MSE")
b <- optbin(data, 2, metric='mse')
if (!verify_binning(2, b, c(10, 13), c(5.0, 8.5),
1.226666666667, c(2.30, 8.25))) {
passed <- FALSE
}
b <- optbin(data, 3, metric='mse')
if (!verify_binning(3, b, c(3, 10, 13), c(1.5, 5.0, 8.5),
1.08333333333, c(1.25, 2.75, 8.25))) {
passed <- FALSE
}
if (passed) {
catf(" tests passed")
}
catf(" running test set 2 without cache and using SE")
b <- optbin(data, 2, metric='se', max.cache=0)
if (!verify_binning(2, b, c(9, 13), c(3.0, 8.5),
11.796875000000, c(2.0, 7.4375))) {
passed <- FALSE
}
b <- optbin(data, 3, metric='se', max.cache=0)
if (!verify_binning(3, b, c(8, 10, 13), c(2.75, 5.0, 8.50),
4.750000000000, c(1.875, 4.0, 8.25))) {
passed <- FALSE
}
if (passed) {
catf(" tests passed")
}
catf(" running test set 2 without cache and using MSE")
b <- optbin(data, 2, metric='mse', max.cache=0)
if (!verify_binning(2, b, c(10, 13), c(5.0, 8.5),
1.226666666667, c(2.30, 8.25))) {
passed <- FALSE
}
b <- optbin(data, 3, metric='mse', max.cache=0)
if (!verify_binning(3, b, c(3, 10, 13), c(1.5, 5.0, 8.5),
1.08333333333, c(1.25, 2.75, 8.25))) {
passed <- FALSE
}
if (passed) {
catf(" tests passed")
}
return(passed)
}
# Fill a large array with random data and compare the results with and
# without caching.
optbin_caching <- function() {
passed <- TRUE
catf(" running random data with and without caching using SE")
# 2520 is a multiple of 5, 7, 8, and 9 so all bins fall on endpoints.
set.seed(17)
data <- runif(2520)
for (b in 2:10) {
bcache <- optbin(data, b)
bnocache <- optbin(data, b, max.cache=0)
if (!compare_binning(bcache, bnocache)) {
passed <- FALSE
}
}
if (passed) {
catf(" tests passed")
}
catf(" running random data with and without caching using MSE")
for (b in 2:10) {
bcache <- optbin(data, b, metric='mse')
bnocache <- optbin(data, b, metric='mse', max.cache=0)
if (!compare_binning(bcache, bnocache)) {
passed <- FALSE
}
}
if (passed) {
catf(" tests passed")
}
return(passed)
}
### assign.optbin Functionality Tests
# Check that the count of values assigned to each bin match the bin size
# in points. Returns TRUE if all tests pass, FALSE if any fail.
assign_test1 <- function() {
passed <- TRUE
catf(" running assign.optbin tests")
set.seed(19)
data <- rnorm(1000, mean=2, sd=0.5)
binned <- optbin(data, 10)
cnt <- table(assign.optbin(data, binned))
stpt <- 1
for (b in 1:10) {
expcnt <- binned$breaks[b] - stpt + 1
if (cnt[b] != expcnt) {
catf(" %d values assigned to bin %d but range has %d",
cnt[b], b, expcnt)
passed <- FALSE
}
stpt <- binned$breaks[b] + 1
}
if (passed) {
catf(" tests passed")
}
catf(" running assign.optbin tests without extending upper bound")
dmax <- max(data)
data2 <- c(data, dmax+1:10)
cnt <- table(assign.optbin(data2, binned))
stpt <- 1
for (b in 1:10) {
expcnt <- binned$breaks[b] - stpt + 1
if (cnt[b] != expcnt) {
catf(" %d values assigned to bin %d but range has %d",
cnt[b], b, expcnt)
passed <- FALSE
}
stpt <- binned$breaks[b] + 1
}
if (passed) {
catf(" tests passed")
}
catf(" running assign.optbin tests while extending upper bound")
cnt <- table(assign.optbin(data2, binned, extend.upper=TRUE))
stpt <- 1
for (b in 1:9) {
expcnt <- binned$breaks[b] - stpt + 1
if (cnt[b] != expcnt) {
catf(" %d values assigned to bin %d but range has %d",
cnt[b], b, expcnt)
passed <- FALSE
}
stpt <- binned$breaks[b] + 1
}
b <- 10
expcnt <- binned$breaks[b] - stpt + 1
if (cnt[b] != (expcnt + 10)) {
catf(" %d values assigned to bin %d but expected %d",
cnt[b], b, expcnt+10)
passed <- FALSE
}
if (passed) {
catf(" tests passed")
}
return(passed)
}
### Break The Part tests
# Various error checks on input data to optbin. Returns TRUE if all tests
# pass, FALSE if any fail.
BTP_optbin <- function() {
passed <- TRUE
catf(" running optbin BTP tests")
b <- tryCatch(optbin(c(1, 2, 3, 4, 5, "blah", 6), -1),
error=function(e) { NULL })
if (!is.null(b)) {
catf(" negative number of bins did not raise error")
passed <- FALSE
}
b <- tryCatch(optbin(c(1, 2, 3, 4, 5, "blah", 6), 1),
error=function(e) { NULL })
if (!is.null(b)) {
catf(" too few bins did not raise error")
passed <- FALSE
}
b <- tryCatch(optbin(c(1, 2, 3, 4, 5, "blah", 6), 2),
error=function(e) { NULL })
if (!is.null(b)) {
catf(" non-numeric entry in data without na.rm did not raise error")
passed <- FALSE
}
b <- tryCatch(b <- optbin(c(1, 2, 3, 4, 5, "blah", 6), 2, na.rm=TRUE),
error=function(e) { NULL })
if (is.null(b)) {
catf(" non-numeric entry in data with na.rm did raise error")
passed <- FALSE
} else if (!verify_binning(2, b, c(3,6), c(3.0, 6.0), 4, c(2.0, 5.0))) {
passed <- FALSE
}
b <- tryCatch(optbin(c(1, 2, 3, 4, 5, 6), 10),
error=function(e) { NULL })
if (!is.null(b)) {
catf(" too few values for number of bins did not raise error")
passed <- FALSE
}
# This is test1 data.
data <- c(1.0, 2.0, 3.0, 4.0, NA, 5.0, 6.0, 7.0, NA, 8.0, 9.0, NA, 10.0)
b <- tryCatch(optbin(data, 2),
error=function(e) { NULL })
if (!is.null(b)) {
catf(" NAs in data without removing did not raise error")
passed <- FALSE
}
b <- tryCatch(optbin(data, 2, na.rm=TRUE),
error=function(e) { NULL })
if (is.null(b)) {
catf(" removed NAs still caused error")
passed <- FALSE
} else if (!verify_binning(2, b, c(5, 10), c(5.0, 10.0), 20.0, c(3.0, 8.0))) {
passed <- FALSE
}
b <- tryCatch(optbin(reverse(data), 2, is.sorted=TRUE),
error=function(e) { NULL })
if (!is.null(b)) {
catf(" unsorted data after setting is.sorted TRUE did not raise error")
passed <- FALSE
}
b <- tryCatch(optbin(data, 2, is.sorted=T, na.rm=TRUE),
error=function(e) { NULL })
if (is.null(b)) {
catf(" pre-sorted data raised error")
passed <- FALSE
} else if (!verify_binning(2, b, c(5, 10), c(5.0, 10.0), 20.0, c(3.0, 8.0))) {
passed <- FALSE
}
if (passed) {
catf(" tests passed")
}
return(passed)
}
# Error checks on the arguments to plot.optbin. Returns TRUE if all tests
# pass, FALSE if any fail.
BTP_plot <- function() {
passed <- TRUE
catf(" running plot.optbin BTP tests")
data <- 1:10
b <- optbin(data, 2)
# These generate an error because we use them in the basic plot.
# plot doesn't return anything so our error function will generate an NA.
res <- tryCatch(plot(b2, ann=TRUE), error=function(e) { NA })
if (!is.na(res)) {
catf(" passing ann to plot.optbin did not raise error")
passed <- FALSE
}
res <- tryCatch(plot(b2, xaxt='s'), error=function(e) { NA })
if (!is.na(res)) {
catf(" passing xaxt to plot.optbin did not raise error")
passed <- FALSE
}
if (passed) {
catf(" tests passed")
}
return(passed)
}
### Top Level
allpass <- TRUE
cat('\nStarting optbin verification\n')
if (!optbin_test1()) {
allpass <- FALSE
}
if (!optbin_test2()) {
allpass <- FALSE
}
if (!optbin_test3()) {
allpass <- FALSE
}
if (!optbin_test4()) {
allpass <- FALSE
}
if (!optbin_caching()) {
allpass <- FALSE
}
if (!suppressWarnings(BTP_optbin())) {
allpass <- FALSE
}
if (!assign_test1()) {
allpass <- FALSE
}
if (!BTP_plot()) {
allpass <- FALSE
}
if (allpass) {
cat('\nAll tests PASSED\n\n')
} else {
cat('\nSome test FAILED\n\n')
# This should cause the build check to fail.
error("test failure")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.