tests/testthat/test-discretize.R

library("arules")
library("testthat")


options(digits=2)

context("discretize")

nums1 <- sample(rep(0:10, time = 5))

nums2 <- 
    c(rep(1,7), 
	  rep(2,3),
	  rep(3,4),
	  rep(4,5),
	  rep(5,9),
	  rep(6,10),
	  rep(7,8),
	  rep(8,1),
	  rep(9,9),
	  rep(10,4)
  )


# interval
d <- discretize(nums1, method = "interval", breaks = 2)
expect_equal(attr(d, 'discretized:breaks'), c(0, 5, 10))

d <- discretize(nums2, method = "interval", breaks = 2)
expect_equal(attr(d, 'discretized:breaks'), c(1, 5.5, 10))
expect_equal(as.numeric(table(d)), c(28, 32))

d <- discretize(nums2, method = "interval", breaks = 9)
expect_equal(attr(d, 'discretized:breaks'), as.numeric(1:10))

# fixed
expect_error(discretize(nums2, method = "fixed", breaks = 1)) 
  ### needs at least 2 values for breaks
d <- discretize(nums2, method = "fixed", breaks = c(0,5,10))
expect_equal(length(levels(d)), 2L)
expect_equal(as.numeric(table(d)), c(19, 41))

# frequency
d <- discretize(nums1, method = "frequency", breaks = 2)
expect_equal(length(levels(d)), 2L)
expect_equal(as.numeric(table(d)), c(25, 30))

d <- discretize(nums1, method = "frequency", breaks = 11)
expect_equal(as.numeric(table(d)), rep(5, 11))

d <- discretize(nums2, method = "frequency", breaks = 2)
expect_equal(length(levels(d)), 2L)
expect_equal(as.numeric(table(d)), c(28, 32))

d <- discretize(nums2, method = "frequency", breaks = 6)
expect_equal(as.numeric(table(d)), c(10, 9, 9, 10, 9, 13))

# missing values
nums1[3:5] <- NA
d <- discretize(nums1, method = "interval")
expect_equal(sum(is.na(d)), 3L)
d <- discretize(nums1, method = "frequency")
expect_equal(sum(is.na(d)), 3L)
d <- discretize(nums1, method = "cluster")
expect_equal(sum(is.na(d)), 3L)
mhahsler/arules documentation built on March 19, 2024, 5:45 p.m.