tests/testthat/test-data-statistics.R

context("Statistics")

test_that("Conditional mutual information", {  
# With Incomplete data.   
  a <- cmi('physician_fee_freeze', 'Class', 
               dataset = voting, z = 'water_project_cost_sharing')  
  b <- cmi('physician_fee_freeze', 'Class', dataset = voting)
  expect_equal(a, b, tolerance = 0.001)
  # Mutual information
  a <- cmi('physician_fee_freeze', 'Class', 
           dataset = voting, z = 'water_project_cost_sharing')  
  b <- cmi('physician_fee_freeze', 'Class', voting)
  expect_equal(a, b, tolerance = 0.001)
  c <- cmi('buying', 'maint', dataset = car)
  expect_equal(c, 0)
# the same as when removing missing directly.
  a <- cmi('physician_fee_freeze', 'Class', 
           dataset = voting, z = 'water_project_cost_sharing')  
  v <- voting[,c('physician_fee_freeze', 'water_project_cost_sharing', 'Class')]
  b <- cmi('physician_fee_freeze', 'Class', v, z = 'water_project_cost_sharing')
  expect_equal(a, b, tolerance = 1e-10)
# Log base
  a <- cmi('physician_fee_freeze', 'Class', dataset = voting, unit = "log2")
  b <- cmi('physician_fee_freeze', 'Class', dataset = voting)
  expect_true(abs(a - b) > 0.1)
})

test_that("Contingency table", {
	t <- extract_ctgt('doors', car)
	expect_identical(dim(t), 4L) 
	t <- extract_ctgt(c('buying', c('doors', 'maint')), car)
	expect_identical(dim(t), c(4L, 4L ,4L)) 
	d <- dimnames(t)
	vars <- c('buying', 'doors', 'maint')
	expect_identical(names(d), vars)
	levs <- lapply(car[, vars], levels)
	expect_true(all(mapply(identical, levs, d, SIMPLIFY = TRUE)))
	t <- extract_ctgt('crime', voting)
	expect_identical(sum(t), 418L) 
})

test_that("Degrees freedom", {
  tbl <- extract_ctgt(c('persons', 'doors', 'class'), car)
  df <- cmi_degrees_freedom(freqs_table = tbl)
  expect_equal(df, 24L)

  tbl <- extract_ctgt(c('doors', 'class', 'persons'), car)
  df <- cmi_degrees_freedom(freqs_table = tbl)
  expect_equal(df, 27L)
})

test_that("Contingency table to CPT", {  
  # Nominal
  suppressWarnings(RNGversion("3.5.0"))
  set.seed(0) 
  tc <- table(random_letters_vector(3, 20), random_letters_vector(4, 20))
  tcpt <- ctgt2cpt(tc, 1)
  expect_equal(colnames(tcpt), letters[1:4])
  expect_equal(tcpt[, 1], setNames(c(1, 3, 4) / 8, letters[1:3]))
  # No smooth  
  suppressWarnings(RNGversion("3.5.0"))
  set.seed(0) 
  tc <- table(random_letters_vector(3, 20), random_letters_vector(4, 20))
  tcpt <- ctgt2cpt(tc, 0)
  expect_equal(colnames(tcpt), letters[1:4])
  expect_equal(tcpt[, 1], setNames(c(0, 2, 3) / 5, letters[1:3]))
  # 1D table 
  suppressWarnings(RNGversion("3.5.0"))
  set.seed(0)
  tc <- table(random_letters_vector(4, 200))
  tcpt <- ctgt2cpt(tc, 0)
  expect_equal(names(tcpt), letters[1:4])
  expect_equal(tcpt, tc / 200)
})
bmihaljevic/bnclassify documentation built on March 18, 2024, 8:34 a.m.