tests/testthat/test-permutations.R

context("Text permutations")

test_that("properties of permutations are as expected", {
  set.seed(2000)
  original_document <- unique(as.integer(runif(20, 0, 30)))
  number_permutations <- 1e5
  # Generate permutations
  permutations <- lime:::get_index_permutations(original_document, number_permutations)

  # Expected number of permutations
  expect_length(permutations, number_permutations)

  # First permutation is the original document
  expect_equal(permutations[[1]], original_document)

  # No permutation is larger than the original document
  expect_true(all(lengths(permutations) <= length(original_document)))

  # Permutations doesn't contain duplicates
  expect_true(all(unlist(lapply(permutations, function(x) all(!duplicated(x))))))

  # There is no empty permutation
  expect_true(sum(lengths(permutations) == 0) == 0)
})

test_that("Cosine computation is correct", {
  v <- 5:13
  m <- Matrix::Matrix(1:81, ncol = 9, sparse = TRUE)
  cos <- function(x) crossprod(v, x)/sqrt(crossprod(v) * crossprod(x))
  expect_equal(lime:::cosine_distance_vector_to_matrix_rows(v, m), apply(m, MARGIN = 1, cos))
})

test_that("there is no empty generated text", {
  generated_documents <- lime:::permute_cases.character(cases = "this is a test ", n_permutations = 5e3, tokenization = default_tokenize, keep_word_position = FALSE)

  # There is no empty permutation
  expect_true(all(sapply(generated_documents$permutations, nchar, USE.NAMES = FALSE) > 0))
})

test_that("Default tokenizer works for multiple documents", {
  r <- default_tokenize(c("    this is a test.", "this is another       test."))
  expect_equal(r , c("this", "is",   "a", "test", "this", "is", "another","test"))
})
thomasp85/lime documentation built on Aug. 19, 2022, 5 p.m.