tests/testthat/test-FuzzyTokenSet.R

context("Fuzzy Token Set distance")

test_that("Fuzzy Token Set distance is correct when using default settings", {
  comparator <- FuzzyTokenSet()
  x <- strsplit("The quick brown fox", "\\s+")
  y <- strsplit("The quicker browner fox", "\\s+")
  z <- strsplit("The quicker fox", "\\s+")
  expect_equal(comparator(x, y), (0 + 2/7 + 2/7 + 0)/4)
  expect_equal(comparator(x, z), (0 + 2/7 + 1 + 0)/4)
  expect_equal(comparator(z, x), (0 + 2/7 + 1 + 0)/4)
  x <- strsplit("University of Melbourne", "\\s+")
  y <- strsplit("Melbourne University", "\\s+")
  expect_equal(comparator(x, y), (0 + 1 + 0)/3)
})

test_that("Fuzzy Token Set distance is correct when using a non-default inner comparator", {
  comparator <- FuzzyTokenSet(inner_comparator = BinaryComp())
  x <- strsplit("The quick brown fox", "\\s+")
  y <- strsplit("The quicker browner fox", "\\s+")
  z <- strsplit("The quicker fox", "\\s+")
  expect_equal(comparator(x, y), 2/4)
  expect_equal(comparator(x, z), 2/4)
  expect_equal(comparator(z, x), 2/4)
  x <- strsplit("University of Melbourne", "\\s+")
  y <- strsplit("Melbourne University", "\\s+")
  expect_equal(comparator(x, y), 1/3)
})

test_that("Fuzzy Token Set distance is correct when using asymmetric weights", {
  comparator <- FuzzyTokenSet(deletion = 0.5)
  x <- strsplit("The quick brown fox", "\\s+")
  y <- strsplit("The quicker browner fox", "\\s+")
  z <- strsplit("The quicker fox", "\\s+")
  expect_equal(comparator(x, y), (0 + 2/7 + 2/7 + 0)/4)
  expect_equal(comparator(x, z), (0 + 2/7 + 1/2 + 0)/4)
  expect_equal(comparator(z, x), (0 + 2/7 + 1 + 0)/4)
  x <- strsplit("University of Melbourne", "\\s+")
  y <- strsplit("Melbourne University", "\\s+")
  expect_equal(comparator(x, y), (0 + 1/2 + 0)/3)
})

test_that("Fuzzy Token Set distance is correct when using a non-default aggregation function", {
  comparator <- FuzzyTokenSet(agg_function = base::sum)
  x <- strsplit("The quick brown fox", "\\s+")
  y <- strsplit("The quicker browner fox", "\\s+")
  z <- strsplit("The quicker fox", "\\s+")
  expect_equal(comparator(x, y), 0 + 2/7 + 2/7 + 0)
  expect_equal(comparator(x, z), 0 + 2/7 + 1 + 0)
  expect_equal(comparator(z, x), 0 + 2/7 + 1 + 0)
  x <- strsplit("University of Melbourne", "\\s+")
  y <- strsplit("Melbourne University", "\\s+")
  expect_equal(comparator(x, y), 0 + 1 + 0)
})

Try the comparator package in your browser

Any scripts or data that you put into this service are public.

comparator documentation built on March 18, 2022, 6:15 p.m.