tests/testthat/test-test_string_group.R

test_that("string_group dedups string correctly", {
  skip_if_not_installed("igraph")
  n_groups <- purrr::map_dbl(1:30, function(x) {
    string <- c("beniamino", "jack", "benjamin", "beniamin", "jacky")
    dplyr::n_distinct(jaccard_string_group(string, n_bands = 190, threshold = .2))
  })


  n_groups <- purrr::map_dbl(1:30, function(x) {
    string <- c("new haven", "new york", "chicago", "newy york")
    dplyr::n_distinct(jaccard_string_group(string, n_bands = 190, threshold = .2))
  })
  expect_equal(median(n_groups), 3)
})

test_that("nthread works for jaccard string group", {
  skip_if_not_installed("igraph")
  testthat::skip_on_cran()

  string <- c("new haven", "new york", "chicago", "newy york")
  runtime <- system.time(jaccard_string_group(
    string, n_bands = 190, threshold = .2, nthread = 2
  ))
  testthat::expect_lte(runtime['user.self'], 2.5 * runtime['elapsed'])
})

Try the zoomerjoin package in your browser

Any scripts or data that you put into this service are public.

zoomerjoin documentation built on March 14, 2026, 5:07 p.m.