tests/testthat/test_single_jaccard.R

context("Weighted Jaccard Testing")
test_that("single jaccard works", {
  corp_data1 <- copy(fedmatch::corp_data1)
  corp_data2 <- copy(fedmatch::corp_data2)

  corpus <- fedmatch:::build_corpus(clean_strings(corp_data1$Company),
                         clean_strings(corp_data2$Name))
  # corpus
  result <- wgt_jaccard_distance(clean_strings(corp_data1$Company),
                                 clean_strings(corp_data2$Name),
                                 corpus = corpus)
  expect_true(is.numeric(result))
})
test_that("single jaccard breaks when non-corpus item passed", {
  corp_data1 <- copy(fedmatch::corp_data1)
  corp_data2 <- copy(fedmatch::corp_data2)

  corpus <- fedmatch:::build_corpus(clean_strings(corp_data1$Company),
                                    clean_strings(corp_data2$Name))
  expect_error(wgt_jaccard_distance("abcd",
                                    "efgh",
                                    corpus = corpus))
})
test_that("single jaccard breaks when strings are different lengths", {
  corp_data1 <- copy(fedmatch::corp_data1)
  corp_data2 <- copy(fedmatch::corp_data2)

  corpus <- fedmatch:::build_corpus(clean_strings(corp_data1$Company),
                                    clean_strings(corp_data2$Name))
  expect_error(wgt_jaccard_distance(c("walmart", "walmart abc"),
                                    c("walmart"),
                                    corpus = corpus))
})
seunglee98/fedmatch documentation built on April 26, 2024, 10:24 a.m.