tests/testthat/test_evaluate.R

test_that("match_evaluate returns matches that are correct number", {
  corp_data1 <- copy(fedmatch::corp_data1)
  corp_data2 <- copy(fedmatch::corp_data2)
  # setnames(corp_data1, "unique_key_1", "id1")
  # setnames(corp_data2, "unique_key_2", "id2")
  tier_list <- list(
    a = list(match_type = "exact"),
    b = list(match_type = "fuzzy"),
    c = list(match_type = "multivar", multivar_settings = list(
      logit = NULL, missing = FALSE, wgts = 1,
      compare_type = "stringdist", blocks = NULL, blocks.x = NULL, blocks.y = NULL,
      top = 1, threshold = NULL
    ))
  )
  # tier_list <- list(a = list(match_type = "exact"))
  result <- tier_match(corp_data1, corp_data2,
    by.x = "Company", by.y = "Name",
    unique_key_1 = "unique_key_1", unique_key_2 = "unique_key_2",
    tiers = tier_list, takeout = "neither"
  )
  result
  # result$matches
  # result$match_evaluation
  pct_matched <- result$match_evaluation[2, 5, with = F]
  expect_false(pct_matched == 1)
})
test_that("match_evaluate returns matches that are correct, different unique key names", {
  corp_data1 <- copy(fedmatch::corp_data1)
  corp_data2 <- copy(fedmatch::corp_data2)
  tier_list <- list(
    a = list(match_type = "exact"),
    b = list(match_type = "fuzzy"),
    c = list(match_type = "multivar", multivar_settings = list(
      logit = NULL, missing = FALSE, wgts = c(1),
      compare_type = "stringdist", blocks = NULL, blocks.x = NULL, blocks.y = NULL,
      top = 1, threshold = NULL
    )),
    d = list(match_type = "exact", clean_settings = list(remove_words = TRUE))
  )
  corp_data1[, unique_k_1 := unique_key_1][, unique_key_1 := NULL]
  corp_data2[, unique_k_2 := unique_key_2][, unique_key_2 := NULL]
  # unique_key_1 <- "unique_key_1"
  # corp_data1[, .(test = uniqueN(.SD[[..unique_key_1]])), Country]
  # corp_data1[[c("unique_key_1", "Company")]]
  # corp_data2
  # corp_data1
  # tier_list <- list(a = list(match_type = "exact"))

  result <- tier_match(corp_data1, corp_data2,
    by.x = "Company", by.y = "Name",
    unique_key_1 = "unique_k_1", unique_key_2 = "unique_k_2",
    tiers = tier_list, takeout = "neither"
  )
  result
  # fsetdiff(c(1, 2, 3), c(1))
  # result
  # result$matches[tier == "d"]
  # result$matches
  # result$match_evaluation
  pct_matched <- result$match_evaluation[2, 5, with = F]
  expect_false(pct_matched == 1)
})
test_that("match_evaluate returns new unique matches", {
  corp_data1 <- copy(fedmatch::corp_data1)
  corp_data2 <- copy(fedmatch::corp_data2)
  tier_list <- list(
    a = list(match_type = "exact"),
    b = list(match_type = "fuzzy"),
    c = list(match_type = "multivar", multivar_settings = list(
      logit = NULL, missing = FALSE, wgts = c(1),
      compare_type = "stringdist", blocks = NULL, blocks.x = NULL, blocks.y = NULL,
      top = 1, threshold = NULL
    )),
    d = list(match_type = "exact", clean_settings = list(remove_words = TRUE))
  )
  corp_data1[, unique_k_1 := unique_key_1][, unique_key_1 := NULL]
  corp_data2[, unique_k_2 := unique_key_2][, unique_key_2 := NULL]
  # unique_key_1 <- "unique_key_1"
  # corp_data1[, .(test = uniqueN(.SD[[..unique_key_1]])), Country]
  # corp_data1[[c("unique_key_1", "Company")]]
  # corp_data2
  # corp_data1
  # tier_list <- list(a = list(match_type = "exact"))
  result <- tier_match(corp_data1, corp_data2,
    by.x = "Company", by.y = "Name",
    unique_key_1 = "unique_k_1", unique_key_2 = "unique_k_2",
    tiers = tier_list, takeout = "neither"
  )
  # result$match_evaluation
  # fsetdiff(c(1, 2, 3), c(1))
  # result
  # result$matches[tier == "d"]
  # result$matches
  # result$match_evaluation
  new_matches <- result$match_evaluation[1, 7, with = F]
  expect_false(new_matches == 1)
})
seunglee98/fedmatch documentation built on April 26, 2024, 10:24 a.m.