test_that("match_evaluate returns matches that are correct number", {
corp_data1 <- copy(fedmatch::corp_data1)
corp_data2 <- copy(fedmatch::corp_data2)
# setnames(corp_data1, "unique_key_1", "id1")
# setnames(corp_data2, "unique_key_2", "id2")
tier_list <- list(
a = list(match_type = "exact"),
b = list(match_type = "fuzzy"),
c = list(match_type = "multivar", multivar_settings = list(
logit = NULL, missing = FALSE, wgts = 1,
compare_type = "stringdist", blocks = NULL, blocks.x = NULL, blocks.y = NULL,
top = 1, threshold = NULL
))
)
# tier_list <- list(a = list(match_type = "exact"))
result <- tier_match(corp_data1, corp_data2,
by.x = "Company", by.y = "Name",
unique_key_1 = "unique_key_1", unique_key_2 = "unique_key_2",
tiers = tier_list, takeout = "neither"
)
result
# result$matches
# result$match_evaluation
pct_matched <- result$match_evaluation[2, 5, with = F]
expect_false(pct_matched == 1)
})
test_that("match_evaluate returns matches that are correct, different unique key names", {
corp_data1 <- copy(fedmatch::corp_data1)
corp_data2 <- copy(fedmatch::corp_data2)
tier_list <- list(
a = list(match_type = "exact"),
b = list(match_type = "fuzzy"),
c = list(match_type = "multivar", multivar_settings = list(
logit = NULL, missing = FALSE, wgts = c(1),
compare_type = "stringdist", blocks = NULL, blocks.x = NULL, blocks.y = NULL,
top = 1, threshold = NULL
)),
d = list(match_type = "exact", clean_settings = list(remove_words = TRUE))
)
corp_data1[, unique_k_1 := unique_key_1][, unique_key_1 := NULL]
corp_data2[, unique_k_2 := unique_key_2][, unique_key_2 := NULL]
# unique_key_1 <- "unique_key_1"
# corp_data1[, .(test = uniqueN(.SD[[..unique_key_1]])), Country]
# corp_data1[[c("unique_key_1", "Company")]]
# corp_data2
# corp_data1
# tier_list <- list(a = list(match_type = "exact"))
result <- tier_match(corp_data1, corp_data2,
by.x = "Company", by.y = "Name",
unique_key_1 = "unique_k_1", unique_key_2 = "unique_k_2",
tiers = tier_list, takeout = "neither"
)
result
# fsetdiff(c(1, 2, 3), c(1))
# result
# result$matches[tier == "d"]
# result$matches
# result$match_evaluation
pct_matched <- result$match_evaluation[2, 5, with = F]
expect_false(pct_matched == 1)
})
test_that("match_evaluate returns new unique matches", {
corp_data1 <- copy(fedmatch::corp_data1)
corp_data2 <- copy(fedmatch::corp_data2)
tier_list <- list(
a = list(match_type = "exact"),
b = list(match_type = "fuzzy"),
c = list(match_type = "multivar", multivar_settings = list(
logit = NULL, missing = FALSE, wgts = c(1),
compare_type = "stringdist", blocks = NULL, blocks.x = NULL, blocks.y = NULL,
top = 1, threshold = NULL
)),
d = list(match_type = "exact", clean_settings = list(remove_words = TRUE))
)
corp_data1[, unique_k_1 := unique_key_1][, unique_key_1 := NULL]
corp_data2[, unique_k_2 := unique_key_2][, unique_key_2 := NULL]
# unique_key_1 <- "unique_key_1"
# corp_data1[, .(test = uniqueN(.SD[[..unique_key_1]])), Country]
# corp_data1[[c("unique_key_1", "Company")]]
# corp_data2
# corp_data1
# tier_list <- list(a = list(match_type = "exact"))
result <- tier_match(corp_data1, corp_data2,
by.x = "Company", by.y = "Name",
unique_key_1 = "unique_k_1", unique_key_2 = "unique_k_2",
tiers = tier_list, takeout = "neither"
)
# result$match_evaluation
# fsetdiff(c(1, 2, 3), c(1))
# result
# result$matches[tier == "d"]
# result$matches
# result$match_evaluation
new_matches <- result$match_evaluation[1, 7, with = F]
expect_false(new_matches == 1)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.