## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(fedmatch)
library(data.table)
## -----------------------------------------------------------------------------
data("corp_data1", package = "fedmatch")
data("corp_data2", package = "fedmatch")
## -----------------------------------------------------------------------------
corp_data1_test <- copy(corp_data1)
data.table::setDT(corp_data1_test)
corp_data2_test <- copy(corp_data2)
data.table::setDT(corp_data2_test)
corp_data1_test[, id_1 := seq(1, .N)]
corp_data2_test[, id_2 := seq(1, .N)]
corp_data2_test[, Country := country]
corp_data2_test[, Company := Name]
result <- merge_plus(
data1 = corp_data1_test,
data2 = corp_data2_test,
match_type = "multivar",
by = c("Country", "Company"),
suffixes = c("_1", "_2"),
unique_key_1 = "id_1",
unique_key_2 = "id_2",
multivar_settings = build_multivar_settings(
compare_type = c("indicator", "stringdist"),
wgts = c(.5, .5), nthread = 1
))
result
## -----------------------------------------------------------------------------
print(result$matches[, .(Company_1, Company_2, Company_compare)])
print(result$matches[, .(Country_1, Country_2, Country_compare)])
## -----------------------------------------------------------------------------
print(result$matches[, .(Company_compare, Country_compare, multivar_score)])
## -----------------------------------------------------------------------------
set.seed(111)
fake_result_table <- data.table::data.table(
Company_1 = c("ABC Corp", "XYZ Corporation", "Apple Corp", "Banana Corp"),
Company_2 = c("ABC Corporation", "XYZ Inc", "Apple Incorporated", "Banana Stand"),
Country_1 = c("USA", "USA", "TUR", "USA"),
Country_2 = c("MEX", "USA", "TUR", "USA")
)
calculated_weights <- calculate_weights(fake_result_table, c("Company", "Country"),
compare_type = c("stringdist", "indicator"),
suffixes = c("_1", "_2"))
calculated_weights$w
## -----------------------------------------------------------------------------
set.seed(111)
corp_data1_test <- copy(corp_data1)
data.table::setDT(corp_data1_test)
corp_data2_test <- copy(corp_data2)
data.table::setDT(corp_data2_test)
corp_data1_test[, id_1 := seq(1, .N)]
corp_data2_test[, id_2 := seq(1, .N)]
corp_data2_test[, Country := country]
corp_data2_test[, Company := Name]
set.seed(111)
fake_result_table <- data.table::data.table(
match = sample(c(1, 0, 1), 1e5, replace = TRUE),
Company_compare = runif(1e5),
Country_compare = sample(c(1, 0), 1e5, replace = TRUE)
)
logit_model <- glm(match ~ Company_compare + Country_compare,
family = "binomial",
data = fake_result_table
)
summary(logit_model)
result <- merge_plus(corp_data1_test, corp_data2_test,
match_type = "multivar",
multivar_settings = build_multivar_settings(logit = logit_model, compare_type = c("indicator", "stringdist"),
wgts = NULL, nthread = 1),
by = c("Country", "Company"), unique_key_1 = "id_1",
unique_key_2 = "id_2",
suffixes = c("_1", "_2")
)
result
## -----------------------------------------------------------------------------
corp_data1_test <- copy(corp_data1)
data.table::setDT(corp_data1_test)
corp_data2_test <- copy(corp_data2)
data.table::setDT(corp_data2_test)
corp_data1_test[, id_1 := seq(1, .N)]
corp_data2_test[, id_2 := seq(1, .N)]
corp_data2_test[, Country := country]
corp_data2_test[, Company := Name]
result <- merge_plus(
data1 = corp_data1_test,
data2 = corp_data2_test,
match_type = "multivar",
by = c("Company"),
suffixes = c("_1", "_2"),
unique_key_1 = "id_1",
unique_key_2 = "id_2",
multivar_settings = build_multivar_settings(
compare_type = c( "stringdist"),
wgts = c(1), nthread = 1, blocks = "Country"
))
result$matches
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.