Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----message=FALSE,results='hide',warning=FALSE,echo=TRUE---------------------
library(reclin)
library(dplyr)
## -----------------------------------------------------------------------------
data("linkexample1", "linkexample2")
print(linkexample1)
print(linkexample2)
## -----------------------------------------------------------------------------
p <- pair_blocking(linkexample1, linkexample2, "postcode", large = FALSE)
print(p)
## -----------------------------------------------------------------------------
p <- compare_pairs(p, by = c("lastname", "firstname", "address", "sex"))
print(p)
## -----------------------------------------------------------------------------
p <- compare_pairs(p, by = c("lastname", "firstname", "address", "sex"),
default_comparator = jaro_winkler(0.9), overwrite = TRUE)
print(p)
## -----------------------------------------------------------------------------
p <- score_simsum(p, var = "simsum")
print(p)
## -----------------------------------------------------------------------------
m <- problink_em(p)
print(m)
## -----------------------------------------------------------------------------
p <- score_problink(p, model = m, var = "weight")
print(p)
## -----------------------------------------------------------------------------
p <- select_threshold(p, "weight", var = "threshold", threshold = 8)
print(p)
## -----------------------------------------------------------------------------
p <- add_from_x(p, id_x = "id")
print(p)
## -----------------------------------------------------------------------------
p <- add_from_y(p, id_y = "id")
p$true <- p$id_x == p$id_y
table(as.data.frame(p[c("true", "threshold")]))
## -----------------------------------------------------------------------------
p <- select_greedy(p, "weight", var = "greedy", threshold = 0)
table(as.data.frame(p[c("true", "greedy")]))
## -----------------------------------------------------------------------------
p <- select_n_to_m(p, "weight", var = "ntom", threshold = 0)
table(as.data.frame(p[c("true", "ntom")]))
## -----------------------------------------------------------------------------
linked_data_set <- link(p)
print(linked_data_set)
## ---- message=FALSE-----------------------------------------------------------
library(dplyr)
linked_data_set <- pair_blocking(linkexample1, linkexample2, "postcode") %>%
compare_pairs(by = c("lastname", "firstname", "address", "sex"),
default_comparator = jaro_winkler(0.9)) %>%
score_problink(var = "weight") %>%
select_n_to_m("weight", var = "ntom", threshold = 0) %>%
link()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.