inst/doc/v3-integration.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(blocking)
library(reclin2)

## -----------------------------------------------------------------------------
data(census)
data(cis)
census[, x:=1:.N]
cis[, y:=1:.N]

## -----------------------------------------------------------------------------
pair_ann(x = census[1:1000], 
         y = cis[1:1000], 
         on = c("pername1", "pername2", "sex", "dob_day", "dob_mon", "dob_year", "enumcap", "enumpc"), 
         deduplication = FALSE) |>
  head()

## -----------------------------------------------------------------------------
pair_ann(x = census[1:1000], 
         y = cis[1:1000], 
         on = c("pername1", "pername2", "sex", "dob_day", "dob_mon", "dob_year", "enumcap", "enumpc"), 
         deduplication = FALSE,
         ann = "hnsw") |>
  compare_pairs(on = c("pername1", "pername2", "sex", "dob_day", "dob_mon", "dob_year", "enumcap", "enumpc"),
                comparators = list(cmp_jarowinkler())) |>
  score_simple("score",
               on = c("pername1", "pername2", "sex", "dob_day", "dob_mon", "dob_year", "enumcap", "enumpc")) |>
  select_threshold("threshold", score = "score", threshold = 6) |>
  link(selection = "threshold") |>
  head()

Try the blocking package in your browser

Any scripts or data that you put into this service are public.

blocking documentation built on June 18, 2025, 9:16 a.m.