Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
warning = FALSE,
message = FALSE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(blocking)
library(data.table)
## -----------------------------------------------------------------------------
data(RLdata500)
head(RLdata500)
## -----------------------------------------------------------------------------
RLdata500[, id_count :=.N, ent_id] ## how many times given unit occurs
RLdata500[, bm:=sprintf("%02d", bm)] ## add leading zeros to month
RLdata500[, bd:=sprintf("%02d", bd)] ## add leading zeros to day
RLdata500[, txt:=tolower(paste0(fname_c1,fname_c2,lname_c1,lname_c2,by,bm,bd))]
head(RLdata500)
## -----------------------------------------------------------------------------
df_blocks <- blocking(x = RLdata500$txt, ann = "nnd", verbose = 1, graph = TRUE, seed = 2024)
## -----------------------------------------------------------------------------
df_blocks
## -----------------------------------------------------------------------------
str(df_blocks,1)
## -----------------------------------------------------------------------------
plot(df_blocks$graph, vertex.size=1, vertex.label = NA)
## -----------------------------------------------------------------------------
head(df_blocks$result)
## -----------------------------------------------------------------------------
df_block_melted <- melt(df_blocks$result, id.vars = c("block", "dist"))
df_block_melted_rec_block <- unique(df_block_melted[, .(rec_id=value, block)])
head(df_block_melted_rec_block)
## -----------------------------------------------------------------------------
RLdata500[df_block_melted_rec_block, on = "rec_id", block_id := i.block]
head(RLdata500)
## -----------------------------------------------------------------------------
RLdata500[, .(uniq_blocks = uniqueN(block_id)), .(ent_id)][, .N, uniq_blocks]
## -----------------------------------------------------------------------------
hist(df_blocks$result$dist, xlab = "Distances", ylab = "Frequency", breaks = "fd",
main = "Distances calculated between units")
## -----------------------------------------------------------------------------
df_for_density <- copy(df_block_melted[block %in% RLdata500$block_id])
df_for_density[, match:= block %in% RLdata500[id_count == 2]$block_id]
plot(density(df_for_density[match==FALSE]$dist), col = "blue", xlim = c(0, 0.8),
main = "Distribution of distances between\nclusters type (match=red, non-match=blue)")
lines(density(df_for_density[match==TRUE]$dist), col = "red", xlim = c(0, 0.8))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.