Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(zoomerjoin)
## -----------------------------------------------------------------------------
n <- 10^5 # number of data points
d <- 10^2 # dimension
# Create a matrix of 10^6 observations in R^100
X <- matrix(runif(n * d), n, d)
# Second Dataset is a copy of the first with points shifted an infinitesimal
# amount
X_2 <- as.data.frame(X + matrix(rnorm(n * d, 0, .0001), n, d))
X <- as.data.frame(X)
## -----------------------------------------------------------------------------
euclidean_probability(.01, n_bands = 5, band_width = 8, r = .25)
euclidean_probability(.1, n_bands = 5, band_width = 8, r = .25)
euclidean_probability(.01, n_bands = 10, band_width = 4, r = .15)
euclidean_probability(.1, n_bands = 10, band_width = 4, r = .15)
euclidean_probability(.01, n_bands = 40, band_width = 8, r = .15)
euclidean_probability(.1, n_bands = 40, band_width = 8, r = .15)
## -----------------------------------------------------------------------------
set.seed(1)
start <- Sys.time()
joined_out <- euclidean_inner_join(
X,
X_2,
threshold = .01,
n_bands = 40,
band_width = 8,
r = .15
)
n_matches <- nrow(joined_out)
time_taken <- Sys.time() - start
print(paste("found", n_matches, "matches in", round(time_taken), "seconds"))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.