Nothing
# ten iris entries where the 4 nearest neighbors are distinct
uiris <- unique(iris)
uirism <- as.matrix(uiris[, -5])
ui10 <- uirism[6:15, ]
ui10sp_full <- Matrix::drop0(ui10)
set.seed(1337)
ui10z <- ui10
ui10z[sample(prod(dim(ui10z)), 10)] <- 0
ui10sp <- Matrix::drop0(ui10z)
ui10z6 <- head(ui10z, 6)
ui10z4 <- tail(ui10z, 4)
ui10sp6 <- head(ui10sp, 6)
ui10sp4 <- tail(ui10sp, 4)
# treat sum of distances an objective function
# expected sum from sum(FNN::get.knn(uirism, 14)$nn.dist)
ui_edsum <- 1016.834
# sum(FNN::get.knn(ui10, 3)$nn.dist)
ui10_edsum <- 13.28425
ui6 <- ui10[1:6, ]
ui4 <- ui10[7:10, ]
ui10_eucd <- as.matrix(dist(ui10))
# sum(RcppHNSW::hnsw_search(ui4, RcppHNSW::hnsw_build(ui6), k = 4)$dist)
ui4q_edsum <- 9.310494
# sum(RcppHNSW::hnsw_search(ui6, RcppHNSW::hnsw_build(ui4), k = 4)$dist)
ui6q_edsum <- 18.98666
# NB Annoy and HNSW don't agree to more than this # of decimal places
# sum(RcppHNSW::hnsw_search(ui4, RcppHNSW::hnsw_build(ui6, distance = "cosine"), k = 4)$dist)
ui4q_cdsum <- 0.02072
# sum(RcppHNSW::hnsw_search(ui6, RcppHNSW::hnsw_build(ui4, distance = "cosine"), k = 4)$dist)
ui6q_cdsum <- 0.04220
# Manhattan: Taken from RcppAnnoy
ui4q_mdsum <- 15.4
ui6q_mdsum <- 31.6
# Hamming
bitm <- function(nrow, ncol, prob = 0.5) {
matrix(rbinom(n = nrow * ncol, size = 1, prob = prob), ncol = ncol)
}
set.seed(1337)
bitdata <- bitm(nrow = 10, ncol = 160)
intdata <- matrix(sample.int(5, 40, replace = TRUE), 10)
bitdatasp <- Matrix::drop0(bitdata)
lbitdata <- matrix(as.logical(bitdata), nrow = nrow(bitdatasp))
bit6 <- bitdata[1:6, ]
bit4 <- bitdata[7:10, ]
lbit6 <- matrix(as.logical(bit6), nrow = nrow(bit6))
lbit4 <- matrix(as.logical(bit4), nrow = nrow(bit4))
# Hamming
# from Annoy
expected_hamm_idx <- matrix(
c(
1, 7, 4, 5,
2, 10, 3, 9,
3, 4, 2, 7,
4, 3, 1, 7,
5, 6, 7, 1,
6, 5, 10, 3,
7, 1, 10, 5,
8, 9, 10, 7,
9, 8, 10, 4,
10, 2, 9, 7
),
byrow = TRUE, nrow = 10, ncol = 4
)
# distances normalized wrt ndim for consistency with PyNNDescent
expected_hamm_dist <- matrix(
c(
0, 72, 74, 77,
0, 69, 78, 79,
0, 65, 78, 79,
0, 65, 74, 76,
0, 67, 75, 77,
0, 67, 80, 81,
0, 72, 74, 75,
0, 69, 77, 81,
0, 69, 72, 78,
0, 69, 72, 74
),
byrow = TRUE, nrow = 10, ncol = 4
) / ncol(bitdata)
int6 <- intdata[1:6, ]
int4 <- intdata[7:10, ]
int6hd <- matrix(c(
0, 2, 3, 3, 3, 2,
2, 0, 2, 2, 4, 4,
3, 2, 0, 4, 4, 4,
3, 2, 4, 0, 3, 3,
3, 4, 4, 3, 0, 2,
2, 4, 4, 3, 2, 0
), nrow = 6) / ncol(int6)
# Taken from RcppAnnoy (and then normalize wrt num features for consistency with PyNNDescent)
bit4q_hdsum <- 1275 / ncol(bitdata)
bit6q_hdsum <- 1986 / ncol(bitdata)
# Distance matrices generated with Annoy
ui10_cosd <- matrix(c(
0.0000000, 0.0001315, 0.0007371, 1.131e-03, 2.628e-03, 0.0009933, 0.0004830, 2.966e-03, 1.830e-03, 0.0039657,
0.0001315, 0.0000000, 0.0009095, 1.648e-03, 3.132e-03, 0.0009375, 0.0006808, 3.429e-03, 1.426e-03, 0.0033142,
0.0007371, 0.0009095, 0.0000000, 2.441e-04, 6.824e-04, 0.0001688, 0.0004221, 8.124e-04, 8.191e-04, 0.0027981,
0.0011313, 0.0016477, 0.0002441, 1.372e-09, 3.832e-04, 0.0007671, 0.0005494, 6.114e-04, 1.888e-03, 0.0043782,
0.0026276, 0.0031324, 0.0006824, 3.832e-04, 0.000e+00, 0.0011068, 0.0014791, 7.131e-05, 2.038e-03, 0.0043755,
0.0009933, 0.0009375, 0.0001688, 7.671e-04, 1.107e-03, 0.0000000, 0.0009307, 1.066e-03, 2.660e-04, 0.0016037,
0.0004830, 0.0006808, 0.0004221, 5.494e-04, 1.479e-03, 0.0009307, 0.0000000, 1.947e-03, 1.850e-03, 0.0047715,
0.0029658, 0.0034286, 0.0008124, 6.114e-04, 7.131e-05, 0.0010661, 0.0019470, 0.000e+00, 1.851e-03, 0.0037512,
0.0018304, 0.0014256, 0.0008191, 1.888e-03, 2.038e-03, 0.0002660, 0.0018498, 1.851e-03, 1.945e-09, 0.0007827,
0.0039657, 0.0033142, 0.0027981, 4.378e-03, 4.376e-03, 0.0016037, 0.0047715, 3.751e-03, 7.827e-04, 0.0000000
), nrow = 10)
ui10_mand <- matrix(c(
0.0, 1.7, 1.3, 2.5, 1.8, 0.6, 1.4, 2.1, 2.9, 1.2,
1.7, 0.0, 0.6, 0.8, 0.9, 1.3, 0.5, 0.8, 1.2, 2.1,
1.3, 0.6, 0.0, 1.2, 0.5, 0.7, 0.3, 0.8, 1.6, 1.7,
2.5, 0.8, 1.2, 0.0, 0.9, 1.9, 1.1, 0.6, 0.6, 2.7,
1.8, 0.9, 0.5, 0.9, 0.0, 1.2, 0.6, 0.3, 1.1, 2.2,
0.6, 1.3, 0.7, 1.9, 1.2, 0.0, 1.0, 1.5, 2.3, 1.0,
1.4, 0.5, 0.3, 1.1, 0.6, 1.0, 0.0, 0.7, 1.5, 2.0,
2.1, 0.8, 0.8, 0.6, 0.3, 1.5, 0.7, 0.0, 0.8, 2.3,
2.9, 1.2, 1.6, 0.6, 1.1, 2.3, 1.5, 0.8, 0.0, 2.7,
1.2, 2.1, 1.7, 2.7, 2.2, 1.0, 2.0, 2.3, 2.7, 0.0
), nrow = 10)
bit10_hamd <- matrix(c(
0, 85, 81, 74, 77, 90, 72, 93, 90, 90,
85, 0, 78, 83, 82, 83, 89, 84, 79, 69,
81, 78, 0, 65, 86, 81, 79, 82, 81, 89,
74, 83, 65, 0, 83, 90, 76, 85, 78, 92,
77, 82, 86, 83, 0, 67, 75, 82, 83, 81,
90, 83, 81, 90, 67, 0, 82, 87, 88, 80,
72, 89, 79, 76, 75, 82, 0, 81, 92, 74,
93, 84, 82, 85, 82, 87, 81, 0, 69, 77,
90, 79, 81, 78, 83, 88, 92, 69, 0, 72,
90, 69, 89, 92, 81, 80, 74, 77, 72, 0
), nrow = 10) / ncol(bitdata)
# for uirism[1:10, ]
uirism10_cord <- matrix(
c(
0, 0.00400133876, 2.60889537e-05, 0.00183154822, 0.0006526685,
0.000413946853, 0.00118880691, 0.000461852891, 0.00192335771,
0.00344803882, 0.00400133876, 0, 0.00339291433, 0.00260336976,
0.00776732119, 0.00640810993, 0.00927944638, 0.00288194472, 0.00145365862,
0.00096714455, 2.60889537e-05, 0.00339291433, 0, 0.00166652079,
0.000938868047, 0.000622703492, 0.00156232107, 0.000395491414,
0.00164390757, 0.00301440442, 0.00183154822, 0.00260336976, 0.00166652079,
0, 0.00328117923, 0.00216742062, 0.00386063303, 0.000454440488,
0.000166690505, 0.000693152364, 0.0006526685, 0.00776732119,
0.000938868047, 0.00328117923, 0, 0.000116680316, 8.60443273e-05,
0.00149684289, 0.00396913822, 0.00623882524, 0.000413946853,
0.00640810993, 0.000622703492, 0.00216742062, 0.000116680316,
0, 0.000277394147, 0.000821174669, 0.00278434617, 0.00473942264,
0.00118880691, 0.00927944638, 0.00156232107, 0.00386063303, 8.60443273e-05,
0.000277394147, 1.11022302e-16, 0.00204787836, 0.00478602797,
0.0072727783, 0.000461852891, 0.00288194472, 0.000395491414,
0.000454440488, 0.00149684289, 0.000821174669, 0.00204787836,
0, 0.000593789371, 0.00162634825, 0.00192335771, 0.00145365862,
0.00164390757, 0.000166690505, 0.00396913822, 0.00278434617,
0.00478602797, 0.000593789371, 0, 0.000260225275, 0.00344803882,
0.00096714455, 0.00301440442, 0.000693152364, 0.00623882524,
0.00473942264, 0.0072727783, 0.00162634825, 0.000260225275, 0
),
nrow = 10
)
ui10_nn4 <- list(
idx = matrix(
c(
1, 6, 10, 3,
2, 7, 3, 5,
3, 7, 5, 2,
4, 9, 8, 2,
5, 8, 3, 7,
6, 1, 3, 10,
7, 3, 2, 5,
8, 5, 4, 7,
9, 4, 8, 2,
10, 6, 1, 3
),
byrow = TRUE, ncol = 4
),
dist = matrix(
c(
0, 0.3464, 0.6782, 0.7,
0, 0.3, 0.4243, 0.4796,
0, 0.2236, 0.3317, 0.4243,
0, 0.3464, 0.4243, 0.5477,
0, 0.1732, 0.3317, 0.3464,
0, 0.3464, 0.5, 0.5831,
0, 0.2236, 0.3, 0.3464,
0, 0.1732, 0.4243, 0.4583,
0, 0.3464, 0.5831, 0.6164,
0, 0.5831, 0.6782, 1.044
),
byrow = TRUE, ncol = 4
)
)
# set.seed(1337)
# rpf_build(ui10, metric = "euclidean", leaf_size = 4, margin = "explicit")
rpf_index_ls4e <-
list(
trees = list(
list(hyperplanes = structure(c(
-0.5, 0.300000190734863,
0, 0, 0, -0.800000190734863, -0.300000190734863, 0, 0, 0, -0.200000047683716,
0.100000023841858, 0, 0, 0, -0.300000011920929, -0.200000017881393,
0, 0, 0
), dim = 5:4), offsets = c(
5.77000093460083, -0.555000305175781,
NaN, NaN, NaN
), children = structure(c(
1L, 2L, 0L, 3L, 7L, 4L,
3L, 3L, 7L, 10L
), dim = c(5L, 2L)), indices = c(
2L, 4L, 7L, 1L,
3L, 6L, 8L, 0L, 5L, 9L
), leaf_size = 4), list(
hyperplanes = structure(c(
0.599999904632568,
0.0999999046325684, 0.400000095367432, -0.400000095367432, 0,
0, 0, 0, 0, 0.0999999046325684, 0.300000190734863, 0, -0.299999952316284,
0, 0, 0, 0, 0, 0.399999976158142, 0, 0.100000023841858, 0, 0,
0, 0, 0, 0, 0, 0.100000001490116, -0.100000008940697, 0, 0, 0,
0, 0, 0
), dim = c(9L, 4L)), offsets = c(
-3.58499956130981, -1.4850001335144,
-2.04000043869019, 3.14500045776367, NaN, NaN, NaN, NaN, NaN
),
children = structure(c(
1L, 2L, 3L, 4L, 0L, 2L, 5L, 6L, 8L,
8L, 7L, 6L, 5L, 2L, 5L, 6L, 8L, 10L
), dim = c(9L, 2L)), indices = c(
2L,
6L, 0L, 5L, 9L, 1L, 4L, 7L, 3L, 8L
), leaf_size = 3
), list(
hyperplanes = structure(c(
1.09999990463257, -0.599999904632568,
0, 0, 0.5, 0, 0, 0.700000047683716, -0.5, 0, 0, 0.199999809265137,
0, 0, 0.399999976158142, -0.100000023841858, 0, 0, 0.100000023841858,
0, 0, 0.100000001490116, -0.200000002980232, 0, 0, -0.100000001490116,
0, 0
), dim = c(7L, 4L)), offsets = c(
-8.21500015258789, 5.10999965667725,
NaN, NaN, -3.05499935150146, NaN, NaN
), children = structure(c(
1L,
2L, 0L, 2L, 5L, 5L, 8L, 4L, 3L, 2L, 5L, 6L, 8L, 10L
), dim = c(
7L,
2L
)), indices = c(2L, 6L, 0L, 5L, 9L, 1L, 4L, 7L, 3L, 8L),
leaf_size = 3
), list(hyperplanes = structure(c(
-0.0999999046325684,
0, 0.800000190734863, 0, 0, -0.0999999046325684, 0, 0.299999952316284,
0, 0, -0.100000023841858, 0, 0.100000023841858, 0, 0, 0, 0, -0.100000008940697,
0, 0
), dim = 5:4), offsets = c(
0.934999287128448, NaN, -5.18500089645386,
NaN, NaN
), children = structure(c(
1L, 0L, 3L, 3L, 6L, 2L, 3L,
4L, 6L, 10L
), dim = c(5L, 2L)), indices = c(
3L, 7L, 8L, 0L, 5L,
9L, 1L, 2L, 4L, 6L
), leaf_size = 4), list(hyperplanes = structure(c(
0.300000190734863,
0.599999904632568, 0, 0, 0.400000095367432, 0, 0, -0.300000190734863,
0.700000047683716, 0, 0, 0.5, 0, 0, 0.100000023841858, 0.100000023841858,
0, 0, 0.200000047683716, 0, 0, -0.200000017881393, 0.100000001490116,
0, 0, 0, 0, 0
), dim = c(7L, 4L)), offsets = c(
-0.555000305175781,
-5.5649995803833, NaN, NaN, -3.71500062942505, NaN, NaN
), children = structure(c(
1L,
2L, 0L, 2L, 5L, 5L, 8L, 4L, 3L, 2L, 5L, 6L, 8L, 10L
), dim = c(
7L,
2L
)), indices = c(5L, 9L, 2L, 4L, 7L, 0L, 1L, 6L, 3L, 8L), leaf_size = 3),
list(hyperplanes = structure(c(
-0.599999904632568, -0.0999999046325684,
0, 0, 0, -0.5, -0.0999999046325684, 0, 0, 0, -0.100000023841858,
-0.100000023841858, 0, 0, 0, -0.200000002980232, 0, 0, 0,
0
), dim = 5:4), offsets = c(
5.10999965667725, 0.934999287128448,
NaN, NaN, NaN
), children = structure(c(
1L, 2L, 0L, 3L, 7L,
4L, 3L, 3L, 7L, 10L
), dim = c(5L, 2L)), indices = c(
3L, 7L,
8L, 1L, 2L, 4L, 6L, 0L, 5L, 9L
), leaf_size = 4), list(hyperplanes = structure(c(
0.900000095367432,
0, -0.599999904632568, 0, 0, 0.900000095367432, 0, -0.5,
0, 0, -0.299999952316284, 0, -0.100000023841858, 0, 0, 0.100000001490116,
0, 0, 0, 0
), dim = 5:4), offsets = c(
-7.62000131607056, NaN,
4.53999948501587, NaN, NaN
), children = structure(c(
1L, 0L,
3L, 3L, 6L, 2L, 3L, 4L, 6L, 10L
), dim = c(5L, 2L)), indices = c(
0L,
5L, 9L, 3L, 7L, 8L, 1L, 2L, 4L, 6L
), leaf_size = 4)
), margin = "explicit",
actual_metric = "sqeuclidean", version = "0.0.12", use_alt_metric = TRUE,
original_metric = "euclidean", sparse = FALSE, type = "rnndescent:rpforest"
)
# set.seed(1337)
# rpf_build(ui10, metric = "euclidean", leaf_size = 4, margin = "implicit")
rpf_index_ls4i <-
list(
trees = list(list(normal_indices = structure(c(
4L, 4L, -1L,
-1L, -1L, 0L, 1L, -1L, -1L, -1L
), dim = c(5L, 2L)), children = structure(c(
1L,
2L, 0L, 3L, 7L, 4L, 3L, 3L, 7L, 10L
), dim = c(5L, 2L)), indices = c(
2L,
4L, 7L, 1L, 3L, 6L, 8L, 0L, 5L, 9L
), leaf_size = 4), list(normal_indices = structure(c(
4L,
2L, 2L, 2L, -1L, -1L, -1L, -1L, -1L, 8L, 4L, 1L, 5L, -1L, -1L,
-1L, -1L, -1L
), dim = c(9L, 2L)), children = structure(c(
1L,
2L, 3L, 4L, 0L, 2L, 5L, 6L, 8L, 8L, 7L, 6L, 5L, 2L, 5L, 6L, 8L,
10L
), dim = c(9L, 2L)), indices = c(
2L, 6L, 0L, 5L, 9L, 1L, 4L,
7L, 3L, 8L
), leaf_size = 3), list(normal_indices = structure(c(
5L,
6L, -1L, -1L, 4L, -1L, -1L, 8L, 0L, -1L, -1L, 3L, -1L, -1L
), dim = c(
7L,
2L
)), children = structure(c(
1L, 2L, 0L, 2L, 5L, 5L, 8L, 4L,
3L, 2L, 5L, 6L, 8L, 10L
), dim = c(7L, 2L)), indices = c(
2L, 6L,
0L, 5L, 9L, 1L, 4L, 7L, 3L, 8L
), leaf_size = 3), list(
normal_indices = structure(c(
7L,
-1L, 5L, -1L, -1L, 4L, -1L, 1L, -1L, -1L
), dim = c(5L, 2L)),
children = structure(c(
1L, 0L, 3L, 3L, 6L, 2L, 3L, 4L, 6L,
10L
), dim = c(5L, 2L)), indices = c(
3L, 7L, 8L, 0L, 5L, 9L,
1L, 2L, 4L, 6L
), leaf_size = 4
), list(normal_indices = structure(c(
4L,
5L, -1L, -1L, 6L, -1L, -1L, 1L, 7L, -1L, -1L, 3L, -1L, -1L
), dim = c(
7L,
2L
)), children = structure(c(
1L, 2L, 0L, 2L, 5L, 5L, 8L, 4L,
3L, 2L, 5L, 6L, 8L, 10L
), dim = c(7L, 2L)), indices = c(
5L, 9L,
2L, 4L, 7L, 0L, 1L, 6L, 3L, 8L
), leaf_size = 3), list(
normal_indices = structure(c(
6L,
7L, -1L, -1L, -1L, 0L, 4L, -1L, -1L, -1L
), dim = c(5L, 2L)),
children = structure(c(
1L, 2L, 0L, 3L, 7L, 4L, 3L, 3L, 7L,
10L
), dim = c(5L, 2L)), indices = c(
3L, 7L, 8L, 1L, 2L, 4L,
6L, 0L, 5L, 9L
), leaf_size = 4
), list(
normal_indices = structure(c(
9L,
-1L, 3L, -1L, -1L, 4L, -1L, 2L, -1L, -1L
), dim = c(5L, 2L)),
children = structure(c(
1L, 0L, 3L, 3L, 6L, 2L, 3L, 4L, 6L,
10L
), dim = c(5L, 2L)), indices = c(
0L, 5L, 9L, 3L, 7L, 8L,
1L, 2L, 4L, 6L
), leaf_size = 4
)), margin = "implicit", actual_metric = "sqeuclidean",
version = "0.0.12", use_alt_metric = TRUE, original_metric = "euclidean",
sparse = FALSE, type = "rnndescent:rpforest"
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.