library(rnndescent)
context("RP Tree")
# just one tree
expected_rpt_knn <- list(
idx = matrix(
c(
1, 6, 10, 0,
2, 7, 4, 9,
3, 5, 8, 0,
4, 9, 2, 7,
5, 8, 3, 0,
6, 1, 10, 0,
7, 2, 4, 9,
8, 5, 3, 0,
9, 4, 2, 7,
10, 6, 1, 0
),
nrow = 10,
byrow = TRUE
),
dist = matrix(
c(
0, 0.3464102, 0.6782330, NA,
0, 0.3000002, 0.5477225, 0.6164413,
0, 0.3316626, 0.4690416, NA,
0, 0.3464101, 0.5477225, 0.6708205,
0, 0.1732050, 0.3316626, NA,
0, 0.3464102, 0.5830952, NA,
0, 0.3000002, 0.6708205, 0.8185353,
0, 0.1732050, 0.4690416, NA,
0, 0.3464101, 0.6164413, 0.8185353,
0, 0.5830952, 0.6782330, NA
),
nrow = 10,
byrow = TRUE
)
)
set.seed(1337)
res <- rpf_knn(ui10, k = 4, leaf_size = 4, n_trees = 1)
expect_equal(res, expected_rpt_knn, tol = 1e-7)
set.seed(1337)
res <- rpf_knn(ui10, k = 4)
expect_equal(res$idx, ui10_nn4$idx, check.attributes = FALSE)
expect_equal(res$dist, ui10_nn4$dist, check.attributes = FALSE, tol = 1e-4)
set.seed(1337)
res <- rpf_knn(ui10, k = 4, include_self = FALSE)
expect_equal(res$idx[, 1:3], ui10_nn4$idx[, 2:4], check.attributes = FALSE)
expect_equal(res$dist[, 1:3], ui10_nn4$dist[, 2:4], check.attributes = FALSE, tol = 1e-4)
# euclidean
set.seed(1337)
res <- rpf_knn(ui10, k = 4)
expect_equal(res$idx, ui10_nn4$idx, check.attributes = FALSE)
expect_equal(res$dist, ui10_nn4$dist, check.attributes = FALSE, tol = 1e-4)
# cosine
set.seed(1337)
uiris_rnn <- rpf_knn(uirism, 15, metric = "cosine", n_trees = 40)
expect_equal(sum(uiris_rnn$dist), 1.347357, tol = 1e-3)
# multi-threading
set.seed(1337)
res <- rpf_knn(ui10, k = 4, leaf_size = 3, n_threads = 2, n_trees = 1)
expect_in(c(NA), res$dist)
expect_in(c(0), res$idx)
set.seed(1337)
res <- rpf_knn(ui10, k = 4, n_threads = 2)
expect_equal(res$idx, ui10_nn4$idx, check.attributes = FALSE)
expect_equal(res$dist, ui10_nn4$dist, check.attributes = FALSE, tol = 1e-4)
# euclidean converges
set.seed(1337)
uiris_rnn <- rpf_knn(uiris, 15, n_trees = 40, n_threads = 2)
expect_equal(sum(uiris_rnn$dist), ui_edsum, tol = 1e-3)
# cosine
set.seed(1337)
uiris_rnn <- rpf_knn(uirism, 15, metric = "cosine", n_trees = 40, n_threads = 2)
expect_equal(sum(uiris_rnn$dist), 1.347357, tol = 1e-3)
# R index
expected_rpf_index <- list(
trees = list(
list(
hyperplanes = matrix(c(
-0.5000000, -0.8000002, -0.2, -0.3,
0.3000002, -0.3000002, 0.1, -0.2,
0.0000000, 0.0000000, 0.0, 0.0,
0.0000000, 0.0000000, 0.0, 0.0,
0.0000000, 0.0000000, 0.0, 0.0
), nrow = 5, byrow = TRUE),
offsets = c(5.7700009, -0.5550003, NA, NA, NA),
children = matrix(c(
1, 4,
2, 3,
0, 3,
3, 7,
7, 10
), nrow = 5, byrow = TRUE),
indices = c(2, 4, 7, 1, 3, 6, 8, 0, 5, 9),
leaf_size = 4
)
),
margin = "explicit",
actual_metric = "sqeuclidean",
version = "0.0.12",
use_alt_metric = TRUE,
original_metric = "euclidean",
sparse = FALSE,
type = "rnndescent:rpforest"
)
set.seed(1337)
rpf_index <- rpf_build(ui10, metric = "euclidean", n_trees = 1, leaf_size = 4)
expect_equal(rpf_index, expected_rpf_index, tol = 1e-7)
# query data against itself to reproduce knn (just more slowly)
set.seed(1337)
rpf_query_res <-
rpf_knn_query(
ui10,
ui10,
rpf_index,
k = 4,
n_threads = 0,
cache = TRUE
)
expect_equal(rpf_query_res, expected_rpt_knn, tol = 1e-7)
set.seed(1337)
rpf_query_res <-
rpf_knn_query(
ui10,
ui10,
rpf_index,
k = 4,
n_threads = 0,
cache = FALSE
)
expect_equal(rpf_query_res, expected_rpt_knn, tol = 1e-7)
# return forest with knn
set.seed(1337)
rpf_knnf <-
rpf_knn(
ui10,
k = 4,
metric = "euclidean",
n_trees = 1,
ret_forest = TRUE,
leaf_size = 4
)
expect_equal(rpf_knnf$forest, expected_rpf_index, tol = 1e-7)
expect_equal(
list(idx = rpf_knnf$idx, dist = rpf_knnf$dist),
expected_rpt_knn,
check.attributes = FALSE,
tol = 1e-7
)
set.seed(1337)
nnd_with_tree <-
nnd_knn(
ui10,
k = 4,
ret_forest = TRUE,
init = "tree",
init_args = list(n_trees = 1, leaf_size = 4)
)
expect_equal(nnd_with_tree$forest, expected_rpf_index, tol = 1e-7)
# handle alt metric
set.seed(1337)
nnd_with_tree <-
nnd_knn(
ui10,
k = 4,
ret_forest = TRUE,
init = "tree",
use_alt_metric = FALSE,
init_args = list(n_trees = 1, leaf_size = 4)
)
rpf_index_no_alt <- expected_rpf_index
rpf_index_no_alt$use_alt_metric <- FALSE
rpf_index_no_alt$actual_metric <- "euclidean"
expect_equal(nnd_with_tree$forest, rpf_index_no_alt, tol = 1e-7)
# filtering
set.seed(1337)
rpf_knnf3 <-
rpf_knn(
ui10,
k = 4,
metric = "euclidean",
n_trees = 3,
ret_forest = TRUE,
leaf_size = 4
)
expect_equal(length(rpf_knnf3$forest$trees), 3)
rpf_f3f <- rpf_filter(rpf_knnf3, n_trees = 1)
expect_equal(length(rpf_f3f$trees), 1)
expect_equal(rpf_f3f$trees[[1]], rpf_knnf3$forest$trees[[1]])
test_that("can't pass mismatched forest and nn to filter", {
iris_knn_with_forest <-
rpf_knn(iris[1:50, ], k = 15, ret_forest = TRUE)
iris_query_virginica <-
rpf_knn_query(
query = iris[51:150, ],
reference = iris[1:50, ],
forest = iris_knn_with_forest$forest,
k = 15
)
iris_forest <- rpf_build(iris, leaf_size = 15)
expect_error(
rpf_filter(
nn = iris_query_virginica,
forest = iris_forest,
n_trees = 1
), "Mismatched"
)
})
set.seed(1337)
expect_equal(
rpf_build(ui10, metric = "euclidean", leaf_size = 4, n_threads = 0),
rpf_index_ls4e,
tol = 1e-7
)
# implicit margin
set.seed(1337)
rpi_knn <- rpf_knn(uirism[1:20, ], k = 4, verbose = FALSE, n_threads = 0, n_trees = 2, margin = "implicit")
set.seed(1337)
rpe_knn <- rpf_knn(uirism[1:20, ], k = 4, verbose = FALSE, n_threads = 0, n_trees = 2, margin = "explicit")
expect_equal(rpe_knn$dist, rpi_knn$dist)
expected_rpfi_index <- list(
trees = list(list(
normal_indices = matrix(c(
4, 0,
4, 1,
-1, -1,
-1, -1,
-1, -1
), nrow = 5, byrow = TRUE),
children = matrix(c(
1, 4,
2, 3,
0, 3,
3, 7,
7, 10
), nrow = 5, byrow = TRUE),
indices = c(2, 4, 7, 1, 3, 6, 8, 0, 5, 9),
leaf_size = 4
)),
margin = "implicit",
actual_metric = "sqeuclidean",
version = "0.0.12",
use_alt_metric = TRUE,
original_metric = "euclidean",
sparse = FALSE,
type = "rnndescent:rpforest"
)
set.seed(1337)
rpf_knn2df <- rpf_knn(
ui10,
k = 4,
metric = "euclidean",
n_trees = 1,
ret_forest = TRUE,
leaf_size = 4,
margin = "implicit"
)
expect_equal(list(idx = rpf_knn2df$idx, dist = rpf_knn2df$dist), expected_rpt_knn, tol = 1e-7)
expect_equal(rpf_knn2df$forest, expected_rpfi_index)
set.seed(1337)
rpfi_query_res <-
rpf_knn_query(
ui10,
ui10,
rpf_knn2df$forest,
k = 4,
n_threads = 0,
cache = TRUE
)
expect_equal(rpfi_query_res, expected_rpt_knn, tol = 1e-7)
set.seed(1337)
rpf_knnfi3 <-
rpf_knn(
ui10,
k = 4,
metric = "euclidean",
n_trees = 3,
ret_forest = TRUE,
leaf_size = 4,
margin = "implicit"
)
expect_equal(length(rpf_knnfi3$forest$trees), 3)
rpf_fi3f <- rpf_filter(rpf_knnfi3, n_trees = 1)
expect_equal(length(rpf_fi3f$trees), 1)
expect_equal(rpf_fi3f$trees[[1]], rpf_knnfi3$forest$trees[[1]])
expect_equal(rpf_fi3f$margin, rpf_knnfi3$forest$margin)
expect_equal(rpf_fi3f$actual_metric, rpf_knnfi3$forest$actual_metric)
expect_equal(rpf_fi3f$version, rpf_knnfi3$forest$version)
expect_equal(rpf_fi3f$use_alt_metric, rpf_knnfi3$forest$use_alt_metric)
expect_equal(rpf_fi3f$original_metric, rpf_knnfi3$forest$original_metric)
set.seed(1337)
rpf_knnff3 <-
rpf_knn(
ui10,
k = 4,
metric = "euclidean",
n_trees = 3,
ret_forest = TRUE,
leaf_size = 4,
margin = "explicit"
)
expect_equal(length(rpf_knnff3$forest$trees), 3)
rpf_ff3f <- rpf_filter(rpf_knnff3, n_trees = 1)
expect_equal(length(rpf_ff3f$trees), 1)
expect_equal(rpf_ff3f$trees[[1]], rpf_knnff3$forest$trees[[1]])
expect_equal(rpf_ff3f$margin, rpf_knnff3$forest$margin)
expect_equal(rpf_ff3f$actual_metric, rpf_knnff3$forest$actual_metric)
expect_equal(rpf_ff3f$version, rpf_knnff3$forest$version)
expect_equal(rpf_ff3f$use_alt_metric, rpf_knnff3$forest$use_alt_metric)
expect_equal(rpf_ff3f$original_metric, rpf_knnff3$forest$original_metric)
set.seed(1337)
expect_equal(
rpf_build(ui10, metric = "euclidean", leaf_size = 4, margin = "implicit", n_threads = 0),
rpf_index_ls4i,
tol = 1e-7
)
set.seed(1337)
rpf_index_ls4i_no_alt <- rpf_index_ls4i
rpf_index_ls4i_no_alt$use_alt_metric <- FALSE
rpf_index_ls4i_no_alt$actual_metric <- "euclidean"
expect_equal(
rpf_build(ui10, metric = "euclidean", use_alt_metric = FALSE, leaf_size = 4, margin = "implicit", n_threads = 0),
rpf_index_ls4i_no_alt,
tol = 1e-7
)
# cosine test
set.seed(1337)
uiriscos <-
rpf_knn(
uirism,
k = 15,
metric = "cosine",
n_threads = 0,
ret_forest = TRUE,
n_trees = 1
)
set.seed(1337)
uiriscosq <-
rpf_knn_query(
uirism,
uirism,
uiriscos$forest,
k = 15,
n_threads = 0,
)
# handle ties where indices swap places
expect_equal(sum(uiriscos$idx - uiriscosq$idx), 0)
# test uncached
uiriscosq_nocache <- rpf_knn_query(
uirism,
uirism,
uiriscos$forest,
k = 15,
n_threads = 2,
cache = FALSE
)
expect_equal(sum(uiriscos$idx - uiriscosq_nocache$idx), 0)
set.seed(1337)
uiriscosi <-
rpf_knn(
uirism,
k = 15,
metric = "cosine",
n_threads = 0,
ret_forest = TRUE,
margin = "implicit",
n_trees = 1
)
set.seed(1337)
uiriscosiq <-
rpf_knn_query(
uirism,
uirism,
uiriscosi$forest,
k = 15,
n_threads = 0,
)
expect_equal(sum(uiriscosi$idx - uiriscosq$idx), 0)
expect_equal(sum(uiriscosi$idx - uiriscosiq$idx), 0)
set.seed(1337)
ui6f <- rpf_knn(
ui6,
k = 4,
leaf_size = 3,
ret_forest = TRUE
)
qnbrs4 <- graph_knn_query(reference = ui6, reference_graph = ui6f, query = ui4, init = ui6f$forest, k = 4)
expect_equal(sum(qnbrs4$dist), ui4q_edsum, tol = 1e-6)
test_that("binary data", {
# euclidean forces conversion to float data
set.seed(1337)
bin_euc_imp <- rpf_knn(lbitdata, k = 4, margin = "implicit")
set.seed(1337)
bin_euc_exp <- rpf_knn(lbitdata, k = 4, margin = "explicit")
expect_equal(bin_euc_imp, bin_euc_exp)
set.seed(1337)
bin_jac_imp <- rpf_knn(lbitdata, k = 4, margin = "implicit", metric = "jaccard")
set.seed(1337)
bin_jac_exp <- rpf_knn(lbitdata, k = 4, margin = "explicit", metric = "jaccard")
expect_equal(bin_jac_imp, bin_jac_exp)
set.seed(1337)
bin_jac_aut <- rpf_knn(lbitdata, k = 4, margin = "auto", metric = "jaccard")
expect_equal(bin_jac_aut, bin_jac_imp)
set.seed(1337)
euc_forest_i <- rpf_build(lbitdata, leaf_size = 10, margin = "implicit")
bin_euc_impq <-
rpf_knn_query(
query = lbitdata,
reference = lbitdata,
forest = euc_forest_i,
k = 4
)
expect_equal(bin_euc_impq, bin_euc_imp)
set.seed(1337)
euc_forest_e <- rpf_build(lbitdata, leaf_size = 10, margin = "explicit")
bin_euc_expq <-
rpf_knn_query(
query = lbitdata,
reference = lbitdata,
forest = euc_forest_e,
k = 4
)
expect_equal(bin_euc_expq, bin_euc_exp)
set.seed(1337)
euc_forest_a <- rpf_build(lbitdata, leaf_size = 10, margin = "auto")
bin_euc_autq <-
rpf_knn_query(
query = lbitdata,
reference = lbitdata,
forest = euc_forest_a,
k = 4
)
expect_equal(bin_euc_autq, bin_euc_exp)
})
test_that("sparse implicit margin", {
set.seed(1337)
dknn <- rpf_knn(ui10z, k = 4, leaf_size = 3, n_trees = 2, margin = "implicit")
set.seed(1337)
sknn <- rpf_knn(ui10sp, k = 4, leaf_size = 3, n_trees = 2, margin = "implicit")
expect_equal(sknn, dknn)
set.seed(1337)
dknn <- rpf_knn(ui10z, k = 4, leaf_size = 3, n_trees = 2, margin = "implicit", ret_forest = TRUE)
set.seed(1337)
sknn <- rpf_knn(ui10sp, k = 4, leaf_size = 3, n_trees = 2, margin = "implicit", ret_forest = TRUE)
expect_equal(list(idx = sknn$idx, dist = sknn$dist), list(idx = dknn$idx, dist = dknn$dist))
dknn$forest$sparse <- TRUE
expect_equal(sknn$forest, dknn$forest)
set.seed(1337)
dforest <- rpf_build(ui10z, leaf_size = 3, n_trees = 2, margin = "implicit", metric = "cosine")
set.seed(1337)
sforest <- rpf_build(ui10sp, leaf_size = 3, n_trees = 2, margin = "implicit", metric = "cosine")
expect_equal(sforest$actual_metric, "alternative-cosine")
expect_true(sforest$sparse)
sforest$sparse <- FALSE
expect_equal(sforest, dforest)
set.seed(1337)
dforest6 <- rpf_build(ui10z6, leaf_size = 3, n_trees = 2, margin = "implicit", metric = "cosine")
set.seed(1337)
dquery4 <- rpf_knn_query(query = ui10z4, reference = ui10z6, forest = dforest6, k = 4)
expect_error(squery4 <- rpf_knn_query(query = ui10sp4, reference = ui10sp6, forest = dforest6, k = 4), "sparse forest")
# hack the forest to force it to work with sparse
dforest6$sparse <- TRUE
set.seed(1337)
squery4 <- rpf_knn_query(query = ui10sp4, reference = ui10sp6, forest = dforest6, k = 4)
expect_equal(squery4, dquery4, tol = 1e-4)
set.seed(1337)
sforest6 <- rpf_build(ui10sp6, leaf_size = 3, n_trees = 2, margin = "implicit", metric = "cosine")
set.seed(1337)
squery4b <- rpf_knn_query(query = ui10sp4, reference = ui10sp6, forest = sforest6, k = 4)
expect_equal(squery4b, squery4, tol = 1e-5)
set.seed(1337)
squery4b <- rpf_knn_query(query = ui10sp4, reference = ui10sp6, forest = sforest6, k = 4, cache = FALSE)
expect_equal(squery4b, squery4, tol = 1e-5)
})
test_that("sparse explicit margin", {
set.seed(1337)
dknn <- rpf_knn(ui10z, k = 4, leaf_size = 3, n_trees = 2, margin = "explicit")
set.seed(1337)
sknn <- rpf_knn(ui10sp, k = 4, leaf_size = 3, n_trees = 2, margin = "explicit")
expect_equal(sknn, dknn)
# implict and explicit should give the same results for euclidean
set.seed(1337)
siknn <- rpf_knn(ui10sp, k = 4, leaf_size = 3, n_trees = 2, margin = "implicit")
expect_equal(siknn, sknn)
set.seed(1337)
sknn6 <- rpf_knn(ui10sp6, k = 4, leaf_size = 2, n_trees = 2, ret_forest = TRUE)
set.seed(1337)
sforest6 <- rpf_build(ui10sp6, leaf_size = 2, n_trees = 2)
expect_equal(sforest6$margin, "explicit")
expect_equal(sknn6$forest$margin, "explicit")
expect_equal(sforest6, sknn6$forest)
s6_ff <- rpf_filter(sknn6)
expect_equal(length(s6_ff$trees), 1)
expect_equal(s6_ff$trees[[1]], sknn6$forest$trees[[1]])
set.seed(1337)
res_forest <- rpf_knn_query(ui10sp4, ui10sp6, forest = sforest6, k = 4)
set.seed(1337)
res_knnforest <- rpf_knn_query(ui10sp4, ui10sp6, forest = sknn6$forest, k = 4)
expect_equal(res_forest, res_knnforest)
set.seed(1337)
dknn6 <- rpf_knn(ui10z6, k = 4, leaf_size = 2, n_trees = 2, margin = "explicit", ret_forest = TRUE)
set.seed(1337)
res_dknn <- rpf_knn_query(ui10z4, ui10z6, forest = dknn6$forest, k = 4)
expect_equal(res_forest, res_dknn)
# uncached
set.seed(1337)
res_dknn_nocache <- rpf_knn_query(ui10z4, ui10z6, forest = dknn6$forest, k = 4, cache = FALSE)
expect_equal(res_forest, res_dknn_nocache)
# implict and explicit should give the same results for cosine also
set.seed(1337)
secknn <- rpf_knn(ui10sp, k = 4, leaf_size = 3, n_trees = 2, margin = "explicit", metric = "cosine")
set.seed(1337)
sicknn <- rpf_knn(ui10sp, k = 4, leaf_size = 3, n_trees = 2, margin = "implicit", metric = "cosine")
expect_equal(secknn, sicknn)
set.seed(1337)
sacknn <- rpf_knn(ui10sp,
k = 4, leaf_size = 3, n_trees = 2,
margin = "explicit", metric = "cosine", use_alt_metric = FALSE
)
expect_equal(sacknn, secknn, tol = 1e-4)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.