inst/doc/bigannoy-vs-bigknn.R

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

options(bigANNOY.progress = FALSE)
set.seed(20260326)

library(bigANNOY)
library(bigmemory)

compare_dir <- tempfile("bigannoy-vs-bigknn-")
dir.create(compare_dir, recursive = TRUE, showWarnings = FALSE)

ref_dense <- matrix(rnorm(120 * 6), nrow = 120, ncol = 6)
query_dense <- matrix(rnorm(15 * 6), nrow = 15, ncol = 6)

ref_big <- as.big.matrix(ref_dense)
dim(ref_big)
dim(query_dense)

annoy_index <- annoy_build_bigmatrix(
  ref_big,
  path = file.path(compare_dir, "ref.ann"),
  metric = "euclidean",
  n_trees = 20L,
  seed = 123L,
  load_mode = "eager"
)

approx_result <- annoy_search_bigmatrix(
  annoy_index,
  query = query_dense,
  k = 5L,
  search_k = 100L
)

names(approx_result)
approx_result$exact
approx_result$backend
approx_result$index[1:3, ]
round(approx_result$distance[1:3, ], 3)

if (length(find.package("bigKNN", quiet = TRUE)) > 0L) {
  knn_bigmatrix <- get("knn_bigmatrix", envir = asNamespace("bigKNN"))

  exact_result <- knn_bigmatrix(
    ref_big,
    query = query_dense,
    k = 5L,
    metric = "euclidean",
    block_size = 64L,
    exclude_self = FALSE
  )

  list(
    names = names(exact_result),
    exact = exact_result$exact,
    backend = exact_result$backend,
    index_head = exact_result$index[1:3, ],
    distance_head = round(exact_result$distance[1:3, ], 3)
  )
} else {
  "bigKNN is not installed in this session, so the exact comparison example is skipped."
}

if (length(find.package("bigKNN", quiet = TRUE)) > 0L) {
  knn_bigmatrix <- get("knn_bigmatrix", envir = asNamespace("bigKNN"))

  exact_result <- knn_bigmatrix(
    ref_big,
    query = query_dense,
    k = 5L,
    metric = "euclidean",
    block_size = 64L,
    exclude_self = FALSE
  )

  recall_at_5 <- mean(vapply(seq_len(nrow(query_dense)), function(i) {
    length(intersect(approx_result$index[i, ], exact_result$index[i, ])) / 5
  }, numeric(1L)))

  recall_at_5
} else {
  "Recall example skipped because bigKNN is not installed."
}

bench <- benchmark_annoy_bigmatrix(
  n_ref = 200L,
  n_query = 20L,
  n_dim = 6L,
  k = 5L,
  n_trees = 20L,
  search_k = 100L,
  metric = "euclidean",
  exact = length(find.package("bigKNN", quiet = TRUE)) > 0L,
  path_dir = compare_dir,
  load_mode = "eager"
)

bench$summary[, c(
  "metric",
  "n_trees",
  "search_k",
  "build_elapsed",
  "search_elapsed",
  "exact_elapsed",
  "recall_at_k"
)]

Try the bigANNOY package in your browser

Any scripts or data that you put into this service are public.

bigANNOY documentation built on April 1, 2026, 9:07 a.m.