inst/doc/metrics-and-tuning.R

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

options(bigANNOY.progress = FALSE)
set.seed(20260326)

library(bigANNOY)
library(bigmemory)

tune_dir <- tempfile("bigannoy-tuning-")
dir.create(tune_dir, recursive = TRUE, showWarnings = FALSE)

ref_labels <- c(
  "unit_x",
  "double_x",
  "unit_y",
  "tilted_x",
  "unit_z",
  "diag_xy"
)

ref_dense <- matrix(
  c(
    1.0, 0.0, 0.0,
    2.0, 0.0, 0.0,
    0.0, 1.0, 0.0,
    0.8, 0.2, 0.0,
    0.0, 0.0, 1.0,
    1.0, 1.0, 0.0
  ),
  ncol = 3,
  byrow = TRUE
)

query_dense <- matrix(
  c(
    1.0, 0.0, 0.0,
    0.9, 0.1, 0.0
  ),
  ncol = 3,
  byrow = TRUE
)

ref_big <- as.big.matrix(ref_dense)

data.frame(
  index = seq_along(ref_labels),
  label = ref_labels,
  ref_dense,
  row.names = NULL
)

metric_table <- do.call(
  rbind,
  lapply(c("euclidean", "angular", "manhattan", "dot"), function(metric) {
    index_path <- file.path(tune_dir, sprintf("%s.ann", metric))

    idx <- annoy_build_bigmatrix(
      ref_big,
      path = index_path,
      metric = metric,
      n_trees = 20L,
      seed = 123L,
      load_mode = "eager"
    )

    res <- annoy_search_bigmatrix(
      idx,
      query = query_dense,
      k = 2L,
      search_k = 100L
    )

    data.frame(
      metric = metric,
      q1_top1 = ref_labels[res$index[1, 1]],
      q1_distance = round(res$distance[1, 1], 3),
      q2_top1 = ref_labels[res$index[2, 1]],
      q2_distance = round(res$distance[2, 1], 3),
      stringsAsFactors = FALSE
    )
  })
)

metric_table

lazy_index <- annoy_build_bigmatrix(
  ref_big,
  path = file.path(tune_dir, "lazy.ann"),
  metric = "euclidean",
  n_trees = 8L,
  seed = 123L,
  load_mode = "lazy"
)

eager_index <- annoy_build_bigmatrix(
  ref_big,
  path = file.path(tune_dir, "eager.ann"),
  metric = "euclidean",
  n_trees = 25L,
  seed = 123L,
  load_mode = "eager"
)

c(
  lazy_loaded = annoy_is_loaded(lazy_index),
  eager_loaded = annoy_is_loaded(eager_index)
)

reopened <- annoy_open_index(
  eager_index$path,
  prefault = TRUE,
  load_mode = "eager"
)

result <- annoy_search_bigmatrix(
  reopened,
  query = query_dense,
  k = 2L,
  search_k = 100L,
  prefault = TRUE
)

if (length(find.package("bigKNN", quiet = TRUE)) > 0L) {
  tuning_suite <- benchmark_annoy_recall_suite(
    n_ref = 200L,
    n_query = 20L,
    n_dim = 6L,
    k = 3L,
    n_trees = c(5L, 20L),
    search_k = c(-1L, 50L, 200L),
    metric = "euclidean",
    exact = TRUE,
    path_dir = tune_dir
  )

  tuning_suite$summary[, c(
    "n_trees",
    "search_k",
    "build_elapsed",
    "search_elapsed",
    "recall_at_k"
  )]
} else {
  tuning_suite <- benchmark_annoy_recall_suite(
    n_ref = 200L,
    n_query = 20L,
    n_dim = 6L,
    k = 3L,
    n_trees = c(5L, 20L),
    search_k = c(-1L, 50L, 200L),
    metric = "euclidean",
    exact = FALSE,
    path_dir = tune_dir
  )

  tuning_suite$summary[, c(
    "n_trees",
    "search_k",
    "build_elapsed",
    "search_elapsed"
  )]
}

list(
  block_size_default = getOption("bigANNOY.block_size", 1024L),
  progress_default = getOption("bigANNOY.progress", FALSE),
  backend_default = getOption("bigANNOY.backend", "cpp")
)

Try the bigANNOY package in your browser

Any scripts or data that you put into this service are public.

bigANNOY documentation built on April 1, 2026, 9:07 a.m.