Nothing
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
options(bigANNOY.progress = FALSE)
set.seed(20260326)
library(bigANNOY)
bench_dir <- tempfile("bigannoy-benchmark-")
dir.create(bench_dir, recursive = TRUE, showWarnings = FALSE)
bench_dir
single_csv <- file.path(bench_dir, "single.csv")
single <- benchmark_annoy_bigmatrix(
n_ref = 200L,
n_query = 20L,
n_dim = 6L,
k = 3L,
n_trees = 10L,
search_k = 50L,
exact = FALSE,
path_dir = bench_dir,
output_path = single_csv,
load_mode = "eager"
)
single$summary
names(single)
single$params
single$exact_available
single$validation$valid
single$validation$checks[, c("check", "passed", "severity")]
read.csv(single_csv, stringsAsFactors = FALSE)
external_run <- benchmark_annoy_bigmatrix(
n_ref = 120L,
n_query = 12L,
n_dim = 5L,
k = 3L,
n_trees = 8L,
exact = FALSE,
path_dir = bench_dir
)
self_run <- benchmark_annoy_bigmatrix(
n_ref = 120L,
query = NULL,
n_dim = 5L,
k = 3L,
n_trees = 8L,
exact = FALSE,
path_dir = bench_dir
)
shape_cols <- c("self_search", "n_ref", "n_query", "k")
rbind(
external = external_run[["summary"]][, shape_cols],
self = self_run[["summary"]][, shape_cols]
)
suite_csv <- file.path(bench_dir, "suite.csv")
suite <- benchmark_annoy_recall_suite(
n_ref = 200L,
n_query = 20L,
n_dim = 6L,
k = 3L,
n_trees = c(5L, 10L),
search_k = c(-1L, 50L),
exact = FALSE,
path_dir = bench_dir,
output_path = suite_csv,
load_mode = "eager"
)
suite$summary
read.csv(suite_csv, stringsAsFactors = FALSE)
if (length(find.package("bigKNN", quiet = TRUE)) > 0L) {
exact_run <- benchmark_annoy_bigmatrix(
n_ref = 150L,
n_query = 15L,
n_dim = 5L,
k = 3L,
n_trees = 10L,
search_k = 50L,
metric = "euclidean",
exact = TRUE,
path_dir = bench_dir
)
exact_run$exact_available
exact_run$summary[, c("build_elapsed", "search_elapsed", "exact_elapsed", "recall_at_k")]
} else {
"Exact baseline example skipped because bigKNN is not installed."
}
ref <- matrix(rnorm(80 * 4), nrow = 80, ncol = 4)
query <- matrix(rnorm(12 * 4), nrow = 12, ncol = 4)
user_run <- benchmark_annoy_bigmatrix(
x = ref,
query = query,
k = 3L,
n_trees = 12L,
search_k = 40L,
exact = FALSE,
filebacked = TRUE,
path_dir = bench_dir,
load_mode = "eager"
)
user_run$summary[, c(
"filebacked",
"self_search",
"n_ref",
"n_query",
"n_dim",
"build_elapsed",
"search_elapsed"
)]
compare_csv <- file.path(bench_dir, "compare.csv")
compare_run <- benchmark_annoy_vs_rcppannoy(
n_ref = 200L,
n_query = 20L,
n_dim = 6L,
k = 3L,
n_trees = 10L,
search_k = 50L,
exact = FALSE,
path_dir = bench_dir,
output_path = compare_csv,
load_mode = "eager"
)
compare_run$summary[, c(
"implementation",
"reference_storage",
"n_ref",
"n_query",
"n_dim",
"total_data_bytes",
"index_bytes",
"build_elapsed",
"search_elapsed"
)]
read.csv(compare_csv, stringsAsFactors = FALSE)[, c(
"implementation",
"ref_bytes",
"query_bytes",
"index_bytes",
"metadata_bytes",
"artifact_bytes"
)]
volume_csv <- file.path(bench_dir, "volume.csv")
volume_run <- benchmark_annoy_volume_suite(
n_ref = c(200L, 500L),
n_query = 20L,
n_dim = c(6L, 12L),
k = 3L,
n_trees = 10L,
search_k = 50L,
exact = FALSE,
path_dir = bench_dir,
output_path = volume_csv,
load_mode = "eager"
)
volume_run$summary[, c(
"implementation",
"n_ref",
"n_dim",
"total_data_bytes",
"index_bytes",
"build_elapsed",
"search_elapsed"
)]
system.file("benchmarks", "benchmark_annoy.R", package = "bigANNOY")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.