inst/benchmarks/benchmark_annoy.R

#!/usr/bin/env Rscript

parse_scalar <- function(value, kind = c("integer", "logical", "character")) {
    kind <- match.arg(kind)
    switch(
        kind,
        integer = as.integer(value),
        logical = tolower(value) %in% c("true", "1", "yes", "y"),
        character = value
    )
}

parse_vector <- function(value) {
    stats::na.omit(as.integer(strsplit(value, ",", fixed = TRUE)[[1L]]))
}

parse_args <- function(args) {
    defaults <- list(
        mode = "single",
        n_ref = 2000L,
        n_query = 200L,
        n_dim = 20L,
        k = 10L,
        n_trees = 50L,
        suite_n_ref = c(2000L, 5000L, 10000L),
        suite_n_query = 200L,
        suite_n_dim = c(20L, 50L),
        suite_trees = c(10L, 50L, 100L),
        search_k = -1L,
        suite_search_k = c(-1L, 1000L, 5000L),
        metric = "euclidean",
        seed = 42L,
        build_seed = 42L,
        build_threads = -1L,
        backend = getOption("bigANNOY.backend", "cpp"),
        exact = TRUE,
        self_search = FALSE,
        filebacked = FALSE,
        keep_files = FALSE,
        path_dir = tempdir(),
        output_path = NULL,
        load_mode = "eager"
    )

    if (!length(args)) {
        return(defaults)
    }

    for (arg in args) {
        if (!startsWith(arg, "--")) {
            stop("Arguments must use the form --name=value", call. = FALSE)
        }
        parts <- strsplit(sub("^--", "", arg), "=", fixed = TRUE)[[1L]]
        if (length(parts) != 2L) {
            stop("Arguments must use the form --name=value", call. = FALSE)
        }
        key <- parts[[1L]]
        value <- parts[[2L]]

        if (key %in% c("n_ref", "n_query", "n_dim", "k", "n_trees", "search_k", "seed", "build_seed", "build_threads")) {
            defaults[[key]] <- parse_scalar(value, "integer")
        } else if (key %in% c("suite_n_ref", "suite_n_query", "suite_n_dim", "suite_trees", "suite_search_k")) {
            defaults[[key]] <- parse_vector(value)
        } else if (key %in% c("exact", "self_search", "filebacked", "keep_files")) {
            defaults[[key]] <- parse_scalar(value, "logical")
        } else if (key %in% c("mode", "metric", "backend", "path_dir", "output_path", "load_mode")) {
            defaults[[key]] <- parse_scalar(value, "character")
        } else {
            stop(sprintf("Unknown benchmark argument: %s", key), call. = FALSE)
        }
    }

    defaults
}

print_summary <- function(x, title) {
    cat(title, "\n", sep = "")
    cat(strrep("=", nchar(title)), "\n", sep = "")
    print(x, row.names = FALSE)
}

args <- parse_args(commandArgs(trailingOnly = TRUE))

if (!requireNamespace("bigANNOY", quietly = TRUE)) {
    stop("The benchmark requires the 'bigANNOY' package.", call. = FALSE)
}

library(bigANNOY)

if (identical(tolower(args$mode), "suite")) {
    suite_args <- list(
        n_ref = args$n_ref,
        n_query = args$n_query,
        n_dim = args$n_dim,
        k = args$k,
        n_trees = args$suite_trees,
        search_k = args$suite_search_k,
        metric = args$metric,
        seed = args$seed,
        build_seed = args$build_seed,
        build_threads = args$build_threads,
        backend = args$backend,
        exact = args$exact,
        filebacked = args$filebacked,
        path_dir = args$path_dir,
        keep_files = args$keep_files,
        output_path = args$output_path,
        load_mode = args$load_mode
    )
    if (isTRUE(args$self_search)) {
        suite_args$query <- NULL
    }
    result <- do.call(benchmark_annoy_recall_suite, suite_args)
    print_summary(result$summary, "bigANNOY recall suite")
} else if (identical(tolower(args$mode), "compare")) {
    compare_args <- list(
        n_ref = args$n_ref,
        n_query = args$n_query,
        n_dim = args$n_dim,
        k = args$k,
        n_trees = args$n_trees,
        metric = args$metric,
        search_k = args$search_k,
        seed = args$seed,
        build_seed = args$build_seed,
        build_threads = args$build_threads,
        backend = args$backend,
        exact = args$exact,
        filebacked = args$filebacked,
        path_dir = args$path_dir,
        keep_files = args$keep_files,
        output_path = args$output_path,
        load_mode = args$load_mode
    )
    if (isTRUE(args$self_search)) {
        compare_args$query <- NULL
    }
    result <- do.call(benchmark_annoy_vs_rcppannoy, compare_args)
    print_summary(result$summary, "bigANNOY vs RcppAnnoy benchmark")
} else if (identical(tolower(args$mode), "volume")) {
    volume_args <- list(
        n_ref = args$suite_n_ref,
        n_query = args$suite_n_query,
        n_dim = args$suite_n_dim,
        k = args$k,
        n_trees = args$n_trees,
        metric = args$metric,
        search_k = args$search_k,
        seed = args$seed,
        build_seed = args$build_seed,
        build_threads = args$build_threads,
        backend = args$backend,
        exact = args$exact,
        filebacked = args$filebacked,
        path_dir = args$path_dir,
        keep_files = args$keep_files,
        output_path = args$output_path,
        load_mode = args$load_mode
    )
    result <- do.call(benchmark_annoy_volume_suite, volume_args)
    print_summary(result$summary, "bigANNOY volume suite")
} else {
    single_args <- list(
        n_ref = args$n_ref,
        n_query = args$n_query,
        n_dim = args$n_dim,
        k = args$k,
        n_trees = args$n_trees,
        metric = args$metric,
        search_k = args$search_k,
        seed = args$seed,
        build_seed = args$build_seed,
        build_threads = args$build_threads,
        backend = args$backend,
        exact = args$exact,
        filebacked = args$filebacked,
        path_dir = args$path_dir,
        keep_files = args$keep_files,
        output_path = args$output_path,
        load_mode = args$load_mode
    )
    if (isTRUE(args$self_search)) {
        single_args$query <- NULL
    }
    result <- do.call(benchmark_annoy_bigmatrix, single_args)
    print_summary(result$summary, "bigANNOY benchmark")
}

cat("\nInstalled benchmark path:\n")
cat(system.file("benchmarks", "benchmark_annoy.R", package = "bigANNOY"), "\n")

Try the bigANNOY package in your browser

Any scripts or data that you put into this service are public.

bigANNOY documentation built on April 1, 2026, 9:07 a.m.