inst/doc/file-backed-bigmemory-workflows.R

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

options(bigANNOY.progress = FALSE)
set.seed(20260326)

library(bigANNOY)
library(bigmemory)

workspace_dir <- tempfile("bigannoy-filebacked-")
dir.create(workspace_dir, recursive = TRUE, showWarnings = FALSE)

make_filebacked_matrix <- function(values, type, backingpath, name) {
  bm <- filebacked.big.matrix(
    nrow = nrow(values),
    ncol = ncol(values),
    type = type,
    backingfile = sprintf("%s.bin", name),
    descriptorfile = sprintf("%s.desc", name),
    backingpath = backingpath
  )
  bm[,] <- values
  bm
}

ref_dense <- matrix(
  c(
    0.0, 0.0,
    5.0, 0.0,
    0.0, 5.0,
    5.0, 5.0,
    9.0, 9.0
  ),
  ncol = 2,
  byrow = TRUE
)

ref_fb <- make_filebacked_matrix(
  values = ref_dense,
  type = "double",
  backingpath = workspace_dir,
  name = "ref"
)

ref_desc <- describe(ref_fb)
ref_desc_path <- file.path(workspace_dir, "ref.desc")

file.exists(ref_desc_path)
dim(ref_fb)

index_path <- file.path(workspace_dir, "ref.ann")

index <- annoy_build_bigmatrix(
  x = ref_desc_path,
  path = index_path,
  n_trees = 25L,
  metric = "euclidean",
  seed = 99L,
  load_mode = "lazy"
)

index

query_dense <- matrix(
  c(
    0.2, 0.1,
    4.7, 5.1
  ),
  ncol = 2,
  byrow = TRUE
)

query_fb <- make_filebacked_matrix(
  values = query_dense,
  type = "double",
  backingpath = workspace_dir,
  name = "query"
)

query_result_big <- annoy_search_bigmatrix(
  index,
  query = query_fb,
  k = 2L,
  search_k = 100L
)

query_result_big$index
round(query_result_big$distance, 3)

query_desc <- describe(query_fb)
query_desc_path <- file.path(workspace_dir, "query.desc")

query_result_desc <- annoy_search_bigmatrix(
  index,
  query = query_desc,
  k = 2L,
  search_k = 100L
)

query_result_path <- annoy_search_bigmatrix(
  index,
  query = query_desc_path,
  k = 2L,
  search_k = 100L
)

query_result_desc$index
query_result_path$index

identical(query_result_big$index, query_result_desc$index)
identical(query_result_big$index, query_result_path$index)
all.equal(query_result_big$distance, query_result_desc$distance)

index_store <- filebacked.big.matrix(
  nrow = nrow(query_dense),
  ncol = 2L,
  type = "integer",
  backingfile = "nn_index.bin",
  descriptorfile = "nn_index.desc",
  backingpath = workspace_dir
)

distance_store <- filebacked.big.matrix(
  nrow = nrow(query_dense),
  ncol = 2L,
  type = "double",
  backingfile = "nn_distance.bin",
  descriptorfile = "nn_distance.desc",
  backingpath = workspace_dir
)

streamed_result <- annoy_search_bigmatrix(
  index,
  query = query_desc,
  k = 2L,
  xpIndex = describe(index_store),
  xpDistance = file.path(workspace_dir, "nn_distance.desc")
)

bigmemory::as.matrix(index_store)
round(bigmemory::as.matrix(distance_store), 3)

index_store_again <- attach.big.matrix(file.path(workspace_dir, "nn_index.desc"))
distance_store_again <- attach.big.matrix(file.path(workspace_dir, "nn_distance.desc"))

bigmemory::as.matrix(index_store_again)
round(bigmemory::as.matrix(distance_store_again), 3)

query_sep <- big.matrix(
  nrow = nrow(query_dense),
  ncol = ncol(query_dense),
  type = "double",
  separated = TRUE
)
query_sep[,] <- query_dense

sep_result <- annoy_search_bigmatrix(
  index,
  query = describe(query_sep),
  k = 2L,
  search_k = 100L
)

sep_result$index
round(sep_result$distance, 3)

identical(sep_result$index, query_result_big$index)
all.equal(sep_result$distance, query_result_big$distance)

Try the bigANNOY package in your browser

Any scripts or data that you put into this service are public.

bigANNOY documentation built on April 1, 2026, 9:07 a.m.