tests/testthat/test-hnsw.R

# Tests for hnsw.R - HNSW Index for Approximate Nearest Neighbor Search

test_that("DistanceMetric contains correct values", {
  expect_equal(DistanceMetric$COSINE, "cosine")
  expect_equal(DistanceMetric$EUCLIDEAN, "euclidean")
  expect_equal(DistanceMetric$DOT, "dot")
  expect_equal(DistanceMetric$MANHATTAN, "manhattan")
})

test_that("HNSWIndex initializes correctly", {
  index <- HNSWIndex$new(dimension = 128, metric = "angular")

  expect_equal(index$dimension, 128)
  expect_equal(index$metric, "angular")
  expect_equal(index$n_trees, 50)
  expect_equal(index$size(), 0)
})

test_that("HNSWIndex initializes with custom parameters", {
  index <- HNSWIndex$new(
    dimension = 64,
    metric = "euclidean",
    n_trees = 100,
    search_k = 500
  )

  expect_equal(index$dimension, 64)
  expect_equal(index$metric, "euclidean")
  expect_equal(index$n_trees, 100)
  expect_equal(index$search_k, 500)
})

test_that("HNSWIndex add_items works with single vector", {
  index <- HNSWIndex$new(dimension = 4)

  vec <- c(1.0, 2.0, 3.0, 4.0)
  index$add_items(ids = c("a"), vectors = vec)

  expect_equal(index$size(), 1)
  expect_equal(index$get_ids(), "a")
})

test_that("HNSWIndex add_items works with matrix", {
  index <- HNSWIndex$new(dimension = 3)

  vectors <- matrix(c(
    1, 0, 0,
    0, 1, 0,
    0, 0, 1
  ), nrow = 3, byrow = TRUE)

  index$add_items(ids = c("x", "y", "z"), vectors = vectors)

  expect_equal(index$size(), 3)
  expect_true(all(c("x", "y", "z") %in% index$get_ids()))
})

test_that("HNSWIndex add_items validates dimensions", {
  index <- HNSWIndex$new(dimension = 4)

  wrong_dim <- matrix(c(1, 2, 3), nrow = 1)
  expect_error(
    index$add_items(ids = c("a"), vectors = wrong_dim),
    "dimension mismatch"
  )
})

test_that("HNSWIndex add_items validates ID count", {
  index <- HNSWIndex$new(dimension = 3)

  vectors <- matrix(rnorm(6), nrow = 2)
  expect_error(
    index$add_items(ids = c("a"), vectors = vectors),
    "must match"
  )
})

test_that("HNSWIndex get_vector returns correct vector", {
  index <- HNSWIndex$new(dimension = 3)

  vec <- c(1.5, 2.5, 3.5)
  index$add_items(ids = c("test"), vectors = vec)

  result <- index$get_vector("test")
  expect_equal(result, vec)
})

test_that("HNSWIndex get_vector returns NULL for missing ID", {
  index <- HNSWIndex$new(dimension = 3)
  expect_null(index$get_vector("nonexistent"))
})

test_that("HNSWIndex search returns results", {
  index <- HNSWIndex$new(dimension = 3)

  vectors <- matrix(c(
    1, 0, 0,
    0, 1, 0,
    0, 0, 1
  ), nrow = 3, byrow = TRUE)

  index$add_items(ids = c("x", "y", "z"), vectors = vectors)
  index$build()

  query <- c(1, 0, 0)
  results <- index$search(query, k = 2)

  expect_true(is.data.frame(results))
  expect_true("id" %in% names(results))
  expect_true("distance" %in% names(results))
  expect_equal(nrow(results), 2)
  expect_equal(results$id[1], "x")  # Closest to query
})

test_that("HNSWIndex search returns empty for empty index", {
  index <- HNSWIndex$new(dimension = 3)

  results <- index$search(c(1, 0, 0), k = 5)

  expect_true(is.data.frame(results))
  expect_equal(nrow(results), 0)
})

test_that("HNSWIndex search limits k to available items", {
  index <- HNSWIndex$new(dimension = 3)

  index$add_items(ids = c("a", "b"), vectors = matrix(rnorm(6), nrow = 2))
  index$build()

  results <- index$search(rnorm(3), k = 10)
  expect_equal(nrow(results), 2)
})

test_that("HNSWIndex remove_items works", {
  index <- HNSWIndex$new(dimension = 3)

  index$add_items(ids = c("a", "b", "c"), vectors = matrix(rnorm(9), nrow = 3))
  expect_equal(index$size(), 3)

  index$remove_items(c("b"))
  expect_equal(index$size(), 2)
  expect_null(index$get_vector("b"))
  expect_true(!is.null(index$get_vector("a")))
})

test_that("HNSWIndex clear works", {
  index <- HNSWIndex$new(dimension = 3)

  index$add_items(ids = c("a", "b"), vectors = matrix(rnorm(6), nrow = 2))
  expect_equal(index$size(), 2)

  index$clear()
  expect_equal(index$size(), 0)
  expect_equal(length(index$get_ids()), 0)
})

test_that("HNSWIndex save and load works", {
  index <- HNSWIndex$new(dimension = 4, metric = "euclidean")

  vectors <- matrix(rnorm(12), nrow = 3)
  index$add_items(ids = c("a", "b", "c"), vectors = vectors)
  index$build()

  # Save
  temp_path <- file.path(tempdir(), "test_hnsw.rds")
  index$save(temp_path)

  expect_true(file.exists(temp_path))

  # Load into new index
  new_index <- HNSWIndex$new(dimension = 1)  # Will be overwritten
  new_index$load(temp_path)

  expect_equal(new_index$dimension, 4)
  expect_equal(new_index$metric, "euclidean")
  expect_equal(new_index$size(), 3)
  expect_equal(new_index$get_vector("a"), vectors[1, ])

  # Clean up
  unlink(temp_path)
  unlink(paste0(temp_path, ".annoy"))
})

test_that("HNSWIndex brute force search works with cosine distance", {
  index <- HNSWIndex$new(dimension = 3, metric = "cosine")

  # Use normalized vectors for cosine
  v1 <- c(1, 0, 0)
  v2 <- c(0.7071, 0.7071, 0)
  v3 <- c(0, 1, 0)

  index$add_items(ids = c("a", "b", "c"), vectors = rbind(v1, v2, v3))

  # Query close to v1
  results <- index$search(c(0.9, 0.1, 0), k = 3)

  expect_equal(nrow(results), 3)
  # First result should be "a" (closest to query)
  expect_equal(results$id[1], "a")
})

test_that("HNSWIndex brute force search works with euclidean distance", {
  index <- HNSWIndex$new(dimension = 2, metric = "euclidean")

  # Simple 2D points
  index$add_items(
    ids = c("origin", "far"),
    vectors = rbind(c(0, 0), c(10, 10))
  )

  # Query near origin
  results <- index$search(c(0.1, 0.1), k = 2)

  expect_equal(results$id[1], "origin")
  expect_true(results$distance[1] < results$distance[2])
})

test_that("HNSWIndex brute force search works with manhattan distance", {
  index <- HNSWIndex$new(dimension = 2, metric = "manhattan")

  index$add_items(
    ids = c("a", "b"),
    vectors = rbind(c(0, 0), c(1, 1))
  )

  results <- index$search(c(0.5, 0), k = 2)

  # Manhattan distance to (0,0) is 0.5, to (1,1) is 1.5
  expect_equal(results$id[1], "a")
})

test_that("create_hnsw_index factory works", {
  index <- create_hnsw_index(dimension = 128, metric = "euclidean", n_trees = 25)

  expect_true(inherits(index, "HNSWIndex"))
  expect_equal(index$dimension, 128)
  expect_equal(index$metric, "euclidean")
  expect_equal(index$n_trees, 25)
})

test_that("load_hnsw_index factory works", {
  # Create and save
  index <- create_hnsw_index(dimension = 8)
  index$add_items(ids = c("test"), vectors = rnorm(8))
  index$build()

  temp_path <- file.path(tempdir(), "test_load_hnsw.rds")
  index$save(temp_path)

  # Load via factory
  loaded <- load_hnsw_index(temp_path)

  expect_true(inherits(loaded, "HNSWIndex"))
  expect_equal(loaded$dimension, 8)
  expect_equal(loaded$size(), 1)

  # Clean up
  unlink(temp_path)
  unlink(paste0(temp_path, ".annoy"))
})

test_that("HNSWIndex auto-builds on search if not built", {
  index <- HNSWIndex$new(dimension = 3)

  index$add_items(ids = c("a"), vectors = c(1, 2, 3))
  # Don't call build()

  # Search should auto-build
  results <- index$search(c(1, 2, 3), k = 1)

  expect_equal(nrow(results), 1)
})

Try the VectrixDB package in your browser

Any scripts or data that you put into this service are public.

VectrixDB documentation built on Feb. 20, 2026, 5:09 p.m.