tests/testthat/test-query.R

if (!identical(Sys.getenv("NOT_CRAN"), "true")) return()
source("helper.R")

test_that("query works", {
  client <- chroma_connect()
  test_id <- basename(tempfile("test")) # Generate unique test ID
  collection_name <- paste0("test_collection_", test_id)

  # Create collection
  create_collection(client, collection_name)

  # Add documents with embeddings
  docs <- c("apple fruit", "banana fruit", "carrot vegetable")
  ids <- c("id1", "id2", "id3")
  embeddings <- list(
    c(1.0, 0.0, 0.0), # apple
    c(0.8, 0.2, 0.0), # banana (similar to apple)
    c(0.0, 0.0, 1.0) # carrot (different)
  )
  add_documents(
    client,
    collection_name,
    documents = docs,
    ids = ids,
    embeddings = embeddings
  )

  # Query with embeddings
  result <- query(
    client,
    collection_name,
    query_embeddings = list(c(1.0, 0.0, 0.0)), # should match apple best
    n_results = 2
  )
  expect_type(result, "list")
  expect_true(all(c("documents", "metadatas", "distances") %in% names(result)))
  expect_equal(length(result$documents[[1]]), 2)
  expect_equal(result$documents[[1]][[1]], "apple fruit") # should be closest match

  # Query non-existent collection should fail
  expect_error(
    query(
      client,
      "nonexistent_collection",
      query_embeddings = list(c(1.0, 0.0, 0.0))
    ),
    "Collection nonexistent_collection does not exist|HTTP 400"
  )
})

test_that("query with filters works", {
  client <- chroma_connect()
  test_id <- basename(tempfile("test")) # Generate unique test ID
  collection_name <- paste0("test_collection_", test_id)

  # Create collection
  create_collection(client, collection_name)

  # Add documents with embeddings and metadata
  docs <- c("doc1", "doc2", "doc3")
  ids <- c("id1", "id2", "id3")
  embeddings <- list(
    c(1.0, 0.0, 0.0),
    c(0.0, 1.0, 0.0),
    c(0.0, 0.0, 1.0)
  )
  metadatas <- list(
    list(category = "A", year = 2024),
    list(category = "B", year = 2023),
    list(category = "A", year = 2022)
  )
  add_documents(
    client,
    collection_name,
    documents = docs,
    ids = ids,
    embeddings = embeddings,
    metadatas = metadatas
  )

  # Query with where filter
  result <- query(
    client,
    collection_name,
    query_embeddings = list(c(1.0, 0.0, 0.0)),
    where = list(year = list("$gte" = 2024))
  )
  expect_type(result, "list")
  expect_equal(length(result$documents[[1]]), 1)
  expect_equal(result$documents[[1]][[1]], "doc1")

  # Query with where_document filter
  result <- query(
    client,
    collection_name,
    query_embeddings = list(c(1.0, 0.0, 0.0)),
    where_document = list("$contains" = "doc1")
  )
  expect_type(result, "list")
  expect_equal(length(result$documents[[1]]), 1)
  expect_equal(result$documents[[1]][[1]], "doc1")
})

test_that("query include parameter works", {
  client <- chroma_connect()
  test_id <- basename(tempfile("test")) # Generate unique test ID
  collection_name <- paste0("test_collection_", test_id)

  # Create collection
  create_collection(client, collection_name)

  # Add documents with all possible attributes
  docs <- c("doc1", "doc2")
  ids <- c("id1", "id2")
  embeddings <- list(c(1.0, 0.0), c(0.0, 1.0))
  metadatas <- list(
    list(source = "test1"),
    list(source = "test2")
  )
  uris <- c("http://example.com/1", "http://example.com/2")
  add_documents(
    client,
    collection_name,
    documents = docs,
    ids = ids,
    embeddings = embeddings,
    metadatas = metadatas,
    uris = uris
  )

  # Test different include combinations
  includes <- list(
    c("documents", "embeddings"),
    c("metadatas", "distances"),
    c("uris", "data"),
    c("documents", "embeddings", "metadatas", "distances", "uris", "data")
  )

  for (include in includes) {
    result <- query(
      client,
      collection_name,
      query_embeddings = list(c(1.0, 0.0)),
      include = include
    )
    expect_type(result, "list")
    expect_true(all(include %in% names(result)))
  }

  # Test invalid include parameter
  expect_error(
    query(
      client,
      collection_name,
      query_embeddings = list(c(1.0, 0.0)),
      include = c("invalid")
    ),
    "Input should be 'documents', 'embeddings'|HTTP"
  )
})

Try the rchroma package in your browser

Any scripts or data that you put into this service are public.

rchroma documentation built on April 4, 2025, 1:37 a.m.