tests/testthat/test-advanced-search.R

# Tests for advanced_search.R - Facets, ACL, Text Analyzers

test_that("FacetConfig works correctly", {
  config <- FacetConfig$new(
    field = "category",
    limit = 5,
    min_count = 2,
    sort_by = "count"
  )

  expect_equal(config$field, "category")
  expect_equal(config$limit, 5)
  expect_equal(config$min_count, 2)
  expect_equal(config$sort_by, "count")
  expect_false(config$include_zero)
})

test_that("FacetAggregator aggregates correctly", {
  aggregator <- FacetAggregator$new()

  documents <- list(
    list(category = "tech", author = "Alice"),
    list(category = "tech", author = "Bob"),
    list(category = "science", author = "Alice"),
    list(category = "tech", author = "Charlie"),
    list(category = "health", author = "Alice")
  )

  facets <- aggregator$aggregate(documents, c("category", "author"))

  # Category facet
  expect_true("category" %in% names(facets))
  category_facet <- facets$category
  expect_equal(category_facet$total_count, 5)

  # Find tech count
  tech_count <- 0
  for (fv in category_facet$values) {
    if (fv$value == "tech") tech_count <- fv$count
  }
  expect_equal(tech_count, 3)

  # Author facet
  expect_true("author" %in% names(facets))
  author_facet <- facets$author
  alice_count <- 0
  for (fv in author_facet$values) {
    if (fv$value == "Alice") alice_count <- fv$count
  }
  expect_equal(alice_count, 3)
})

test_that("FacetAggregator handles nested fields", {
  aggregator <- FacetAggregator$new()

  documents <- list(
    list(meta = list(type = "A")),
    list(meta = list(type = "A")),
    list(meta = list(type = "B"))
  )

  facets <- aggregator$aggregate(documents, c("meta.type"))

  expect_true("meta.type" %in% names(facets))
  expect_equal(facets$`meta.type`$total_count, 3)
})

test_that("ACLPrincipal parsing works", {
  # User principal
  p1 <- parse_acl("user:alice")
  expect_equal(p1$type, "user")
  expect_equal(p1$value, "alice")

  # Group principal
  p2 <- parse_acl("group:engineering")
  expect_equal(p2$type, "group")
  expect_equal(p2$value, "engineering")

  # Role principal
  p3 <- parse_acl("role:admin")
  expect_equal(p3$type, "role")
  expect_equal(p3$value, "admin")

  # Default to user
  p4 <- parse_acl("bob")
  expect_equal(p4$type, "user")
  expect_equal(p4$value, "bob")
})

test_that("ACLPrincipal matching works", {
  p1 <- ACLPrincipal$new("user", "alice")
  p2 <- ACLPrincipal$new("user", "alice")
  p3 <- ACLPrincipal$new("user", "bob")
  p4 <- ACLPrincipal$new("group", "alice")

  # Same principal

  expect_true(p1$matches(p2))

  # Different value
  expect_false(p1$matches(p3))

  # Different type
  expect_false(p1$matches(p4))

  # Wildcard
  p_wild <- ACLPrincipal$new("user", "*")
  expect_true(p_wild$matches(p1))
})

test_that("ACLConfig from list works", {
  config <- acl_config_from_list(c("user:alice", "group:engineering", "public"))

  expect_true(config$is_public)
  expect_equal(length(config$read_principals), 2)

  # With deny
  config2 <- acl_config_from_list(c("user:alice", "deny:user:bob"))
  expect_equal(length(config2$deny_principals), 1)
  expect_equal(config2$deny_principals[[1]]$value, "bob")
})

test_that("ACLFilter filters documents correctly", {
  filter <- ACLFilter$new()

  documents <- list(
    list(id = "doc1", metadata = list(`_acl` = c("user:alice", "group:engineering"))),
    list(id = "doc2", metadata = list(`_acl` = c("user:bob"))),
    list(id = "doc3", metadata = list(`_acl` = c("public"))),
    list(id = "doc4", metadata = list())  # No ACL
  )

  # Alice can see doc1 and doc3
  filtered <- filter$filter(documents, c("user:alice"), default_allow = FALSE)
  ids <- sapply(filtered, function(d) d$id)
  expect_true("doc1" %in% ids)
  expect_true("doc3" %in% ids)
  expect_false("doc2" %in% ids)

  # With default_allow, doc4 is included
  filtered2 <- filter$filter(documents, c("user:alice"), default_allow = TRUE)
  ids2 <- sapply(filtered2, function(d) d$id)
  expect_true("doc4" %in% ids2)
})

test_that("SimpleStemmer works correctly", {
  stemmer <- SimpleStemmer$new()

  expect_equal(stemmer$stem("running"), "runn")
  expect_equal(stemmer$stem("happiness"), "happi")
  expect_equal(stemmer$stem("beautiful"), "beauti")
  expect_equal(stemmer$stem("quickly"), "quick")

  # Short words unchanged
  expect_equal(stemmer$stem("go"), "go")
})

test_that("TextAnalyzer tokenizes correctly", {
  analyzer <- TextAnalyzer$new(
    lowercase = TRUE,
    remove_stopwords = FALSE
  )

  tokens <- analyzer$analyze("Hello World 123")
  expect_equal(tokens, c("hello", "world", "123"))
})

test_that("TextAnalyzer removes stopwords", {
  analyzer <- TextAnalyzer$new(
    lowercase = TRUE,
    remove_stopwords = TRUE
  )

  tokens <- analyzer$analyze("The quick brown fox is fast")
  expect_false("the" %in% tokens)
  expect_false("is" %in% tokens)
  expect_true("quick" %in% tokens)
  expect_true("brown" %in% tokens)
})

test_that("TextAnalyzer applies stemming", {
  analyzer <- TextAnalyzer$new(
    lowercase = TRUE,
    remove_stopwords = TRUE,
    use_stemmer = TRUE
  )

  tokens <- analyzer$analyze("running jumps quickly")
  # Stemmed versions
  expect_true("runn" %in% tokens || "running" %in% tokens)
})

test_that("text_analyzer_english works", {
  analyzer <- text_analyzer_english()

  tokens <- analyzer$analyze("The quick brown foxes are jumping")
  expect_false("the" %in% tokens)
  expect_false("are" %in% tokens)
  expect_true(length(tokens) > 0)
})

test_that("KeywordAnalyzer treats input as single token", {
  analyzer <- text_analyzer_keyword()

  tokens <- analyzer$analyze("Hello World")
  expect_equal(length(tokens), 1)
  expect_equal(tokens[1], "hello world")
})

test_that("AnalyzerChain chains analyzers", {
  analyzer1 <- TextAnalyzer$new(lowercase = TRUE)
  analyzer2 <- TextAnalyzer$new(remove_stopwords = TRUE)

  chain <- AnalyzerChain$new(list(analyzer1, analyzer2))

  tokens <- chain$analyze("THE Quick")
  expect_true("quick" %in% tokens)
})

test_that("EnhancedSearchResults works correctly", {
  results <- EnhancedSearchResults$new(
    results = list(list(id = "1", score = 0.9)),
    facets = list(),
    total_count = 100,
    filtered_count = 50,
    query_time_ms = 15.5
  )

  expect_equal(results$total_count, 100)
  expect_equal(results$filtered_count, 50)
  expect_equal(results$query_time_ms, 15.5)

  result_list <- results$to_list()
  expect_true(is.list(result_list))
  expect_equal(result_list$total_count, 100)
})

Try the VectrixDB package in your browser

Any scripts or data that you put into this service are public.

VectrixDB documentation built on Feb. 20, 2026, 5:09 p.m.