tests/testthat/test-index.R

test_that("create_index and has_index work", {
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(c(f, paste0(f, ".name.vtri"))))

  df <- data.frame(
    name = c("alice", "bob", "charlie", "diana", "eve"),
    val = 1:5,
    stringsAsFactors = FALSE
  )
  write_vtr(df, f)

  expect_false(has_index(f, "name"))
  create_index(f, "name")
  expect_true(has_index(f, "name"))
})

test_that("hash index accelerates equality lookups on strings", {
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(c(f, paste0(f, ".genus.vtri"))))

  df <- data.frame(
    genus = rep(c("Quercus", "Pinus", "Fagus", "Betula", "Acer"), each = 100),
    val = seq_len(500),
    stringsAsFactors = FALSE
  )
  write_vtr(df, f, batch_size = 100L)
  create_index(f, "genus")

  result <- tbl(f) |> filter(genus == "Pinus") |> collect()
  expect_equal(nrow(result), 100L)
  expect_true(all(result$genus == "Pinus"))
})

test_that("hash index works on integer columns", {
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(c(f, paste0(f, ".id.vtri"))))

  df <- data.frame(id = 1:500, val = runif(500))
  write_vtr(df, f, batch_size = 50L)
  create_index(f, "id")

  result <- tbl(f) |> filter(id == 250) |> collect()
  expect_equal(nrow(result), 1L)
  expect_equal(result$id, 250)
})

test_that("hash index with case-insensitive flag", {
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(c(f, paste0(f, ".name.vtri"))))

  df <- data.frame(
    name = c("Alice", "BOB", "Charlie"),
    val = 1:3,
    stringsAsFactors = FALSE
  )
  write_vtr(df, f)
  create_index(f, "name", ci = TRUE)

  # Lookup should match case-insensitively via the hash index
  # (the index provides the row groups, then filter does exact matching)
  # Since the index is CI, "alice" hashes to the same bucket as "Alice"
  expect_true(has_index(f, "name"))
})

test_that("hash index handles empty result", {
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(c(f, paste0(f, ".name.vtri"))))

  df <- data.frame(
    name = c("a", "b", "c"),
    val = 1:3,
    stringsAsFactors = FALSE
  )
  write_vtr(df, f)
  create_index(f, "name")

  result <- tbl(f) |> filter(name == "zzz") |> collect()
  expect_equal(nrow(result), 0L)
})

test_that("hash index with multiple row groups returns correct results", {
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(c(f, paste0(f, ".key.vtri"))))

  # Key values are spread across row groups
  df <- data.frame(
    key = rep(letters[1:10], times = 50),
    val = seq_len(500),
    stringsAsFactors = FALSE
  )
  write_vtr(df, f, batch_size = 100L)
  create_index(f, "key")

  result <- tbl(f) |> filter(key == "e") |> collect()
  expect_equal(nrow(result), 50L)
  expect_true(all(result$key == "e"))
})

test_that("create_index errors on non-existent column", {
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))

  write_vtr(data.frame(x = 1:10), f)
  expect_error(create_index(f, "nonexistent"), "not found")
})

test_that("index survives re-creation after data change", {
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(c(f, paste0(f, ".name.vtri"))))

  df1 <- data.frame(name = c("a", "b"), val = 1:2, stringsAsFactors = FALSE)
  write_vtr(df1, f)
  create_index(f, "name")

  # Overwrite with new data
  df2 <- data.frame(name = c("x", "y", "z"), val = 1:3, stringsAsFactors = FALSE)
  write_vtr(df2, f)

  # Old index is stale — re-create
  create_index(f, "name")

  result <- tbl(f) |> filter(name == "y") |> collect()
  expect_equal(nrow(result), 1L)
  expect_equal(result$name, "y")
})

Try the vectra package in your browser

Any scripts or data that you put into this service are public.

vectra documentation built on May 8, 2026, 9:06 a.m.