tests/testthat/test-topn.R

# --- TopN heap-based optimization tests ---

test_that("slice_min basic correctness", {
  df <- data.frame(x = c(5.0, 1.0, 3.0, 2.0, 4.0),
                   y = c("e", "a", "c", "b", "d"),
                   stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_min(order_by = x, n = 3) |> collect()
  expect_equal(nrow(result), 3)
  expect_equal(result$x, c(1, 2, 3))
  expect_equal(result$y, c("a", "b", "c"))
})

test_that("slice_max basic correctness", {
  df <- data.frame(x = c(5.0, 1.0, 3.0, 2.0, 4.0),
                   y = c("e", "a", "c", "b", "d"),
                   stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_max(order_by = x, n = 3) |> collect()
  expect_equal(nrow(result), 3)
  expect_equal(result$x, c(5, 4, 3))
  expect_equal(result$y, c("e", "d", "c"))
})

test_that("topn with n > nrow returns all rows sorted", {
  df <- data.frame(x = c(3.0, 1.0, 2.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_min(order_by = x, n = 10) |> collect()
  expect_equal(nrow(result), 3)
  expect_equal(result$x, c(1, 2, 3))
})

test_that("topn with n = 1", {
  df <- data.frame(x = c(5.0, 1.0, 3.0, 2.0, 4.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result_min <- tbl(f) |> slice_min(order_by = x, n = 1) |> collect()
  expect_equal(result_min$x, 1)
  result_max <- tbl(f) |> slice_max(order_by = x, n = 1) |> collect()
  expect_equal(result_max$x, 5)
})

test_that("topn with n = nrow returns all rows sorted", {
  df <- data.frame(x = c(3.0, 1.0, 2.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_min(order_by = x, n = 3) |> collect()
  expect_equal(result$x, c(1, 2, 3))
})

test_that("topn with NAs sorts NAs last", {
  df <- data.frame(x = c(5.0, NA, 3.0, NA, 1.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_min(order_by = x, n = 2) |> collect()
  expect_equal(result$x, c(1, 3))
})

test_that("topn with all NAs returns NAs", {
  df <- data.frame(x = c(NA_real_, NA_real_, NA_real_))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_min(order_by = x, n = 2) |> collect()
  expect_equal(nrow(result), 2)
  expect_true(all(is.na(result$x)))
})

test_that("topn on integer column", {
  df <- data.frame(x = c(50L, 10L, 30L, 20L, 40L))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_min(order_by = x, n = 3) |> collect()
  expect_equal(result$x, c(10, 20, 30))
})

test_that("topn on string column", {
  df <- data.frame(x = c("cherry", "apple", "banana", "date", "elderberry"),
                   stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_min(order_by = x, n = 3) |> collect()
  expect_equal(result$x, c("apple", "banana", "cherry"))
})

test_that("topn on boolean column", {
  df <- data.frame(x = c(TRUE, FALSE, TRUE, FALSE, TRUE),
                   y = c(1.0, 2.0, 3.0, 4.0, 5.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_min(order_by = x, n = 2) |> collect()
  expect_equal(nrow(result), 2)
  expect_equal(result$x, c(FALSE, FALSE))
})

test_that("topn preserves all columns", {
  df <- data.frame(a = c(3.0, 1.0, 2.0), b = c("x", "y", "z"),
                   c = c(TRUE, FALSE, TRUE), stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_min(order_by = a, n = 2) |> collect()
  expect_equal(result$a, c(1, 2))
  expect_equal(result$b, c("y", "z"))
  expect_equal(result$c, c(FALSE, TRUE))
})

test_that("topn after filter", {
  df <- data.frame(x = c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> filter(x > 3) |> slice_min(order_by = x, n = 2) |> collect()
  expect_equal(result$x, c(4, 5))
})

test_that("topn after mutate", {
  df <- data.frame(x = c(3.0, 1.0, 2.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> mutate(y = x * 2) |> slice_min(order_by = y, n = 2) |> collect()
  expect_equal(result$x, c(1, 2))
  expect_equal(result$y, c(2, 4))
})

test_that("topn with duplicate values includes ties by default", {
  df <- data.frame(x = c(1.0, 2.0, 2.0, 3.0, 3.0, 3.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  # with_ties = TRUE (default): boundary is 3.0 (4th smallest), keep all <= 3
  result <- tbl(f) |> slice_min(order_by = x, n = 4) |> collect()
  expect_equal(nrow(result), 6)
  expect_equal(result$x, c(1, 2, 2, 3, 3, 3))
})

test_that("topn with duplicate values without ties returns exactly n", {
  df <- data.frame(x = c(1.0, 2.0, 2.0, 3.0, 3.0, 3.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_min(order_by = x, n = 4, with_ties = FALSE) |> collect()
  expect_equal(nrow(result), 4)
})

test_that("topn single row dataset", {
  df <- data.frame(x = 42.0)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_min(order_by = x, n = 1) |> collect()
  expect_equal(result$x, 42)
})

test_that("topn large dataset correctness", {
  set.seed(42)
  n <- 10000
  df <- data.frame(x = runif(n), y = sample(letters, n, replace = TRUE),
                   stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)

  k <- 50
  result <- tbl(f) |> slice_min(order_by = x, n = k) |> collect()
  expected <- head(df[order(df$x), ], k)
  expect_equal(result$x, expected$x)
})

Try the vectra package in your browser

Any scripts or data that you put into this service are public.

vectra documentation built on May 8, 2026, 9:06 a.m.