Nothing
# --- TopN heap-based optimization tests ---
test_that("slice_min basic correctness", {
df <- data.frame(x = c(5.0, 1.0, 3.0, 2.0, 4.0),
y = c("e", "a", "c", "b", "d"),
stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> slice_min(order_by = x, n = 3) |> collect()
expect_equal(nrow(result), 3)
expect_equal(result$x, c(1, 2, 3))
expect_equal(result$y, c("a", "b", "c"))
})
test_that("slice_max basic correctness", {
df <- data.frame(x = c(5.0, 1.0, 3.0, 2.0, 4.0),
y = c("e", "a", "c", "b", "d"),
stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> slice_max(order_by = x, n = 3) |> collect()
expect_equal(nrow(result), 3)
expect_equal(result$x, c(5, 4, 3))
expect_equal(result$y, c("e", "d", "c"))
})
test_that("topn with n > nrow returns all rows sorted", {
df <- data.frame(x = c(3.0, 1.0, 2.0))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> slice_min(order_by = x, n = 10) |> collect()
expect_equal(nrow(result), 3)
expect_equal(result$x, c(1, 2, 3))
})
test_that("topn with n = 1", {
df <- data.frame(x = c(5.0, 1.0, 3.0, 2.0, 4.0))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result_min <- tbl(f) |> slice_min(order_by = x, n = 1) |> collect()
expect_equal(result_min$x, 1)
result_max <- tbl(f) |> slice_max(order_by = x, n = 1) |> collect()
expect_equal(result_max$x, 5)
})
test_that("topn with n = nrow returns all rows sorted", {
df <- data.frame(x = c(3.0, 1.0, 2.0))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> slice_min(order_by = x, n = 3) |> collect()
expect_equal(result$x, c(1, 2, 3))
})
test_that("topn with NAs sorts NAs last", {
df <- data.frame(x = c(5.0, NA, 3.0, NA, 1.0))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> slice_min(order_by = x, n = 2) |> collect()
expect_equal(result$x, c(1, 3))
})
test_that("topn with all NAs returns NAs", {
df <- data.frame(x = c(NA_real_, NA_real_, NA_real_))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> slice_min(order_by = x, n = 2) |> collect()
expect_equal(nrow(result), 2)
expect_true(all(is.na(result$x)))
})
test_that("topn on integer column", {
df <- data.frame(x = c(50L, 10L, 30L, 20L, 40L))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> slice_min(order_by = x, n = 3) |> collect()
expect_equal(result$x, c(10, 20, 30))
})
test_that("topn on string column", {
df <- data.frame(x = c("cherry", "apple", "banana", "date", "elderberry"),
stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> slice_min(order_by = x, n = 3) |> collect()
expect_equal(result$x, c("apple", "banana", "cherry"))
})
test_that("topn on boolean column", {
df <- data.frame(x = c(TRUE, FALSE, TRUE, FALSE, TRUE),
y = c(1.0, 2.0, 3.0, 4.0, 5.0))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> slice_min(order_by = x, n = 2) |> collect()
expect_equal(nrow(result), 2)
expect_equal(result$x, c(FALSE, FALSE))
})
test_that("topn preserves all columns", {
df <- data.frame(a = c(3.0, 1.0, 2.0), b = c("x", "y", "z"),
c = c(TRUE, FALSE, TRUE), stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> slice_min(order_by = a, n = 2) |> collect()
expect_equal(result$a, c(1, 2))
expect_equal(result$b, c("y", "z"))
expect_equal(result$c, c(FALSE, TRUE))
})
test_that("topn after filter", {
df <- data.frame(x = c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> filter(x > 3) |> slice_min(order_by = x, n = 2) |> collect()
expect_equal(result$x, c(4, 5))
})
test_that("topn after mutate", {
df <- data.frame(x = c(3.0, 1.0, 2.0))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> mutate(y = x * 2) |> slice_min(order_by = y, n = 2) |> collect()
expect_equal(result$x, c(1, 2))
expect_equal(result$y, c(2, 4))
})
test_that("topn with duplicate values includes ties by default", {
df <- data.frame(x = c(1.0, 2.0, 2.0, 3.0, 3.0, 3.0))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
# with_ties = TRUE (default): boundary is 3.0 (4th smallest), keep all <= 3
result <- tbl(f) |> slice_min(order_by = x, n = 4) |> collect()
expect_equal(nrow(result), 6)
expect_equal(result$x, c(1, 2, 2, 3, 3, 3))
})
test_that("topn with duplicate values without ties returns exactly n", {
df <- data.frame(x = c(1.0, 2.0, 2.0, 3.0, 3.0, 3.0))
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> slice_min(order_by = x, n = 4, with_ties = FALSE) |> collect()
expect_equal(nrow(result), 4)
})
test_that("topn single row dataset", {
df <- data.frame(x = 42.0)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> slice_min(order_by = x, n = 1) |> collect()
expect_equal(result$x, 42)
})
test_that("topn large dataset correctness", {
set.seed(42)
n <- 10000
df <- data.frame(x = runif(n), y = sample(letters, n, replace = TRUE),
stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
k <- 50
result <- tbl(f) |> slice_min(order_by = x, n = k) |> collect()
expected <- head(df[order(df$x), ], k)
expect_equal(result$x, expected$x)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.