tests/testthat/test-new-aggs.R

# --- sd ---

test_that("sd computes sample standard deviation", {
  df <- data.frame(g = c("a", "a", "a"), x = c(2.0, 4.0, 6.0),
                   stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(s = sd(x)) |> collect()
  expect_equal(result$s, sd(c(2, 4, 6)))
})

test_that("sd of single value returns NA", {
  df <- data.frame(g = c("a"), x = c(5.0), stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(s = sd(x)) |> collect()
  expect_true(is.na(result$s))
})

test_that("sd with na.rm = TRUE skips NAs", {
  df <- data.frame(g = c("a", "a", "a"), x = c(2.0, NA, 6.0),
                   stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(s = sd(x, na.rm = TRUE)) |> collect()
  expect_equal(result$s, sd(c(2, 6)))
})

test_that("sd without na.rm returns NA when group has NAs", {
  df <- data.frame(g = c("a", "a", "a"), x = c(2.0, NA, 6.0),
                   stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(s = sd(x)) |> collect()
  expect_true(is.na(result$s))
})

# --- var ---

test_that("var computes sample variance", {
  df <- data.frame(g = c("a", "a", "a"), x = c(2.0, 4.0, 6.0),
                   stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(v = var(x)) |> collect()
  expect_equal(result$v, var(c(2, 4, 6)))
})

test_that("var of single value returns NA", {
  df <- data.frame(g = c("a"), x = c(5.0), stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(v = var(x)) |> collect()
  expect_true(is.na(result$v))
})

# --- first ---

test_that("first returns the first value per group", {
  df <- data.frame(g = c("a", "a", "b", "b"),
                   x = c(10.0, 20.0, 30.0, 40.0), stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(f = first(x)) |> collect()
  expect_equal(result$f[result$g == "a"], 10)
  expect_equal(result$f[result$g == "b"], 30)
})

test_that("first with na.rm = TRUE skips leading NAs", {
  df <- data.frame(g = c("a", "a", "a"), x = c(NA, 2.0, 3.0),
                   stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(f = first(x, na.rm = TRUE)) |> collect()
  expect_equal(result$f, 2)
})

test_that("first without na.rm returns NA when first value is NA", {
  df <- data.frame(g = c("a", "a"), x = c(NA, 2.0), stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(f = first(x)) |> collect()
  expect_true(is.na(result$f))
})

# --- last ---

test_that("last returns the last value per group", {
  df <- data.frame(g = c("a", "a", "b", "b"),
                   x = c(10.0, 20.0, 30.0, 40.0), stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(l = last(x)) |> collect()
  expect_equal(result$l[result$g == "a"], 20)
  expect_equal(result$l[result$g == "b"], 40)
})

test_that("last with na.rm = TRUE skips trailing NAs", {
  df <- data.frame(g = c("a", "a", "a"), x = c(1.0, 2.0, NA),
                   stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(l = last(x, na.rm = TRUE)) |> collect()
  expect_equal(result$l, 2)
})

# --- sd/var with multiple groups ---

test_that("sd works across multiple groups", {
  df <- data.frame(g = c("a", "a", "a", "b", "b", "b"),
                   x = c(1.0, 2.0, 3.0, 10.0, 20.0, 30.0),
                   stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(s = sd(x)) |> collect()
  expect_equal(result$s[result$g == "a"], sd(c(1, 2, 3)))
  expect_equal(result$s[result$g == "b"], sd(c(10, 20, 30)))
})

# --- slice_min/max with_ties ---

test_that("slice_min with_ties = TRUE includes tied rows", {
  df <- data.frame(x = c(1.0, 2.0, 2.0, 3.0, 4.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_min(order_by = x, n = 2, with_ties = TRUE)
  # n=2 -> boundary is 2.0, so include 1, 2, 2 = 3 rows
  expect_equal(nrow(result), 3)
  expect_true(all(result$x <= 2))
})

test_that("slice_min with_ties = FALSE returns exactly n rows", {
  df <- data.frame(x = c(1.0, 2.0, 2.0, 3.0, 4.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_min(order_by = x, n = 2, with_ties = FALSE) |> collect()
  expect_equal(nrow(result), 2)
})

test_that("slice_max with_ties = TRUE includes tied rows", {
  df <- data.frame(x = c(1.0, 2.0, 3.0, 3.0, 4.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_max(order_by = x, n = 2, with_ties = TRUE)
  # n=2 -> boundary is 3.0, so include 3, 3, 4 = 3 rows
  expect_equal(nrow(result), 3)
  expect_true(all(result$x >= 3))
})

test_that("slice_max with_ties = FALSE returns exactly n rows", {
  df <- data.frame(x = c(1.0, 2.0, 3.0, 3.0, 4.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> slice_max(order_by = x, n = 2, with_ties = FALSE) |> collect()
  expect_equal(nrow(result), 2)
})

test_that("slice_min with no ties behaves same either way", {
  df <- data.frame(x = c(1.0, 2.0, 3.0, 4.0, 5.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result_ties <- tbl(f) |> slice_min(order_by = x, n = 2, with_ties = TRUE)
  result_no <- tbl(f) |> slice_min(order_by = x, n = 2, with_ties = FALSE) |> collect()
  expect_equal(nrow(result_ties), 2)
  expect_equal(nrow(result_no), 2)
})

# --- first/last with all NAs ---

test_that("first with na.rm = TRUE on all-NA group returns NA", {
  df <- data.frame(g = c("a", "a"), x = c(NA_real_, NA_real_),
                   stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(f = first(x, na.rm = TRUE)) |> collect()
  expect_true(is.na(result$f))
})

test_that("last with na.rm = TRUE on all-NA group returns NA", {
  df <- data.frame(g = c("a", "a"), x = c(NA_real_, NA_real_),
                   stringsAsFactors = FALSE)
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> group_by(g) |> summarise(l = last(x, na.rm = TRUE)) |> collect()
  expect_true(is.na(result$l))
})

# --- transmute with across ---

test_that("transmute supports across() in summarise context", {
  # across() with aggregation functions works in summarise, not transmute
  # Test that across() expands correctly in summarise
  df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(10.0, 20.0, 30.0))
  f <- tempfile(fileext = ".vtr")
  on.exit(unlink(f))
  write_vtr(df, f)
  result <- tbl(f) |> summarise(across(c(x, y), mean)) |> collect()
  expect_equal(names(result), c("x", "y"))
  expect_equal(result$x, 2)
  expect_equal(result$y, 20)
})

Try the vectra package in your browser

Any scripts or data that you put into this service are public.

vectra documentation built on May 8, 2026, 9:06 a.m.