Nothing
test_that("group_by + summarise with count", {
df <- data.frame(g = c("a", "b", "a", "b", "a"), x = 1:5,
stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> group_by(g) |> summarise(cnt = n()) |> collect()
# First-seen order: "a" first, "b" second
expect_equal(result$g, c("a", "b"))
expect_equal(result$cnt, c(3, 2))
})
test_that("group_by + summarise with sum", {
df <- data.frame(g = c("a", "a", "b", "b"), x = c(1.0, 2.0, 3.0, 4.0),
stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> group_by(g) |> summarise(total = sum(x)) |> collect()
expect_equal(result$g, c("a", "b"))
expect_equal(result$total, c(3, 7))
})
test_that("group_by + summarise with mean", {
df <- data.frame(g = c("a", "a", "b", "b"), x = c(10.0, 20.0, 30.0, 40.0),
stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> group_by(g) |> summarise(avg = mean(x)) |> collect()
expect_equal(result$avg, c(15, 35))
})
test_that("group_by + summarise with min and max", {
df <- data.frame(g = c("a", "a", "b", "b"), x = c(5.0, 1.0, 8.0, 3.0),
stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |>
group_by(g) |>
summarise(lo = min(x), hi = max(x)) |>
collect()
expect_equal(result$lo, c(1, 3))
expect_equal(result$hi, c(5, 8))
})
test_that("group_by with NA key values", {
df <- data.frame(g = c("a", NA, "a", NA), x = c(1.0, 2.0, 3.0, 4.0),
stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> group_by(g) |> summarise(total = sum(x)) |> collect()
expect_equal(nrow(result), 2)
# "a" group and NA group
expect_equal(result$total[result$g == "a" & !is.na(result$g)], 4)
expect_equal(result$total[is.na(result$g)], 6)
})
test_that("multiple grouping columns", {
df <- data.frame(
a = c("x", "x", "y", "y"),
b = c(1L, 2L, 1L, 2L),
v = c(10.0, 20.0, 30.0, 40.0)
)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> group_by(a, b) |> summarise(s = sum(v)) |> collect()
expect_equal(nrow(result), 4)
})
test_that("summarise with na.rm", {
df <- data.frame(g = c("a", "a", "b"), x = c(1.0, NA, 3.0),
stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |>
group_by(g) |>
summarise(total = sum(x, na.rm = TRUE)) |>
collect()
expect_equal(result$total[result$g == "a"], 1)
expect_equal(result$total[result$g == "b"], 3)
})
test_that("summarise without na.rm gives NA for all-NA group", {
df <- data.frame(g = c("a", "a"), x = c(NA_real_, NA_real_),
stringsAsFactors = FALSE)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |>
group_by(g) |>
summarise(total = sum(x)) |>
collect()
expect_true(is.na(result$total))
})
test_that("string key arena survives multiple resizes", {
# Initial arena capacity is 64. 200 unique groups forces resizes at 65 and 129.
# Regression test for UAF in arena_ensure when string data was aliased.
set.seed(1)
n <- 2000
n_groups <- 200
df <- data.frame(
g = sample(paste0("grp_", seq_len(n_groups)), n, replace = TRUE),
x = rnorm(n),
stringsAsFactors = FALSE
)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |> group_by(g) |> summarise(sx = sum(x), n = n()) |> collect()
expect_equal(nrow(result), n_groups)
expect_equal(sum(result$n), n)
# Verify against R base
ref <- aggregate(x ~ g, data = df, FUN = sum)
ref <- ref[match(result$g, ref$g), ]
expect_equal(result$sx, ref$x, tolerance = 1e-10)
# Chain a downstream operation on the grouped result to exercise
# post-resize hash probing with the result keys still intact.
f2 <- tempfile(fileext = ".vtr")
on.exit(unlink(f2), add = TRUE)
write_vtr(result, f2)
result2 <- tbl(f2) |> filter(n > 5) |> collect()
expect_true(all(result2$n > 5))
expect_true(nrow(result2) > 0)
})
test_that("filter then group_by then summarise", {
df <- data.frame(
g = c("a", "b", "a", "b", "a"),
x = c(1.0, 2.0, 3.0, 4.0, 5.0),
stringsAsFactors = FALSE
)
f <- tempfile(fileext = ".vtr")
on.exit(unlink(f))
write_vtr(df, f)
result <- tbl(f) |>
filter(x > 2) |>
group_by(g) |>
summarise(s = sum(x)) |>
collect()
expect_equal(result$s[result$g == "a"], 8)
expect_equal(result$s[result$g == "b"], 4)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.