df <- data.frame(x = rep(1:3, each = 10), y = rep(1:6, each = 5))
test_that("group_by() with .add = TRUE adds groups", {
add_groups1 <- function(tbl) group_by(tbl, x, y, .add = TRUE)
add_groups2 <- function(tbl) group_by(group_by(tbl, x, .add = TRUE), y, .add = TRUE)
expect_equal(group_vars(add_groups1(df)), c("x", "y"))
expect_equal(group_vars(add_groups2(df)), c("x", "y"))
})
test_that("group_by(<grouped df>, <computation>) computes the expressions on the ungrouped data frame (#5938)", {
df <- data.frame(
x = 1:4,
g = rep(1:2, each = 2)
)
count <- 0
out <- df %>% group_by(g) %>% group_by(big = { count <<- count + 1; x > mean(x) })
expect_equal(out$big, c(FALSE, FALSE, TRUE, TRUE))
expect_equal(count, 1L)
expect_equal(group_vars(out), c("big"))
count <- 0
out <- df %>% group_by(g) %>% group_by(big = { count <<- count + 1; x > mean(x) }, .add = TRUE)
expect_equal(out$big, c(FALSE, FALSE, TRUE, TRUE))
expect_equal(count, 1L)
expect_equal(group_vars(out), c("g", "big"))
count <- 0
out <- df %>% group_by(g) %>% mutate(big = { count <<- count + 1; x > mean(x)}) %>% group_by(big)
expect_equal(out$big, c(FALSE, TRUE, FALSE, TRUE))
expect_equal(count, 2L)
expect_equal(group_vars(out), c("big"))
count <- 0
out <- df %>% group_by(g) %>% mutate(big = { count <<- count + 1; x > mean(x)}) %>% group_by(big, .add = TRUE)
expect_equal(out$big, c(FALSE, TRUE, FALSE, TRUE))
expect_equal(count, 2L)
expect_equal(group_vars(out), c("g", "big"))
})
test_that("add = TRUE is deprecated", {
rlang::local_options(lifecycle_verbosity = "warning")
df <- tibble(x = 1, y = 2)
expect_warning(
out <- df %>% group_by(x) %>% group_by(y, add = TRUE),
"deprecated"
)
expect_equal(group_vars(out), c("x", "y"))
})
test_that("joins preserve grouping", {
df <- data.frame(x = rep(1:2, each = 4), y = rep(1:4, each = 2))
g <- group_by(df, x)
expect_equal(group_vars(inner_join(g, g, by = c("x", "y"), relationship = "many-to-many")), "x")
expect_equal(group_vars(left_join(g, g, by = c("x", "y"), relationship = "many-to-many")), "x")
expect_equal(group_vars(semi_join(g, g, by = c("x", "y"))), "x")
expect_equal(group_vars(anti_join(g, g, by = c("x", "y"))), "x")
})
test_that("constructors drops groups", {
df <- data.frame(x = 1:3) %>% group_by(x)
expect_equal(group_vars(as_tibble(df)), character())
})
test_that("grouping by constant adds column (#410)", {
grouped <- group_by(mtcars, "cyl") %>% summarise(foo = n())
expect_equal(names(grouped), c('"cyl"', "foo"))
expect_equal(nrow(grouped), 1L)
})
test_that("can partially `ungroup()` (#6606)", {
df <- tibble(g1 = 1:2, g2 = 3:4, x = 5:6)
gdf <- group_by(df, g1, g2)
expect_identical(ungroup(gdf, g1), group_by(df, g2))
expect_identical(ungroup(gdf, g1, g2), df)
})
test_that("can't rename while partially `ungroup()`-ing (#6606)", {
df <- tibble(g = 1:2, x = 3:4)
gdf <- group_by(df, g)
expect_snapshot(error = TRUE, {
ungroup(gdf, g2 = g)
})
})
test_that(".dots is soft deprecated", {
rlang::local_options(lifecycle_verbosity = "warning")
df <- tibble(x = 1, y = 1)
expect_warning(gf <- group_by(df, .dots = "x"), "deprecated")
})
# Test full range of variable types --------------------------------------------
test_that("local group_by preserves variable types", {
df_var <- tibble(
l = c(T, F),
i = 1:2,
d = Sys.Date() + 1:2,
f = factor(letters[1:2]),
num = 1:2 + 0.5,
t = Sys.time() + 1:2,
c = letters[1:2]
)
attr(df_var$t, "tzone") <- ""
for (var in names(df_var)) {
expected <- tibble(!!var := sort(unique(df_var[[var]])), n = 1L)
summarised <- df_var %>% group_by(!!sym(var)) %>% summarise(n = n())
expect_equal(summarised, expected)
}
})
test_that("mutate does not lose variables (#144)", {
df <- tibble(a = rep(1:4, 2), b = rep(1:4, each = 2), x = runif(8))
by_ab <- group_by(df, a, b)
by_a <- summarise(by_ab, x = sum(x), .groups = "drop_last")
by_a_quartile <- group_by(by_a, quartile = ntile(x, 4))
expect_equal(names(by_a_quartile), c("a", "b", "x", "quartile"))
})
test_that("group_by uses shallow copy", {
skip_if_not_installed("lobstr")
m1 <- group_by(mtcars, cyl)
expect_equal(group_vars(mtcars), character())
expect_equal(
lobstr::obj_addrs(mtcars),
lobstr::obj_addrs(m1)
)
})
test_that("group_by orders by groups. #242", {
df <- data.frame(a = sample(1:10, 3000, replace = TRUE)) %>% group_by(a)
expect_equal(group_data(df)$a, 1:10)
df <- data.frame(a = sample(letters[1:10], 3000, replace = TRUE), stringsAsFactors = FALSE) %>% group_by(a)
expect_equal(group_data(df)$a, letters[1:10])
df <- data.frame(a = sample(sqrt(1:10), 3000, replace = TRUE)) %>% group_by(a)
expect_equal(group_data(df)$a, sqrt(1:10))
})
test_that("Can group_by() a POSIXlt", {
skip_if_not_installed("tibble", "2.99.99")
df <- tibble(x = 1:5, times = as.POSIXlt(seq.Date(Sys.Date(), length.out = 5, by = "day")))
g <- group_by(df, times)
expect_equal(nrow(group_data(g)), 5L)
})
test_that("group_by() handles list as grouping variables", {
df <- tibble(x = 1:3, y = list(1:2, 1:3, 1:2))
gdata <- group_data(group_by(df, y))
expect_equal(nrow(gdata), 2L)
expect_equal(gdata$y, list(1:2, 1:3))
expect_equal(gdata$.rows, list_of(c(1L, 3L), 2L))
})
test_that("select(group_by(.)) implicitly adds grouping variables (#170)", {
expect_snapshot(
res <- mtcars %>% group_by(vs) %>% select(mpg)
)
expect_equal(names(res), c("vs", "mpg"))
})
test_that("group_by only creates one group for NA (#401)", {
x <- as.numeric(c(NA, NA, NA, 10:1, 10:1))
w <- c(20, 30, 40, 1:10, 1:10) * 10
n_distinct(x) # 11 OK
res <- data.frame(x = x, w = w) %>% group_by(x) %>% summarise(n = n())
expect_equal(nrow(res), 11L)
})
test_that("there can be 0 groups (#486)", {
data <- tibble(a = numeric(0), g = character(0)) %>% group_by(g)
expect_equal(length(data$a), 0L)
expect_equal(length(data$g), 0L)
expect_equal(map_int(group_rows(data), length), integer(0))
})
test_that("group_by works with zero-row data frames (#486)", {
df <- data.frame(a = numeric(0), b = numeric(0), g = character(0))
dfg <- group_by(df, g, .drop = FALSE)
expect_equal(dim(dfg), c(0, 3))
expect_equal(group_vars(dfg), "g")
expect_equal(group_size(dfg), integer(0))
x <- summarise(dfg, n = n())
expect_equal(dim(x), c(0, 2))
expect_equal(group_vars(x), character())
x <- mutate(dfg, c = b + 1)
expect_equal(dim(x), c(0, 4))
expect_equal(group_vars(x), "g")
expect_equal(group_size(x), integer(0))
x <- filter(dfg, a == 100)
expect_equal(dim(x), c(0, 3))
expect_equal(group_vars(x), "g")
expect_equal(group_size(x), integer(0))
x <- arrange(dfg, a, g)
expect_equal(dim(x), c(0, 3))
expect_equal(group_vars(x), "g")
expect_equal(group_size(x), integer(0))
expect_snapshot(
x <- select(dfg, a) # Only select 'a' column; should result in 'g' and 'a'
)
expect_equal(dim(x), c(0, 2))
expect_equal(group_vars(x), "g")
expect_equal(group_size(x), integer(0))
})
test_that("[ on grouped_df preserves grouping if subset includes grouping vars", {
df <- tibble(x = 1:5, ` ` = 6:10)
by_x <- df %>% group_by(x)
expect_equal(by_x %>% groups(), by_x %>% `[`(1:2) %>% groups())
# non-syntactic name
by_ns <- df %>% group_by(` `)
expect_equal(by_ns %>% groups(), by_ns %>% `[`(1:2) %>% groups())
})
test_that("[ on grouped_df drops grouping if subset doesn't include grouping vars", {
by_cyl <- mtcars %>% group_by(cyl)
no_cyl <- by_cyl %>% `[`(c(1, 3))
expect_equal(group_vars(no_cyl), character())
expect_s3_class(no_cyl, "tbl_df")
})
test_that("group_by works after arrange (#959)", {
df <- tibble(Log = c(1, 2, 1, 2, 1, 2), Time = c(10, 1, 3, 0, 15, 11))
res <- df %>%
arrange(Time) %>%
group_by(Log) %>%
mutate(Diff = Time - lag(Time))
expect_true(all(is.na(res$Diff[c(1, 3)])))
expect_equal(res$Diff[c(2, 4, 5, 6)], c(1, 7, 10, 5))
})
test_that("group_by keeps attributes", {
d <- data.frame(x = structure(1:10, foo = "bar"))
gd <- group_by(d)
expect_equal(attr(gd$x, "foo"), "bar")
})
test_that("ungroup.rowwise_df gives a tbl_df (#936)", {
res <- mtcars %>% rowwise() %>% ungroup() %>% class()
expect_equal(res, c("tbl_df", "tbl", "data.frame"))
})
test_that("group_by handles encodings for native strings (#1507)", {
local_non_utf8_encoding()
special <- get_native_lang_string()
df <- data.frame(x = 1:3, Eng = 2:4)
for (names_converter in c(enc2native, enc2utf8)) {
for (dots_converter in c(enc2native, enc2utf8)) {
names(df) <- names_converter(c(special, "Eng"))
res <- group_by(df, !!!syms(dots_converter(special)))
expect_equal(names(res), names(df))
expect_equal(group_vars(res), special)
}
}
for (names_converter in c(enc2native, enc2utf8)) {
names(df) <- names_converter(c(special, "Eng"))
res <- group_by(df, !!!special)
expect_equal(names(res), c(names(df), deparse(special)))
expect_equal(groups(res), list(as.name(enc2native(deparse(special)))))
}
})
test_that("group_by handles raw columns (#1803)", {
df <- tibble(a = 1:3, b = as.raw(1:3))
expect_identical(ungroup(group_by(df, a)), df)
expect_identical(ungroup(group_by(df, b)), df)
})
test_that("rowwise handles raw columns (#1803)", {
df <- tibble(a = 1:3, b = as.raw(1:3))
expect_s3_class(rowwise(df), "rowwise_df")
})
test_that("group_by() names pronouns correctly (#2686)", {
expect_named(group_by(tibble(x = 1), .data$x), "x")
expect_named(group_by(tibble(x = 1), .data[["x"]]), "x")
})
test_that("group_by() does not affect input data (#3028)", {
x <-
data.frame(old1 = c(1, 2, 3), old2 = c(4, 5, 6)) %>%
group_by(old1)
y <-
x %>%
select(new1 = old1, new2 = old2)
expect_identical(groups(x), syms(quote(old1)))
})
test_that("group_by() does not mutate for nothing when using the .data pronoun (#2752, #3533)", {
expect_identical(
iris %>% group_by(Species) %>% group_by(.data$Species),
iris %>% group_by(Species)
)
expect_identical(
iris %>% group_by(Species) %>% group_by(.data[["Species"]]),
iris %>% group_by(Species)
)
df <- tibble(x = 1:5)
attr(df, "y") <- 1
expect_equal( df %>% group_by(.data$x) %>% attr("y"), 1 )
expect_equal( df %>% group_by(.data[["x"]]) %>% attr("y"), 1 )
})
test_that("tbl_sum gets the right number of groups", {
res <- data.frame(x=c(1,1,2,2)) %>% group_by(x) %>% pillar::tbl_sum()
expect_equal(res, c("A tibble" = "4 x 1", "Groups" = "x [2]"))
})
test_that("group_by ignores empty quosures (3780)", {
empty <- quo()
expect_equal(group_by(mtcars, cyl), group_by(mtcars, cyl, !!empty))
})
# Zero groups ---------------------------------------------------
test_that("mutate handles grouped tibble with 0 groups (#3935)", {
df <- tibble(x=integer()) %>% group_by(x)
res <- mutate(df, y = mean(x), z = +mean(x), n = n())
expect_equal(names(res), c("x", "y", "z", "n"))
expect_equal(nrow(res), 0L)
expect_equal(res$y, double())
expect_equal(res$z, double())
expect_equal(res$n, integer())
})
test_that("summarise handles grouped tibble with 0 groups (#3935)", {
df <- tibble(x=integer()) %>% group_by(x)
res <- summarise(df, y = mean(x), z = +mean(x), n = n())
expect_equal(names(res), c("x", "y", "z", "n"))
expect_equal(nrow(res), 0L)
expect_equal(res$y, double())
expect_equal(res$n, integer())
expect_equal(res$z, double())
})
test_that("filter handles grouped tibble with 0 groups (#3935)", {
df <- tibble(x=integer()) %>% group_by(x)
res <- filter(df, x > 3L)
expect_identical(df, res)
})
test_that("select handles grouped tibble with 0 groups (#3935)", {
df <- tibble(x=integer()) %>% group_by(x)
res <- select(df, x)
expect_identical(df, res)
})
test_that("arrange handles grouped tibble with 0 groups (#3935)", {
df <- tibble(x=integer()) %>% group_by(x)
res <- arrange(df, x)
expect_identical(df, res)
})
test_that("group_by() with empty spec produces a grouped data frame with 0 grouping variables", {
gdata <- group_data(group_by(iris))
expect_equal(names(gdata), ".rows")
expect_equal(gdata$.rows, list_of(1:nrow(iris)))
gdata <- group_data(group_by(iris, !!!list()))
expect_equal(names(gdata), ".rows")
expect_equal(gdata$.rows, list_of(1:nrow(iris)))
})
# .drop = TRUE ---------------------------------------------------
test_that("group_by(.drop = TRUE) drops empty groups (4061)", {
res <- iris %>%
filter(Species == "setosa") %>%
group_by(Species, .drop = TRUE)
expect_identical(
group_data(res),
structure(
tibble(Species = factor("setosa", levels = levels(iris$Species)), .rows := list_of(1:50)),
.drop = TRUE
)
)
expect_true(group_by_drop_default(res))
})
test_that("grouped data frames remember their .drop (#4061)", {
res <- iris %>%
filter(Species == "setosa") %>%
group_by(Species, .drop = TRUE)
res2 <- res %>%
filter(Sepal.Length > 5)
expect_true(group_by_drop_default(res2))
res3 <- res %>%
filter(Sepal.Length > 5, .preserve = FALSE)
expect_true(group_by_drop_default(res3))
res4 <- res3 %>%
group_by(Species)
expect_true(group_by_drop_default(res4))
expect_equal(nrow(group_data(res4)), 1L)
})
test_that("grouped data frames remember their .drop = FALSE (#4337)", {
res <- iris %>%
filter(Species == "setosa") %>%
group_by(Species, .drop = FALSE)
expect_false(group_by_drop_default(res))
res2 <- res %>%
group_by(Species)
expect_false(group_by_drop_default(res2))
})
test_that("group_by(.drop = FALSE) preserve ordered factors (#5455)", {
df <- tibble(x = ordered("x"))
drop <- df %>% group_by(x) %>% group_data()
nodrop <- df %>% group_by(x, .drop = FALSE) %>% group_data()
expect_equal(is.ordered(drop$x), is.ordered(nodrop$x))
expect_true(is.ordered(nodrop$x))
})
test_that("summarise maintains the .drop attribute (#4061)", {
df <- tibble(
f1 = factor("a", levels = c("a", "b", "c")),
f2 = factor("d", levels = c("d", "e", "f", "g")),
x = 42
)
res <- df %>%
group_by(f1, f2, .drop = TRUE)
expect_equal(n_groups(res), 1L)
res2 <- summarise(res, x = sum(x), .groups = "drop_last")
expect_equal(n_groups(res2), 1L)
expect_true(group_by_drop_default(res2))
})
test_that("joins maintain the .drop attribute (#4061)", {
df1 <- group_by(tibble(
f1 = factor(c("a", "b"), levels = c("a", "b", "c")),
x = 42:43
), f1, .drop = TRUE)
df2 <- group_by(tibble(
f1 = factor(c("a"), levels = c("a", "b", "c")),
y = 1
), f1, .drop = TRUE)
res <- left_join(df1, df2, by = "f1")
expect_equal(n_groups(res), 2L)
df2 <- group_by(tibble(
f1 = factor(c("a", "c"), levels = c("a", "b", "c")),
y = 1:2
), f1, .drop = TRUE)
res <- full_join(df1, df2, by = "f1")
expect_equal(n_groups(res), 3L)
})
test_that("group_by(add = TRUE) sets .drop if the origonal data was .drop", {
d <- tibble(
f1 = factor("b", levels = c("a", "b", "c")),
f2 = factor("g", levels = c("e", "f", "g")),
x = 48
)
res <- group_by(group_by(d, f1, .drop = TRUE), f2, .add = TRUE)
expect_equal(n_groups(res), 1L)
expect_true(group_by_drop_default(res))
})
test_that("group_by_drop_default() is forgiving about corrupt grouped df (#4306)",{
df <- tibble(x = 1:2, y = 1:2) %>%
structure(class = c("grouped_df", "tbl_df", "tbl", "data.frame"))
expect_true(group_by_drop_default(df))
})
test_that("group_by() puts NA groups last in STRSXP (#4227)", {
res <- tibble(x = c("apple", NA, "banana"), y = 1:3) %>%
group_by(x) %>%
group_data()
expect_identical(res$x, c("apple", "banana", NA_character_))
expect_identical(res$.rows, list_of(1L, 3L, 2L))
})
test_that("group_by() does not create arbitrary NA groups for factors when drop = TRUE (#4460)", {
res <- expect_warning(group_data(group_by(iris, Species)[0, ]), NA)
expect_equal(nrow(res), 0L)
res <- expect_warning(group_data(group_by(iris[0, ], Species)), NA)
expect_equal(nrow(res), 0L)
})
test_that("group_by() can handle auto splicing in the mutate() step", {
expect_identical(
iris %>% group_by(Species),
iris %>% group_by(data.frame(Species = Species))
)
expect_identical(
iris %>% group_by(Species),
iris %>% group_by(pick(Species))
)
expect_identical(
iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Sepal.Length, Sepal.Width),
iris %>% group_by(across(starts_with("Sepal"), round))
)
})
test_that("group_by() can combine usual spec and auto-splicing-mutate() step", {
expect_identical(
iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Sepal.Length, Sepal.Width, Species),
iris %>% group_by(across(starts_with("Sepal"), round), Species)
)
expect_identical(
iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Species, Sepal.Length, Sepal.Width),
iris %>% group_by(Species, across(starts_with("Sepal"), round))
)
})
# mutate() semantics
test_that("group_by() has mutate() semantics (#4984)", {
expect_equal(
tibble(a = 1, b = 2) %>% group_by(c = a * b, d = c + 1),
tibble(a = 1, b = 2) %>% mutate(c = a * b, d = c + 1) %>% group_by(c, d)
)
})
test_that("implicit mutate() operates on ungrouped data (#5598)", {
vars <- tibble(x = c(1,2), y = c(3,4), z = c(5,6)) %>%
dplyr::group_by(y) %>%
dplyr::group_by(pick(any_of(c('y','z')))) %>%
dplyr::group_vars()
expect_equal(vars, c("y", "z"))
})
test_that("grouped_df() does not break row.names (#5745)", {
groups <- compute_groups(data.frame(x = 1:10), "x")
expect_equal(.row_names_info(groups, type = 0), c(NA, -10L))
})
test_that("group_by() keeps attributes unrelated to the grouping (#5760)", {
d <- data.frame(x = 453, y = 642)
attr(d, "foo") <- "bar"
d2 <- group_by(d, x)
expect_equal(attr(d2, "foo"), "bar")
d3 <- group_by(d2, y, .add = TRUE)
expect_equal(attr(d2, "foo"), "bar")
d4 <- group_by(d2, y2 = y * 2, .add = TRUE)
expect_equal(attr(d2, "foo"), "bar")
})
test_that("group_by() works with quosures (tidyverse/lubridate#959)", {
ignore <- function(...) NA
f <- function(var) {
tibble(x = 1) %>% group_by(g = ignore({{ var }}))
}
g <- function(var) {
# This used to fail with the extra argument
tibble(x = 1) %>% group_by(g = ignore({{ var }}, 1))
}
expect_equal(f(), tibble(x = 1, g = NA) %>% group_by(g))
expect_equal(g(), tibble(x = 1, g = NA) %>% group_by(g))
})
# Errors ------------------------------------------------------------------
test_that("group_by() and ungroup() give meaningful error messages", {
expect_snapshot({
df <- tibble(x = 1, y = 2)
(expect_error(df %>% group_by(unknown)))
(expect_error(df %>% ungroup(x)))
(expect_error(df %>% group_by(x, y) %>% ungroup(z)))
(expect_error(df %>% group_by(z = a + 1)))
})
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.