context("Summarise")
test_that("repeated outputs applied progressively", {
df <- data.frame(x = 5)
out <- summarise(df, x = mean(x), x = x + 1)
expect_equal(nrow(out), 1)
expect_equal(ncol(out), 1)
expect_equal(out$x, 6)
})
test_that("repeated outputs applied progressively (grouped_df)", {
df <- data.frame(x = c(1, 1), y = 1:2)
ds <- group_by(df, y)
out <- summarise(ds, z = mean(x), z = z + 1)
expect_equal(nrow(out), 2)
expect_equal(ncol(out), 2)
expect_equal(out$z, c(2L, 2L))
})
test_that("summarise peels off a single layer of grouping", {
df <- tibble(x = rep(1:4, each = 4), y = rep(1:2, each = 8), z = runif(16))
grouped <- df %>% group_by(x, y, z)
expect_equal(group_vars(grouped), c("x", "y", "z"))
expect_equal(group_vars(grouped %>% summarise(n = n())), c("x", "y"))
})
test_that("summarise can refer to variables that were just created (#138)", {
res <- summarise(tbl_df(mtcars), cyl1 = mean(cyl), cyl2 = cyl1 + 1)
expect_equal(res$cyl2, mean(mtcars$cyl) + 1)
gmtcars <- group_by(tbl_df(mtcars), am)
res <- summarise(gmtcars, cyl1 = mean(cyl), cyl2 = cyl1 + 1)
res_direct <- summarise(gmtcars, cyl2 = mean(cyl) + 1)
expect_equal(res$cyl2, res_direct$cyl2)
})
test_that("summarise can refer to factor variables that were just created (#2217)", {
df <- data_frame(a = 1:3) %>%
group_by(a)
res <- df %>%
summarise(f = factor(if_else(a <= 1, "a", "b")), g = (f == "a"))
expect_equal(
res,
data_frame(a = 1:3, f = factor(c("a", "b", "b")), g = c(TRUE, FALSE, FALSE))
)
})
test_that("summarise refuses to modify grouping variable (#143)", {
df <- data.frame(a = c(1, 2, 1, 2), b = c(1, 1, 2, 2), x = 1:4)
ds <- group_by(tbl_df(df), a, b)
expect_error(
summarise(ds, a = mean(x), a = b + 1),
"Column `a` can't be modified because it's a grouping variable"
)
})
test_that("summarise gives proper errors (#153)", {
df <- data_frame(
x = 1,
y = c(1, 2, 2),
z = runif(3)
)
expect_error(
summarise(df, identity(NULL)),
"Column `identity(NULL)` must be length 1 (a summary value), not 0",
fixed = TRUE
)
expect_error(
summarise(df, log(z)),
"Column `log(z)` must be length 1 (a summary value), not 3",
fixed = TRUE
)
expect_error(
summarise(df, y[1:2]),
"Column `y[1:2]` must be length 1 (a summary value), not 2",
fixed = TRUE
)
gdf <- group_by(df, x, y)
expect_error(
summarise(gdf, identity(NULL)),
"Column `identity(NULL)` must be length 1 (a summary value), not 0",
fixed = TRUE
)
expect_error(
summarise(gdf, z),
"Column `z` must be length 1 (a summary value), not 2",
fixed = TRUE
)
expect_error(
summarise(gdf, log(z)),
"Column `log(z)` must be length 1 (a summary value), not 2",
fixed = TRUE
)
expect_error(
summarise(gdf, y[1:2]),
"Column `y[1:2]` must be length 1 (a summary value), not 2",
fixed = TRUE
)
})
test_that("summarise handles constants (#153)", {
df <- data.frame(a = 1:4)
today <- Sys.Date()
now <- Sys.time()
res <- summarise(
tbl_df(df),
int = 1L, num = 1, str = "foo", bool = TRUE, date = today, time = now
)
expect_equal(res$int, 1L)
expect_equal(res$num, 1.0)
expect_equal(res$str, "foo")
expect_equal(res$bool, TRUE)
expect_equal(res$date, today)
expect_equal(res$time, now)
res <- summarise(
group_by(df, a),
int = 1L, num = 1, str = "foo", bool = TRUE, date = today, time = now
)
expect_equal(res$int, rep(1L, 4))
expect_equal(res$num, rep(1.0, 4))
expect_equal(res$str, rep("foo", 4))
expect_equal(res$bool, rep(TRUE, 4))
expect_equal(res$date, rep(today, 4))
expect_equal(res$time, rep(now, 4))
})
test_that("summarise handles passing ...", {
df <- data.frame(x = 1:4)
f <- function(...) {
x1 <- 1
f1 <- function(x) x
summarise(df, ..., x1 = f1(x1))
}
g <- function(...) {
x2 <- 2
f(x2 = x2, ...)
}
h <- function(before = "before", ..., after = "after") {
g(before = before, ..., after = after)
}
res <- h(x3 = 3)
expect_equal(res$x1, 1)
expect_equal(res$x2, 2)
expect_equal(res$before, "before")
expect_equal(res$after, "after")
df <- tbl_df(df)
res <- h(x3 = 3)
expect_equal(res$x1, 1)
expect_equal(res$x2, 2)
expect_equal(res$before, "before")
expect_equal(res$after, "after")
df <- group_by(df, x)
res <- h(x3 = 3)
expect_equal(res$x1, rep(1, 4))
expect_equal(res$x2, rep(2, 4))
expect_equal(res$before, rep("before", 4))
expect_equal(res$after, rep("after", 4))
})
test_that("summarise propagate attributes (#194)", {
df <- data.frame(
b = rep(1:2, 2),
f = Sys.Date() + 1:4,
g = Sys.time() + 1:4,
stringsAsFactors = FALSE
) %>%
group_by(b)
min_ <- min
res <- summarise(df,
min_f = min(f),
max_f = max(f),
min_g = min(g),
max_g = max(g),
min__f = min_(f),
min__g = min_(g)
)
expect_equal(class(res$min_f) , "Date")
expect_equal(class(res$max_f) , "Date")
expect_equal(class(res$min__f), "Date")
expect_equal(class(res$min_g) , c("POSIXct", "POSIXt"))
expect_equal(class(res$max_g) , c("POSIXct", "POSIXt"))
expect_equal(class(res$min__g), c("POSIXct", "POSIXt"))
})
test_that("summarise strips names, but only if grouped (#2231, #2675)", {
data <- data_frame(a = 1:3) %>% summarise(b = setNames(nm = a[[1]]))
expect_equal(names(data$b), "1")
data <- data_frame(a = 1:3) %>% rowwise %>% summarise(b = setNames(nm = a))
expect_null(names(data$b))
data <- data_frame(a = c(1, 1, 2)) %>% group_by(a) %>% summarise(b = setNames(nm = a[[1]]))
expect_null(names(data$b))
})
test_that("summarise fails on missing variables", {
# error messages from rlang
expect_error(summarise(mtcars, a = mean(notthear)))
})
test_that("summarise fails on missing variables when grouping (#2223)", {
# error messages from rlang
expect_error(summarise(group_by(mtcars, cyl), a = mean(notthear)))
})
test_that("n() does not accept arguments", {
expect_error(
summarise(group_by(mtcars, cyl), n(hp)),
"`n()` does not take arguments",
fixed = TRUE
)
})
test_that("hybrid nests correctly", {
res <- group_by(mtcars, cyl) %>%
summarise(a = if (n() > 10) 1 else 2)
expect_equal(res$a, c(1, 2, 1))
res <- mtcars %>% summarise(a = if (n() > 10) 1 else 2)
expect_equal(res$a, 1)
})
test_that("hybrid min and max propagate attributes (#246)", {
x <- data.frame(
id = c(rep("a", 2), rep("b", 2)),
date = as.POSIXct(c("2014-01-13", "2014-01-14", "2014-01-15", "2014-01-16"), tz = "GMT")
)
y <- x %>% group_by(id) %>% summarise(max_date = max(date), min_date = min(date))
expect_true("tzone" %in% names(attributes(y$min_date)))
expect_true("tzone" %in% names(attributes(y$max_date)))
})
test_that("summarise can use newly created variable more than once", {
df <- data.frame(id = c(1, 1, 2, 2, 3, 3), a = 1:6) %>% group_by(id)
for (i in 1:10) {
res <- summarise(
df,
biggest = max(a),
smallest = min(a),
diff1 = biggest - smallest,
diff2 = smallest - biggest
)
expect_equal(res$diff1, -res$diff2)
}
})
test_that("summarise creates an empty data frame when no parameters are used", {
res <- summarise(mtcars)
expect_equal(res, data.frame())
})
test_that("integer overflow (#304)", {
groups <- rep(c("A", "B"), each = 3)
values <- rep(1e9, 6)
dat <- data.frame(groups, X1 = as.integer(values), X2 = values)
# now group and summarise
expect_warning(
res <- group_by(dat, groups) %>%
summarise(sum_integer = sum(X1), sum_numeric = sum(X2)),
"integer overflow"
)
expect_true(all(is.na(res$sum_integer)))
expect_equal(res$sum_numeric, rep(3e9, 2L))
})
test_that("summarise checks outputs (#300)", {
expect_error(
summarise(mtcars, mpg, cyl),
"Column `mpg` must be length 1 (a summary value), not 32",
fixed = TRUE
)
expect_error(
summarise(mtcars, mpg + cyl),
"Column `mpg + cyl` must be length 1 (a summary value), not 32",
fixed = TRUE
)
})
test_that("comment attribute is white listed (#346)", {
test <- data.frame(A = c(1, 1, 0, 0), B = c(2, 2, 3, 3))
comment(test$B) <- "2nd Var"
res <- group_by(test, A)
expect_equal(comment(res$B), "2nd Var")
})
test_that("AsIs class is white listed (#453)", {
test <- data.frame(A = c(1, 1, 0, 0), B = I(c(2, 2, 3, 3)))
res <- group_by(test, B)
expect_equal(res$B, test$B)
})
test_that("names attribute is not retained (#357)", {
df <- data.frame(x = c(1:3), y = letters[1:3])
df <- group_by(df, y)
m <- df %>% summarise(
a = length(x),
b = quantile(x, 0.5)
)
expect_equal(m$b, c(1, 2, 3))
expect_null(names(m$b))
})
test_that("na.rm is supported (#168)", {
df <- data.frame(
x = c(1:5, NA, 7:10),
y = rep(1:2, each = 5),
z = c(rnorm(5), NA, rnorm(4))
)
res <- df %>%
group_by(y) %>%
summarise(
mean_x = mean(x, na.rm = FALSE),
mean_z = mean(z, na.rm = FALSE),
min_x = min(x, na.rm = FALSE),
min_z = min(z, na.rm = FALSE)
)
expect_equal(res$mean_x[1], 3)
expect_true(is.na(res$mean_x[2]))
expect_equal(res$mean_z[1], mean(df$z[1:5]))
expect_true(is.na(res$mean_z[2]))
expect_equal(res$min_x[1], 1)
expect_true(is.na(res$min_x[2]))
expect_equal(res$min_z[1], min(df$z[1:5]))
expect_true(is.na(res$min_z[2]))
res <- df %>%
group_by(y) %>%
summarise(
mean_x = mean(x, na.rm = TRUE),
mean_z = mean(z, na.rm = TRUE),
min_x = min(x, na.rm = TRUE),
min_z = min(z, na.rm = TRUE)
)
expect_equal(res$mean_x[1], 3)
expect_equal(res$mean_x[2], 8.5)
expect_equal(res$mean_z[1], mean(df$z[1:5]))
expect_equal(res$mean_z[2], mean(df$z[7:10]))
expect_equal(res$min_x[1], 1)
expect_equal(res$min_x[2], 7)
expect_equal(res$min_z[1], min(df$z[1:5]))
expect_equal(res$min_z[2], min(df$z[7:10]))
})
test_that("summarise hybrid functions can use summarized variables", {
df <- data.frame(x = c(1:5, NA, 7:10), y = rep(1:2, each = 5)) %>% group_by(y)
res <- summarise(
df,
x = mean(x),
min = min(x),
max = max(x),
mean = mean(x),
var = var(x)
)
expect_identical(res$x, res$min)
expect_identical(res$x, res$max)
expect_identical(res$x, res$mean)
expect_identical(res$var, rep(NA_real_, 2))
})
test_that("LazySubset is not confused about input data size (#452)", {
res <- data.frame(a = c(10, 100)) %>% summarise(b = sum(a), c = sum(a) * 2)
expect_equal(res$b, 110)
expect_equal(res$c, 220)
})
test_that("nth, first, last promote dates and times (#509)", {
data <- data_frame(
ID = rep(letters[1:4], each = 5),
date = Sys.Date() + 1:20,
time = Sys.time() + 1:20,
number = rnorm(20)
)
res <- data %>%
group_by(ID) %>%
summarise(
date2 = nth(date, 2),
time2 = nth(time, 2),
first_date = first(date),
last_date = last(date),
first_time = first(time),
last_time = last(time)
)
expect_is(res$date2, "Date")
expect_is(res$first_date, "Date")
expect_is(res$last_date, "Date")
expect_is(res$time2, "POSIXct")
expect_is(res$first_time, "POSIXct")
expect_is(res$last_time, "POSIXct")
# error messages from rlang
expect_error(data %>% group_by(ID) %>% summarise(time2 = nth(times, 2)))
})
test_that("nth, first, last preserves factor data (#509)", {
dat <- data_frame(a = rep(seq(1, 20, 2), 3), b = as.ordered(a))
dat1 <- dat %>%
group_by(a) %>%
summarise(
der = nth(b, 2),
first = first(b),
last = last(b)
)
expect_is(dat1$der, "ordered")
expect_is(dat1$first, "ordered")
expect_is(dat1$last, "ordered")
expect_equal(levels(dat1$der), levels(dat$b))
})
test_that("nth handle negative value (#1584) ", {
df <- data.frame(
a = 1:10, b = 10:1,
g = rep(c(1, 2), c(4, 6))
) %>%
group_by(g)
res <- summarise(
df,
x1 = nth(a, -1L),
x2 = nth(a, -1L, order_by = b),
x3 = nth(a, -5L),
x4 = nth(a, -5L, order_by = b),
x5 = nth(a, -5L, default = 99),
x6 = nth(a, -5L, order_by = b, default = 99)
)
expect_equal(res$x1, c(4, 10))
expect_equal(res$x2, c(1, 5))
expect_true(is.na(res$x3[1]))
expect_equal(res$x3[2], 6)
expect_true(is.na(res$x4[1]))
expect_equal(res$x4[2], 9)
expect_equal(res$x5, c(99, 6))
expect_equal(res$x6, c(99, 9))
})
test_that("LazyGroupSubsets is robust about columns not from the data (#600)", {
foo <- data_frame(x = 1:10, y = 1:10)
# error messages from rlang
expect_error(foo %>% group_by(x) %>% summarise(first_y = first(z)))
})
test_that("can summarise first(x[-1]) (#1980)", {
expect_equal(
tibble(x = 1:3) %>% summarise(f = first(x[-1])),
tibble(f = 2L)
)
})
test_that("hybrid eval handles $ and @ (#645)", {
tmp <- expand.grid(a = 1:3, b = 0:1, i = 1:10)
g <- tmp %>% group_by(a)
f <- function(a, b) {
list(x = 1:10)
}
res <- g %>% summarise(
r = sum(b),
n = length(b),
p = f(r, n)$x[1]
)
expect_equal(names(res), c("a", "r", "n", "p"))
res <- tmp %>% summarise(
r = sum(b),
n = length(b),
p = f(r, n)$x[1]
)
expect_equal(names(res), c("r", "n", "p"))
})
test_that("argument order_by in last is flexible enough to handle more than just a symbol (#626)", {
res1 <- group_by(mtcars, cyl) %>%
summarise(
big = last(mpg[drat > 3], order_by = wt[drat > 3]),
small = first(mpg[drat > 3], order_by = wt[drat > 3]),
second = nth(mpg[drat > 3], 2, order_by = wt[drat > 3])
)
# turning off lazy eval
last. <- last
first. <- first
nth. <- nth
res2 <- group_by(mtcars, cyl) %>%
summarise(
big = last.(mpg[drat > 3], order_by = wt[drat > 3]),
small = first.(mpg[drat > 3], order_by = wt[drat > 3]),
second = nth.(mpg[drat > 3], 2, order_by = wt[drat > 3])
)
expect_equal(res1, res2)
})
test_that("min(., na.rm=TRUE) correctly handles Dates that are coded as REALSXP (#755)", {
dates <- as.Date(c("2014-01-01", "2013-01-01"))
dd <- data.frame(Dates = dates)
res <- summarise(dd, Dates = min(Dates, na.rm = TRUE))
expect_is(res$Dates, "Date")
expect_equal(res$Dates, as.Date("2013-01-01"))
})
test_that("nth handles expressions for n argument (#734)", {
df <- data.frame(x = c(1:4, 7:9, 13:19), y = sample(100:999, 14))
idx <- which(df$x == 16)
res <- df %>% summarize(abc = nth(y, n = which(x == 16)))
expect_equal(res$abc, df$y[idx])
})
test_that("summarise is not polluted by logical NA (#599)", {
dat <- data.frame(grp = rep(1:4, each = 2), val = c(NA, 2, 3:8))
Mean <- function(x, thresh = 2) {
res <- mean(x, na.rm = TRUE)
if (res > thresh) res else NA
}
res <- dat %>% group_by(grp) %>% summarise(val = Mean(val, thresh = 2))
expect_is(res$val, "numeric")
expect_true(is.na(res$val[1]))
})
test_that("summarise handles list output columns (#832)", {
df <- data_frame(x = 1:10, g = rep(1:2, each = 5))
res <- df %>% group_by(g) %>% summarise(y = list(x))
expect_equal(res$y[[1]], 1:5)
expect_equal(res$y[[2]], 6:10)
# just checking objects are not messed up internally
expect_equal(gp(res$y[[1]]), 0L)
expect_equal(gp(res$y[[2]]), 0L)
res <- df %>% group_by(g) %>% summarise(y = list(x + 1))
expect_equal(res$y[[1]], 1:5 + 1)
expect_equal(res$y[[2]], 6:10 + 1)
# just checking objects are not messed up internally
expect_equal(gp(res$y[[1]]), 0L)
expect_equal(gp(res$y[[2]]), 0L)
df <- data_frame(x = 1:10, g = rep(1:2, each = 5))
res <- df %>% summarise(y = list(x))
expect_equal(res$y[[1]], 1:10)
res <- df %>% summarise(y = list(x + 1))
expect_equal(res$y[[1]], 1:10 + 1)
})
test_that("summarise works with empty data frame (#1142)", {
df <- data.frame()
res <- df %>% summarise
expect_equal(nrow(res), 0L)
expect_equal(length(res), 0L)
})
test_that("n_distint uses na.rm argument", {
df <- data.frame(x = c(1:3, NA), g = rep(1:2, 2))
res <- summarise(df, n = n_distinct(x, na.rm = TRUE))
expect_equal(res$n, 3L)
res <- group_by(df, g) %>% summarise(n = n_distinct(x, na.rm = TRUE))
expect_equal(res$n, c(2L, 1L))
})
test_that("n_distinct front end supports na.rm argument (#1052)", {
x <- c(1:3, NA)
expect_equal(n_distinct(x, na.rm = TRUE), 3L)
})
test_that("n_distinct without arguments stops (#1957)", {
expect_error(
n_distinct(),
"Need at least one column for `n_distinct()`",
fixed = TRUE
)
})
test_that("hybrid evaluation does not take place for objects with a class (#1237)", {
mean.foo <- function(x) 42
df <- data_frame(x = structure(1:10, class = "foo"))
expect_equal(summarise(df, m = mean(x))$m[1], 42)
env <- environment()
Foo <- suppressWarnings(setClass("Foo", contains = "numeric", where = env))
suppressMessages(setMethod("mean", "Foo", function(x, ...) 42, where = env))
on.exit(removeClass("Foo", where = env))
df <- data.frame(x = Foo(c(1, 2, 3)))
expect_equal(summarise(df, m = mean(x))$m[1], 42)
})
test_that("summarise handles promotion of results (#893)", {
df <- structure(list(
price = c(580L, 650L, 630L, 706L, 1080L, 3082L, 3328L, 4229L, 1895L,
3546L, 752L, 13003L, 814L, 6115L, 645L, 3749L, 2926L, 765L,
1140L, 1158L),
cut = structure(c(2L, 4L, 4L, 2L, 3L, 2L, 2L, 3L, 4L, 1L, 1L, 3L, 2L,
4L, 3L, 3L, 1L, 2L, 2L, 2L),
.Label = c("Good", "Ideal", "Premium", "Very Good"),
class = "factor")),
row.names = c(NA, -20L),
.Names = c("price", "cut"),
class = "data.frame"
)
res <- df %>%
group_by(cut) %>%
select(price) %>%
summarise(price = median(price))
expect_is(res$price, "numeric")
})
test_that("summarise correctly handles logical (#1291)", {
test <- expand.grid(id = 1:2, type = letters[1:2], sample = 1:2) %>%
mutate(var = c(1, 0, 1, 1, 0, 0, 0, 1)) %>%
mutate(var_l = as.logical(var)) %>%
mutate(var_ch = as.character(var_l)) %>%
arrange(id, type, sample) %>%
group_by(id, type)
test_sum <- test %>%
ungroup() %>%
group_by(id, type) %>%
summarise(
anyvar = any(var == 1),
anyvar_l = any(var_l),
anyvar_ch = any(var_ch == "TRUE")
)
expect_equal(test_sum$anyvar, c(TRUE, TRUE, FALSE, TRUE))
})
test_that("summarise correctly handles NA groups (#1261)", {
tmp <- data_frame(
a = c(1, 1, 1, 2, 2),
b1 = NA_integer_,
b2 = NA_character_
)
res <- tmp %>% group_by(a, b1) %>% summarise(n())
expect_equal(nrow(res), 2L)
res <- tmp %>% group_by(a, b2) %>% summarise(n())
expect_equal(nrow(res), 2L)
})
test_that("n_distinct handles multiple columns (#1084)", {
df <- data.frame(
x = rep(1:4, each = 2),
y = rep(1:2, each = 4),
g = rep(1:2, 4)
)
res <- summarise(df, n = n_distinct(x, y))
expect_equal(res$n, 4L)
res <- group_by(df, g) %>% summarise(n = n_distinct(x, y))
expect_equal(res$n, c(4L, 4L))
df$x[3] <- df$y[7] <- NA
res <- summarise(df, n = n_distinct(x, y))
expect_equal(res$n, 6L)
res <- summarise(df, n = n_distinct(x, y, na.rm = TRUE))
expect_equal(res$n, 4L)
res <- group_by(df, g) %>% summarise(n = n_distinct(x, y))
expect_equal(res$n, c(4L, 4L))
res <- group_by(df, g) %>% summarise(n = n_distinct(x, y, na.rm = TRUE))
expect_equal(res$n, c(2L, 4L))
})
test_that("hybrid max works when not used on columns (#1369)", {
df <- data_frame(x = 1:1000)
y <- 1:10
expect_equal(summarise(df, z = max(y))$z, 10)
expect_equal(summarise(df, z = max(10))$z, 10)
})
test_that("min and max handle empty sets in summarise (#1481)", {
df <- data_frame(A = numeric())
res <- df %>% summarise(Min = min(A, na.rm = TRUE), Max = max(A, na.rm = TRUE))
expect_equal(res$Min, Inf)
expect_equal(res$Max, -Inf)
})
test_that("lead and lag behave correctly in summarise (#1434)", {
res <- mtcars %>%
group_by(cyl) %>%
summarise(
n = n(),
leadn = lead(n),
lagn = lag(n),
leadn10 = lead(n, default = 10),
lagn10 = lag(n, default = 10)
)
expect_true(all(is.na(res$lagn)))
expect_true(all(is.na(res$leadn)))
expect_true(all(res$lagn10 == 10))
expect_true(all(res$leadn10 == 10))
res <- mtcars %>%
rowwise() %>%
summarise(
n = n(),
leadn = lead(n),
lagn = lag(n),
leadn10 = lead(n, default = 10),
lagn10 = lag(n, default = 10)
)
expect_true(all(is.na(res$lagn)))
expect_true(all(is.na(res$leadn)))
expect_true(all(res$lagn10 == 10))
expect_true(all(res$leadn10 == 10))
})
# .data and .env tests now in test-hybrid-traverse.R
test_that("data.frame columns are supported in summarise (#1425)", {
df <- data.frame(x1 = rep(1:3, times = 3), x2 = 1:9)
df$x3 <- df %>% mutate(x3 = x2)
res <- df %>% group_by(x1) %>% summarise(nr = nrow(x3))
expect_true(all(res$nr == 3))
})
test_that("summarise handles min/max of already summarised variable (#1622)", {
df <- data.frame(
FIRST_DAY = rep(seq(as.POSIXct("2015-12-01", tz = "UTC"), length.out = 2, by = "days"), 2),
event = c("a", "a", "b", "b")
)
df_summary <- df %>%
group_by(event) %>%
summarise(FIRST_DAY = min(FIRST_DAY), LAST_DAY = max(FIRST_DAY))
expect_equal(df_summary$FIRST_DAY, df_summary$LAST_DAY)
})
test_that("group_by keeps classes (#1631)", {
df <- data.frame(a = 1, b = as.Date(NA)) %>%
group_by(a) %>%
summarize(c = min(b))
expect_equal(class(df$c), "Date")
df <- data.frame(a = 1, b = as.POSIXct(NA)) %>%
group_by(a) %>%
summarize(c = min(b))
expect_equal(class(df$c), c("POSIXct", "POSIXt"))
})
test_that("hybrid n_distinct falls back to R evaluation when needed (#1657)", {
dat3 <- data.frame(id = c(2, 6, 7, 10, 10))
res <- dat3 %>% summarise(n_unique = n_distinct(id[id > 6]))
expect_equal(res$n_unique, 2)
})
test_that("summarise() correctly coerces factors with different levels (#1678)", {
res <- data_frame(x = 1:3) %>%
group_by(x) %>%
summarise(
y = if (x == 1) "a" else "b",
z = factor(y)
)
expect_is(res$z, "factor")
expect_equal(levels(res$z), c("a", "b"))
expect_equal(as.character(res$z), c("a", "b", "b"))
})
test_that("summarise works if raw columns exist but are not involved (#1803)", {
df <- data_frame(a = 1:3, b = as.raw(1:3))
expect_equal(summarise(df, c = sum(a)), data_frame(c = 6L))
})
test_that("summarise fails gracefully on raw columns (#1803)", {
df <- data_frame(a = 1:3, b = as.raw(1:3))
expect_error(
summarise(df, c = b[[1]]),
"Column `c` is of unsupported type raw vector",
fixed = TRUE
)
})
test_that("dim attribute is stripped from grouped summarise (#1918)", {
df <- data.frame(a = 1:3, b = 1:3)
df_regular <- summarise(df, b = scale(b)[1, 1])
df_grouped <- summarise(group_by(df, a), b = scale(b))
df_rowwise <- summarise(rowwise(df), b = scale(b))
expect_null(dim(df$b))
expect_null(dim(df_grouped$b))
expect_null(dim(df_rowwise$b))
})
test_that("typing and NAs for grouped summarise (#1839)", {
expect_identical(
data_frame(id = 1L, a = NA_character_) %>%
group_by(id) %>%
summarise(a = a[[1]]) %>%
.$a,
NA_character_)
expect_identical(
data_frame(id = 1:2, a = c(NA, "a")) %>%
group_by(id) %>%
summarise(a = a[[1]]) %>%
.$a,
c(NA, "a"))
# Properly upgrade NA (logical) to character
expect_identical(
data_frame(id = 1:2, a = 1:2) %>%
group_by(id) %>%
summarise(a = ifelse(all(a < 2), NA, "yes")) %>%
.$a,
c(NA, "yes"))
expect_error(
data_frame(id = 1:2, a = list(1, "2")) %>%
group_by(id) %>%
summarise(a = a[[1]]) %>%
.$a,
"Column `a` can't promote group 1 to numeric",
fixed = TRUE
)
expect_identical(
data_frame(id = 1:2, a = list(1, "2")) %>%
group_by(id) %>%
summarise(a = a[1]) %>%
.$a,
list(1, "2"))
})
test_that("typing and NAs for rowwise summarise (#1839)", {
expect_identical(
data_frame(id = 1L, a = NA_character_) %>%
rowwise %>%
summarise(a = a[[1]]) %>%
.$a,
NA_character_)
expect_identical(
data_frame(id = 1:2, a = c(NA, "a")) %>%
rowwise %>%
summarise(a = a[[1]]) %>%
.$a,
c(NA, "a"))
# Properly promote NA (logical) to character
expect_identical(
data_frame(id = 1:2, a = 1:2) %>%
group_by(id) %>%
summarise(a = ifelse(all(a < 2), NA, "yes")) %>%
.$a,
c(NA, "yes"))
expect_error(
data_frame(id = 1:2, a = list(1, "2")) %>%
rowwise %>%
summarise(a = a[[1]]) %>%
.$a,
"Column `a` can't promote group 1 to numeric",
fixed = TRUE
)
expect_error(
data_frame(id = 1:2, a = list(1, "2")) %>%
rowwise %>%
summarise(a = a[1]) %>%
.$a,
"Column `a` can't promote group 1 to numeric",
fixed = TRUE
)
})
test_that("calculating an ordered factor preserves order (#2200)", {
test_df <- tibble(
id = c("a", "b"),
val = 1:2
)
ret <- group_by(test_df, id) %>%
summarize(level = ordered(val))
expect_s3_class(ret$level, "ordered")
expect_equal(levels(ret$level), c("1", "2"))
})
test_that("min, max preserves ordered factor data (#2200)", {
test_df <- tibble(
id = rep(c("a", "b"), 2),
ord = ordered(c("A", "B", "B", "A"), levels = c("A", "B"))
)
ret <- group_by(test_df, id) %>%
summarize(
min_ord = min(ord),
max_ord = max(ord)
)
expect_s3_class(ret$min_ord, "ordered")
expect_s3_class(ret$max_ord, "ordered")
expect_equal(levels(ret$min_ord), levels(test_df$ord))
expect_equal(levels(ret$max_ord), levels(test_df$ord))
})
test_that("ungrouped summarise() uses summary variables correctly (#2404)", {
df <- tibble::as_tibble(seq(1:10))
out <- df %>% summarise(value = mean(value), sd = sd(value))
expect_equal(out$value, 5.5)
expect_equal(out$sd, NA_real_)
})
test_that("proper handling of names in summarised list columns (#2231)", {
d <- data_frame(x = rep(1:3, 1:3), y = 1:6, names = letters[1:6])
res <- d %>% group_by(x) %>% summarise(y = list(setNames(y, names)))
expect_equal(names(res$y[[1]]), letters[[1]])
expect_equal(names(res$y[[2]]), letters[2:3])
expect_equal(names(res$y[[3]]), letters[4:6])
})
test_that("proper handling of NA factors (#2588)", {
df <- tibble(
x = c(1, 1, 2, 2, 3, 3),
y = factor(c(NA, NA, NA, "2", "3", "3"))
)
ret <- df %>% group_by(x) %>% summarise(y = y[1])
expect_identical(as.character(ret$y), c(NA, NA, "3"))
})
test_that("can refer to previously summarised symbols", {
expect_identical(summarise(group_by(mtcars, cyl), x = 1, z = x)[2:3], tibble(x = c(1, 1, 1), z = x))
expect_identical(summarise(group_by(mtcars, cyl), x = n(), z = x)[2:3], tibble(x = c(11L, 7L, 14L), z = x))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.