tests/testthat/test-summarise.R

test_that("can use freshly create variables (#138)", {
  skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
  df <- tibble(x = 1:10)
  out <- duckplyr_summarise(df, y = mean(x), z = y + 1)
  expect_equal(out$y, 5.5)
  expect_equal(out$z, 6.5)
})

test_that("inputs are recycled (deprecated in 1.1.0)", {
  skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
  local_options(lifecycle_verbosity = "quiet")

  expect_equal(
    tibble() %>% duckplyr_summarise(x = 1, y = 1:3, z = 1),
    tibble(x = 1, y = 1:3, z = 1)
  )

  gf <- duckplyr_group_by(tibble(a = 1:2), a)
  expect_equal(
    gf %>% duckplyr_summarise(x = 1, y = 1:3, z = 1),
    tibble(a = rep(1:2, each = 3), x = 1, y = c(1:3, 1:3), z = 1) %>% duckplyr_group_by(a)
  )
  expect_equal(
    gf %>% duckplyr_summarise(x = seq_len(a), y = 1),
    tibble(a = c(1L, 2L, 2L), x = c(1L, 1L, 2L), y = 1) %>% duckplyr_group_by(a)
  )
})

test_that("works with empty data frames", {
  skip("TODO duckdb")
  # 0 rows
  df <- tibble(x = integer())
  expect_equal(duckplyr_summarise(df), tibble(.rows = 1))
  expect_equal(duckplyr_summarise(df, n = n(), sum = sum(x)), tibble(n = 0, sum = 0))

  # 0 cols
  df <- tibble(.rows = 10)
  expect_equal(duckplyr_summarise(df), tibble(.rows = 1))
  expect_equal(duckplyr_summarise(df, n = n()), tibble(n = 10))
})

test_that("works with grouped empty data frames", {
  df <- tibble(x = integer())

  expect_equal(
    df %>% duckplyr_group_by(x) %>% duckplyr_summarise(y = 1L),
    tibble(x = integer(), y = integer())
  )
  expect_equal(
    df %>% duckplyr_rowwise(x) %>% duckplyr_summarise(y = 1L),
    duckplyr_group_by(tibble(x = integer(), y = integer()), x)
  )
})

test_that("no expressions yields grouping data", {
  skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
  df <- tibble(x = 1:2, y = 1:2)
  gf <- duckplyr_group_by(df, x)

  expect_equal(duckplyr_summarise(df), tibble(.rows = 1))
  expect_equal(duckplyr_summarise(gf), tibble(x = 1:2))

  expect_equal(duckplyr_summarise(df, !!!list()), tibble(.rows = 1))
  expect_equal(duckplyr_summarise(gf, !!!list()), tibble(x = 1:2))
})

test_that("doesn't preserve attributes", {
  df <- structure(
    data.frame(x = 1:10, g1 = rep(1:2, each = 5), g2 = rep(1:5, 2)),
    meta = "this is important"
  )

  out <- df %>% duckplyr_summarise(n = n())
  expect_null(attr(out, "res"))

  out <- df %>% duckplyr_group_by(g1) %>% duckplyr_summarise(n = n())
  expect_null(attr(out, "res"))
})

test_that("strips off subclass", {
  # We consider the data frame returned by `duckplyr_summarise()` to be
  # "fundamentally a new data frame"

  df <- new_data_frame(list(a = 1), class = "myclass")
  out <- df %>% duckplyr_summarise(n = n())
  expect_s3_class(out, "data.frame", exact = TRUE)
  out <- df %>% duckplyr_summarise(.by = a, n = n())
  expect_s3_class(out, "data.frame", exact = TRUE)

  df <- new_tibble(list(a = 1), class = "myclass")
  out <- df %>% duckplyr_summarise(n = n())
  expect_s3_class(out, class(tibble()), exact = TRUE)
  out <- df %>% duckplyr_summarise(.by = a, n = n())
  expect_s3_class(out, class(tibble()), exact = TRUE)

  gdf <- duckplyr_group_by(tibble(a = 1), a)
  df <- gdf
  class(df) <- c("myclass", class(gdf))
  out <- df %>% duckplyr_summarise(n = n(), .groups = "drop")
  expect_s3_class(out, class(tibble()), exact = TRUE)
  out <- df %>% duckplyr_summarise(n = n(), .groups = "keep")
  expect_s3_class(out, class(gdf), exact = TRUE)
})

test_that("works with unquoted values", {
  df <- tibble(g = c(1, 1, 2, 2, 2), x = 1:5)
  expect_equal(duckplyr_summarise(df, out = !!1), tibble(out = 1))
  expect_equal(duckplyr_summarise(df, out = !!quo(1)), tibble(out = 1))
})

test_that("formulas are evaluated in the right environment (#3019)", {
  out <- mtcars %>% duckplyr_summarise(fn = list(rlang::as_function(~ list(~foo, environment()))))
  out <- out$fn[[1]]()
  expect_identical(environment(out[[1]]), out[[2]])
})

test_that("unnamed data frame results with 0 columns are ignored (#5084)", {
  df1 <- tibble(x = 1:2)
  expect_equal(df1 %>% duckplyr_group_by(x) %>% duckplyr_summarise(data.frame()), df1)
  expect_equal(df1 %>% duckplyr_group_by(x) %>% duckplyr_summarise(data.frame(), y = 65), duckplyr_mutate(df1, y = 65))
  expect_equal(df1 %>% duckplyr_group_by(x) %>% duckplyr_summarise(y = 65, data.frame()), duckplyr_mutate(df1, y = 65))

  df2 <- tibble(x = 1:2, y = 3:4)
  expect_equal(df2 %>% duckplyr_group_by(x) %>% duckplyr_summarise(data.frame()), df1)
  expect_equal(df2 %>% duckplyr_group_by(x) %>% duckplyr_summarise(data.frame(), z = 98), duckplyr_mutate(df1, z = 98))
  expect_equal(df2 %>% duckplyr_group_by(x) %>% duckplyr_summarise(z = 98, data.frame()), duckplyr_mutate(df1, z = 98))

  # This includes unnamed data frames that have 0 columns but >0 rows.
  # Noted when working on (#6509).
  empty3 <- new_tibble(list(), nrow = 3L)
  expect_equal(df1 %>% duckplyr_summarise(empty3), new_tibble(list(), nrow = 1L))
  expect_equal(df1 %>% duckplyr_summarise(empty3, y = mean(x)), df1 %>% duckplyr_summarise(y = mean(x)))
  expect_equal(df1 %>% duckplyr_group_by(x) %>% duckplyr_summarise(empty3), df1)
  expect_equal(df1 %>% duckplyr_group_by(x) %>% duckplyr_summarise(empty3, y = x + 1), duckplyr_mutate(df1, y = x + 1))
})

test_that("named data frame results with 0 columns participate in recycling (#6509)", {
  skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
  local_options(lifecycle_verbosity = "quiet")

  df <- tibble(x = 1:3)
  gdf <- duckplyr_group_by(df, x)

  empty <- tibble()
  expect_identical(duckplyr_summarise(df, empty = empty), tibble(empty = empty))
  expect_identical(duckplyr_summarise(df, x = sum(x), empty = empty), tibble(x = integer(), empty = empty))
  expect_identical(duckplyr_summarise(df, empty = empty, x = sum(x)), tibble(empty = empty, x = integer()))

  empty3 <- new_tibble(list(), nrow = 3L)
  expect_identical(duckplyr_summarise(df, empty = empty3), tibble(empty = empty3))
  expect_identical(duckplyr_summarise(df, x = sum(x), empty = empty3), tibble(x = c(6L, 6L, 6L), empty = empty3))
  expect_identical(duckplyr_summarise(df, empty = empty3, x = sum(x)), tibble(empty = empty3, x = c(6L, 6L, 6L)))

  expect_identical(
    duckplyr_summarise(gdf, empty = empty, .groups = "drop"),
    tibble(x = integer(), empty = empty)
  )
  expect_identical(
    duckplyr_summarise(gdf, y = x + 1L, empty = empty, .groups = "drop"),
    tibble(x = integer(), y = integer(), empty = empty)
  )
  expect_identical(
    duckplyr_summarise(gdf, empty = empty3, .groups = "drop"),
    tibble(x = vec_rep_each(1:3, 3), empty = vec_rep(empty3, 3))
  )
  expect_identical(
    duckplyr_summarise(gdf, y = x + 1L, empty = empty3, .groups = "drop"),
    tibble(x = vec_rep_each(1:3, 3), y = vec_rep_each(2:4, 3), empty = vec_rep(empty3, 3))
  )
})

test_that("can't overwrite column active bindings (#6666)", {
  skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
  skip_if(getRversion() < "3.6.3", message = "Active binding error changed")

  df <- tibble(g = c(1, 1, 2, 2), x = 1:4)
  gdf <- duckplyr_group_by(df, g)

  # The error seen here comes from trying to `<-` to an active binding when
  # the active binding function has 0 arguments.
  expect_snapshot(error = TRUE, {
    duckplyr_summarise(df, y = {
      x <<- x + 2L
      mean(x)
    })
  })
  expect_snapshot(error = TRUE, {
    duckplyr_summarise(df, .by = g, y = {
      x <<- x + 2L
      mean(x)
    })
  })
  expect_snapshot(error = TRUE, {
    duckplyr_summarise(gdf, y = {
      x <<- x + 2L
      mean(x)
    })
  })
})

test_that("assigning with `<-` doesn't affect the mask (#6666)", {
  skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
  df <- tibble(g = c(1, 1, 2, 2), x = 1:4)
  gdf <- duckplyr_group_by(df, g)

  out <- duckplyr_summarise(df, .by = g, y = {
    x <- x + 4L
    mean(x)
  })
  expect_identical(out$y, c(5.5, 7.5))

  out <- duckplyr_summarise(gdf, y = {
    x <- x + 4L
    mean(x)
  })
  expect_identical(out$y, c(5.5, 7.5))
})

test_that("duckplyr_summarise() correctly auto-names expressions (#6741)", {
  df <- tibble(a = 1:3)
  expect_identical(duckplyr_summarise(df, min(-a)), tibble("min(-a)" = -3L))
})

# grouping ----------------------------------------------------------------

test_that("peels off a single layer of grouping", {
  df <- tibble(x = rep(1:4, each = 4), y = rep(1:2, each = 8), z = runif(16))
  gf <- df %>% duckplyr_group_by(x, y)
  expect_equal(duckplyr_group_vars(duckplyr_summarise(gf)), "x")
  expect_equal(duckplyr_group_vars(duckplyr_summarise(duckplyr_summarise(gf))), character())
})

test_that("correctly reconstructs groups", {
  d <- tibble(x = 1:4, g1 = rep(1:2, 2), g2 = 1:4) %>%
    duckplyr_group_by(g1, g2) %>%
    duckplyr_summarise(x = x + 1)
  expect_equal(group_rows(d), list_of(1:2, 3:4))
})

test_that("can modify grouping variables", {
  df <- tibble(a = c(1, 2, 1, 2), b = c(1, 1, 2, 2))
  gf <- duckplyr_group_by(df, a, b)

  i <- count_regroups(out <- duckplyr_summarise(gf, a = a + 1))
  expect_equal(i, 1)
  expect_equal(out$a, c(2, 2, 3, 3))
})

test_that("summarise returns a row for zero length groups", {
  df <- tibble(
    e = 1,
    f = factor(c(1, 1, 2, 2), levels = 1:3),
    g = c(1, 1, 2, 2),
    x = c(1, 2, 1, 4)
  )
  df <- duckplyr_group_by(df, e, f, g, .drop = FALSE)

  expect_equal( nrow(duckplyr_summarise(df, z = n())), 3L)
})

test_that("summarise respects zero-length groups (#341)", {
  df <- tibble(x = factor(rep(1:3, each = 10), levels = 1:4))

  out <- df %>%
    duckplyr_group_by(x, .drop = FALSE) %>%
    duckplyr_summarise(n = n())

  expect_equal(out$n, c(10L, 10L, 10L, 0L))
})

# vector types ----------------------------------------------------------

test_that("summarise allows names (#2675)", {
  skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
  data <- tibble(a = 1:3) %>% duckplyr_summarise(b = c("1" = a[[1]]))
  expect_equal(names(data$b), "1")

  data <- tibble(a = 1:3) %>% duckplyr_rowwise() %>% duckplyr_summarise(b = setNames(nm = a))
  expect_equal(names(data$b), c("1", "2", "3"))

  data <- tibble(a = c(1, 1, 2)) %>% duckplyr_group_by(a) %>% duckplyr_summarise(b = setNames(nm = a[[1]]))
  expect_equal(names(data$b), c("1", "2"))

  res <- data.frame(x = c(1:3), y = letters[1:3]) %>%
    duckplyr_group_by(y) %>%
    duckplyr_summarise(
      a = length(x),
      b = quantile(x, 0.5)
    )
  expect_equal(res$b, c("50%" = 1, "50%" = 2, "50%" = 3))
})

test_that("summarise handles list output columns (#832)", {
  df <- tibble(x = 1:10, g = rep(1:2, each = 5))
  res <- df %>% duckplyr_group_by(g) %>% duckplyr_summarise(y = list(x))
  expect_equal(res$y[[1]], 1:5)

  # preserving names
  d <- tibble(x = rep(1:3, 1:3), y = 1:6, names = letters[1:6])
  res <- d %>% duckplyr_group_by(x) %>% duckplyr_summarise(y = list(setNames(y, names)))
  expect_equal(names(res$y[[1]]), letters[[1]])
})

test_that("summarise coerces types across groups", {
  gf <- duckplyr_group_by(tibble(g = 1:2), g)

  out <- duckplyr_summarise(gf, x = if (g == 1) NA else "x")
  expect_type(out$x, "character")

  out <- duckplyr_summarise(gf, x = if (g == 1L) NA else 2.5)
  expect_type(out$x, "double")
})

test_that("unnamed tibbles are unpacked (#2326)", {
  skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
  df <- tibble(x = 2)
  out <- duckplyr_summarise(df, tibble(y = x * 2, z = 3))
  expect_equal(out$y, 4)
  expect_equal(out$z, 3)
})

test_that("named tibbles are packed (#2326)", {
  skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
  df <- tibble(x = 2)
  out <- duckplyr_summarise(df, df = tibble(y = x * 2, z = 3))
  expect_equal(out$df, tibble(y = 4, z = 3))
})

test_that("duckplyr_summarise(.groups=) in global environment", {
  skip("TODO duckdb")
  expect_message(eval_bare(
    expr(data.frame(x = 1, y = 2) %>% duckplyr_group_by(x, y) %>% duckplyr_summarise()),
    env(global_env())
  ))
  expect_message(eval_bare(
    expr(data.frame(x = 1, y = 2) %>% duckplyr_rowwise(x, y) %>% duckplyr_summarise()),
    env(global_env())
  ))
})

test_that("duckplyr_summarise(.groups=)", {
  skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
  df <- data.frame(x = 1, y = 2)
  expect_equal(df %>% duckplyr_summarise(z = 3, .groups= "rowwise"), duckplyr_rowwise(data.frame(z = 3)))

  gf <- df %>% duckplyr_group_by(x, y)
  expect_equal(gf %>% duckplyr_summarise() %>% duckplyr_group_vars(), "x")
  expect_equal(gf %>% duckplyr_summarise(.groups = "drop_last") %>% duckplyr_group_vars(), "x")
  expect_equal(gf %>% duckplyr_summarise(.groups = "drop") %>% duckplyr_group_vars(), character())
  expect_equal(gf %>% duckplyr_summarise(.groups = "keep") %>% duckplyr_group_vars(), c("x", "y"))

  rf <- df %>% duckplyr_rowwise(x, y)
  expect_equal(rf %>% duckplyr_summarise(.groups = "drop") %>% duckplyr_group_vars(), character())
  expect_equal(rf %>% duckplyr_summarise(.groups = "keep") %>% duckplyr_group_vars(), c("x", "y"))
})

test_that("duckplyr_summarise() casts data frame results to common type (#5646)", {
  df <- data.frame(x = 1:2, g = 1:2) %>% duckplyr_group_by(g)

  res <- df %>%
    duckplyr_summarise(if (g == 1) data.frame(y = 1) else data.frame(y = 1, z = 2), .groups = "drop")
  expect_equal(res$z, c(NA, 2))
})

test_that("duckplyr_summarise() silently skips when all results are NULL (#5708)", {
  df <- data.frame(x = 1:2, g = 1:2) %>% duckplyr_group_by(g)

  expect_equal(duckplyr_summarise(df, x = NULL), duckplyr_summarise(df))
  expect_error(duckplyr_summarise(df, x = if(g == 1) 42))
})

# .by ----------------------------------------------------------------------

test_that("can group transiently using `.by`", {
  df <- tibble(g = c(1, 1, 2, 1, 2), x = c(5, 2, 1, 2, 3))

  out <- duckplyr_summarise(df, x = mean(x), .by = g)

  expect_identical(out$g, c(1, 2))
  expect_identical(out$x, c(3, 2))
  expect_s3_class(out, class(df), exact = TRUE)
})

test_that("transient grouping retains bare data.frame class", {
  df <- data.frame(g = c(1, 1, 2, 1, 2), x = c(5, 2, 1, 2, 3))
  out <- duckplyr_summarise(df, x = mean(x), .by = g)
  expect_s3_class(out, class(df), exact = TRUE)
})

test_that("transient grouping drops data frame attributes", {
  # Because `duckplyr_summarise()` theoretically creates a "new" data frame

  # With data.frames or tibbles
  df <- data.frame(g = c(1, 1, 2), x = c(1, 2, 1))
  tbl <- as_tibble(df)

  attr(df, "foo") <- "bar"
  attr(tbl, "foo") <- "bar"

  out <- duckplyr_summarise(df, x = mean(x), .by = g)
  expect_null(attr(out, "foo"))

  out <- duckplyr_summarise(tbl, x = mean(x), .by = g)
  expect_null(attr(out, "foo"))
})

test_that("transient grouping orders by first appearance", {
  df <- tibble(g = c(2, 1, 2, 0), x = c(4, 2, 8, 5))

  out <- duckplyr_summarise(df, x = mean(x), .by = g)

  expect_identical(out$g, c(2, 1, 0))
  expect_identical(out$x, c(6, 2, 5))
})

test_that("can't use `.by` with `.groups`", {
  df <- tibble(x = 1)

  expect_snapshot(error = TRUE, {
    duckplyr_summarise(df, .by = x, .groups = "drop")
  })
})

test_that("catches `.by` with grouped-df", {
  df <- tibble(x = 1)
  gdf <- duckplyr_group_by(df, x)

  expect_snapshot(error = TRUE, {
    duckplyr_summarise(gdf, .by = x)
  })
})

test_that("catches `.by` with rowwise-df", {
  df <- tibble(x = 1)
  rdf <- duckplyr_rowwise(df)

  expect_snapshot(error = TRUE, {
    duckplyr_summarise(rdf, .by = x)
  })
})

# errors -------------------------------------------------------------------

test_that("duckplyr_summarise() preserves the call stack on error (#5308)", {
  foobar <- function() stop("foo")

  stack <- NULL
  expect_error(
    withCallingHandlers(
      error = function(...) stack <<- sys.calls(),
      duckplyr_summarise(mtcars, foobar())
    )
  )

  expect_true(some(stack, is_call, "foobar"))
})

test_that("`duckplyr_summarise()` doesn't allow data frames with missing or empty names (#6758)", {
  df1 <- new_data_frame(set_names(list(1), ""))
  df2 <- new_data_frame(set_names(list(1), NA_character_))

  expect_snapshot(error = TRUE, {
    duckplyr_summarise(df1)
  })
  expect_snapshot(error = TRUE, {
    duckplyr_summarise(df2)
  })
})

test_that("duckplyr_summarise() gives meaningful errors", {
  skip("TODO duckdb")
  eval(envir = global_env(), expr({
    expect_snapshot({
      # Messages about .groups=
      tibble(x = 1, y = 2) %>% duckplyr_group_by(x, y) %>% duckplyr_summarise()
      tibble(x = 1, y = 2) %>% duckplyr_rowwise(x, y) %>% duckplyr_summarise()
      tibble(x = 1, y = 2) %>% duckplyr_rowwise() %>% duckplyr_summarise()
    })
  }))

  eval(envir = global_env(), expr({
    expect_snapshot({
      # unsupported type
      (expect_error(
                      tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>%
                        duckplyr_summarise(a = rlang::env(a = 1))
      ))
      (expect_error(
                      tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>%
                        duckplyr_group_by(x, y) %>%
                        duckplyr_summarise(a = rlang::env(a = 1))
      ))
      (expect_error(
                      tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>%
                        duckplyr_rowwise() %>%
                        duckplyr_summarise(a = lm(y ~ x))
      ))

      # mixed types
      (expect_error(
                      tibble(id = 1:2, a = list(1, "2")) %>%
                        duckplyr_group_by(id) %>%
                        duckplyr_summarise(a = a[[1]])
      ))
      (expect_error(
                      tibble(id = 1:2, a = list(1, "2")) %>%
                        duckplyr_rowwise() %>%
                        duckplyr_summarise(a = a[[1]])
      ))

      # incompatible size
      (expect_error(
                      tibble(z = 1) %>%
                        duckplyr_summarise(x = 1:3, y = 1:2)
      ))
      (expect_error(
                      tibble(z = 1:2) %>%
                        duckplyr_group_by(z) %>%
                        duckplyr_summarise(x = 1:3, y = 1:2)
      ))
      (expect_error(
                      tibble(z = c(1, 3)) %>%
                        duckplyr_group_by(z) %>%
                        duckplyr_summarise(x = seq_len(z), y = 1:2)
      ))

      # mixed nulls
      (expect_error(
                      data.frame(x = 1:2, g = 1:2) %>% duckplyr_group_by(g) %>% duckplyr_summarise(x = if(g == 1) 42)
      ))
      (expect_error(
                      data.frame(x = 1:2, g = 1:2) %>% duckplyr_group_by(g) %>% duckplyr_summarise(x = if(g == 2) 42)
      ))

      # .data pronoun
      (expect_error(duckplyr_summarise(tibble(a = 1), c = .data$b)))
      (expect_error(duckplyr_summarise(duckplyr_group_by(tibble(a = 1:3), a), c = .data$b)))

      # Duplicate column names
      (expect_error(
                      tibble(x = 1, x = 1, .name_repair = "minimal") %>% duckplyr_summarise(x)
      ))

      # Not glue()ing
      (expect_error(tibble() %>% duckplyr_summarise(stop("{"))))
      (expect_error(
                      tibble(a = 1, b="{value:1, unit:a}") %>% duckplyr_group_by(b) %>% duckplyr_summarise(a = stop("!"))
      ))
    })
  }))

})

test_that("non-summary results are deprecated in favor of `duckplyr_reframe()` (#6382)", {
  skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
  local_options(lifecycle_verbosity = "warning")

  df <- tibble(g = c(1, 1, 2), x = 1:3)
  gdf <- duckplyr_group_by(df, g)
  rdf <- duckplyr_rowwise(df)

  expect_snapshot({
    out <- duckplyr_summarise(df, x = which(x < 3))
  })
  expect_identical(out$x, 1:2)

  expect_snapshot({
    out <- duckplyr_summarise(df, x = which(x < 3), .by = g)
  })
  expect_identical(out$g, c(1, 1))
  expect_identical(out$x, 1:2)

  # First group returns size 2 summary
  expect_snapshot({
    out <- duckplyr_summarise(gdf, x = which(x < 3))
  })
  expect_identical(out$g, c(1, 1))
  expect_identical(out$x, 1:2)

  # Last row returns size 0 summary
  expect_snapshot({
    out <- duckplyr_summarise(rdf, x = which(x < 3))
  })
  expect_identical(out$x, c(1L, 1L))
})
duckdblabs/duckplyr documentation built on Nov. 6, 2024, 10 p.m.