# ------------------------------------------------------------------------------
# pick() + duckplyr_mutate()
test_that("can pick columns from the data", {
df <- tibble(x1 = 1, y = 2, x2 = 3, z = 4)
expect <- df[c("z", "x1", "x2")]
out <- duckplyr_mutate(df, sel = pick(z, starts_with("x")))
expect_identical(out$sel, expect)
out <- duckplyr_mutate(df, sel = pick_wrapper(z, starts_with("x")))
expect_identical(out$sel, expect)
})
test_that("can use namespaced call to `pick()`", {
df <- tibble(x = 1, y = "y")
expect_identical(
duckplyr_mutate(df, z = dplyr::pick(where(is.character))),
duckplyr_mutate(df, z = pick(where(is.character)))
)
})
test_that("returns separate data frames for each group", {
fn <- function(x) {
x[["x"]] + mean(x[["z"]])
}
df <- tibble(g = c(1, 1, 2, 2, 2), x = 1:5, z = 11:15)
gdf <- duckplyr_group_by(df, g)
expect <- duckplyr_mutate(gdf, res = x + mean(z))
out <- duckplyr_mutate(gdf, res = fn(pick(x, z)))
expect_identical(out, expect)
out <- duckplyr_mutate(gdf, res = fn(pick_wrapper(x, z)))
expect_identical(out, expect)
})
test_that("returns a tibble", {
df <- data.frame(x = 1)
out <- duckplyr_mutate(df, y = pick(x))
expect_s3_class(out$y, "tbl_df")
out <- duckplyr_mutate(df, y = pick_wrapper(x))
expect_s3_class(out$y, "tbl_df")
})
test_that("with `duckplyr_rowwise()` data, leaves list-cols unwrapped (#5951, #6264)", {
# Because this most closely mimics macro expansion of:
# pick(x) -> tibble(x = x)
df <- tibble(x = list(1, 2:3, 4:5), y = 1:3)
rdf <- duckplyr_rowwise(df)
expect_snapshot(error = TRUE, {
duckplyr_mutate(rdf, z = pick(x, y))
})
expect_snapshot(error = TRUE, {
duckplyr_mutate(rdf, z = pick_wrapper(x, y))
})
})
test_that("selectors won't select grouping columns", {
df <- tibble(g = 1, x = 2)
gdf <- duckplyr_group_by(df, g)
out <- duckplyr_mutate(gdf, y = pick(everything()))
expect_named(out$y, "x")
out <- duckplyr_mutate(gdf, y = pick_wrapper(everything()))
expect_named(out$y, "x")
})
test_that("selectors won't select rowwise 'grouping' columns", {
df <- tibble(g = 1, x = 2)
rdf <- duckplyr_rowwise(df, g)
out <- duckplyr_mutate(rdf, y = pick(everything()))
expect_named(out$y, "x")
out <- duckplyr_mutate(rdf, y = pick_wrapper(everything()))
expect_named(out$y, "x")
})
test_that("can't explicitly select grouping columns (#5460)", {
# Related to removing the mask layer from the quosure environments
df <- tibble(g = 1, x = 2)
gdf <- duckplyr_group_by(df, g)
expect_snapshot(error = TRUE, {
duckplyr_mutate(gdf, y = pick(g))
})
expect_snapshot(error = TRUE, {
duckplyr_mutate(gdf, y = pick_wrapper(g))
})
})
test_that("`all_of()` is evaluated in the correct environment (#5460)", {
# Related to removing the mask layer from the quosure environments
df <- tibble(g = 1, x = 2, y = 3)
# We expect an "object not found" error, but we don't control that
# so we aren't going to snapshot it, especially since the call reported
# by those kinds of errors changed in R 4.3.
expect_error(duckplyr_mutate(df, z = pick(all_of(y))))
expect_error(duckplyr_mutate(df, z = pick_wrapper(all_of(y))))
y <- "x"
expect <- df["x"]
out <- duckplyr_mutate(df, z = pick(all_of(y)))
expect_identical(out$z, expect)
out <- duckplyr_mutate(df, z = pick_wrapper(all_of(y)))
expect_identical(out$z, expect)
})
test_that("empty selections create 1 row tibbles (#6685)", {
# This makes the result recyclable against other inputs, and ensures that
# a `pick(NULL)` call can be used in a `duckplyr_group_by()` wrapper to
# "group by nothing". It is a slight departure from viewing `pick()` as a
# pure macro expansion into `tibble()`. Instead it is more like an expansion
# into:
# size <- vctrs::vec_size_common(..., .absent = 1L)
# out <- vctrs::vec_recycle_common(..., .size = size)
# tibble::new_tibble(out, nrow = size)
df <- tibble(g = c(1, 1, 2), x = c(2, 3, 4))
gdf <- duckplyr_group_by(df, g)
out <- duckplyr_mutate(gdf, y = pick(starts_with("foo")))
expect_identical(out$y, new_tibble(list(), nrow = 3L))
out <- duckplyr_mutate(gdf, y = pick_wrapper(starts_with("foo")))
expect_identical(out$y, new_tibble(list(), nrow = 3L))
})
test_that("must supply at least one selector to `pick()`", {
df <- tibble(x = c(2, 3, 4))
expect_snapshot(error = TRUE, {
duckplyr_mutate(df, y = pick())
})
expect_snapshot(error = TRUE, {
duckplyr_mutate(df, y = pick_wrapper())
})
})
test_that("the tidyselection and column extraction are evaluated on the current data", {
# Because `pick()` is viewed as macro expansion, and the expansion inherits
# typical dplyr semantics
df <- tibble(g = c(1, 2, 2), x = 1:3)
gdf <- duckplyr_group_by(df, g)
expect_snapshot(error = TRUE, {
# Expands to `tibble(x = x)`
duckplyr_mutate(gdf, x = NULL, y = pick(x))
})
expect_snapshot(error = TRUE, {
# Does actual `eval_select()` call per group
duckplyr_mutate(gdf, x = NULL, y = pick_wrapper(x))
})
# Can select newly created columns
out <- duckplyr_mutate(gdf, y = x + 1L, z = pick(x, y))
expect_identical(out[c("x", "y")], out$z)
out <- duckplyr_mutate(gdf, y = x + 1L, z = pick_wrapper(x, y))
expect_identical(out[c("x", "y")], out$z)
df <- tibble(x = 1)
expect <- tibble(x = tibble(x = tibble(x = 1)), y = tibble(x = x))
out <- duckplyr_mutate(df, x = pick(x), x = pick(x), y = pick(x))
expect_identical(out, expect)
out <- duckplyr_mutate(df, x = pick_wrapper(x), x = pick_wrapper(x), y = pick_wrapper(x))
expect_identical(out, expect)
})
test_that("can call different `pick()` expressions in different groups", {
df <- tibble(g = c(1, 2), x = 1:2, y = 3:4)
gdf <- duckplyr_group_by(df, g)
expect <- tibble(x = c(1L, NA), y = c(NA, 4L))
out <- duckplyr_mutate(gdf, z = if (g == 1) pick(x) else pick(y))
expect_identical(out$z, expect)
out <- duckplyr_mutate(gdf, z = if (g == 1) pick_wrapper(x) else pick_wrapper(y))
expect_identical(out$z, expect)
})
test_that("can call `pick()` from a user defined function", {
df <- tibble(a = 1, b = 2, c = 3)
gdf <- duckplyr_group_by(df, a)
# Hardcoded variables in expression
my_pick <- function() pick(a, c)
out <- duckplyr_mutate(df, d = my_pick())
expect_identical(out$d, df[c("a", "c")])
# Hardcoded `all_of()` using a local variable
my_pick <- function() {
x <- c("a", "c")
pick(all_of(x))
}
out <- duckplyr_mutate(df, d = my_pick())
expect_identical(out$d, df[c("a", "c")])
expect_snapshot(error = TRUE, {
duckplyr_mutate(gdf, d = my_pick())
})
# Dynamic `all_of()` using user supplied variable
my_pick <- function(x) {
pick(all_of(x))
}
y <- c("a", "c")
out <- duckplyr_mutate(df, d = my_pick(y))
expect_identical(out$d, df[c("a", "c")])
expect_snapshot(error = TRUE, {
duckplyr_mutate(gdf, d = my_pick(y))
})
})
test_that("wrapped `all_of()` and `where()` selections work", {
df <- tibble(a = 1, b = "x", c = 3)
my_pick <- function(x) {
pick(all_of(x))
}
out <- duckplyr_mutate(df, x = my_pick("a"), y = my_pick("b"))
expect_identical(out$x, df["a"])
expect_identical(out$y, df["b"])
my_pick2 <- function(x) {
pick(all_of(x))
}
out <- duckplyr_mutate(df, x = my_pick("a"), y = my_pick2("b"))
expect_identical(out$x, df["a"])
expect_identical(out$y, df["b"])
my_where <- function(fn) {
pick(where(fn))
}
out <- duckplyr_mutate(df, x = my_where(is.numeric), y = my_where(is.character))
expect_identical(out$x, df[c("a", "c")])
expect_identical(out$y, df["b"])
})
test_that("`pick()` expansion evaluates on the full data", {
# To ensure tidyselection is consistent across groups
df <- tibble(g = c(1, 1, 2, 2), x = c(0, 0, 1, 1), y = c(1, 1, 0, 0))
gdf <- duckplyr_group_by(df, g)
# Doesn't select any columns. Returns a 1 row tibble per group (#6685).
out <- duckplyr_mutate(gdf, y = pick(where(~all(.x == 0))))
expect_identical(out$y, new_tibble(list(), nrow = 4L))
# `pick()` evaluation fallback evaluates on the group specific data,
# forcing potentially different results per group.
out <- duckplyr_mutate(gdf, z = pick_wrapper(where(~all(.x == 0))))
expect_named(out$z, c("x", "y"))
expect_identical(out$z$x, c(0, 0, NA, NA))
expect_identical(out$z$y, c(NA, NA, 0, 0))
})
test_that("`pick()` expansion/tidyselection happens outside the data mask", {
# `pick()` expressions are evaluated in the caller environment of the verb.
# This is intentional to avoid theoretical per-group differences in what
# `pick()` should return.
df <- tibble(x = 1, y = 2, z = 3)
a <- "z"
expect <- df["z"]
out <- duckplyr_mutate(df, foo = {
a <- "x"
pick(all_of(a))
})
expect_identical(out$foo, expect)
# `pick()`'s evaluation fallback also performs the tidy-selection
# in the calling environment of the verb
out <- duckplyr_mutate(df, foo = {
a <- "x"
pick_wrapper(all_of(a))
})
expect_identical(out$foo, expect)
})
test_that("errors correctly outside mutate context", {
expect_snapshot(error = TRUE, {
pick()
})
expect_snapshot(error = TRUE, {
pick(a, b)
})
})
test_that("can assign `pick()` to new function", {
# Will run the evaluation version of `pick()`
pick2 <- pick
df <- tibble(x = 1, y = 2)
out <- duckplyr_mutate(df, z = pick2(y))
expect_identical(out$z, df["y"])
})
test_that("selection on rowwise data frames uses full list-cols, but actual evaluation unwraps them", {
df <- tibble(x = list(1:2, 2:4, 5))
df <- duckplyr_rowwise(df)
# i.e. can select based on list-ness of the column.
# Expands to `y = list(tibble(x = x))` where `x` is `1:2`, `2:4`, `5` like it
# would be if you called that directly.
out <- duckplyr_mutate(df, y = list(pick(where(is.list))))
expect_identical(out$y, map(df$x, ~tibble(x = .x)))
})
test_that("when expansion occurs, error labels use the pre-expansion quosure", {
df <- tibble(g = c(1, 2, 2), x = c(1, 2, 3))
# Fails in common type casting of the group chunks,
# which references the auto-named column name
expect_snapshot(error = TRUE, {
duckplyr_mutate(df, if (cur_group_id() == 1L) pick(x) else "x", .by = g)
})
})
test_that("doesn't allow renaming", {
expect_snapshot(error = TRUE, {
duckplyr_mutate(data.frame(x = 1), pick(y = x))
})
expect_snapshot(error = TRUE, {
duckplyr_mutate(data.frame(x = 1), pick_wrapper(y = x))
})
})
# ------------------------------------------------------------------------------
# pick() + duckplyr_summarise()/duckplyr_reframe()
test_that("can `pick()` inside `duckplyr_reframe()`", {
df <- tibble(g = c(1, 1, 2, 1, 2), x = c(1, 1, 1, 2, 2), y = c(1, 1, 1, 2, 1))
gdf <- duckplyr_group_by(df, g)
expect_key <- df[c(1, 4, 5), c("x", "y")]
expect_count <- c(3L, 1L, 1L)
out <- duckplyr_reframe(df, vec_count(pick(x, y), sort = "count"))
expect_identical(out$key, expect_key)
expect_identical(out$count, expect_count)
out <- duckplyr_reframe(df, vec_count(pick_wrapper(x, y), sort = "count"))
expect_identical(out$key, expect_key)
expect_identical(out$count, expect_count)
expect_key <- df[c(1, 4, 3, 5), c("x", "y")]
expect_count <- c(2L, 1L, 1L, 1L)
out <- duckplyr_reframe(gdf, vec_count(pick(x, y), sort = "count"))
expect_identical(out$key, expect_key)
expect_identical(out$count, expect_count)
out <- duckplyr_reframe(gdf, vec_count(pick_wrapper(x, y), sort = "count"))
expect_identical(out$key, expect_key)
expect_identical(out$count, expect_count)
})
test_that("empty selections recycle to the size of any other column", {
df <- tibble(x = 1:5)
# Returns size 1 tibbles that stay the same size (#6685)
out <- duckplyr_summarise(df, sum = sum(x), y = pick(starts_with("foo")))
expect_identical(out$sum, 15L)
expect_identical(out$y, new_tibble(list(), nrow = 1L))
out <- duckplyr_summarise(df, sum = sum(x), y = pick_wrapper(starts_with("foo")))
expect_identical(out$sum, 15L)
expect_identical(out$y, new_tibble(list(), nrow = 1L))
# Returns size 1 tibbles that recycle to size 0 because of `empty` (#6685)
out <- duckplyr_reframe(df, empty = integer(), y = pick(starts_with("foo")))
expect_identical(out$empty, integer())
expect_identical(out$y, new_tibble(list(), nrow = 0L))
out <- duckplyr_reframe(df, empty = integer(), y = pick_wrapper(starts_with("foo")))
expect_identical(out$empty, integer())
expect_identical(out$y, new_tibble(list(), nrow = 0L))
})
test_that("uses 'current' columns of `summarize()` and `duckplyr_reframe()`", {
df <- tibble(x = 1:5, y = 6:10)
# Uses size of current version of `x`
expect_x <- 15L
expect_z <- tibble(x = 15L)
out <- duckplyr_summarise(df, x = sum(x), z = pick(x))
expect_identical(out$x, expect_x)
expect_identical(out$z, expect_z)
out <- duckplyr_summarise(df, x = sum(x), z = pick_wrapper(x))
expect_identical(out$x, expect_x)
expect_identical(out$z, expect_z)
# Adding in `y` forces recycling
expect_x <- vec_rep(15L, 5)
expect_z <- tibble(x = 15L, y = 6:10)
out <- duckplyr_reframe(df, x = sum(x), z = pick(x, y))
expect_identical(out$x, expect_x)
expect_identical(out$z, expect_z)
out <- duckplyr_reframe(df, x = sum(x), z = pick_wrapper(x, y))
expect_identical(out$x, expect_x)
expect_identical(out$z, expect_z)
})
test_that("can select completely new columns in `duckplyr_summarise()`", {
df <- tibble(x = 1:5)
out <- duckplyr_mutate(df, y = x + 1, z = pick(y))
expect_identical(out["y"], out$z)
out <- duckplyr_mutate(df, y = x + 1, z = pick_wrapper(y))
expect_identical(out["y"], out$z)
})
# ------------------------------------------------------------------------------
# pick() + duckplyr_arrange()
test_that("can `duckplyr_arrange()` with `pick()` selection", {
df <- tibble(x = c(2, 2, 1), y = c(3, 1, 3))
expect <- df[c(3, 2, 1),]
expect_identical(duckplyr_arrange(df, pick(x, y)), expect)
expect_identical(duckplyr_arrange(df, pick_wrapper(x, y)), expect)
expect_identical(duckplyr_arrange(df, pick(x), y), expect)
expect_identical(duckplyr_arrange(df, pick_wrapper(x), y), expect)
})
test_that("`pick()` errors in `duckplyr_arrange()` are useful", {
df <- tibble(x = 1)
expect_snapshot(error = TRUE, {
duckplyr_arrange(df, pick(y))
})
expect_snapshot(error = TRUE, {
duckplyr_arrange(df, foo(pick(x)))
})
})
# ------------------------------------------------------------------------------
# pick() + duckplyr_filter()
test_that("can `pick()` inside `duckplyr_filter()`", {
df <- tibble(x = c(1, 2, NA, 3), y = c(2, NA, 5, 3))
out <- duckplyr_filter(df, vec_detect_complete(pick(x, y)))
expect_identical(out, df[c(1, 4),])
out <- duckplyr_filter(df, vec_detect_complete(pick_wrapper(x, y)))
expect_identical(out, df[c(1, 4),])
})
test_that("`duckplyr_filter()` with `pick()` that uses invalid tidy-selection errors", {
df <- tibble(x = c(1, 2, NA, 3), y = c(2, NA, 5, 3))
expect_snapshot(error = TRUE, {
duckplyr_filter(df, pick(x, a))
})
expect_snapshot(error = TRUE, {
duckplyr_filter(df, pick_wrapper(x, a))
})
})
test_that("`duckplyr_filter()` that doesn't use `pick()` result correctly errors", {
df <- tibble(x = c(1, 2, NA, 3), y = c(2, NA, 5, 3))
# TODO: Can we improve on the `In argument:` expression in the expansion case?
expect_snapshot(error = TRUE, {
duckplyr_filter(df, pick(x, y)$x)
})
expect_snapshot(error = TRUE, {
duckplyr_filter(df, pick_wrapper(x, y)$x)
})
})
# ------------------------------------------------------------------------------
# pick() + duckplyr_group_by()
test_that("`pick()` can be used inside `duckplyr_group_by()` wrappers", {
df <- tibble(a = 1:3, b = 2:4, c = 3:5)
tidyselect_group_by <- function(data, groups) {
duckplyr_group_by(data, pick({{ groups }}))
}
tidyselect_group_by_wrapper <- function(data, groups) {
duckplyr_group_by(data, pick_wrapper({{ groups }}))
}
expect_identical(
tidyselect_group_by(df, c(a, c)),
duckplyr_group_by(df, a, c)
)
expect_identical(
tidyselect_group_by_wrapper(df, c(a, c)),
duckplyr_group_by(df, a, c)
)
# Empty selections group by nothing (#6685)
expect_identical(
tidyselect_group_by(df, NULL),
df
)
expect_identical(
tidyselect_group_by_wrapper(df, NULL),
df
)
})
# ------------------------------------------------------------------------------
# expand_pick()
test_that("`pick()` doesn't expand across anonymous function boundaries", {
df <- tibble(x = 1, y = 2)
by <- compute_by(by = NULL, data = df, error_call = current_env())
mask <- DataMask$new(df, by, verb = "mutate", error_call = current_env())
# With inline `function() { }` calls (this also handles native R anonymous functions)
quo <- dplyr_quosures(z = function() pick(y, x))$z
expect_identical(expand_pick(quo, mask), quo)
# With `~` anonymous functions
quos <- dplyr_quosures(z = ~ pick(y, x))$z
expect_identical(expand_pick(quo, mask), quo)
})
test_that("`pick()` expands embedded quosures", {
df <- tibble(x = 1, y = 2)
by <- compute_by(by = NULL, data = df, error_call = current_env())
mask <- DataMask$new(df, by, verb = "mutate", error_call = current_env())
wrapper <- function(x) {
dplyr_quosures(z = dense_rank({{x}}))
}
quo <- wrapper(pick(x, y))$z
out <- expand_pick(quo, mask)
expect_identical(
quo_get_expr(quo_get_expr(out)[[2L]]),
expr(asNamespace("dplyr")$dplyr_pick_tibble(x = x, y = y))
)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.