# Basic properties --------------------------------------------------------
test_that("mutating joins preserve row and column order of x", {
df1 <- data.frame(a = 1:3)
df2 <- data.frame(b = 1, c = 2, a = 4:1)
out <- duckplyr_inner_join(df1, df2, by = "a")
expect_named(out, c("a", "b", "c"))
expect_equal(out$a, 1:3)
out <- duckplyr_left_join(df1, df2, by = "a")
expect_named(out, c("a", "b", "c"))
expect_equal(out$a, 1:3)
out <- duckplyr_right_join(df1, df2, by = "a")
expect_named(out, c("a", "b", "c"))
expect_equal(out$a, 1:4)
out <- duckplyr_full_join(df1, df2, by = "a")
expect_named(out, c("a", "b", "c"))
expect_equal(out$a, 1:4)
})
test_that("even when column names change", {
df1 <- data.frame(x = c(1, 1, 2, 3), z = 1:4, a = 1)
df2 <- data.frame(z = 1:3, b = 1, x = c(1, 2, 4))
out <- duckplyr_inner_join(df1, df2, by = "x")
expect_named(out, c("x", "z.x", "a", "z.y", "b"))
})
test_that("filtering joins preserve row and column order of x (#2964)", {
df1 <- data.frame(a = 4:1, b = 1)
df2 <- data.frame(b = 1, c = 2, a = 2:3)
out <- duckplyr_semi_join(df1, df2, by = "a")
expect_named(out, c("a", "b"))
expect_equal(out$a, 3:2)
out <- duckplyr_anti_join(df1, df2, by = "a")
expect_named(out, c("a", "b"))
expect_equal(out$a, c(4L, 1L))
})
test_that("keys are coerced to symmetric type", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
foo <- tibble(id = 1:2, var1 = "foo")
bar <- tibble(id = as.numeric(1:2), var2 = "bar")
expect_type(duckplyr_inner_join(foo, bar, by = "id")$id, "double")
expect_type(duckplyr_inner_join(bar, foo, by = "id")$id, "double")
foo <- tibble(id = factor(c("a", "b")), var1 = "foo")
bar <- tibble(id = c("a", "b"), var2 = "bar")
expect_type(duckplyr_inner_join(foo, bar, by = "id")$id, "character")
expect_type(duckplyr_inner_join(bar, foo, by = "id")$id, "character")
})
test_that("factor keys are coerced to the union factor type", {
df1 <- tibble(x = 1, y = factor("a"))
df2 <- tibble(x = 2, y = factor("b"))
out <- duckplyr_full_join(df1, df2, by = c("x", "y"))
expect_equal(out$y, factor(c("a", "b")))
})
test_that("keys of non-equi conditions are not coerced if `keep = NULL`", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
foo <- tibble(id = factor(c("a", "b")), col1 = c(1, 2), var1 = "foo")
bar <- tibble(id = c("a", "b"), col2 = c(1L, 2L), var2 = "bar")
out <- duckplyr_inner_join(foo, bar, by = join_by(id, col1 >= col2))
expect_type(out$id, "character")
expect_type(out$col1, "double")
expect_type(out$col2, "integer")
out <- duckplyr_inner_join(bar, foo, by = join_by(id, col2 <= col1))
expect_type(out$id, "character")
expect_type(out$col1, "double")
expect_type(out$col2, "integer")
})
test_that("when keep = TRUE, duckplyr_left_join() preserves both sets of keys", {
# when keys have different names
df1 <- tibble(a = c(2, 3), b = c(1, 2))
df2 <- tibble(x = c(3, 4), y = c(3, 4))
out <- duckplyr_left_join(df1, df2, by = c("a" = "x"), keep = TRUE)
expect_equal(out$a, c(2, 3))
expect_equal(out$x, c(NA, 3))
# when keys have same name
df1 <- tibble(a = c(2, 3), b = c(1, 2))
df2 <- tibble(a = c(3, 4), y = c(3, 4))
out <- duckplyr_left_join(df1, df2, by = c("a"), keep = TRUE)
expect_equal(out$a.x, c(2, 3))
expect_equal(out$a.y, c(NA, 3))
})
test_that("when keep = TRUE, duckplyr_right_join() preserves both sets of keys", {
# when keys have different names
df1 <- tibble(a = c(2, 3), b = c(1, 2))
df2 <- tibble(x = c(3, 4), y = c(3, 4))
out <- duckplyr_right_join(df1, df2, by = c("a" = "x"), keep = TRUE)
expect_equal(out$a, c(3, NA))
expect_equal(out$x, c(3, 4))
# when keys have same name
df1 <- tibble(a = c(2, 3), b = c(1, 2))
df2 <- tibble(a = c(3, 4), y = c(3, 4))
out <- duckplyr_right_join(df1, df2, by = c("a"), keep = TRUE)
expect_equal(out$a.x, c(3, NA))
expect_equal(out$a.y, c(3, 4))
})
test_that("when keep = TRUE, duckplyr_full_join() preserves both sets of keys", {
# when keys have different names
df1 <- tibble(a = c(2, 3), b = c(1, 2))
df2 <- tibble(x = c(3, 4), y = c(3, 4))
out <- duckplyr_full_join(df1, df2, by = c("a" = "x"), keep = TRUE)
expect_equal(out$a, c(2, 3, NA))
expect_equal(out$x, c(NA, 3, 4))
# when keys have same name
df1 <- tibble(a = c(2, 3), b = c(1, 2))
df2 <- tibble(a = c(3, 4), y = c(3, 4))
out <- duckplyr_full_join(df1, df2, by = c("a"), keep = TRUE)
expect_equal(out$a.x, c(2, 3, NA))
expect_equal(out$a.y, c(NA, 3, 4))
})
test_that("when keep = TRUE, duckplyr_inner_join() preserves both sets of keys (#5581)", {
# when keys have different names
df1 <- tibble(a = c(2, 3), b = c(1, 2))
df2 <- tibble(x = c(3, 4), y = c(3, 4))
out <- duckplyr_inner_join(df1, df2, by = c("a" = "x"), keep = TRUE)
expect_equal(out$a, c(3))
expect_equal(out$x, c(3))
# when keys have same name
df1 <- tibble(a = c(2, 3), b = c(1, 2))
df2 <- tibble(a = c(3, 4), y = c(3, 4))
out <- duckplyr_inner_join(df1, df2, by = c("a"), keep = TRUE)
expect_equal(out$a.x, c(3))
expect_equal(out$a.y, c(3))
})
test_that("can't use `keep = FALSE` with non-equi conditions (#6499)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- tibble(xl = c(1, 3), xu = c(4, 7))
df2 <- tibble(yl = c(2, 5, 8), yu = c(6, 8, 9))
expect_snapshot(error = TRUE, {
duckplyr_left_join(df1, df2, join_by(overlaps(xl, xu, yl, yu)), keep = FALSE)
})
# Would never make sense here.
# Based on how the binary conditions are generated we'd merge:
# - `yu` into `xl`
# - `yl` into `xu`
# Which results in `xl` and `xu` columns that don't maintain `xl <= xu`.
expect_snapshot(error = TRUE, {
duckplyr_full_join(df1, df2, join_by(overlaps(xl, xu, yl, yu)), keep = FALSE)
})
})
test_that("joins matches NAs by default (#892, #2033)", {
df1 <- tibble(x = c(NA_character_, 1))
df2 <- tibble(x = c(NA_character_, 2))
expect_equal(nrow(duckplyr_inner_join(df1, df2, by = "x")), 1)
expect_equal(nrow(duckplyr_semi_join(df1, df2, by = "x")), 1)
})
test_that("joins don't match NA when na_matches = 'never' (#2033)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- tibble(a = c(1, NA))
df2 <- tibble(a = c(1, NA), b = 1:2)
out <- duckplyr_left_join(df1, df2, by = "a", na_matches = "never")
expect_equal(out, tibble(a = c(1, NA), b = c(1, NA)))
out <- duckplyr_inner_join(df1, df2, by = "a", na_matches = "never")
expect_equal(out, tibble(a = 1, b = 1))
out <- duckplyr_semi_join(df1, df2, by = "a", na_matches = "never")
expect_equal(out, tibble(a = 1))
out <- duckplyr_anti_join(df1, df2, by = "a", na_matches = "never")
expect_equal(out, tibble(a = NA_integer_))
out <- duckplyr_nest_join(df1, df2, by = "a", na_matches = "never")
expect <- tibble(a = c(1, NA), df2 = list(tibble(b = 1L), tibble(b = integer())))
expect_equal(out, expect)
dat1 <- tibble(
name = c("a", "c"),
var1 = c(1, 2)
)
dat3 <- tibble(
name = c("a", NA_character_),
var3 = c(5, 6)
)
expect_equal(
duckplyr_full_join(dat1, dat3, by = "name", na_matches = "never"),
tibble(name = c("a", "c", NA), var1 = c(1, 2, NA), var3 = c(5, NA, 6))
)
})
test_that("`duckplyr_left_join(by = join_by(closest(...)))` works as expected", {
df1 <- tibble(x = 1:5)
df2 <- tibble(y = c(1, 2, 4))
out <- duckplyr_left_join(df1, df2, by = join_by(closest(x <= y)))
expect_identical(out$x, 1:5)
expect_identical(out$y, c(1, 2, 4, 4, NA))
out <- duckplyr_left_join(df1, df2, by = join_by(closest(x < y)))
expect_identical(out$x, 1:5)
expect_identical(out$y, c(2, 4, 4, NA, NA))
out <- duckplyr_left_join(df1, df2, by = join_by(closest(x >= y)))
expect_identical(out$x, 1:5)
expect_identical(out$y, c(1, 2, 2, 4, 4))
out <- duckplyr_left_join(df1, df2, by = join_by(closest(x > y)))
expect_identical(out$x, 1:5)
expect_identical(out$y, c(NA, 1, 2, 2, 4))
})
test_that("`duckplyr_full_join(by = join_by(closest(...)))` works as expected", {
df1 <- tibble(x = 1:5)
df2 <- tibble(y = c(1, 2, 4))
out <- duckplyr_full_join(df1, df2, by = join_by(closest(x <= y)))
expect_identical(out$x, 1:5)
expect_identical(out$y, c(1, 2, 4, 4, NA))
out <- duckplyr_full_join(df1, df2, by = join_by(closest(x < y)))
expect_identical(out$x, c(1:5, NA))
expect_identical(out$y, c(2, 4, 4, NA, NA, 1))
out <- duckplyr_full_join(df1, df2, by = join_by(closest(x >= y)))
expect_identical(out$x, 1:5)
expect_identical(out$y, c(1, 2, 2, 4, 4))
out <- duckplyr_full_join(df1, df2, by = join_by(closest(x > y)))
expect_identical(out$x, 1:5)
expect_identical(out$y, c(NA, 1, 2, 2, 4))
})
test_that("`duckplyr_right_join(by = join_by(closest(...)))` works as expected", {
df1 <- tibble(x = 1:5)
df2 <- tibble(y = c(1, 2, 4))
out <- duckplyr_right_join(df1, df2, by = join_by(closest(x <= y)))
expect_identical(out$x, 1:4)
expect_identical(out$y, c(1, 2, 4, 4))
out <- duckplyr_right_join(df1, df2, by = join_by(closest(x < y)))
expect_identical(out$x, c(1:3, NA))
expect_identical(out$y, c(2, 4, 4, 1))
out <- duckplyr_right_join(df1, df2, by = join_by(closest(x >= y)))
expect_identical(out$x, 1:5)
expect_identical(out$y, c(1, 2, 2, 4, 4))
out <- duckplyr_right_join(df1, df2, by = join_by(closest(x > y)))
expect_identical(out$x, 2:5)
expect_identical(out$y, c(1, 2, 2, 4))
})
test_that("`duckplyr_inner_join(by = join_by(closest(...)))` works as expected", {
df1 <- tibble(x = 1:5)
df2 <- tibble(y = c(1, 2, 4))
out <- duckplyr_inner_join(df1, df2, by = join_by(closest(x <= y)))
expect_identical(out$x, 1:4)
expect_identical(out$y, c(1, 2, 4, 4))
out <- duckplyr_inner_join(df1, df2, by = join_by(closest(x < y)))
expect_identical(out$x, 1:3)
expect_identical(out$y, c(2, 4, 4))
out <- duckplyr_inner_join(df1, df2, by = join_by(closest(x >= y)))
expect_identical(out$x, 1:5)
expect_identical(out$y, c(1, 2, 2, 4, 4))
out <- duckplyr_inner_join(df1, df2, by = join_by(closest(x > y)))
expect_identical(out$x, 2:5)
expect_identical(out$y, c(1, 2, 2, 4))
})
test_that("joins using `between(bounds =)` work as expected (#6488)", {
df1 <- tibble(x = 1:5)
df2 <- tibble(lower = 2, upper = 4)
out <- duckplyr_full_join(df1, df2, by = join_by(between(x, lower, upper, bounds = "[]")))
expect_identical(out$lower, c(NA, 2, 2, 2, NA))
expect_identical(out$upper, c(NA, 4, 4, 4, NA))
out <- duckplyr_full_join(df1, df2, by = join_by(between(x, lower, upper, bounds = "[)")))
expect_identical(out$lower, c(NA, 2, 2, NA, NA))
expect_identical(out$upper, c(NA, 4, 4, NA, NA))
out <- duckplyr_full_join(df1, df2, by = join_by(between(x, lower, upper, bounds = "(]")))
expect_identical(out$lower, c(NA, NA, 2, 2, NA))
expect_identical(out$upper, c(NA, NA, 4, 4, NA))
out <- duckplyr_full_join(df1, df2, by = join_by(between(x, lower, upper, bounds = "()")))
expect_identical(out$lower, c(NA, NA, 2, NA, NA))
expect_identical(out$upper, c(NA, NA, 4, NA, NA))
})
test_that("joins using `overlaps(bounds =)` work as expected (#6488)", {
df1 <- tibble(x_lower = c(1, 1, 3, 4), x_upper = c(2, 3, 4, 5))
df2 <- tibble(y_lower = 2, y_upper = 4)
expect_closed <- vec_cbind(df1, vec_c(df2, df2, df2, df2))
out <- duckplyr_full_join(df1, df2, by = join_by(overlaps(x_lower, x_upper, y_lower, y_upper, bounds = "[]")))
expect_identical(out, expect_closed)
# `[)`, `(]`, and `()` all generate the same binary conditions but are useful
# for consistency with `between(bounds =)`
expect_open <- vec_cbind(df1, vec_c(NA, df2, df2, NA))
out <- duckplyr_full_join(df1, df2, by = join_by(overlaps(x_lower, x_upper, y_lower, y_upper, bounds = "[)")))
expect_identical(out, expect_open)
out <- duckplyr_full_join(df1, df2, by = join_by(overlaps(x_lower, x_upper, y_lower, y_upper, bounds = "(]")))
expect_identical(out, expect_open)
out <- duckplyr_full_join(df1, df2, by = join_by(overlaps(x_lower, x_upper, y_lower, y_upper, bounds = "()")))
expect_identical(out, expect_open)
})
test_that("join_mutate() validates arguments", {
df <- tibble(x = 1)
# Mutating joins
expect_snapshot(error = TRUE, {
join_mutate(df, df, by = 1, type = "left")
join_mutate(df, df, by = "x", type = "left", suffix = 1)
join_mutate(df, df, by = "x", type = "left", na_matches = "foo")
join_mutate(df, df, by = "x", type = "left", keep = 1)
})
})
test_that("join_filter() validates arguments", {
df <- tibble(x = 1)
# Filtering joins
expect_snapshot(error = TRUE, {
join_filter(df, df, by = 1, type = "semi")
join_filter(df, df, by = "x", type = "semi", na_matches = "foo")
})
})
test_that("mutating joins trigger many-to-many warning", {
skip("TODO duckdb")
df <- tibble(x = c(1, 1))
expect_snapshot(out <- duckplyr_left_join(df, df, join_by(x)))
})
test_that("mutating joins don't trigger many-to-many warning when called indirectly", {
skip("TODO duckdb")
df <- tibble(x = c(1, 1))
fn <- function(df1, df2, relationship = NULL) {
duckplyr_left_join(df1, df2, join_by(x), relationship = relationship)
}
# Directly calling `duckplyr_left_join()` from a function you control results in a warning
expect_warning(fn(df, df), class = "dplyr_warning_join_relationship_many_to_many")
# Now mimic calling an "rlang function" which you don't control that calls `duckplyr_left_join()`
fn_env(fn) <- ns_env("rlang")
# Indirectly calling `duckplyr_left_join()` through a function you don't control
# doesn't warn
expect_no_warning(fn(df, df), class = "dplyr_warning_join_relationship_many_to_many")
})
test_that("mutating joins compute common columns", {
df1 <- tibble(x = c(1, 2), y = c(2, 3))
df2 <- tibble(x = c(1, 3), z = c(2, 3))
expect_snapshot(out <- duckplyr_left_join(df1, df2))
})
test_that("filtering joins compute common columns", {
df1 <- tibble(x = c(1, 2), y = c(2, 3))
df2 <- tibble(x = c(1, 3), z = c(2, 3))
expect_snapshot(out <- duckplyr_semi_join(df1, df2))
})
test_that("mutating joins finalize unspecified columns (#6804)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- tibble(x = NA)
df2 <- tibble(x = NA)
expect_identical(
duckplyr_inner_join(df1, df2, by = join_by(x)),
tibble(x = NA)
)
expect_identical(
duckplyr_inner_join(df1, df2, by = join_by(x), na_matches = "never"),
tibble(x = logical())
)
# Pre-existing `unspecified()` vectors get finalized, because they are
# considered internal types and we took a "common type" between the keys
df1 <- tibble(x = unspecified())
df2 <- tibble(x = unspecified())
expect_identical(
duckplyr_inner_join(df1, df2, by = join_by(x)),
tibble(x = logical())
)
})
test_that("filtering joins finalize unspecified columns (#6804)", {
df1 <- tibble(x = NA)
df2 <- tibble(x = NA)
expect_identical(
duckplyr_semi_join(df1, df2, by = join_by(x)),
tibble(x = NA)
)
expect_identical(
duckplyr_semi_join(df1, df2, by = join_by(x), na_matches = "never"),
tibble(x = logical())
)
# Pre-existing `unspecified()` vectors aren't finalized,
# because we don't take the common type of the keys.
# We retain the exact type of `x`.
df1 <- tibble(x = unspecified())
df2 <- tibble(x = NA)
expect_identical(
duckplyr_semi_join(df1, df2, by = join_by(x)),
tibble(x = unspecified())
)
})
test_that("mutating joins reference original column in `y` when there are type errors (#6465)", {
x <- tibble(a = 1)
y <- tibble(b = "1")
expect_snapshot({
(expect_error(duckplyr_left_join(x, y, by = join_by(a == b))))
})
})
test_that("filtering joins reference original column in `y` when there are type errors (#6465)", {
x <- tibble(a = 1)
y <- tibble(b = "1")
expect_snapshot({
(expect_error(duckplyr_semi_join(x, y, by = join_by(a == b))))
})
})
test_that("error if passed additional arguments", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- data.frame(a = 1:3)
df2 <- data.frame(a = 1)
expect_snapshot(error = TRUE, {
duckplyr_inner_join(df1, df2, on = "a")
duckplyr_left_join(df1, df2, on = "a")
duckplyr_right_join(df1, df2, on = "a")
duckplyr_full_join(df1, df2, on = "a")
duckplyr_nest_join(df1, df2, on = "a")
duckplyr_anti_join(df1, df2, on = "a")
duckplyr_semi_join(df1, df2, on = "a")
})
})
# nest_join ---------------------------------------------------------------
test_that("nest_join returns list of tibbles (#3570)",{
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- tibble(x = c(1, 2), y = c(2, 3))
df2 <- tibble(x = c(1, 1), z = c(2, 3))
out <- duckplyr_nest_join(df1, df2, by = "x")
expect_named(out, c("x", "y", "df2"))
expect_type(out$df2, "list")
expect_s3_class(out$df2[[1]], "tbl_df")
})
test_that("nest_join respects types of y (#6295)",{
df1 <- tibble(x = c(1, 2), y = c(2, 3))
df2 <- duckplyr_rowwise(tibble(x = c(1, 1), z = c(2, 3)))
out <- duckplyr_nest_join(df1, df2, by = "x")
expect_s3_class(out$df2[[1]], "rowwise_df")
})
test_that("nest_join preserves data frame attributes on `x` and `y` (#6295)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- data.frame(x = c(1, 2), y = c(3, 4))
attr(df1, "foo") <- 1
df2 <- data.frame(x = c(1, 2), z = c(3, 4))
attr(df2, "foo") <- 2
out <- duckplyr_nest_join(df1, df2, by = "x")
expect_identical(attr(out, "foo"), 1)
expect_identical(attr(out$df2[[1]], "foo"), 2)
})
test_that("nest_join computes common columns", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- tibble(x = c(1, 2), y = c(2, 3))
df2 <- tibble(x = c(1, 3), z = c(2, 3))
expect_snapshot(out <- duckplyr_nest_join(df1, df2))
})
test_that("nest_join finalizes unspecified columns (#6804)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- tibble(x = NA)
df2 <- tibble(x = NA)
expect_identical(
duckplyr_nest_join(df1, df2, by = join_by(x)),
tibble(x = NA, df2 = list(tibble(.rows = 1L)))
)
expect_identical(
duckplyr_nest_join(df1, df2, by = join_by(x), keep = TRUE),
tibble(x = NA, df2 = list(tibble(x = NA)))
)
expect_identical(
duckplyr_nest_join(df1, df2, by = join_by(x), na_matches = "never"),
tibble(x = NA, df2 = list(tibble()))
)
# Pre-existing `unspecified()` vectors get finalized, because they are
# considered internal types and we took a "common type" between the keys
df1 <- tibble(x = unspecified())
df2 <- tibble(x = unspecified())
expect_identical(
duckplyr_nest_join(df1, df2, by = join_by(x)),
tibble(x = logical(), df2 = list())
)
})
test_that("nest_join references original column in `y` when there are type errors (#6465)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
x <- tibble(a = 1)
y <- tibble(b = "1")
expect_snapshot({
(expect_error(duckplyr_nest_join(x, y, by = join_by(a == b))))
})
})
test_that("nest_join handles multiple matches in x (#3642)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- tibble(x = c(1, 1))
df2 <- tibble(x = 1, y = 1:2)
out <- duckplyr_nest_join(df1, df2, by = "x")
expect_equal(out$df2[[1]], out$df2[[2]])
})
test_that("nest_join forces `multiple = 'all'` internally (#6392)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- tibble(x = 1)
df2 <- tibble(x = 1, y = 1:2)
expect_no_warning(out <- duckplyr_nest_join(df1, df2, by = "x"))
expect_identical(nrow(out$df2[[1]]), 2L)
})
test_that("y keys dropped by default for equi conditions", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- tibble(x = c(1, 2), y = c(2, 3))
df2 <- tibble(x = c(1, 3), z = c(2, 3))
out <- duckplyr_nest_join(df1, df2, by = "x")
expect_named(out, c("x", "y", "df2"))
expect_named(out$df2[[1]], "z")
out <- duckplyr_nest_join(df1, df2, by = "x", keep = TRUE)
expect_named(out$df2[[1]], c("x", "z"))
})
test_that("y keys kept by default for non-equi conditions", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- tibble(x = c(1, 2), y = c(2, 3))
df2 <- tibble(x = c(1, 3), z = c(2, 3))
out <- duckplyr_nest_join(df1, df2, by = join_by(x >= x))
expect_named(out, c("x", "y", "df2"))
expect_named(out$df2[[1]], c("x", "z"))
})
test_that("validates inputs", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- tibble(x = c(1, 2), y = c(2, 3))
df2 <- tibble(x = c(1, 3), z = c(2, 3))
expect_snapshot(error = TRUE, {
duckplyr_nest_join(df1, df2, by = 1)
duckplyr_nest_join(df1, df2, keep = 1)
duckplyr_nest_join(df1, df2, name = 1)
duckplyr_nest_join(df1, df2, na_matches = 1)
})
})
# output type ---------------------------------------------------------------
test_that("joins x preserve type of x", {
df1 <- data.frame(x = 1)
df2 <- tibble(x = 2)
expect_s3_class(duckplyr_inner_join(df1, df2, by = "x"), "data.frame", exact = TRUE)
expect_s3_class(duckplyr_inner_join(df2, df1, by = "x"), "tbl_df")
})
test_that("joins preserve groups", {
gf1 <- tibble(a = 1:3) %>% duckplyr_group_by(a)
gf2 <- tibble(a = rep(1:4, 2), b = 1) %>% duckplyr_group_by(b)
i <- count_regroups(out <- duckplyr_inner_join(gf1, gf2, by = "a"))
expect_equal(i, 1L)
expect_equal(duckplyr_group_vars(out), "a")
i <- count_regroups(out <- duckplyr_semi_join(gf1, gf2, by = "a"))
expect_equal(i, 0L)
expect_equal(duckplyr_group_vars(out), "a")
# once for x + once for each row for y
i <- count_regroups(out <- duckplyr_nest_join(gf1, gf2, by = "a"))
expect_equal(i, 4L)
expect_equal(duckplyr_group_vars(out), "a")
expect_equal(duckplyr_group_vars(out$gf2[[1]]), "b")
})
test_that("joins respect zero length groups", {
df1 <- tibble(f = factor( c(1,1,2,2), levels = 1:3), x = c(1,2,1,4)) %>%
duckplyr_group_by(f)
df2 <- tibble(f = factor( c(2,2,3,3), levels = 1:3), y = c(1,2,3,4)) %>%
duckplyr_group_by(f)
expect_equal(duckplyr_group_size(duckplyr_left_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4))
expect_equal(duckplyr_group_size(duckplyr_right_join( df1, df2, by = "f", relationship = "many-to-many")), c(4,2))
expect_equal(duckplyr_group_size(duckplyr_full_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4,2))
expect_equal(duckplyr_group_size(duckplyr_anti_join( df1, df2, by = "f")), c(2))
expect_equal(duckplyr_group_size(duckplyr_inner_join( df1, df2, by = "f", relationship = "many-to-many")), c(4))
df1 <- tibble(f = factor( c(1,1,2,2), levels = 1:3), x = c(1,2,1,4)) %>%
duckplyr_group_by(f, .drop = FALSE)
df2 <- tibble(f = factor( c(2,2,3,3), levels = 1:3), y = c(1,2,3,4)) %>%
duckplyr_group_by(f, .drop = FALSE)
expect_equal(duckplyr_group_size(duckplyr_left_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4,0))
expect_equal(duckplyr_group_size(duckplyr_right_join( df1, df2, by = "f", relationship = "many-to-many")), c(0,4,2))
expect_equal(duckplyr_group_size(duckplyr_full_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4,2))
expect_equal(duckplyr_group_size(duckplyr_anti_join( df1, df2, by = "f")), c(2,0,0))
expect_equal(duckplyr_group_size(duckplyr_inner_join( df1, df2, by = "f", relationship = "many-to-many")), c(0,4,0))
})
test_that("group column names reflect renamed duplicate columns (#2330)", {
df1 <- tibble(x = 1:5, y = 1:5) %>% duckplyr_group_by(x, y)
df2 <- tibble(x = 1:5, y = 1:5)
out <- duckplyr_inner_join(df1, df2, by = "x")
expect_equal(duckplyr_group_vars(out), "x")
# TODO: fix this issue: https://github.com/tidyverse/dplyr/issues/4917
# expect_equal(duckplyr_group_vars(out), c("x", "y.x"))
})
test_that("rowwise group structure is updated after a join (#5227)", {
df1 <- duckplyr_rowwise(tibble(x = 1:2))
df2 <- tibble(x = c(1:2, 2L))
x <- duckplyr_left_join(df1, df2, by = "x")
expect_identical(group_rows(x), list_of(1L, 2L, 3L))
})
# deprecated ----------------------------------------------------------------
test_that("by = character() generates cross (#4206)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
local_options(lifecycle_verbosity = "quiet")
df1 <- tibble(x = 1:2)
df2 <- tibble(y = 1:2)
out <- duckplyr_left_join(df1, df2, by = character())
expect_equal(out$x, rep(1:2, each = 2))
expect_equal(out$y, rep(1:2, 2))
})
test_that("`by = character()` technically respects `unmatched`", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
local_options(lifecycle_verbosity = "quiet")
df1 <- tibble()
df2 <- tibble(x = 1)
expect_snapshot(error = TRUE, {
duckplyr_left_join(df1, df2, by = character(), unmatched = "error")
})
})
test_that("`by = character()` technically respects `relationship`", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
local_options(lifecycle_verbosity = "quiet")
df <- tibble(x = 1:2)
expect_snapshot(error = TRUE, {
duckplyr_left_join(df, df, by = character(), relationship = "many-to-one")
})
})
test_that("`by = character()` for a cross join is deprecated (#6604)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- tibble(x = 1:2)
df2 <- tibble(y = 1:2)
# Mutating join
expect_snapshot({
out <- duckplyr_left_join(df1, df2, by = character())
})
# Filtering join
expect_snapshot({
out <- duckplyr_semi_join(df1, df2, by = character())
})
# Nest join
expect_snapshot({
out <- duckplyr_nest_join(df1, df2, by = character())
})
})
test_that("`by = named character()` for a cross join works", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
# Used by the sift package
df1 <- tibble(x = 1:2)
df2 <- tibble(y = 1:2)
by <- set_names(character(), nm = character())
expect_snapshot({
out <- duckplyr_left_join(df1, df2, by = by)
})
expect_identical(
out,
duckplyr_cross_join(df1, df2)
)
})
test_that("`by = list(x = character(), y = character())` for a cross join is deprecated (#6604)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df1 <- tibble(x = 1:2)
df2 <- tibble(y = 1:2)
expect_snapshot({
out <- duckplyr_left_join(df1, df2, by = list(x = character(), y = character()))
})
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.