test_that("dt_sources captures all tables", {
dt1 <- lazy_dt(data.frame(x = 1), "dt1")
dt2 <- lazy_dt(data.frame(x = 2), "dt2")
dt3 <- lazy_dt(data.frame(x = 3), "dt3")
out <- dt1 %>% left_join(dt2, by = "x") %>% left_join(dt3, by = "x")
expect_equal(
dt_sources(out)[c("dt1", "dt2", "dt3")],
list(dt1 = dt1$parent, dt2 = dt2$parent, dt3 = dt3$parent)
)
})
test_that("joins captures locals from both parents", {
dt1 <- lazy_dt(data.frame(x = 1)) %>% mutate(y = 1) %>% compute("D1")
dt2 <- lazy_dt(data.frame(x = 1)) %>% mutate(z = 1) %>% compute("D2")
expect_named(left_join(dt1, dt2, by = "x")$locals, c("D1", "D2"))
expect_named(inner_join(dt1, dt2, by = "x")$locals, c("D1", "D2"))
})
# dplyr verbs -------------------------------------------------------------
test_that("simple usage generates expected translation", {
dt1 <- lazy_dt(tibble(x = 1, y = 2, a = 3), "dt1")
dt2 <- lazy_dt(tibble(x = 1, y = 2, b = 4), "dt2")
expect_equal(
dt1 %>% left_join(dt2, by = "x") %>% show_query(),
expr(
setnames(
setcolorder(
dt2[dt1, on = .(x), allow.cartesian = TRUE],
!!c(1L, 4L, 5L, 2L, 3L)
),
!!c("i.y", "y"),
!!c("y.x", "y.y")
)
)
)
expect_equal(
dt1 %>% right_join(dt2, by = "x") %>% show_query(),
expr(
setnames(
dt1[dt2, on = .(x), allow.cartesian = TRUE],
!!c("y", "i.y"),
!!c("y.x", "y.y")
)
)
)
expect_equal(
dt1 %>% inner_join(dt2, by = "x") %>% show_query(),
expr(
setnames(
dt1[dt2, on = .(x), nomatch = NULL, allow.cartesian = TRUE],
!!c("y", "i.y"),
!!c("y.x", "y.y")
)
)
)
expect_equal(
dt1 %>% full_join(dt2, by = "x") %>% show_query(),
expr(merge(dt1, dt2, all = TRUE, by.x = "x", by.y = "x", allow.cartesian = TRUE))
)
expect_equal(
dt1 %>% anti_join(dt2, by = "x") %>% show_query(),
expr(dt1[!dt2, on = .(x)])
)
expect_equal(
dt1 %>% semi_join(dt2, by = "x") %>% show_query(),
expr(dt1[unique(dt1[dt2, which = TRUE, nomatch = NULL, on = .(x)])])
)
})
test_that("full_join produces correct names with default suffix", {
### names are set correctly for basic join
df1 <- tibble(a = "a", b = "b.x")
df2 <- tibble(a = "a", b = "b.y")
dt1 <- lazy_dt(df1, "dt1")
dt2 <- lazy_dt(df2, "dt2")
expect_equal(
full_join(dt1, dt2, by = "a") %>% collect,
full_join(df1, df2, by = "a")
)
### names are set correctly for join which requires `setnames()`
# data.table: use merge which simply appends the corresponding suffix
# producing duplicates
# dplyr: appends suffix until name is unique
df1 <- tibble(a = "a", b = "b.x", b.x = "b.x.x.x")
df2 <- tibble(a = "a", b = "b.y", b.x.x = "b.x.x")
dt1 <- lazy_dt(df1, "dt1")
dt2 <- lazy_dt(df2, "dt2")
joined_dt <- full_join(dt1, dt2, by = "a")
expected <- full_join(df1, df2, by = "a")
expect_equal(
joined_dt %>% .$vars,
colnames(expected)
)
# suppress warning created by `data.table::merge()`
expect_equal(
suppressWarnings(joined_dt %>% collect()),
expected
)
})
test_that("full_join produces correct names with user-supplied suffix", {
### names are set correctly for basic join
df1 <- tibble(a = "a", b = "b.x")
df2 <- tibble(a = "a", b = "b.y")
dt1 <- lazy_dt(df1, "dt1")
dt2 <- lazy_dt(df2, "dt2")
expect_equal(
full_join(dt1, dt2, by = "a", suffix = c(".one", ".two")) %>% collect,
full_join(df1, df2, by = "a", suffix = c(".one", ".two"))
)
### names are set correctly for join which requires `setnames()`
# data.table: use merge which simply appends the corresponding suffix
# producing duplicates
# dplyr: appends suffix until name is unique
df1 <- tibble(a = "a", b = "b.one", b.one = "b.one.one.one")
df2 <- tibble(a = "a", b = "b.two", b.one.one = "b.one.one")
dt1 <- lazy_dt(df1, "dt1")
dt2 <- lazy_dt(df2, "dt2")
joined_dt <- full_join(dt1, dt2, by = "a", suffix = c(".one", ".two"))
expected <- full_join(df1, df2, by = "a", suffix = c(".one", ".two"))
expect_equal(
joined_dt %>% .$vars,
colnames(expected)
)
# suppress warning created by `data.table::merge()`
expect_equal(
suppressWarnings(joined_dt %>% collect()),
expected
)
})
test_that("join can handle `by` where order doesn't match input", {
dt1 <- lazy_dt(tibble(a = "a", b = "b", c = "c"), name = "dt1")
dt2 <- lazy_dt(tibble(a = "a", b = "b", c = "c", d = "d"), name = "dt2")
dt3 <- left_join(dt1, dt2, by = c("c", "b", "a"))
expect_equal(dt3$vars, letters[1:4])
expect_equal(collect(dt3), collect(dt2))
dt4 <- full_join(dt1, dt2, by = c("c", "b", "a"))
expect_equal(dt4$vars, letters[1:4])
expect_equal(collect(dt4), collect(dt2))
dt5 <- left_join(dt1, dt2, by = c("c", "b"))
expect_equal(
collect(dt5),
tibble(a.x = "a", b = "b", c = "c", a.y = "a", d = "d")
)
})
test_that("left_join produces correct names", {
# data.table: uses y[x] which prefixes `x` vars with "i." and if name is not
# unique it appends ".<number>" with the smallest number without a collision
# dplyr: appends suffix until name is unique
df1 <- tibble(a = "a", b = "b.x", i.b = "i.b")
df2 <- tibble(a = "a", b = "b.y")
dt1 <- lazy_dt(df1, "dt1")
dt2 <- lazy_dt(df2, "dt2")
joined_dt <- left_join(dt1, dt2, by = "a")
expected <- left_join(df1, df2, by = "a") %>% colnames()
expect_equal(
joined_dt %>% .$vars,
expected
)
expect_equal(
joined_dt %>% collect() %>% colnames(),
expected
)
})
test_that("named by converted to by.x and by.y", {
dt1 <- lazy_dt(data.frame(a1 = 1:3, z = 1), "dt1")
dt2 <- lazy_dt(data.frame(a2 = 1:3, z = 2), "dt2")
out_inner <- inner_join(dt1, dt2, by = c('a1' = 'a2'))
expect_equal(
out_inner %>% show_query(),
expr(
setnames(
dt1[dt2, on = .(a1 = a2), nomatch = NULL, allow.cartesian = TRUE],
!!c("z", "i.z"),
!!c("z.x", "z.y")
)
)
)
expect_setequal(tbl_vars(out_inner), c("a1", "z.x", "z.y"))
out_left <- left_join(dt1, dt2, by = c('a1' = 'a2'))
expect_equal(
out_left %>% show_query(),
expr(
setnames(
setcolorder(
dt2[dt1, on = .(a2 = a1), allow.cartesian = TRUE],
!!c(1L, 3L, 2L)
),
!!c("a2", "i.z", "z"),
!!c("a1", "z.x", "z.y")
)
)
)
expect_setequal(tbl_vars(out_left), c("a1", "z.x", "z.y"))
})
test_that("named by can handle edge cases", {
test_equal <- function(f_join) {
joined_dt <- f_join(dt1, dt2, by = c("x", z = "y"))
expected <- f_join(df1, df2, by = c("x", z = "y"))
expect_equal(
joined_dt %>% collect(),
expected
)
expect_equal(
joined_dt$vars,
colnames(expected)
)
}
df1 <- tibble(x = 1, y = 1, z = 2)
df2 <- tibble(x = 1, y = 2)
dt1 <- lazy_dt(df1, "dt1")
dt2 <- lazy_dt(df2, "dt2")
test_equal(left_join)
test_equal(right_join)
test_equal(full_join)
test_equal(semi_join)
test_equal(anti_join)
})
test_that("setnames only used when necessary", {
dt1 <- lazy_dt(data.frame(x = 1:2, a = 3), "dt1")
dt2 <- lazy_dt(data.frame(x = 2:3, b = 4), "dt2")
expect_equal(
dt1 %>% left_join(dt2, by = "x") %>% show_query(),
expr(setcolorder(dt2[dt1, on = .(x), allow.cartesian = TRUE], !!c(1L, 3L, 2L)))
)
expect_equal(
dt1 %>% left_join(dt2, by = "x") %>% pull(x),
dt1 %>% pull(x)
)
})
test_that("correctly determines vars", {
dt1 <- lazy_dt(data.frame(x = 1, y = 2, a = 3), "dt1")
dt2 <- lazy_dt(data.frame(x = 1, y = 2, b = 4), "dt2")
expect_setequal(
dt1 %>% left_join(dt2, by = c("x", "y")) %>% .$vars,
c("x", "y", "a", "b")
)
expect_setequal(
dt1 %>% left_join(dt2, by = "x") %>% .$vars,
c("x", "y.x", "y.y", "a", "b")
)
expect_setequal(
dt1 %>% semi_join(dt2, by = "x") %>% .$vars,
c("x", "y", "a")
)
})
test_that("can override suffixes", {
dt1 <- lazy_dt(data.frame(x = 1, y = 2, a = 3), "dt1")
dt2 <- lazy_dt(data.frame(x = 1, y = 22, b = 4), "dt2")
expect_equal(
dt1 %>% left_join(dt2, by = "x", suffix = c("X", "Y")) %>% show_query(),
expr(
setnames(
setcolorder(
dt2[dt1, on = .(x), allow.cartesian = TRUE],
!!c(1L, 4L, 5L, 2L, 3L)
),
!!c("i.y", "y"),
!!c("yX", "yY")
)
)
)
})
test_that("automatically data.frame converts to lazy_dt", {
dt1 <- lazy_dt(data.frame(x = 1, y = 2, a = 3), "dt1")
df2 <- data.frame(x = 1, y = 2, a = 3)
out <- left_join(dt1, df2, by = "x")
expect_s3_class(out, "dtplyr_step")
})
test_that("converts other types if requested", {
dt1 <- lazy_dt(data.frame(x = 1, y = 2, a = 3), "dt1")
x <- structure(10, class = "foo")
expect_error(left_join(dt1, x, by = "x"), "copy")
expect_s3_class(left_join(dt1, x, by = "x", copy = TRUE), "dtplyr_step_join")
})
test_that("mutates inside joins are copied as needed", {
dt <- data.table(x = 1)
lhs <- lazy_dt(dt, "dt1") %>% mutate(y = x + 1)
rhs <- lazy_dt(dt, "dt2") %>% mutate(z = x + 1)
collect(inner_join(lhs, rhs, by = "x"))
expect_named(dt, "x")
})
test_that("performs cartesian joins as needed", {
x <- lazy_dt(data.frame(x = c(2, 2, 2), y = 1:3))
y <- lazy_dt(data.frame(x = c(2, 2, 2), z = 1:3))
out <- collect(left_join(x, y, by = "x"))
expect_equal(nrow(out), 9)
})
test_that("performs cross join", {
df1 <- data.frame(x = 1:2, y = "a", stringsAsFactors = FALSE)
df2 <- data.frame(x = 3:4)
expected <- dplyr::cross_join(df1, df2) %>% as_tibble()
dt1 <- lazy_dt(df1, "dt1")
dt2 <- lazy_dt(df2, "dt2")
expect_snapshot(left_join(dt1, dt2, by = character()))
expect_equal(left_join(dt1, dt2, by = character()) %>% collect(), expected)
expect_snapshot(right_join(dt1, dt2, by = character()))
expect_equal(right_join(dt1, dt2, by = character()) %>% collect(), expected)
expect_snapshot(full_join(dt1, dt2, by = character()))
expect_equal(full_join(dt1, dt2, by = character()) %>% collect(), expected)
expect_snapshot(inner_join(dt1, dt2, by = character()))
expect_equal(inner_join(dt1, dt2, by = character()) %>% collect(), expected)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.