Nothing
df <- tibble(
x = c(1, 1, 1, 1),
y = c(1, 1, 2, 2),
z = c(1, 2, 1, 2)
)
dfs <- test_load(df)
test_that("distinct equivalent to local unique when keep_all is TRUE", {
dfs %>%
lapply(. %>% distinct()) %>%
expect_equal_tbls(unique(df))
})
test_that("distinct for single column equivalent to local unique (#1937)", {
dfs %>%
lapply(. %>% distinct(x, .keep_all = FALSE)) %>%
expect_equal_tbls(unique(df["x"]))
dfs %>%
lapply(. %>% distinct(y, .keep_all = FALSE)) %>%
expect_equal_tbls(unique(df["y"]))
})
test_that("distinct doesn't duplicate column names if grouped (#354)", {
df <- lazy_frame(a = 1)
expect_equal(df %>% group_by(a) %>% distinct() %>% op_vars(), "a")
})
test_that("distinct respects groups", {
df <- memdb_frame(a = 1:2, b = 1) %>% group_by(a)
expect_equal(df %>% group_by(a) %>% distinct() %>% op_vars(), c("a", "b"))
})
test_that("distinct returns all columns when .keep_all is TRUE", {
mf <- memdb_frame(x = c(1, 1, 2, 2), y = 1:4)
result <- mf %>% distinct(x, .keep_all = TRUE) %>% collect()
expect_named(result, c("x", "y"))
expect_equal(result$x, c(1, 2))
expect_equal(group_vars(result), character())
})
test_that("distinct respects groups when .keep_all is TRUE", {
mf <- memdb_frame(x = c(1, 1, 2, 2), y = 1:4)
result <- mf %>% group_by(x) %>% distinct(.keep_all = TRUE) %>% collect()
expect_named(result, c("x", "y"))
expect_equal(result$x, c(1, 2))
expect_equal(group_vars(result), "x")
})
test_that("distinct can select variables via pick() #1125", {
lf <- lazy_frame(x_1 = 1, x_2 = 1, y = 1)
expect_equal(
lf %>% distinct(pick(starts_with("x_"))) %>% remote_query(),
sql("SELECT DISTINCT `x_1`, `x_2`\nFROM `df`")
)
})
test_that("distinct() produces optimized SQL", {
lf <- lazy_frame(x = 1, y = 1)
# can use renamed variable
out <- lf %>%
select(a = x, y) %>%
distinct(a)
expect_equal(
remote_query(out),
sql("SELECT DISTINCT `x` AS `a`\nFROM `df`")
)
expect_true(out$lazy_query$distinct)
expect_equal(out$lazy_query$select$name, "a")
expect_equal(out$lazy_query$select$expr, syms("x"))
# unnecessary extra variables do not matter
out <- lf %>%
mutate(z = 1) %>%
group_by(x) %>%
distinct(y)
expect_s3_class(out$lazy_query$x, "lazy_base_local_query")
expect_equal(out$lazy_query$select$name, c("x", "y"))
expect_equal(out$lazy_query$select$expr, syms(c("x", "y")))
# inlined after `summarise()`
out <- lf %>%
group_by(x) %>%
summarise(y = mean(y, na.rm = TRUE)) %>%
distinct(x, y)
expect_equal(
remote_query(out),
sql("SELECT DISTINCT `x`, AVG(`y`) AS `y`\nFROM `df`\nGROUP BY `x`")
)
expect_true(out$lazy_query$distinct)
expect_equal(out$lazy_query$select$name, c("x", "y"))
expect_equal(
out$lazy_query$select$expr, list(sym("x"), quo(mean(y, na.rm = TRUE))),
ignore_formula_env = TRUE
)
expect_equal(out$lazy_query$group_by, syms("x"))
# inlined after `filter()`
out <- lf %>%
filter(x == 1L) %>%
distinct(y)
expect_equal(
remote_query(out),
sql("SELECT DISTINCT `y`\nFROM `df`\nWHERE (`x` = 1)")
)
expect_true(out$lazy_query$distinct)
expect_equal(out$lazy_query$select$name, "y")
expect_equal(out$lazy_query$select$expr, syms("y"))
expect_equal(out$lazy_query$where, list(quo(x == 1L)), ignore_formula_env = TRUE)
# Note: currently this needs `distinct()` or `distinct(x, y)` because
# `summarise()` + `select()` is not inlined.
out <- lf %>%
group_by(x) %>%
summarise(y = mean(y, na.rm = TRUE)) %>%
filter(x == 1) %>%
distinct(x, y)
expect_equal(
remote_query(out),
sql("SELECT DISTINCT `x`, AVG(`y`) AS `y`\nFROM `df`\nGROUP BY `x`\nHAVING (`x` = 1.0)")
)
expect_true(out$lazy_query$distinct)
expect_equal(out$lazy_query$select$name, c("x", "y"))
expect_equal(
out$lazy_query$select$expr,
list(sym("x"), quo(mean(y, na.rm = TRUE))),
ignore_formula_env = TRUE
)
out <- lf %>%
arrange(y) %>%
distinct(x)
expect_equal(
remote_query(out),
sql("SELECT DISTINCT `x`\nFROM `df`\nORDER BY `y`")
)
expect_snapshot(
(out <- lf %>%
head(2) %>%
distinct(x, y))
)
expect_s3_class(out$lazy_query$x, "lazy_select_query")
expect_equal(out$lazy_query$x$limit, 2)
})
# sql-render --------------------------------------------------------------
test_that("distinct adds DISTINCT suffix", {
out <- memdb_frame(x = c(1, 1)) %>% distinct()
expect_match(out %>% sql_render(), "SELECT DISTINCT")
expect_equal(out %>% collect(), tibble(x = 1))
})
test_that("distinct can compute variables", {
out <- memdb_frame(x = c(2, 1), y = c(1, 2)) %>% distinct(z = x + y)
expect_equal(out %>% collect(), tibble(z = 3))
})
test_that("distinct can compute variables when .keep_all is TRUE", {
out <- memdb_frame(x = c(2, 1), y = c(1, 2)) %>%
distinct(z = x + y, .keep_all = TRUE) %>%
collect()
expect_named(out, c("x", "y", "z"))
expect_equal(out$z, 3)
})
test_that("distinct respects window_order when .keep_all is TRUE", {
mf <- memdb_frame(x = c(1, 1, 2, 2), y = 1:4)
out <- mf %>%
window_order(desc(y)) %>%
distinct(x, .keep_all = TRUE)
expect_equal(out %>% collect(), tibble(x = 1:2, y = c(2, 4)))
lf <- lazy_frame(x = c(1, 1, 2, 2), y = 1:4)
expect_snapshot(
lf %>%
window_order(desc(y)) %>%
distinct(x, .keep_all = TRUE)
)
})
test_that("distinct uses dummy window order when .keep_all is TRUE and no order is used", {
lf <- lazy_frame(x = 1, y = 2)
expect_snapshot(lf %>% distinct(x, .keep_all = TRUE))
})
# sql_build ---------------------------------------------------------------
test_that("distinct sets flagged", {
out1 <- lazy_frame(x = 1) %>%
select() %>%
sql_build()
expect_false(out1$distinct)
out2 <- lazy_frame(x = 1) %>%
distinct() %>%
sql_build()
expect_true(out2$distinct)
})
# ops ---------------------------------------------------------------------
test_that("distinct produces correct vars", {
out <- lazy_frame(x = 1, y = 2) %>% distinct()
expect_equal(op_vars(out), c("x", "y"))
out <- lazy_frame(x = 1, y = 2, z = 3) %>% distinct(x, y)
expect_equal(op_vars(out), c("x", "y"))
out <- lazy_frame(x = 1, y = 2, z = 3) %>% distinct(a = x, b = y)
expect_equal(op_vars(out), c("a", "b"))
out <- lazy_frame(x = 1, y = 2, z = 3) %>% group_by(x) %>% distinct(y)
expect_equal(op_vars(out), c("x", "y"))
})
test_that("distinct produces correct vars when .keep_all is TRUE", {
lf <- lazy_frame(x = 1, y = 2)
out <- lf %>% distinct(.keep_all = TRUE)
expect_equal(op_vars(out), c("x", "y"))
out <- lf %>% distinct(x, .keep_all = TRUE)
expect_equal(op_vars(out), c("x", "y"))
out <- lf %>% distinct(a = x, .keep_all = TRUE)
expect_equal(op_vars(out), c("x", "y", "a"))
out <- lazy_frame(x = 1, y = 2, z = 3) %>% group_by(x) %>% distinct(y, .keep_all = TRUE)
expect_equal(op_vars(out), c("x", "y", "z"))
})
test_that("distinct respects order of the specified variables (#3195, #6156)",{
d <- lazy_frame(x = 1:2, y = 3:4)
expect_equal(colnames(distinct(d, y, x)), c("y", "x"))
})
test_that("distinct adds grouping variables to front if missing",{
d <- lazy_frame(x = 1:2, y = 3:4)
expect_equal(colnames(distinct(group_by(d, y), x)), c("y", "x"))
expect_equal(colnames(distinct(group_by(d, y), x, y)), c("x", "y"))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.