test_that("distinct equivalent to local unique when keep_all is TRUE", {
df <- data.frame(
x = c(1, 1, 1, 1),
y = c(1, 1, 2, 2),
z = c(1, 2, 1, 2)
)
expect_equal(duckplyr_distinct(df), unique(df))
})
test_that("distinct for single column works as expected (#1937)", {
df <- tibble(
x = c(1, 1, 1, 1),
y = c(1, 1, 2, 2),
z = c(1, 2, 1, 2)
)
expect_equal(duckplyr_distinct(df, x, .keep_all = FALSE), unique(df["x"]))
expect_equal(duckplyr_distinct(df, y, .keep_all = FALSE), unique(df["y"]))
})
test_that("distinct works for 0-sized columns (#1437)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df <- tibble(x = 1:10) %>% duckplyr_select(-x)
ddf <- duckplyr_distinct(df)
expect_equal(df_n_col(ddf), 0L)
})
test_that("if no variables specified, uses all", {
df <- tibble(x = c(1, 1), y = c(2, 2))
expect_equal(duckplyr_distinct(df), tibble(x = 1, y = 2))
})
test_that("distinct keeps only specified cols", {
df <- tibble(x = c(1, 1, 1), y = c(1, 1, 1))
expect_equal(df %>% duckplyr_distinct(x), tibble(x = 1))
})
test_that("unless .keep_all = TRUE", {
df <- tibble(x = c(1, 1, 1), y = 3:1)
expect_equal(df %>% duckplyr_distinct(x), tibble(x = 1))
expect_equal(df %>% duckplyr_distinct(x, .keep_all = TRUE), tibble(x = 1, y = 3L))
})
test_that("distinct doesn't duplicate columns", {
df <- tibble(a = 1:3, b = 4:6)
expect_named(df %>% duckplyr_distinct(a, a), "a")
expect_named(df %>% duckplyr_group_by(a) %>% duckplyr_distinct(a), "a")
})
test_that("grouped distinct always includes group cols", {
df <- tibble(g = c(1, 2), x = c(1, 2))
out <- df %>% duckplyr_group_by(g) %>% duckplyr_distinct(x)
expect_named(out, c("g", "x"))
})
test_that("empty grouped distinct equivalent to empty ungrouped", {
df <- tibble(g = c(1, 2), x = c(1, 2))
df1 <- df %>% duckplyr_distinct() %>% duckplyr_group_by(g)
df2 <- df %>% duckplyr_group_by(g) %>% duckplyr_distinct()
expect_equal(df1, df2)
})
test_that("distinct on a new, mutated variable is equivalent to mutate followed by distinct", {
df <- tibble(g = c(1, 2), x = c(1, 2))
df1 <- df %>% duckplyr_distinct(aa = g * 2)
df2 <- df %>% duckplyr_mutate(aa = g * 2) %>% duckplyr_distinct(aa)
expect_equal(df1, df2)
})
test_that("distinct on a new, copied variable is equivalent to mutate followed by distinct (#3234)", {
df <- tibble(g = c(1, 2), x = c(1, 2))
df1 <- df %>% duckplyr_distinct(aa = g)
df2 <- df %>% duckplyr_mutate(aa = g) %>% duckplyr_distinct(aa)
expect_equal(df1, df2)
})
test_that("distinct on a dataframe or tibble with columns of type list throws an error", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
df <- tibble(
a = c("1", "1", "2", "2", "3", "3"),
b = list("A")
)
df2 <- data.frame(x = 1:5, y = I(list(1:3, 2:4, 3:5, 4:6, 5:7)))
expect_identical(df2 %>% duckplyr_distinct(), df2)
expect_identical(df %>% duckplyr_distinct(), df %>% duckplyr_slice(c(1, 3, 5)))
})
test_that("distinct handles 0 columns edge case (#2954)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
d <- duckplyr_select(data.frame(x= c(1, 1)), one_of(character(0)))
res <- duckplyr_distinct(d)
expect_equal(nrow(res), 1L)
expect_equal(nrow(duckplyr_distinct(tibble())), 0L)
})
test_that("distinct respects order of the specified variables (#3195, #6156)",{
d <- data.frame(x = 1:2, y = 3:4)
expect_named(duckplyr_distinct(d, y, x), c("y", "x"))
})
test_that("distinct adds grouping variables to front if missing",{
d <- data.frame(x = 1:2, y = 3:4)
expect_named(duckplyr_distinct(duckplyr_group_by(d, y), x), c("y", "x"))
expect_named(duckplyr_distinct(duckplyr_group_by(d, y), x, y), c("x", "y"))
})
test_that("duckplyr_distinct() understands both NA variants (#4516)", {
df <- data.frame(col_a = c(1, NA, NA))
df$col_a <- df$col_a+0
df$col_a[2] <- NA_real_
expect_equal(nrow(duckplyr_distinct(df)), 2L)
df_1 <- data.frame(col_a = c(1, NA))
df_2 <- data.frame(col_a = c(1, NA))
df_1$col_a <- df_1$col_a+0
df_2$col_a <- df_2$col_a+0
df_1$col_a[2] <- NA
expect_equal(nrow(duckplyr_setdiff(df_1, df_2)), 0L)
})
test_that("duckplyr_distinct() handles auto splicing", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
expect_equal(
iris %>% duckplyr_distinct(Species),
iris %>% duckplyr_distinct(data.frame(Species=Species))
)
expect_equal(
iris %>% duckplyr_distinct(Species),
iris %>% duckplyr_distinct(pick(Species))
)
expect_equal(
iris %>% duckplyr_mutate(across(starts_with("Sepal"), round)) %>% duckplyr_distinct(Sepal.Length, Sepal.Width),
iris %>% duckplyr_distinct(across(starts_with("Sepal"), round))
)
})
test_that("distinct preserves grouping", {
gf <- duckplyr_group_by(tibble(x = c(1, 1, 2, 2), y = x), x)
i <- count_regroups(out <- duckplyr_distinct(gf))
expect_equal(i, 0)
expect_equal(duckplyr_group_vars(out), "x")
i <- count_regroups(out <- duckplyr_distinct(gf, x = x + 2))
expect_equal(i, 1)
expect_equal(duckplyr_group_vars(out), "x")
})
test_that("duckplyr_distinct() preserves attributes on bare data frames (#6318)", {
df <- vctrs::data_frame(x = c(1, 1))
attr(df, "foo") <- "bar"
out <- duckplyr_distinct(df, x)
expect_identical(attr(out, "foo"), "bar")
out <- duckplyr_distinct(df, y = x + 1L)
expect_identical(attr(out, "foo"), "bar")
})
# Errors ------------------------------------------------------------------
test_that("distinct errors when selecting an unknown column (#3140)", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
expect_snapshot({
df <- tibble(g = c(1, 2), x = c(1, 2))
(expect_error(df %>% duckplyr_distinct(aa, x)))
(expect_error(df %>% duckplyr_distinct(aa, bb)))
(expect_error(df %>% duckplyr_distinct(.data$aa)))
(expect_error(df %>% duckplyr_distinct(y = a + 1)))
})
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.