tests/testthat/test-select.R

test_that("select preserves grouping", {
  gf <- duckplyr_group_by(tibble(g = 1:3, x = 3:1), g)

  i <- count_regroups(out <- duckplyr_select(gf, h = g))
  expect_equal(i, 0)
  expect_equal(duckplyr_group_vars(out), "h")
})

test_that("grouping variables preserved with a message, unless already selected (#1511, #5841)", {
  df <- tibble(g = 1:3, x = 3:1) %>% duckplyr_group_by(g)

  expect_snapshot({
    res <- duckplyr_select(df, x)
  })
  expect_named(res, c("g", "x"))

  df <- tibble(a = 1, b = 2, c = 3) %>% duckplyr_group_by(a)
  expect_equal(df %>% duckplyr_select(a = b), tibble(a = 2))

  df <- tibble(a = 1, b = 2, c = 3) %>% duckplyr_group_by(a, b)
  expect_snapshot({
    expect_equal(df %>% duckplyr_select(a = c), tibble(b = 2, a = 3) %>% duckplyr_group_by(b))
    expect_equal(df %>% duckplyr_select(b = c), tibble(a = 1, b = 3) %>% duckplyr_group_by(a))
  })
})

test_that("non-syntactic grouping variable is preserved (#1138)", {
  expect_snapshot(
    df <- tibble(`a b` = 1L) %>% duckplyr_group_by(`a b`) %>% duckplyr_select()
  )
  expect_named(df, "a b")
})

test_that("select doesn't fail if some names missing", {
  df1 <- data.frame(x = 1:10, y = 1:10, z = 1:10)
  df2 <- setNames(df1, c("x", "y", ""))
  # df3 <- setNames(df1, c("x", "", ""))

  expect_equal(duckplyr_select(df1, x), data.frame(x = 1:10))
  expect_equal(duckplyr_select(df2, x), data.frame(x = 1:10))
  # expect_equal(duckplyr_select(df3, x), data.frame(x = 1:10))
})


# Special cases -------------------------------------------------

test_that("select with no args returns nothing", {
  empty <- duckplyr_select(mtcars)
  expect_equal(df_n_col(empty), 0)
  expect_equal(nrow(empty), 32)

  empty <- duckplyr_select(mtcars, !!!list())
  expect_equal(df_n_col(empty), 0)
  expect_equal(nrow(empty), 32)
})

test_that("select excluding all vars returns nothing", {
  expect_equal(dim(duckplyr_select(mtcars, -(mpg:carb))), c(32, 0))
  expect_equal(dim(duckplyr_select(mtcars, starts_with("x"))), c(32, 0))
  expect_equal(dim(duckplyr_select(mtcars, -matches("."))), c(32, 0))
})

test_that("negating empty match returns everything", {
  df <- data.frame(x = 1:3, y = 3:1)
  expect_equal(duckplyr_select(df, -starts_with("xyz")), df)
})

test_that("can select with duplicate columns", {
  skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
  df <- tibble(x = 1, x = 2, y = 1, .name_repair = "minimal")

  # can extract duplicate cols by position
  expect_named(df %>% duckplyr_select(1, 3), c("x", "y"))

  # can select out non-duplicated columns
  expect_named(df %>% duckplyr_select(y), "y")
})

# Select variables -----------------------------------------------

test_that("select can be before group_by (#309)", {
  df <- data.frame(
    id = c(1, 1, 2, 2, 2, 3, 3, 4, 4, 5),
    year = c(2013, 2013, 2012, 2013, 2013, 2013, 2012, 2012, 2013, 2013),
    var1 = rnorm(10)
  )
  dfagg <- df %>%
    duckplyr_group_by(id, year) %>%
    duckplyr_select(id, year, var1) %>%
    duckplyr_summarise(var1 = mean(var1))
  expect_equal(names(dfagg), c("id", "year", "var1"))
})


test_that("select succeeds in presence of raw columns (#1803)", {
  skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
  df <- tibble(a = 1:3, b = as.raw(1:3))
  expect_identical(duckplyr_select(df, a), df["a"])
  expect_identical(duckplyr_select(df, b), df["b"])
  expect_identical(duckplyr_select(df, -b), df["a"])
})

test_that("arguments to duckplyr_select() don't match vars_select() arguments", {
  df <- tibble(a = 1)
  expect_identical(duckplyr_select(df, var = a), tibble(var = 1))
  expect_identical(duckplyr_select(duckplyr_group_by(df, a), var = a), duckplyr_group_by(tibble(var = 1), var))
  expect_identical(duckplyr_select(df, exclude = a), tibble(exclude = 1))
  expect_identical(duckplyr_select(df, include = a), tibble(include = 1))
  expect_identical(duckplyr_select(duckplyr_group_by(df, a), exclude = a), duckplyr_group_by(tibble(exclude = 1), exclude))
  expect_identical(duckplyr_select(duckplyr_group_by(df, a), include = a), duckplyr_group_by(tibble(include = 1), include))
})

test_that("can duckplyr_select() with deprecated `.data` pronoun (#2715)", {
  withr::local_options(lifecycle_verbosity = "quiet")
  expect_identical(duckplyr_select(mtcars, .data$cyl), duckplyr_select(mtcars, cyl))
})

test_that("can duckplyr_select() with character vectors", {
  expect_identical(duckplyr_select(mtcars, "cyl", !!"disp", c("cyl", "am", "drat")), mtcars[c("cyl", "disp", "am", "drat")])
})

test_that("duckplyr_select() treats NULL inputs as empty", {
  expect_identical(duckplyr_select(mtcars, cyl), duckplyr_select(mtcars, NULL, cyl, NULL))
})

test_that("can duckplyr_select() with strings and character vectors", {
  vars <- c(foo = "cyl", bar = "am")

  expect_identical(duckplyr_select(mtcars, !!!vars), duckplyr_select(mtcars, foo = cyl, bar = am))
  expect_identical(duckplyr_select(mtcars, !!vars), duckplyr_select(mtcars, foo = cyl, bar = am))
})

test_that("select works on empty names (#3601)", {
  df <- data.frame(x=1, y=2, z=3)
  colnames(df) <- c("x","y","")
  expect_identical(duckplyr_select(df, x)$x, 1)

  colnames(df) <- c("","y","z")
  expect_identical(duckplyr_select(df, y)$y, 2)
})

test_that("select works on NA names (#3601)", {
  df <- data.frame(x=1, y=2, z=3)
  colnames(df) <- c("x","y",NA)
  expect_identical(duckplyr_select(df, x)$x, 1)

  colnames(df) <- c(NA,"y","z")
  expect_identical(duckplyr_select(df, y)$y, 2)
})

test_that("duckplyr_select() keeps attributes of raw data frames (#5831)", {
  df <- data.frame(x = 1)
  attr(df, "a") <- "b"
  expect_equal(attr(duckplyr_select(df, x), "a"), "b")
})

test_that("duckplyr_select() provides informative errors", {
  expect_snapshot({
    (expect_error(duckplyr_select(mtcars, 1 + "")))
  })
})


# dplyr_col_select() ------------------------------------------------------

test_that("dplyr_col_select() aborts when `[` implementation is broken", {
  local_methods(
    "[.dplyr_test_broken_operator" = function(x, ...) {
      unclass(x)
    },
    "[.dplyr_test_operator_wrong_size" = function(x, ...) {
      data.frame()
    }
  )
  df1 <- new_tibble(list(x = 1), nrow = 1L, class = "dplyr_test_broken_operator")
  expect_snapshot({
    (expect_error(
      duckplyr_select(df1, 1:2)
    ))
    (expect_error(
      duckplyr_select(df1, 0)
    ))
  })
  df2 <- new_tibble(list(x = 1), nrow = 1L, class = "dplyr_test_operator_wrong_size")
  expect_error(duckplyr_select(df2, 1:2))

  expect_snapshot({
    # from vctrs
    (expect_error(
      duckplyr_select(df1, 2)
    ))

    # not returning a data frame
    (expect_error(
      duckplyr_select(df1, 1)
    ))

    # unexpected number of columns
    (expect_error(
      duckplyr_select(df2, 1)
    ))
  })

})
duckdblabs/duckplyr documentation built on Nov. 6, 2024, 10 p.m.