tests/testthat/test-dictionary.R

# counting ----------------------------------------------------------------

test_that("vec_count counts number observations", {
  x <- vec_count(rep(1:3, 1:3), sort = "key")
  expect_equal(x, data.frame(key = 1:3, count = 1:3))
})

test_that("vec_count(sort = 'count') uses a stable sort when there are ties (#1588)", {
  x <- c("a", "b", "b", "a", "d")

  expect_identical(
    vec_count(x, sort = "count"),
    data_frame(key = c("a", "b", "d"), count = c(2L, 2L, 1L))
  )
})

test_that("vec_count works with matrices", {
  x <- matrix(c(1, 1, 1, 2, 2, 1), c(3, 2))

  out <- vec_count(x)
  exp <- data_frame(key = c(NA, NA), count = int(2L, 1L))
  exp$key <- vec_slice(x, c(1, 3))

  expect_identical(out, exp)
})

test_that("vec_count works with arrays", {
  x <- array(c(rep(1, 3), rep(2, 3)), dim = c(3, 2, 1))
  expect <- data.frame(key = NA, count = 3)
  expect$key <- vec_slice(x, 1L)
  expect_equal(vec_count(x), expect)
})

test_that("vec_count works for zero-length input", {
  x <- vec_count(integer(), sort = "none")
  expect_equal(x, data.frame(key = integer(), count = integer()))
})

test_that("vec_count works with different encodings", {
  x <- vec_count(encodings())
  expect_equal(x, new_data_frame(list(key = encodings()[1], count = 3L)))
})

test_that("vec_count recursively takes the equality proxy", {
  local_comparable_tuple()

  x <- tuple(c(1, 1, 2), 1:3)
  df <- data_frame(x = x)
  expect <- data_frame(key = vec_slice(df, c(1, 3)), count = c(2L, 1L))

  expect_equal(vec_count(df), expect)
})

# duplicates and uniques --------------------------------------------------

test_that("vec_duplicated reports on duplicates regardless of position", {
  x <- c(1, 1, 2, 3, 4, 4)
  expect_equal(vec_duplicate_detect(x), c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE))
})

test_that("vec_duplicate_any returns single TRUE/FALSE", {
  expect_false(vec_duplicate_any(c(1:10)))
  expect_true(vec_duplicate_any(c(1:10, 1)))
})

test_that("vec_duplicate_id gives position of first found", {
  x <- c(1, 2, 3, 1, 4)
  expect_equal(vec_duplicate_id(x), c(1, 2, 3, 1, 5))
})

test_that("vec_unique matches unique", {
  x <- sample(100, 1000, replace = TRUE)
  expect_equal(vec_unique(x), unique(x))
  expect_equal(vec_unique(c("x", "x")), "x")
})

test_that("vec_unique matches unique for matrices", {
  x <- matrix(c(1, 1, 2, 2), ncol = 2)
  expect_equal(vec_unique(x), unique(x))
})

test_that("vec_unique_count matches length + unique", {
  x <- sample(100, 1000, replace = TRUE)
  expect_equal(vec_unique_count(x), length(unique(x)))
})

test_that("also works for data frames", {
  df <- data.frame(x = 1:3, y = letters[3:1], stringsAsFactors = FALSE)
  idx <- c(1L, 1L, 1L, 2L, 2L, 3L)
  df2 <- df[idx, , drop = FALSE]
  rownames(df2) <- NULL

  expect_equal(vec_duplicate_detect(df2), vec_duplicate_detect(idx))
  expect_equal(vec_unique(df2), vec_slice(df, vec_unique(idx)))

  count <- vec_count(df2, sort = "key")
  expect_equal(count$key, df)
  expect_equal(count$count, vec_count(idx)$count)

  exp <- tibble(x = c(1, 1, 2), y = c(1, 2, 3))
  expect_identical(vec_unique(vec_slice(exp, c(1, 1, 2, 3))), exp)
})

test_that("vec_unique() handles matrices (#327)", {
  x <- matrix(c(1, 2, 3, 4), c(2, 2))
  y <- matrix(c(1, 2, 3, 5), c(2, 2))
  expect_identical(vec_unique(list(x, x)), list(x))
  expect_identical(vec_unique(list(x, y)), list(x, y))

  x <- matrix(c(1, 2, 1, 1, 2, 1), nrow = 3)
  expect_identical(vec_unique(x), vec_slice(x, 1:2))
})

test_that("vec_unique() works with 1D arrays", {
  # 1D arrays are dispatched to `as.data.frame.vector()` which
  # currently does not strip dimensions. This caused an infinite
  # recursion.
  expect_identical(vec_unique(array(1:2)), array(1:2))

  x <- new_vctr(c(1, 1, 1, 2, 1, 2), dim = c(3, 2))
  expect_identical(vec_unique(x), new_vctr(c(1, 1, 2, 1), dim = c(2, 2)))
})

test_that("unique functions take the equality proxy (#375)", {
  local_comparable_tuple()
  x <- tuple(c(1, 2, 1), 1:3)

  expect_true(vec_in(tuple(2, 100), x))
  expect_identical(vec_match(tuple(2, 100), x), 2L)
})

test_that("unique functions take the equality proxy recursively", {
  local_comparable_tuple()

  x <- tuple(c(1, 1, 2), 1:3)
  df <- data_frame(x = x)

  expect_equal(vec_unique(df), vec_slice(df, c(1, 3)))
  expect_equal(vec_unique_count(df), 2L)
  expect_equal(vec_unique_loc(df), c(1, 3))
})

test_that("duplicate functions take the equality proxy recursively", {
  local_comparable_tuple()

  x <- tuple(c(1, 1, 2), 1:3)
  df <- data_frame(x = x)

  expect_equal(vec_duplicate_any(df), TRUE)
  expect_equal(vec_duplicate_detect(df), c(TRUE, TRUE, FALSE))
  expect_equal(vec_duplicate_id(df), c(1, 1, 3))
})

test_that("unique functions treat positive and negative 0 as equivalent (#637)", {
  expect_equal(vec_unique(c(0, -0)), 0)
  expect_equal(vec_unique_count(c(0, -0)), 1)
  expect_equal(vec_unique_loc(c(0, -0)), 1)
})

test_that("unique functions work with different encodings", {
  encs <- encodings()

  expect_equal(vec_unique(encs), encs[1])
  expect_equal(vec_unique_count(encs), 1L)
  expect_equal(vec_unique_loc(encs), 1L)
})

test_that("unique functions can handle scalar types in lists", {
  x <- list(a ~ b, a ~ b, a ~ c)
  expect_equal(vec_unique(x), vec_slice(x, c(1, 3)))

  x <- list(call("x"), call("y"), call("x"))
  expect_equal(vec_unique(x), vec_slice(x, c(1, 2)))
})

test_that("duplicate functions works with different encodings", {
  encs <- encodings()

  expect_equal(vec_duplicate_id(encs), rep(1, 3))
  expect_equal(vec_duplicate_detect(encs), rep(TRUE, 3))
  expect_equal(vec_duplicate_any(encs), TRUE)
})

test_that("vec_unique() returns differently encoded strings in the order they appear", {
  encs <- encodings()
  x <- c(encs$unknown, encs$utf8)
  y <- c(encs$utf8, encs$unknown)

  expect_equal_encoding(vec_unique(x), encs$unknown)
  expect_equal_encoding(vec_unique(y), encs$utf8)
})

test_that("vec_unique() works on lists containing expressions", {
  x <- list(expression(x), expression(y), expression(x))
  expect_equal(vec_unique(x), x[1:2])
})

test_that("vec_unique() works with glm objects (#643)", {
  # class(model$family$initialize) == "expression"
  model <- glm(mpg ~ wt, data = mtcars)
  expect_equal(vec_unique(list(model, model)), list(model))
})

test_that("can take the unique locations of dfs with list-cols", {
  df <- tibble(x = list(1, 2, 1, 3), y = list(1, 2, 1, 3))
  expect_identical(vec_unique_loc(df), c(1L, 2L, 4L))
})

# matching ----------------------------------------------------------------

test_that("vec_match() matches match()", {
  n <- c(1:3, NA)
  h <- c(4, 2, 1, NA)
  expect_equal(vec_match(n, h), match(n, h))

  expect_equal(vec_match(1.5, c(2, 1.5, NA)), match(1.5, c(2, 1.5, NA)))
  expect_equal(vec_match("x", "x"), match("x", "x"))
})

test_that("vec_match() and vec_in() check types", {
  expect_snapshot({
    df1 <- data_frame(x = data_frame(foo = 1))
    df2 <- data_frame(x = data_frame(foo = ""))

    (expect_error(vec_match(df1, df2), class = "vctrs_error_incompatible_type"))
    (expect_error(vec_match(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type"))
    (expect_error(vec_in(df1, df2), class = "vctrs_error_incompatible_type"))
    (expect_error(vec_in(df1, df2, needles_arg = "n", haystack_arg = "h"), class = "vctrs_error_incompatible_type"))
  })
})

test_that("vec_in() matches %in%", {
  n <- c(1:3, NA)
  h <- c(4, 2, 1, NA)

  expect_equal(vec_in(n, h), n %in% h)
})

test_that("can opt out of NA matching", {
  n <- c(1, NA)
  h <- c(1:3, NA)

  expect_equal(vec_in(n, h, na_equal = FALSE), c(TRUE, NA))
})

test_that("vec_match works with empty data frame", {
  out <- vec_match(
    new_data_frame(n = 3L),
    new_data_frame(n = 0L)
  )
  expect_equal(out, vec_init(integer(), 3))
})

test_that("matching functions take the equality proxy (#375)", {
  local_comparable_tuple()
  x <- tuple(c(1, 2, 1), 1:3)

  expect_identical(vec_unique_loc(x), 1:2)
  expect_identical(unique(x), tuple(c(1, 2), 1:2))

  expect_true(vec_duplicate_any(x))
  expect_identical(vec_duplicate_id(x), c(1L, 2L, 1L))
  expect_identical(vec_unique_count(x), 2L)

  expect_identical(vec_duplicate_detect(x), c(TRUE, FALSE, TRUE))
})

test_that("can take the unique loc of 1d arrays (#461)", {
  x <- array(c(1, 1, 2, 2, 3))
  y <- array(c(1, 1, 2, 2, 3), dimnames = list(NULL))
  expect_identical(vctrs::vec_unique_loc(x), int(1, 3, 5))
  expect_identical(vctrs::vec_unique_loc(y), int(1, 3, 5))

  z <- array(c(1, 1, 2, 2, 3, 4), c(3, 2))
  expect_silent(expect_identical(vctrs::vec_unique_loc(y), int(1, 3, 5)))
})

test_that("matching functions work with different encodings", {
  encs <- encodings()

  expect_equal(vec_match(encs, encs[1]), rep(1, 3))
  expect_equal(vec_in(encs, encs[1]), rep(TRUE, 3))
})

test_that("matching functions take the equality proxy recursively", {
  local_comparable_tuple()

  x <- tuple(c(1, 2), 1:2)
  df <- data_frame(x = x)

  y <- tuple(c(2, 3), c(3, 3))
  df2 <- data_frame(x = y)

  expect_equal(vec_match(df, df2), c(NA, 1))
  expect_equal(vec_in(df, df2), c(FALSE, TRUE))
})

test_that("can propagate missing values while matching", {
  exp <- c(NA, 3L, NA, 1L)
  expect_identical(vec_match(lgl(NA, TRUE, NA, FALSE), lgl(FALSE, NA, TRUE), na_equal = FALSE), exp)
  expect_identical(vec_match(int(NA, 1L, NA, 2L), int(2L, NA, 1L), na_equal = FALSE), exp)
  expect_identical(vec_match(dbl(NA, 1, NA, 2), dbl(2, NA, 1), na_equal = FALSE), exp)
  expect_identical(vec_match(cpl(NA, 1, NA, 2), cpl(2, NA, 1), na_equal = FALSE), exp)
  expect_identical(vec_match(chr(NA, "1", NA, "2"), chr("2", NA, "1"), na_equal = FALSE), exp)
  expect_identical(vec_match(list(NULL, 1, NULL, 2), list(2, NULL, 1), na_equal = FALSE), exp)

  # No missing values for raw vectors
  expect_identical(vec_match(raw2(0, 1, 0, 2), raw2(2, 0, 1), na_equal = FALSE), c(2L, 3L, 2L, 1L))
})

test_that("can propagate missingness of incomplete rcrd observations (#1386)", {
  x <- new_rcrd(list(x = c(1, 1, NA, NA), y = c(1, NA, 1, NA)))
  expect_identical(vec_match(x, x, na_equal = FALSE), c(1L, NA, NA, NA))

  # Matches `vec_detect_complete()` results
  expect_identical(vec_detect_complete(x), c(TRUE, FALSE, FALSE, FALSE))
})

test_that("can propagate NaN as a missing value (#1252)", {
  expect_identical(vec_match(dbl(NaN, NA), c(NaN, NA), na_equal = FALSE), int(NA, NA))
  expect_identical(vec_in(dbl(NaN, NA), c(NaN, NA), na_equal = FALSE), lgl(NA, NA))
})

test_that("missing values are propagated across columns", {
  for (na_value in list(NA, na_int, na_dbl, na_cpl, na_chr, list(NULL))) {
    df <- data_frame(x = 1, y = data_frame(foo = 2, bar = na_value), z = 3)
    expect_identical(vec_match(df, df), 1L)
    expect_identical(vec_match(df, df, na_equal = FALSE), na_int)
  }
})

test_that("can't supply NA as `na_equal`", {
  expect_error(vec_match(NA, NA, na_equal = NA), "single `TRUE` or `FALSE`")
})

test_that("vec_match() and vec_in() silently fall back to base data frame", {
  expect_silent(expect_identical(
    vec_match(foobar(mtcars), foobar(tibble::as_tibble(mtcars))),
    1:32
  ))
  expect_silent(expect_identical(
    vec_in(foobar(mtcars), foobar(tibble::as_tibble(mtcars))),
    rep(TRUE, 32)
  ))
})

test_that("vec_in() evaluates arg lazily", {
  expect_silent(vec_in(1L, 1L, needles_arg = print("oof")))
  expect_silent(vec_in(1L, 1L, haystack_arg = print("oof")))
})

test_that("vec_match() evaluates arg lazily", {
  expect_silent(vec_match(1L, 1L, needles_arg = print("oof")))
  expect_silent(vec_match(1L, 1L, haystack_arg = print("oof")))
})

Try the vctrs package in your browser

Any scripts or data that you put into this service are public.

vctrs documentation built on Oct. 13, 2023, 1:05 a.m.