tests/testthat/test-translate.R

# ------------------------------------------------------------------------------
# vec_normalize_encoding()

test_that("can translate a character vector of various encodings (#553)", {
  x <- unlist(encodings(), use.names = FALSE)

  results <- vec_normalize_encoding(x)

  expect_equal_encoding(results, encodings()$utf8)
})

test_that("translates all encodings to UTF-8", {
  encs <- encodings()

  for (enc in encs) {
    expect_equal_encoding(vec_normalize_encoding(enc), encodings()$utf8)
  }
})

test_that("can translate a list containing character vectors with different encodings", {
  results <- vec_normalize_encoding(encodings())
  results <- unlist(results)

  expect_equal_encoding(results, encodings()$utf8)
})

test_that("translation fails purposefully with any bytes", {
  expect_error(
    vec_normalize_encoding(encoding_bytes()),
    "translating strings with \"bytes\" encoding"
  )
})

test_that("translation fails purposefully when mixing with bytes with other encodings", {
  for (enc in encodings()) {
    x <- c(encoding_bytes(), enc)
    expect_error(vec_normalize_encoding(x), "translating strings with \"bytes\" encoding")
  }
})

test_that("attributes are kept on translation (#599)", {
  encs <- encodings()

  x <- c(encs$utf8, encs$latin1)
  x <- structure(x, names = c("a", "b"), extra = 1)

  expect_equal(attributes(vec_normalize_encoding(x)), attributes(x))
})

test_that("translation is robust against scalar types contained in lists (#633)", {
  x <- list(a = z ~ y, b = z ~ z)
  expect_equal(vec_normalize_encoding(x), x)
})

test_that("translation can still occur even if a scalar type is in a list", {
  encs <- encodings()
  x <- list(a = z ~ y, b = encs$latin1)

  result <- vec_normalize_encoding(x)

  expect_equal_encoding(result$b, encs$utf8)
})

test_that("translation occurs inside scalars contained in a list", {
  encs <- encodings()

  scalar <- structure(list(x = encs$latin1), class = "scalar_list")
  lst <- list(scalar)

  result <- vec_normalize_encoding(lst)

  expect_equal_encoding(result[[1]]$x, encs$utf8)
})

test_that("translation treats data frames elements of lists as lists (#1233)", {
  encs <- encodings()

  field <- c(encs$utf8, encs$latin1)

  a <- new_rcrd(list(field = field))
  df <- data.frame(a = a, b = 1:2)
  x <- list(df)

  # Recursive proxy won't proxy list elements,
  # so the rcrd column in the data frame won't get proxied
  proxy <- vec_proxy_equal(x)

  result <- vec_normalize_encoding(proxy)

  expect_identical(result, x)

  result_field <- field(result[[1]]$a, "field")
  expect_field <- c(encs$utf8, encs$utf8)

  expect_equal_encoding(result_field, expect_field)
})

test_that("attributes are translated", {
  utf8 <- encodings()$utf8
  latin1 <- encodings()$latin1

  a <- structure(1, enc = utf8)
  b <- structure(1, enc = latin1)
  c <- structure(1, enc1 = utf8, enc2 = list(latin1), enc3 = latin1)
  x <- list(a, b, c)

  result <- vec_normalize_encoding(x)

  a_enc <- attr(result[[1]], "enc")
  b_enc <- attr(result[[2]], "enc")
  c_enc1 <- attr(result[[3]], "enc1")
  c_enc2 <- attr(result[[3]], "enc2")[[1]]
  c_enc3 <- attr(result[[3]], "enc3")

  expect_equal_encoding(a_enc, utf8)
  expect_equal_encoding(b_enc, utf8)
  expect_equal_encoding(c_enc1, utf8)
  expect_equal_encoding(c_enc2, utf8)
  expect_equal_encoding(c_enc3, utf8)

  expect <- list(
    structure(1, enc = utf8),
    structure(1, enc1 = utf8, enc2 = list(utf8), enc3 = utf8)
  )

  expect_identical(vec_unique(x), expect)
})

test_that("attributes are translated recursively", {
  utf8 <- encodings()$utf8
  latin1 <- encodings()$latin1

  nested <- structure(1, latin1 = latin1)
  x <- structure(2, nested = nested, foo = 1, latin1 = latin1)

  result <- vec_normalize_encoding(x)
  attrib <- attributes(result)
  attrib_nested <- attributes(attrib$nested)

  expect_equal_encoding(attrib$latin1, utf8)
  expect_equal_encoding(attrib_nested$latin1, utf8)
})

test_that("NAs aren't converted to 'NA' (#1291)", {
  utf8 <- c(NA, encodings()$utf8)
  latin1 <- c(NA, encodings()$latin1)

  result1 <- vec_normalize_encoding(utf8)
  result2 <- vec_normalize_encoding(latin1)

  expect_equal_encoding(result1, utf8)
  expect_equal_encoding(result2, utf8)

  expect_identical(result1[[1]], NA_character_)
  expect_identical(result2[[1]], NA_character_)
})

Try the vctrs package in your browser

Any scripts or data that you put into this service are public.

vctrs documentation built on Oct. 13, 2023, 1:05 a.m.