tests/testthat/test-utf8_valid.R

test_that("'as_utf8' errors on latin1 declared to be UTF-8", {
  x <- c("a", "b", "the command of her beauty, and her \xa320,000", "d")
  Encoding(x) <- "UTF-8"

  expect_equal(utf8_valid(x), c(TRUE, TRUE, FALSE, TRUE))
  expect_error(as_utf8(x), "entry 3 has wrong Encoding; marked as \"UTF-8\" but invalid leading byte (0xA3) at position 36", fixed = TRUE)
})


test_that("utf8_valid errors on invalid UTF-8", {
  x <- c("a", "b", "c", "d", "\xf8\x88\x80\x80\x80") # intToUtf8(0x00200000)
  Encoding(x) <- "UTF-8"

  expect_equal(utf8_valid(x), c(TRUE, TRUE, TRUE, TRUE, FALSE))
  expect_error(as_utf8(x), "entry 5 has wrong Encoding; marked as \"UTF-8\" but invalid leading byte (0xF8) at position 1", fixed = TRUE)
})


test_that("utf8_valid passes on valid UTF-8 in bytes encoding", {
  x <- "hello\u2002"
  Encoding(x) <- "bytes"
  expect_equal(utf8_valid(x), TRUE)

  y <- x
  Encoding(y) <- "UTF-8"
  expect_equal(as_utf8(x), enc2utf8(y))
})


test_that("utf8_valid passes on valid ASCII in unknown encoding", {
  x <- "world"
  expect_equal(utf8_valid(x), TRUE)
  expect_equal(as_utf8(x), enc2utf8(x))
})


test_that("utf8_valid errors on invalid UTF8 in bytes encoding", {
  x <- paste0("hello", "\xfc\x8f\xbf\xbf\xbf\xbf") # intToUtf8(0xfffffff)
  Encoding(x) <- "bytes"
  expect_equal(utf8_valid(x), FALSE)
  expect_error(as_utf8(x), "entry 1 cannot be converted from \"bytes\" Encoding to \"UTF-8\"; invalid leading byte (0xFC) at position 6", fixed = TRUE)
})

Try the utf8 package in your browser

Any scripts or data that you put into this service are public.

utf8 documentation built on Oct. 23, 2023, 1:06 a.m.