tests/testthat/test-encoding.R

# Note: for manual testing on Mac, the following can be used to set a multi-byte
# but non-UTF-8 locale:
# Sys.setlocale("LC_ALL", "ja_JP.SJIS")


test_that("Non-ASCII keys are represented as UTF-8", {
  m <- fastmap()

  k1 <- "abc"
  # "åbc" in UTF-8
  k2 <- "\u00e5bc"
  # "åbc" in latin1
  k3 <- iconv(k2, from = "UTF-8", to = "latin1")
  # "中 A" in UTF-8
  k4 <- "\u4e2d A"

  expect_identical(Encoding(k2), "UTF-8")
  expect_identical(Encoding(k3), "latin1")
  expect_identical(Encoding(k4), "UTF-8")

  m$set(k1, 1)
  m$set(k2, 2)
  # Should overwrite k2 since the keys are the same strings in different
  # encodings, and fastmap converts keys to UTF-8.
  m$set(k3, 3)
  m$set(k4, 4)
  expect_identical(m$get(k1), 1)
  expect_identical(m$get(k2), 3)
  expect_identical(m$get(k3), 3)
  expect_identical(m$get(k4), 4)

  # keys() should be in UTF-8
  keys <- m$keys()
  # Note: expect_setequal (and expect_identical, for that matter) compares
  # strings but converts them to the same encoding before comparison, so we need
  # to separately check encoding.
  expect_setequal(keys, c(k1, k2, k4))
  expect_true(Encoding(keys[keys == k1]) == "unknown")
  expect_true(Encoding(keys[keys == k2]) == "UTF-8")
  expect_true(Encoding(keys[keys == k3]) == "UTF-8")
  expect_true(Encoding(keys[keys == k4]) == "UTF-8")

  # names for as_list() should be in UTF-8
  m_list <- m$as_list()
  expect_mapequal(
    m_list,
    setNames(list(1, 3, 4), c(k1, k2, k4))
  )
  keys <- names(m_list)
  expect_setequal(keys, c(k1, k2, k4))
  expect_true(Encoding(keys[keys == k1]) == "unknown")
  expect_true(Encoding(keys[keys == k2]) == "UTF-8")
  expect_true(Encoding(keys[keys == k3]) == "UTF-8")
  expect_true(Encoding(keys[keys == k4]) == "UTF-8")
})


test_that("Non-ASCII keys with mset and mget", {
  m <- fastmap()

  k1 <- "abc"
  # "åbc" in UTF-8
  k2 <- "\u00e5bc"
  # "åbc" in latin1
  k3 <- iconv(k2, from = "UTF-8", to = "latin1")
  # "中 A" in UTF-8
  k4 <- "\u4e2d A"

  args <- setNames(list(1, 2, 3, 4), c(k1, k2, k3, k4))
  expect_identical(
    Encoding(names(args)),
    c("unknown", "UTF-8", "latin1", "UTF-8")
  )

  # These are just here for comparison purposes. R will convert the argument
  # names to native encoding before fastmap can convert the names (keys) to
  # UTF-8. In a UTF-8 locale, the tests below would pass; in some non-UTF-8
  # locales, the tests would fail. They're commented out because we can't expect
  # them to pass on all platforms.
  # do.call(m$mset, args)
  # expect_identical(m$get(k1), 1)
  # expect_identical(m$get(k2), 3)
  # expect_identical(m$get(k3), 3)
  # expect_identical(m$get(k4), 4)

  # Same as above, but using .list. This should succeed in all locales.
  m <- fastmap()
  m$mset(.list = args)
  expect_identical(m$get(k1), 1)
  expect_identical(m$get(k2), 3)
  expect_identical(m$get(k3), 3)
  expect_identical(m$get(k4), 4)

  # names for as_list() should be in UTF-8
  m_list <- m$as_list()
  expect_mapequal(
    m_list,
    setNames(list(1, 3, 4), c(k1, k2, k4))
  )
  keys <- names(m_list)
  expect_setequal(keys, c(k1, k2, k4))
  expect_true(Encoding(keys[keys == k1]) == "unknown")
  expect_true(Encoding(keys[keys == k2]) == "UTF-8")
  expect_true(Encoding(keys[keys == k3]) == "UTF-8")
  expect_true(Encoding(keys[keys == k4]) == "UTF-8")

  # mget will convert the latin1 key to UTF-8
  res <- m$mget(c(k1, k2, k3, k4))
  expect_identical(
    Encoding(names(res)),
    c("unknown", "UTF-8", "UTF-8", "UTF-8")
  )
  expect_identical(names(res), c(k1, k2, k2, k4))
  expect_identical(unname(res), list(1, 3, 3, 4))
})
wch/fastmap documentation built on Nov. 9, 2023, 5:01 a.m.