tests/testthat/test-text-io.R

mock_file_utf8_path  <- get_mock_path(file.path("text-io", "utf8"),  "txt")
mock_file_eucjp_path <- get_mock_path(file.path("text-io", "eucjp"), "txt")

# Below, 8 non-ASCII characters are written. These are purposely
# non-standard and require a multi-byte encoding. They represent
# a mix of Hiragana and Kanji characters (stemming from Japanese
# syllabaries) and non-standard punctuation symbols. When using
# UTF-8, they all require 3 bytes (each) and therefore, 24 bytes
# should be written to match expectations (nchar(,"bytes") should
# return 24). The expected byte sequences were extracted from
# https://symbl.cc/.
chars     <- enc2utf8("さらば、世界よ!")
chars_len <- nchar(chars, "bytes")
chars_raw <- as.raw(c(
    0xe3, 0x81, 0x95,  # さ  (HIRAGANA LETTER SA)
    0xe3, 0x82, 0x89,  # ら  (HIRAGANA LETTER RA)
    0xe3, 0x81, 0xb0,  # ば  (HIRAGANA LETTER BA)
    0xe3, 0x80, 0x81,  # 、  (IDEOGRAPHIC COMMA)
    0xe4, 0xb8, 0x96,  # 世  (KANJI -> IDEOGRAPH GENERATION; WORLD; ERA CJK)
    0xe7, 0x95, 0x8c,  # 界  (KANJI -> IDEOGRAPH BOUNDARY, LIMIT; DOMAIN; SOCIETY; THE WORLD CJK)
    0xe3, 0x82, 0x88,  # よ  (HIRAGANA LETTER YO)
    0xef, 0xbc, 0x81)) # !  (FULLWIDTH EXCLAMATION MARK)

# text_read() ------------------------------------------------------------------

test_that("text_read() returns a character vector", {
    lines <- text_read(mock_file_utf8_path)

    expect_type(lines, "character")
    expect_length(lines, 51L)
})

test_that("text_read() validates path", {
    # Sys.chmod() is incompatible with Windows,
    # but is required for testing purposes. See
    # doc for more information.
    skip_on_os("windows")

    temp_dir  <- withr::local_tempdir(pattern  = "a-test-directory")
    temp_file <- withr::local_tempfile(pattern = "a-non-readable-file")

    # Create file and change permissions.
    # 222 = write-only for owner / group / other.
    # It can still be deleted afterwards.
    file.create(temp_file)
    Sys.chmod(temp_file, "222")

    expect_error(text_read(1L))
    expect_error(text_read("a-non-existent-file.txt"))
    expect_error(text_read(temp_dir))
    expect_error(text_read(temp_file))
    expect_snapshot(text_read(1L), error = TRUE)
    expect_snapshot(text_read("a-non-existent-file.txt"), error = TRUE)
})

test_that("text_read() validates encoding", {
    expect_error(text_read(mock_file_utf8_path, ""))
    expect_snapshot(text_read(mock_file_utf8_path, ""), error = TRUE)
})

test_that("text_read() closes connection to path", {
    # Register number of opened connections just
    # before execution of text_read(). It should
    # be the same after execution.
    n_cons <- nrow(showConnections())
    text_read(mock_file_utf8_path)

    expect_identical(nrow(showConnections()), n_cons)
})

test_that("text_read() re-encodes input to utf-8", {
    lines <- text_read(mock_file_eucjp_path, "EUC-JP")
    chars <- utils::tail(lines, 1L)

    expect_identical(chars, "あいうえお")
    expect_identical(Encoding(chars), "UTF-8")
})

# text_write() -----------------------------------------------------------------

test_that("text_write() returns null", {
    temp_file <- withr::local_tempfile()

    expect_null(text_write("Hello, world!", temp_file))
})

test_that("text_write() validates x", {
    expect_error(text_write(1L))
    expect_snapshot(text_write(1L), error = TRUE)
})

test_that("text_write() validates path", {
    # Sys.chmod() is incompatible with Windows,
    # but is required for testing purposes. See
    # doc for more information.
    skip_on_os("windows")

    temp_dir  <- withr::local_tempdir(pattern  = "a-test-directory")
    temp_file <- withr::local_tempfile(pattern = "a-non-writable-file")

    # Create file and change permissions.
    # 444 = read-only for owner / group / other.
    # It can still be deleted afterwards.
    file.create(temp_file)
    Sys.chmod(temp_file, "444")

    expect_error(text_write("Hello, world!", 1L))
    expect_error(text_write("Hello, world!", temp_dir))
    expect_error(text_write("Hello, world!", temp_file))
    expect_snapshot(text_write("Hello, world!", 1L),       error = TRUE)
    expect_snapshot({
            "Input temporary directory is random. It is replaced"
            "by a constant for in this snapshot for reproducibility."
            text_write("Hello, world!", temp_dir)
        },
        error     = TRUE,
        transform = \(x) gsub("'path' '(.*?)'", "'path' 'a-test-directory'", x))
})

test_that("text_write() validates encoding", {
    temp_file <- withr::local_tempfile()

    expect_error(text_write("Hello, world!", temp_file, 1L))
    expect_snapshot(text_write("Hello, world!", temp_file, 1L), error = TRUE)
})

test_that("text_write() closes connection to path", {
    # Register number of opened connections just
    # before execution of text_read(). It should
    # be the same after execution.
    n_cons    <- nrow(showConnections())
    temp_file <- withr::local_tempfile()
    text_write("Hello, world!", temp_file)

    expect_identical(nrow(showConnections()), n_cons)
})

test_that("text_write() writes utf-8 character to path", {
    temp_file <- withr::local_tempfile()
    text_write(chars, temp_file)

    expect_identical(readBin(temp_file, "raw", n = chars_len), chars_raw)
    expect_identical(text_read(temp_file), chars)
})

test_that("text_write() re-encodes input to utf-8", {
    temp_file   <- withr::local_tempfile()
    chars_eucjp <- iconv(chars, from = "UTF-8", to = "EUC-JP")
    text_write(chars_eucjp, temp_file, "EUC-JP")

    expect_identical(readBin(temp_file, "raw", n = chars_len), chars_raw)
    expect_identical(text_read(temp_file), chars)
})

Try the transltr package in your browser

Any scripts or data that you put into this service are public.

transltr documentation built on April 3, 2025, 9:33 p.m.