tests/testthat/test-io.R

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

test_that("RandomAccessFile$ReadMetadata() works for LocalFileSystem", {
  fs <- LocalFileSystem$create()
  tf <- tempfile()
  on.exit(unlink(tf))
  write("abcdefg", tf)

  expect_identical(
    fs$OpenInputFile(tf)$ReadMetadata(),
    list()
  )
})

test_that("RConnectionInputStream can read from R connections", {
  con <- rawConnection(as.raw(1:100))
  seek(con, 12)
  stream <- MakeRConnectionRandomAccessFile(con)
  expect_identical(stream$GetSize(), 100L)
  expect_identical(stream$tell(), 12L)

  expect_identical(as.raw(stream$ReadAt(50, 50)), as.raw(51:100))
  expect_identical(as.raw(stream$ReadAt(0, 50)), as.raw(1:50))
  stream$close()
  expect_error(isOpen(con), "invalid connection")
})

test_that("RConnectionRandomAccessFile can read from R connections", {
  con <- rawConnection(as.raw(1:100))
  stream <- MakeRConnectionInputStream(con)

  expect_identical(as.raw(stream$Read(50)), as.raw(1:50))
  expect_identical(as.raw(stream$Read(50)), as.raw(51:100))
  stream$close()
  expect_error(isOpen(con), "invalid connection")
})

test_that("RConnectionOutputStream can write to R connections", {
  tf <- tempfile()
  on.exit(unlink(tf))

  con <- file(tf, open = "wb")
  stream <- MakeRConnectionOutputStream(con)
  stream$write(as.raw(1:50))
  stream$write(as.raw(51:100))
  stream$close()
  expect_error(isOpen(con), "invalid connection")

  con <- file(tf, open = "rb")
  expect_identical(readBin(con, raw(), 100), as.raw(1:100))
  expect_identical(readBin(con, raw(), 100), raw())
  close(con)
})

test_that("make_readable_file() works for non-filesystem URLs", {
  skip_if_offline()

  readable_file <- make_readable_file(
    "https://github.com/apache/arrow/raw/main/r/inst/v0.7.1.parquet"
  )
  expect_r6_class(readable_file, "InputStream")
  expect_identical(rawToChar(as.raw(readable_file$Read(3))), "PAR")
  readable_file$close()
})

test_that("make_readable_file() works for seekable connection objects", {
  con <- rawConnection(as.raw(1:100))
  readable_file <- make_readable_file(con)
  expect_r6_class(readable_file, "RandomAccessFile")
  expect_identical(as.raw(readable_file$Read(100)), as.raw(1:100))
  readable_file$close()
})

test_that("make_readable_file() and make_writable_file() open connections", {
  tf <- tempfile()
  on.exit(unlink(tf))

  # check a seekable connection
  write("abcdefg", tf)
  readable_file <- make_readable_file(file(tf))
  expect_r6_class(readable_file, "RandomAccessFile")
  expect_identical(
    rawToChar(as.raw(readable_file$Read(7))),
    "abcdefg"
  )
  readable_file$close()

  # check output stream/non-seekable connection
  con <- gzfile(tf)
  stream <- make_output_stream(con)
  stream$write(as.raw(1:100))
  stream$close()

  readable_file <- make_readable_file(gzfile(tf))
  expect_identical(
    as.raw(readable_file$Read(100)),
    as.raw(1:100)
  )
  readable_file$close()
})

test_that("make_output_stream() works for connection objects", {
  tf <- tempfile()
  on.exit(unlink(tf))

  con <- rawConnection(as.raw(1:100))
  expect_r6_class(make_readable_file(con), "InputStream")
  close(con)
})

test_that("reencoding input stream works for windows-1252", {
  string <- "province_name\nQu\u00e9bec"
  bytes_windows1252 <- iconv(
    string,
    from = Encoding(string),
    to = "windows-1252",
    toRaw = TRUE
  )[[1]]

  bytes_utf8 <- iconv(
    string,
    from = Encoding(string),
    to = "UTF-8",
    toRaw = TRUE
  )[[1]]

  temp_windows1252 <- tempfile()
  con <- file(temp_windows1252, open = "wb")
  writeBin(bytes_windows1252, con)
  close(con)

  fs <- LocalFileSystem$create()

  stream <- fs$OpenInputStream(temp_windows1252)
  stream_utf8 <- MakeReencodeInputStream(stream, "windows-1252")
  expect_identical(as.raw(stream_utf8$Read(100)), bytes_utf8)
  stream$close()
  stream_utf8$close()

  unlink(temp_windows1252)
})

test_that("reencoding input stream works for UTF-16", {
  string <- paste0(strrep("a\u00e9\U0001f4a9", 30))
  bytes_utf16 <- iconv(
    string,
    from = Encoding(string),
    to = "UTF-16LE",
    toRaw = TRUE
  )[[1]]

  bytes_utf8 <- iconv(
    string,
    from = Encoding(string),
    to = "UTF-8",
    toRaw = TRUE
  )[[1]]

  temp_utf16 <- tempfile()
  con <- file(temp_utf16, open = "wb")
  writeBin(bytes_utf16, con)
  close(con)

  fs <- LocalFileSystem$create()

  stream <- fs$OpenInputStream(temp_utf16)
  stream_utf8 <- MakeReencodeInputStream(stream, "UTF-16LE")

  expect_identical(
    as.raw(stream_utf8$Read(length(bytes_utf8))),
    bytes_utf8
  )

  stream_utf8$close()
  stream$close()
  unlink(temp_utf16)
})

test_that("reencoding input stream works with pending characters", {
  string <- paste0(strrep("a\u00e9\U0001f4a9", 30))
  bytes_utf8 <- iconv(
    string,
    from = Encoding(string),
    to = "UTF-8",
    toRaw = TRUE
  )[[1]]

  temp_utf8 <- tempfile()
  con <- file(temp_utf8, open = "wb")
  writeBin(bytes_utf8, con)
  close(con)

  fs <- LocalFileSystem$create()

  stream <- fs$OpenInputStream(temp_utf8)
  stream_utf8 <- MakeReencodeInputStream(stream, "UTF-8")

  # these calls all leave some pending characters
  expect_identical(as.raw(stream_utf8$Read(4)), bytes_utf8[1:4])
  expect_identical(as.raw(stream_utf8$Read(5)), bytes_utf8[5:9])
  expect_identical(as.raw(stream_utf8$Read(6)), bytes_utf8[10:15])
  expect_identical(as.raw(stream_utf8$Read(7)), bytes_utf8[16:22])

  # finish the stream
  expect_identical(
    as.raw(stream_utf8$Read(length(bytes_utf8))),
    bytes_utf8[23:length(bytes_utf8)]
  )

  stream$close()
  stream_utf8$close()

  unlink(temp_utf8)
})

test_that("reencoding input stream errors for invalid characters", {
  bytes_utf8 <- rep(as.raw(0xff), 10)

  temp_utf8 <- tempfile()
  con <- file(temp_utf8, open = "wb")
  writeBin(bytes_utf8, con)
  close(con)

  fs <- LocalFileSystem$create()

  stream <- fs$OpenInputStream(temp_utf8)
  stream_utf8 <- MakeReencodeInputStream(stream, "UTF-8")
  expect_error(stream_utf8$Read(100), "Encountered invalid input bytes")

  unlink(temp_utf8)
})

Try the arrow package in your browser

Any scripts or data that you put into this service are public.

arrow documentation built on Nov. 25, 2023, 1:09 a.m.