tests/testthat/test-python.R

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

test_that("install_pyarrow", {
  skip_on_cran()
  skip_if_offline()
  skip_if_not_dev_mode()
  # Windows CI machine doesn't pick up the right python or something
  skip_on_os("windows")
  skip_if_not_installed("reticulate")
  # PyArrow doesn't support Python 3.7 or earlier
  skip_on_python_older_than("3.8")
  # no pyarrow wheels for macos 10.13
  skip_if(on_macos_10_13_or_lower())

  venv <- try(reticulate::virtualenv_create("arrow-test"))
  # Bail out if virtualenv isn't available
  skip_if(inherits(venv, "try-error"))
  expect_error(install_pyarrow("arrow-test", nightly = TRUE), NA)
  # Set this up for the following tests
  reticulate::use_virtualenv("arrow-test")
})

skip_if_no_pyarrow()

test_that("Array from Python", {
  pa <- reticulate::import("pyarrow")
  py <- pa$array(c(1, 2, 3))
  expect_equal(py, Array$create(c(1, 2, 3)))

  expect_equal(as_arrow_array(py), Array$create(c(1, 2, 3)))
})

test_that("Array to Python", {
  pa <- reticulate::import("pyarrow", convert = FALSE)
  r <- Array$create(c(1, 2, 3))
  py <- pa$concat_arrays(list(r))
  expect_s3_class(py, "pyarrow.lib.Array")
  expect_equal(reticulate::py_to_r(py), r)
})

test_that("RecordBatch to/from Python", {
  pa <- reticulate::import("pyarrow", convert = FALSE)
  batch <- record_batch(col1 = c(1, 2, 3), col2 = letters[1:3])
  py <- reticulate::r_to_py(batch)
  expect_s3_class(py, "pyarrow.lib.RecordBatch")
  expect_equal(reticulate::py_to_r(py), batch)

  expect_equal(as_record_batch(py), batch)
  expect_equal(as_arrow_table(py), as_arrow_table(batch))
  reader <- as_record_batch_reader(py)
  expect_equal(reader$read_next_batch(), batch)
})

test_that("Table and ChunkedArray from Python", {
  pa <- reticulate::import("pyarrow", convert = FALSE)
  batch <- record_batch(col1 = c(1, 2, 3), col2 = letters[1:3])
  tab <- Table$create(batch, batch)
  pybatch <- reticulate::r_to_py(batch)
  pytab <- pa$Table$from_batches(list(pybatch, pybatch))
  expect_s3_class(pytab, "pyarrow.lib.Table")
  expect_s3_class(pytab[0], "pyarrow.lib.ChunkedArray")
  expect_equal(reticulate::py_to_r(pytab[0]), tab$col1)
  expect_equal(reticulate::py_to_r(pytab), tab)

  expect_equal(as_arrow_table(pytab), tab)
  expect_equal(as_record_batch(pytab), as_record_batch(tab))
  reader <- as_record_batch_reader(pytab)
  expect_equal(reader$read_table(), tab)
})

test_that("Table and ChunkedArray to Python", {
  batch <- record_batch(col1 = c(1, 2, 3), col2 = letters[1:3])
  tab <- Table$create(batch, batch)

  pychunked <- reticulate::r_to_py(tab$col1)
  expect_s3_class(pychunked, "pyarrow.lib.ChunkedArray")
  expect_equal(reticulate::py_to_r(pychunked), tab$col1)

  pytab <- reticulate::r_to_py(tab)
  expect_s3_class(pytab, "pyarrow.lib.Table")
  expect_equal(reticulate::py_to_r(pytab), tab)
})

test_that("RecordBatch with metadata roundtrip", {
  batch <- RecordBatch$create(example_with_metadata)
  pybatch <- reticulate::r_to_py(batch)
  expect_s3_class(pybatch, "pyarrow.lib.RecordBatch")

  # Because batch$a is VctrsExtensionType, (which pyarrow doesn't know
  # about) we don't quite have equality; however, we still have the
  # ability to roundtrip preserving the extension type.
  rbatch <- reticulate::py_to_r(pybatch)
  expect_identical(rbatch$metadata, batch$metadata)
  expect_equal(rbatch$a, batch$a)
  expect_equal(rbatch[c("b", "c", "d")], batch[c("b", "c", "d")])
  expect_equal_data_frame(rbatch, example_with_metadata)
})

test_that("Table with metadata roundtrip", {
  tab <- Table$create(example_with_metadata)
  pytab <- reticulate::r_to_py(tab)
  expect_s3_class(pytab, "pyarrow.lib.Table")

  # Because tab$a is VctrsExtensionType, (which pyarrow doesn't know
  # about) we don't quite have equality; however, we still have the
  # ability to roundtrip preserving the extension type.
  rtab <- reticulate::py_to_r(pytab)
  expect_identical(rtab$metadata, tab$metadata)
  expect_equal(rtab$a, tab$a)
  expect_equal(rtab[c("b", "c", "d")], tab[c("b", "c", "d")])
  expect_equal_data_frame(rtab, example_with_metadata)
})

test_that("DataType roundtrip", {
  r <- timestamp("ms", timezone = "Pacific/Marquesas")
  py <- reticulate::r_to_py(r)
  expect_s3_class(py, "pyarrow.lib.DataType")
  expect_equal(reticulate::py_to_r(py), r)

  expect_equal(as_data_type(py), r)
})

test_that("Field roundtrip", {
  r <- field("x", time32("s"))
  py <- reticulate::r_to_py(r)
  expect_s3_class(py, "pyarrow.lib.Field")
  expect_equal(reticulate::py_to_r(py), r)

  expect_equal(as_data_type(py), as_data_type(r))
})

test_that("RecordBatchReader to python", {
  skip_if_not_available("dataset")
  library(dplyr, warn.conflicts = FALSE)

  tab <- Table$create(example_data)
  scan <- tab %>%
    select(int, lgl) %>%
    filter(int > 6) %>%
    Scanner$create()
  reader <- scan$ToRecordBatchReader()
  pyreader <- reticulate::r_to_py(reader)
  expect_s3_class(pyreader, "pyarrow.lib.RecordBatchReader")
  pytab <- pyreader$read_all()
  expect_s3_class(pytab, "pyarrow.lib.Table")
  back_to_r <- reticulate::py_to_r(pytab)
  expect_r6_class(back_to_r, "Table")
  expect_equal_data_frame(
    back_to_r,
    example_data %>%
      select(int, lgl) %>%
      filter(int > 6)
  )
})

test_that("RecordBatchReader from python", {
  skip_if_not_available("dataset")

  tab <- Table$create(example_data)
  scan <- Scanner$create(tab)
  reader <- scan$ToRecordBatchReader()
  pyreader <- reticulate::r_to_py(reader)
  back_to_r <- reticulate::py_to_r(pyreader)
  rt_table <- back_to_r$read_table()
  expect_r6_class(rt_table, "Table")
  expect_equal_data_frame(rt_table, example_data)

  scan <- Scanner$create(tab)
  reader <- scan$ToRecordBatchReader()
  pyreader <- reticulate::r_to_py(reader)
  back_to_r <- as_record_batch_reader(pyreader)
  expect_equal(back_to_r$read_table(), rt_table)
})

Try the arrow package in your browser

Any scripts or data that you put into this service are public.

arrow documentation built on Nov. 25, 2023, 1:09 a.m.