R/python.R

Defines functions pyarrow_compatible_pointer install_pyarrow as_record_batch_reader.pyarrow.lib.RecordBatchReader as_data_type.pyarrow.lib.DataType as_data_type.pyarrow.lib.Field as_schema.pyarrow.lib.Schema as_record_batch_reader.pyarrow.lib.Table as_arrow_table.pyarrow.lib.Table as_record_batch.pyarrow.lib.Table as_record_batch_reader.pyarrow.lib.RecordBatch as_arrow_table.pyarrow.lib.RecordBatch as_record_batch.pyarrow.lib.RecordBatch as_chunked_array.pyarrow.lib.ChunkedArray as_arrow_array.pyarrow.lib.Array maybe_py_to_r r_to_py.RecordBatchReader py_to_r.pyarrow.lib.RecordBatchReader r_to_py.DataType py_to_r.pyarrow.lib.DataType r_to_py.Field py_to_r.pyarrow.lib.Field r_to_py.Schema py_to_r.pyarrow.lib.Schema py_to_r.pyarrow.lib.Table r_to_py.Table py_to_r.pyarrow.lib.ChunkedArray r_to_py.ChunkedArray r_to_py.RecordBatch py_to_r.pyarrow.lib.RecordBatch r_to_py.Array py_to_r.pyarrow.lib.Array

Documented in install_pyarrow

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

py_to_r.pyarrow.lib.Array <- function(x, ...) {
  schema_ptr <- allocate_arrow_schema()
  array_ptr <- allocate_arrow_array()
  on.exit({
    delete_arrow_schema(schema_ptr)
    delete_arrow_array(array_ptr)
  })

  x$`_export_to_c`(
    pyarrow_compatible_pointer(array_ptr),
    pyarrow_compatible_pointer(schema_ptr)
  )

  Array$import_from_c(array_ptr, schema_ptr)
}

r_to_py.Array <- function(x, convert = FALSE) {
  schema_ptr <- allocate_arrow_schema()
  array_ptr <- allocate_arrow_array()
  on.exit({
    delete_arrow_schema(schema_ptr)
    delete_arrow_array(array_ptr)
  })

  # Import with convert = FALSE so that `_import_from_c` returns a Python object
  pa <- reticulate::import("pyarrow", convert = FALSE)
  x$export_to_c(array_ptr, schema_ptr)

  out <- pa$Array$`_import_from_c`(
    pyarrow_compatible_pointer(array_ptr),
    pyarrow_compatible_pointer(schema_ptr)
  )

  # But set the convert attribute on the return object to the requested value
  assign("convert", convert, out)
  out
}

py_to_r.pyarrow.lib.RecordBatch <- function(x, ...) {
  schema_ptr <- allocate_arrow_schema()
  array_ptr <- allocate_arrow_array()
  on.exit({
    delete_arrow_schema(schema_ptr)
    delete_arrow_array(array_ptr)
  })

  x$`_export_to_c`(
    pyarrow_compatible_pointer(array_ptr),
    pyarrow_compatible_pointer(schema_ptr)
  )

  RecordBatch$import_from_c(array_ptr, schema_ptr)
}

r_to_py.RecordBatch <- function(x, convert = FALSE) {
  schema_ptr <- allocate_arrow_schema()
  array_ptr <- allocate_arrow_array()
  on.exit({
    delete_arrow_schema(schema_ptr)
    delete_arrow_array(array_ptr)
  })

  # Import with convert = FALSE so that `_import_from_c` returns a Python object
  pa <- reticulate::import("pyarrow", convert = FALSE)
  x$export_to_c(array_ptr, schema_ptr)

  out <- pa$RecordBatch$`_import_from_c`(
    pyarrow_compatible_pointer(array_ptr),
    pyarrow_compatible_pointer(schema_ptr)
  )

  # But set the convert attribute on the return object to the requested value
  assign("convert", convert, out)
  out
}

r_to_py.ChunkedArray <- function(x, convert = FALSE) {
  # Import with convert = FALSE so that `_import_from_c` returns a Python object
  pa <- reticulate::import("pyarrow", convert = FALSE)
  out <- pa$chunked_array(x$chunks)
  # But set the convert attribute on the return object to the requested value
  assign("convert", convert, out)
  out
}

py_to_r.pyarrow.lib.ChunkedArray <- function(x, ...) {
  ChunkedArray$create(!!!maybe_py_to_r(x$chunks))
}

r_to_py.Table <- function(x, convert = FALSE) {
  # TODO(ARROW-16269): Going through RecordBatchReader maintains schema
  # metadata (e.g., extension types) more faithfully than column-wise
  # construction; however, may re-chunk columns unnecessarily.
  py_rbr <- reticulate::r_to_py(as_record_batch_reader(x), convert = FALSE)
  out <- py_rbr$read_all()
  assign("convert", convert, out)
  out
}

py_to_r.pyarrow.lib.Table <- function(x, ...) {
  # TODO(ARROW-16269): Going through RecordBatchReader maintains schema
  # metadata (e.g., extension types) more faithfully than column-wise
  # construction; however, may re-chunk columns unnecessarily.
  pa <- reticulate::import("pyarrow", convert = FALSE)
  py_rbr <- pa$lib$RecordBatchReader$from_batches(
    x$schema,
    x$to_batches()
  )

  r_rbr <- maybe_py_to_r(py_rbr)
  r_rbr$read_table()
}

py_to_r.pyarrow.lib.Schema <- function(x, ...) {
  schema_ptr <- allocate_arrow_schema()
  on.exit(delete_arrow_schema(schema_ptr))

  x$`_export_to_c`(pyarrow_compatible_pointer(schema_ptr))

  Schema$import_from_c(schema_ptr)
}

r_to_py.Schema <- function(x, convert = FALSE) {
  schema_ptr <- allocate_arrow_schema()
  on.exit(delete_arrow_schema(schema_ptr))

  # Import with convert = FALSE so that `_import_from_c` returns a Python object
  pa <- reticulate::import("pyarrow", convert = FALSE)
  x$export_to_c(schema_ptr)

  out <- pa$Schema$`_import_from_c`(
    pyarrow_compatible_pointer(schema_ptr)
  )

  # But set the convert attribute on the return object to the requested value
  assign("convert", convert, out)
  out
}

py_to_r.pyarrow.lib.Field <- function(x, ...) {
  schema_ptr <- allocate_arrow_schema()
  on.exit(delete_arrow_schema(schema_ptr))

  x$`_export_to_c`(pyarrow_compatible_pointer(schema_ptr))

  Field$import_from_c(schema_ptr)
}

r_to_py.Field <- function(x, convert = FALSE) {
  schema_ptr <- allocate_arrow_schema()
  on.exit(delete_arrow_schema(schema_ptr))

  # Import with convert = FALSE so that `_import_from_c` returns a Python object
  pa <- reticulate::import("pyarrow", convert = FALSE)
  x$export_to_c(schema_ptr)

  out <- pa$Field$`_import_from_c`(
    pyarrow_compatible_pointer(schema_ptr)
  )

  # But set the convert attribute on the return object to the requested value
  assign("convert", convert, out)
  out
}

py_to_r.pyarrow.lib.DataType <- function(x, ...) {
  schema_ptr <- allocate_arrow_schema()
  on.exit(delete_arrow_schema(schema_ptr))

  x$`_export_to_c`(pyarrow_compatible_pointer(schema_ptr))

  DataType$import_from_c(schema_ptr)
}

r_to_py.DataType <- function(x, convert = FALSE) {
  schema_ptr <- allocate_arrow_schema()
  on.exit(delete_arrow_schema(schema_ptr))

  # Import with convert = FALSE so that `_import_from_c` returns a Python object
  pa <- reticulate::import("pyarrow", convert = FALSE)
  x$export_to_c(schema_ptr)

  out <- pa$DataType$`_import_from_c`(
    pyarrow_compatible_pointer(schema_ptr)
  )

  # But set the convert attribute on the return object to the requested value
  assign("convert", convert, out)
  out
}

py_to_r.pyarrow.lib.RecordBatchReader <- function(x, ...) {
  stream_ptr <- allocate_arrow_array_stream()
  on.exit(delete_arrow_array_stream(stream_ptr))

  x$`_export_to_c`(pyarrow_compatible_pointer(stream_ptr))

  RecordBatchReader$import_from_c(stream_ptr)
}

r_to_py.RecordBatchReader <- function(x, convert = FALSE) {
  stream_ptr <- allocate_arrow_array_stream()
  on.exit(delete_arrow_array_stream(stream_ptr))

  # Import with convert = FALSE so that `_import_from_c` returns a Python object
  pa <- reticulate::import("pyarrow", convert = FALSE)
  x$export_to_c(stream_ptr)
  # TODO: handle subclasses of RecordBatchReader?

  out <- pa$lib$RecordBatchReader$`_import_from_c`(
    pyarrow_compatible_pointer(stream_ptr)
  )

  # But set the convert attribute on the return object to the requested value
  assign("convert", convert, out)
  out
}


maybe_py_to_r <- function(x) {
  if (inherits(x, "python.builtin.object")) {
    # Depending on some auto-convert behavior, x may already be converted
    # or it may still be a Python object
    x <- reticulate::py_to_r(x)
  }
  x
}


#' @export
as_arrow_array.pyarrow.lib.Array <- function(x, ..., type = NULL) {
  as_arrow_array(py_to_r.pyarrow.lib.Array(x), type = type)
}

# nolint start
#' @export
as_chunked_array.pyarrow.lib.ChunkedArray <- function(x, ..., type = NULL) {
  as_chunked_array(py_to_r.pyarrow.lib.ChunkedArray(x), type = type)
}
# nolint end

#' @export
as_record_batch.pyarrow.lib.RecordBatch <- function(x, ..., schema = NULL) {
  as_record_batch(py_to_r.pyarrow.lib.RecordBatch(x), schema = schema)
}

#' @export
as_arrow_table.pyarrow.lib.RecordBatch <- function(x, ..., schema = NULL) {
  as_arrow_table(py_to_r.pyarrow.lib.RecordBatch(x), schema = schema)
}

# Some of these function names are longer than 40 characters
# (but have to be named such because of S3 method naming)
# nolint start
#' @export
as_record_batch_reader.pyarrow.lib.RecordBatch <- function(x, ...) {
  as_record_batch_reader(py_to_r.pyarrow.lib.RecordBatch(x))
}
# nolint end

#' @export
as_record_batch.pyarrow.lib.Table <- function(x, ..., schema = NULL) {
  as_record_batch(py_to_r.pyarrow.lib.Table(x), schema = schema)
}

#' @export
as_arrow_table.pyarrow.lib.Table <- function(x, ..., schema = NULL) {
  as_arrow_table(py_to_r.pyarrow.lib.Table(x), schema = schema)
}

#' @export
as_record_batch_reader.pyarrow.lib.Table <- function(x, ...) {
  as_record_batch_reader(py_to_r.pyarrow.lib.Table(x))
}

#' @export
as_schema.pyarrow.lib.Schema <- function(x, ...) {
  py_to_r.pyarrow.lib.Schema(x)
}

#' @export
as_data_type.pyarrow.lib.Field <- function(x, ...) {
  as_data_type(py_to_r.pyarrow.lib.Field(x))
}

#' @export
as_data_type.pyarrow.lib.DataType <- function(x, ...) {
  as_data_type(py_to_r.pyarrow.lib.DataType(x))
}

# nolint start
#' @export
as_record_batch_reader.pyarrow.lib.RecordBatchReader <- function(x, ...) {
  py_to_r.pyarrow.lib.RecordBatchReader(x)
}
# nolint end

#' Install pyarrow for use with reticulate
#'
#' `pyarrow` is the Python package for Apache Arrow. This function helps with
#' installing it for use with `reticulate`.
#'
#' @param envname The name or full path of the Python environment to install
#' into. This can be a virtualenv or conda environment created by `reticulate`.
#' See `reticulate::py_install()`.
#' @param nightly logical: Should we install a development version of the
#' package? Default is to use the official release version.
#' @param ... additional arguments passed to `reticulate::py_install()`.
#' @export
install_pyarrow <- function(envname = NULL, nightly = FALSE, ...) {
  if (nightly) {
    reticulate::py_install("pyarrow",
      envname = envname, ...,
      # Nightly for pip
      pip_options = "--extra-index-url https://repo.fury.io/arrow-nightlies/ --pre --upgrade",
      # Nightly for conda
      channel = "arrow-nightlies"
    )
  } else {
    reticulate::py_install("pyarrow", envname = envname, ...)
  }
}

pyarrow_compatible_pointer <- function(ptr) {
  # GH-39933: Workaround because there is no built-in way to send a
  # 64-bit integer to Python from an R object
  py <- reticulate::import_builtins(convert = FALSE)
  addr <- external_pointer_addr_character(ptr)
  py$int(addr)
}

Try the arrow package in your browser

Any scripts or data that you put into this service are public.

arrow documentation built on Sept. 11, 2024, 8:02 p.m.