R/type.R

Defines functions time_unit_id na_extension na_dictionary na_map na_fixed_size_list na_large_list na_list na_dense_union na_sparse_union na_struct na_decimal256 na_decimal128 na_timestamp na_interval_month_day_nano na_interval_day_time na_interval_months na_duration na_time64 na_time32 na_date64 na_date32 na_fixed_size_binary na_large_binary na_binary na_large_string na_string na_double na_float na_half_float na_uint64 na_int64 na_uint32 na_int32 na_uint16 na_int16 na_uint8 na_int8 na_bool na_na na_type

Documented in na_binary na_bool na_date32 na_date64 na_decimal128 na_decimal256 na_dense_union na_dictionary na_double na_duration na_extension na_fixed_size_binary na_fixed_size_list na_float na_half_float na_int16 na_int32 na_int64 na_int8 na_interval_day_time na_interval_month_day_nano na_interval_months na_large_binary na_large_list na_large_string na_list na_map na_na na_sparse_union na_string na_struct na_time32 na_time64 na_timestamp na_type na_uint16 na_uint32 na_uint64 na_uint8

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

#' Create type objects
#'
#' In nanoarrow, types, fields, and schemas are all represented by a
#' [nanoarrow_schema][as_nanoarrow_schema]. These functions are convenience
#' constructors to create these objects in a readable way. Use [na_type()] to
#' construct types based on the constructor name, which is also the name that
#' prints/is returned by [nanoarrow_schema_parse()].
#'
#' @param type_name The name of the type (e.g., "int32"). This form of the
#'   constructor is useful for writing tests that loop over many types.
#' @param byte_width For [na_fixed_size_binary()], the number of bytes
#'   occupied by each item.
#' @param list_size The number of elements in each item in a
#'   [na_fixed_size_list()].
#' @param precision The total number of digits representable by the decimal type
#' @param scale The number of digits after the decimal point in a decimal type
#' @param unit One of 's' (seconds), 'ms' (milliseconds), 'us' (microseconds),
#'   or 'ns' (nanoseconds).
#' @param timezone A string representing a timezone name. The empty string ""
#'   represents a naive point in time (i.e., one that has no associated
#'   timezone).
#' @param column_types A `list()` of [nanoarrow_schema][as_nanoarrow_schema]s.
#' @param item_type For [na_list()], [na_large_list()], [na_fixed_size_list()],
#'   and [na_map()], the [nanoarrow_schema][as_nanoarrow_schema] representing
#'   the item type.
#' @param key_type The [nanoarrow_schema][as_nanoarrow_schema] representing the
#'   [na_map()] key type.
#' @param index_type The [nanoarrow_schema][as_nanoarrow_schema] representing the
#'   [na_dictionary()] index type.
#' @param value_type The [nanoarrow_schema][as_nanoarrow_schema] representing the
#'   [na_dictionary()] or [na_map()] value type.
#' @param keys_sorted Use `TRUE` to assert that keys are sorted.
#' @param storage_type For [na_extension()], the underlying value type.
#' @param extension_name For [na_extension()], the extension name. This is
#'   typically namespaced separated by dots (e.g., arrow.r.vctrs).
#' @param extension_metadata A string or raw vector defining extension metadata.
#'   Most Arrow extension types define extension metadata as a JSON object.
#' @param nullable Use `FALSE` to assert that this field cannot contain
#'   null values.
#' @param ordered Use `TRUE` to assert that the order of values in the
#'   dictionary are meaningful.
#'
#' @return A [nanoarrow_schema][as_nanoarrow_schema]
#' @export
#'
#' @examples
#' na_int32()
#' na_struct(list(col1 = na_int32()))
#'
na_type <- function(type_name, byte_width = NULL, unit = NULL, timezone = NULL,
                    column_types = NULL, item_type = NULL, key_type = NULL,
                    value_type = NULL, index_type = NULL, ordered = NULL,
                    list_size = NULL, keys_sorted = NULL, storage_type = NULL,
                    extension_name = NULL, extension_metadata = NULL,
                    nullable = NULL) {
  # Create a call and evaluate it. This leads to reasonable error messages
  # regarding nonexistent type names and extraneous or missing parameters.
  args <- list(
    byte_width = byte_width,
    unit = unit,
    timezone = timezone,
    column_types = column_types,
    item_type = item_type,
    key_type = key_type,
    value_type = value_type,
    index_type = index_type,
    ordered = ordered,
    list_size = list_size,
    keys_sorted = keys_sorted,
    storage_type = storage_type,
    extension_name = extension_name,
    extension_metadata = extension_metadata,
    nullable = nullable
  )
  args <- args[!vapply(args, is.null, logical(1))]

  constructor <- as.symbol(paste0("na_", type_name))
  call_obj <- as.call(c(list(constructor), args))
  eval(call_obj)
}

#' @rdname na_type
#' @export
na_na <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE[["NA"]], isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_bool <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$BOOL, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_int8 <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INT8, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_uint8 <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$UINT8, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_int16 <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INT16, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_uint16 <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$UINT16, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_int32 <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INT32, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_uint32 <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$UINT32, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_int64 <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INT64, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_uint64 <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$UINT64, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_half_float <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$HALF_FLOAT, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_float <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$FLOAT, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_double <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$DOUBLE, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_string <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$STRING, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_large_string <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$LARGE_STRING, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_binary <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$BINARY, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_large_binary <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$LARGE_BINARY, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_fixed_size_binary <- function(byte_width, nullable = TRUE) {
  .Call(
    nanoarrow_c_schema_init_fixed_size,
    NANOARROW_TYPE$FIXED_SIZE_BINARY,
    as.integer(byte_width)[1],
    isTRUE(nullable)
  )
}

#' @rdname na_type
#' @export
na_date32 <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$DATE32, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_date64 <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$DATE64, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_time32 <- function(unit = c("ms", "s"), nullable = TRUE) {
  unit <- match.arg(unit)
  .Call(
    nanoarrow_c_schema_init_date_time,
    NANOARROW_TYPE$TIME32,
    time_unit_id(unit),
    NULL,
    isTRUE(nullable)
  )
}

#' @rdname na_type
#' @export
na_time64 <- function(unit = c("us", "ns"), nullable = TRUE) {
  unit <- match.arg(unit)
  .Call(
    nanoarrow_c_schema_init_date_time,
    NANOARROW_TYPE$TIME64,
    time_unit_id(unit),
    NULL,
    isTRUE(nullable)
  )
}

#' @rdname na_type
#' @export
na_duration <- function(unit = c("ms", "s", "us", "ns"), nullable = TRUE) {
  unit <- match.arg(unit)
  .Call(
    nanoarrow_c_schema_init_date_time,
    NANOARROW_TYPE$DURATION,
    time_unit_id(unit),
    NULL,
    isTRUE(nullable)
  )
}

#' @rdname na_type
#' @export
na_interval_months <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INTERVAL_MONTHS, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_interval_day_time <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INTERVAL_DAY_TIME, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_interval_month_day_nano <- function(nullable = TRUE) {
  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INTERVAL_MONTH_DAY_NANO, isTRUE(nullable))
}

#' @rdname na_type
#' @export
na_timestamp <- function(unit = c("us", "ns", "s", "ms"), timezone = "", nullable = TRUE) {
  unit <- match.arg(unit)
  if (!is.character(timezone) || length(timezone) != 1 || is.na(timezone)) {
    stop("`timezone` must be character(1)")
  }

  .Call(
    nanoarrow_c_schema_init_date_time,
    NANOARROW_TYPE$TIMESTAMP,
    time_unit_id(unit),
    timezone,
    isTRUE(nullable)
  )
}

#' @rdname na_type
#' @export
na_decimal128 <- function(precision, scale, nullable = TRUE) {
  .Call(
    nanoarrow_c_schema_init_decimal,
    NANOARROW_TYPE$DECIMAL128,
    as.integer(precision)[1],
    as.integer(scale)[1],
    isTRUE(nullable)
  )
}

#' @rdname na_type
#' @export
na_decimal256 <- function(precision, scale, nullable = TRUE) {
  .Call(
    nanoarrow_c_schema_init_decimal,
    NANOARROW_TYPE$DECIMAL256,
    as.integer(precision)[1],
    as.integer(scale)[1],
    isTRUE(nullable)
  )
}

#' @rdname na_type
#' @export
na_struct <- function(column_types = list(), nullable = FALSE) {
  schema <- .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$STRUCT, isTRUE(nullable))
  schema$children <- column_types
  schema
}

#' @rdname na_type
#' @export
na_sparse_union <- function(column_types = list()) {
  schema <- na_struct(column_types)
  schema$format <- paste0("+us:", paste(seq_along(schema$children) - 1L, collapse = ","))
  schema
}

#' @rdname na_type
#' @export
na_dense_union <- function(column_types = list()) {
  schema <- na_struct(column_types)
  schema$format <- paste0("+ud:", paste(seq_along(schema$children) - 1L, collapse = ","))
  schema
}

#' @rdname na_type
#' @export
na_list <- function(item_type, nullable = TRUE) {
  schema <- .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$LIST, isTRUE(nullable))
  schema$children[[1]] <- item_type
  schema
}

#' @rdname na_type
#' @export
na_large_list <- function(item_type, nullable = TRUE) {
  schema <- .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$LARGE_LIST, isTRUE(nullable))
  schema$children[[1]] <- item_type
  schema
}

#' @rdname na_type
#' @export
na_fixed_size_list <- function(item_type, list_size, nullable = TRUE) {
  schema <- .Call(
    nanoarrow_c_schema_init_fixed_size,
    NANOARROW_TYPE$FIXED_SIZE_LIST,
    as.integer(list_size)[1],
    isTRUE(nullable)
  )
  schema$children[[1]] <- item_type
  schema
}

#' @rdname na_type
#' @export
na_map <- function(key_type, item_type, keys_sorted = FALSE, nullable = TRUE) {
  schema <- .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$MAP, isTRUE(nullable))
  schema$children[[1]]$children[[1]] <- key_type
  schema$children[[1]]$children[[2]] <- item_type
  schema
}

#' @rdname na_type
#' @export
na_dictionary <- function(value_type, index_type = na_int32(), ordered = FALSE) {
  index_type <- as_nanoarrow_schema(index_type)
  index_type$dictionary <- value_type

  if (ordered) {
    index_type$flags <- bitwOr(index_type$flags, ARROW_FLAG$DICTIONARY_ORDERED)
  } else {
    index_type$flags <- bitwAnd(
      index_type$flags,
      bitwNot(ARROW_FLAG$DICTIONARY_ORDERED)
    )
  }

  index_type
}

#' @rdname na_type
#' @export
na_extension <- function(storage_type, extension_name, extension_metadata = "") {
  storage_type <- as_nanoarrow_schema(storage_type)
  new_metadata <- list(
    "ARROW:extension:name" = extension_name,
    "ARROW:extension:metadata" = extension_metadata
  )

  new_metadata <- c(new_metadata, storage_type$metadata)
  storage_type$metadata <- new_metadata[unique(names(new_metadata))]

  storage_type
}

time_unit_id <- function(time_unit) {
  match(time_unit, c("s", "ms", "us", "ns")) - 1L
}

# These values aren't guaranteed to stay stable between nanoarrow versions,
# so we keep them internal but use them in these functions to simplify the
# number of C functions we need to build all the types.
NANOARROW_TYPE <- list(
  UNINITIALIZED = 0,
  "NA" = 1L,
  BOOL = 2L,
  UINT8 = 3L,
  INT8 = 4L,
  UINT16 = 5L,
  INT16 = 6L,
  UINT32 = 7L,
  INT32 = 8L,
  UINT64 = 9L,
  INT64 = 10L,
  HALF_FLOAT = 11L,
  FLOAT = 12L,
  DOUBLE = 13L,
  STRING = 14L,
  BINARY = 15L,
  FIXED_SIZE_BINARY = 16L,
  DATE32 = 17L,
  DATE64 = 18L,
  TIMESTAMP = 19L,
  TIME32 = 20L,
  TIME64 = 21L,
  INTERVAL_MONTHS = 22L,
  INTERVAL_DAY_TIME = 23L,
  DECIMAL128 = 24L,
  DECIMAL256 = 25L,
  LIST = 26L,
  STRUCT = 27L,
  SPARSE_UNION = 28L,
  DENSE_UNION = 29L,
  DICTIONARY = 30L,
  MAP = 31L,
  EXTENSION = 32L,
  FIXED_SIZE_LIST = 33L,
  DURATION = 34L,
  LARGE_STRING = 35L,
  LARGE_BINARY = 36L,
  LARGE_LIST = 37L,
  INTERVAL_MONTH_DAY_NANO = 38L
)

ARROW_FLAG <- list(
  DICTIONARY_ORDERED = 1L,
  NULLABLE = 2L,
  MAP_KEYS_SORTED = 4L
)

Try the nanoarrow package in your browser

Any scripts or data that you put into this service are public.

nanoarrow documentation built on June 22, 2024, 9:37 a.m.