R/to-array.R

Defines functions as_narrow_array.data.frame as_narrow_array.raw as_narrow_array.factor as_narrow_array.character as_narrow_array.double as_narrow_array.integer as_narrow_array.logical as_narrow_array.NULL

Documented in as_narrow_array.character as_narrow_array.data.frame as_narrow_array.double as_narrow_array.factor as_narrow_array.integer as_narrow_array.logical as_narrow_array.NULL as_narrow_array.raw

#' Convert R objects to Arrow vectors
#'
#' These methods return an [narrow_array()] for R objects that don't involve
#' copying or unnecessary allocating. Two excpetions are (1) ALTREP objects,
#' which will be expanded, and (2) character vectors, which will be converted
#' to UTF-8 and serialized as a single [raw()] vector.
#'
#' @inheritParams narrow_array
#' @inheritParams narrow_schema
#'
#' @return An [narrow_array()]
#' @export
#'
#' @examples
#' as_narrow_array(NULL)
#' as_narrow_array(c(TRUE, FALSE, NA))
#' as_narrow_array(1:10)
#' as_narrow_array(c(1.1, 2.2))
#' as_narrow_array(as.raw(0x00))
#' as_narrow_array("fish")
#' as_narrow_array(data.frame(x = 1:10, y = as.raw(1:10)))
#'
as_narrow_array.NULL <- function(x, ..., name = "") {
  narrow_array(narrow_schema("n", name), narrow_array_data(null_count = 0))
}

#' @export
#' @rdname as_narrow_array.NULL
as_narrow_array.logical <- function(x, ..., name = "") {
  x_is_na <- is.na(x)
  narrow_array(
    narrow_schema("i", name),
    narrow_array_data(
      buffers = if (any(x_is_na)) list(as_narrow_bitmask(!x_is_na), x) else list(NULL, x),
      length = length(x),
      null_count = sum(x_is_na),
      offset = 0
    )
  )
}

#' @export
#' @rdname as_narrow_array.NULL
as_narrow_array.integer <- function(x, ..., name = "") {
  x_is_na <- is.na(x)
  narrow_array(
    narrow_schema("i", name),
    narrow_array_data(
      buffers = if (any(x_is_na)) list(as_narrow_bitmask(!x_is_na), x) else list(NULL, x),
      length = length(x),
      null_count = sum(x_is_na),
      offset = 0
    )
  )
}

#' @export
#' @rdname as_narrow_array.NULL
as_narrow_array.double <- function(x, ..., name = "") {
  x_is_na <- is.na(x)
  narrow_array(
    narrow_schema("g", name),
    narrow_array_data(
      buffers = if (any(x_is_na)) list(as_narrow_bitmask(!x_is_na), x) else list(NULL, x),
      length = length(x),
      null_count = sum(x_is_na),
      offset = 0
    )
  )
}

#' @export
#' @rdname as_narrow_array.NULL
as_narrow_array.character <- function(x, ..., name = "") {
  x_is_na <- is.na(x)

  # flatten and check for long data vector
  buffers <- .Call(narrow_c_buffers_from_character, x)
  if (length(buffers[[2]]) <= (2 ^ 31 - 1)) {
    buffers[[1]] <- as.integer(as.numeric(buffers[[1]]))
    format <- "u"
  } else {
    format <- "U"
  }

  if (any(x_is_na)) {
    buffers <- c(list(as_narrow_bitmask(!x_is_na)), buffers)
  } else {
    buffers <- c(list(NULL), buffers)
  }

  narrow_array(
    narrow_schema(format, name),
    narrow_array_data(
      buffers = buffers,
      length = length(x),
      null_count = sum(x_is_na),
      offset = 0
    )
  )
}

#' @export
#' @rdname as_narrow_array.NULL
as_narrow_array.factor <- function(x, ..., name = "") {
  x_is_na <- is.na(x)

  # indices are 1-based and Arrow needs 0-based
  # could also add a factor level here to avoid copying the
  # indices vector but this makes it harder
  # to round-trip a factor() and a little disingenuous
  dictionary_array <- as_narrow_array(levels(x))
  indices <- unclass(x) - 1L

  narrow_array(
    narrow_schema(
      "i", name,
      dictionary = dictionary_array$schema
    ),
    narrow_array_data(
      buffers = if (any(x_is_na)) list(as_narrow_bitmask(!x_is_na), indices) else list(NULL, indices),
      length = length(x),
      null_count = sum(x_is_na),
      offset = 0,
      dictionary = dictionary_array$array_data
    )
  )
}

#' @export
#' @rdname as_narrow_array.NULL
as_narrow_array.raw <- function(x, ..., name = "") {
  narrow_array(
    narrow_schema("C", name),
    narrow_array_data(
      buffers = list(NULL, x),
      length = length(x),
      null_count = 0,
      offset = 0
    )
  )
}

#' @export
#' @rdname as_narrow_array.NULL
as_narrow_array.data.frame <- function(x, ..., name = "", nullable = FALSE) {
  arrays <- Map(as_narrow_array, x, name = names(x), nullable = TRUE)
  array_data <- lapply(arrays, "[[", "array_data")

  narrow_array(
    narrow_schema(
      "+s", name,
      flags = narrow_schema_flags(nullable = nullable),
      children = lapply(arrays, "[[", "schema")),
    narrow_array_data(
      buffers = list(NULL),
      length = nrow(x),
      null_count = 0,
      children = array_data
    )
  )
}
paleolimbot/arrowvctrs documentation built on Oct. 12, 2023, 7 a.m.