inst/doc/arrow-parquet.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----naive, eval = requireNamespace("arrow", quietly = TRUE), message = FALSE----
library(arrow)
library(tibble)
library(a5R)

# A real A5 cell — Edinburgh at resolution 20
cell <- a5_lonlat_to_cell(-3.19, 55.95, resolution = 20)
a5_u64_to_hex(cell)

# Write to Parquet as uint64 (the standard interchange format)
tf <- tempfile(fileext = ".parquet")
arrow::write_parquet(
  arrow::arrow_table(cell_id = a5_cell_to_arrow(cell)),
  tf
)

# Read it back naively — arrow silently converts uint64 to double
(naive <- tibble(arrow::read_parquet(tf)))

cell_as_dbl <- naive$cell_id

# The double can't distinguish this cell from nearby IDs
cell_as_dbl == cell_as_dbl + 1   # TRUE — silent corruption
cell_as_dbl == cell_as_dbl + 100 # still TRUE

## ----bridge, eval = requireNamespace("arrow", quietly = TRUE)-----------------
library(a5R)
library(tibble)

# Six cities across the globe — some will have bit 63 set (origin >= 6)
cities <- tibble(
  name = c("Edinburgh", "Tokyo", "São Paulo", "Nairobi", "Anchorage", "Sydney"),
  lon  = c(   -3.19,     139.69,     -46.63,     36.82,    -149.90,    151.21),
  lat  = c(   55.95,      35.69,     -23.55,     -1.29,      61.22,    -33.87)
)

cities$cell <- a5_lonlat_to_cell(cities$lon, cities$lat, resolution = 10)
cities

## ----enrich, eval = requireNamespace("arrow", quietly = TRUE)-----------------
edinburgh <- cities$cell[1]

cities$resolution <- a5_get_resolution(cities$cell)
cities$dist_from_edinburgh_km <- as.numeric(
  a5_cell_distance(cities$cell, rep(edinburgh, nrow(cities)), units = "km")
)

cities

## ----parquet_write, eval = requireNamespace("arrow", quietly = TRUE)----------
tf <- tempfile(fileext = ".parquet")

arrow_tbl <- arrow::arrow_table(
  name = cities$name,
  cell_id = a5_cell_to_arrow(cities$cell),
  cell_res = cities$resolution,
  dist_from_edinburgh_km = cities$dist_from_edinburgh_km
)
arrow_tbl$schema
arrow::write_parquet(arrow_tbl, tf)

## ----parquet_read, eval = requireNamespace("arrow", quietly = TRUE)-----------
pq <- arrow::read_parquet(tf, as_data_frame = FALSE)

# Recover cells from the uint64 column, bind with the rest of the data
recovered_cells <- a5_cell_from_arrow(pq$column(1))
result <- as.data.frame(pq)
result$cell <- recovered_cells
result <- tibble::as_tibble(result[c("name", "cell", "cell_res", "dist_from_edinburgh_km")])
result

## ----verify, eval = requireNamespace("arrow", quietly = TRUE)-----------------
identical(format(cities$cell), format(result$cell))

Try the a5R package in your browser

Any scripts or data that you put into this service are public.

a5R documentation built on March 26, 2026, 5:10 p.m.