Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----naive, eval = requireNamespace("arrow", quietly = TRUE), message = FALSE----
library(arrow)
library(tibble)
library(a5R)
# A real A5 cell — Edinburgh at resolution 20
cell <- a5_lonlat_to_cell(-3.19, 55.95, resolution = 20)
a5_u64_to_hex(cell)
# Write to Parquet as uint64 (the standard interchange format)
tf <- tempfile(fileext = ".parquet")
arrow::write_parquet(
arrow::arrow_table(cell_id = a5_cell_to_arrow(cell)),
tf
)
# Read it back naively — arrow silently converts uint64 to double
(naive <- tibble(arrow::read_parquet(tf)))
cell_as_dbl <- naive$cell_id
# The double can't distinguish this cell from nearby IDs
cell_as_dbl == cell_as_dbl + 1 # TRUE — silent corruption
cell_as_dbl == cell_as_dbl + 100 # still TRUE
## ----bridge, eval = requireNamespace("arrow", quietly = TRUE)-----------------
library(a5R)
library(tibble)
# Six cities across the globe — some will have bit 63 set (origin >= 6)
cities <- tibble(
name = c("Edinburgh", "Tokyo", "São Paulo", "Nairobi", "Anchorage", "Sydney"),
lon = c( -3.19, 139.69, -46.63, 36.82, -149.90, 151.21),
lat = c( 55.95, 35.69, -23.55, -1.29, 61.22, -33.87)
)
cities$cell <- a5_lonlat_to_cell(cities$lon, cities$lat, resolution = 10)
cities
## ----enrich, eval = requireNamespace("arrow", quietly = TRUE)-----------------
edinburgh <- cities$cell[1]
cities$resolution <- a5_get_resolution(cities$cell)
cities$dist_from_edinburgh_km <- as.numeric(
a5_cell_distance(cities$cell, rep(edinburgh, nrow(cities)), units = "km")
)
cities
## ----parquet_write, eval = requireNamespace("arrow", quietly = TRUE)----------
tf <- tempfile(fileext = ".parquet")
arrow_tbl <- arrow::arrow_table(
name = cities$name,
cell_id = a5_cell_to_arrow(cities$cell),
cell_res = cities$resolution,
dist_from_edinburgh_km = cities$dist_from_edinburgh_km
)
arrow_tbl$schema
arrow::write_parquet(arrow_tbl, tf)
## ----parquet_read, eval = requireNamespace("arrow", quietly = TRUE)-----------
pq <- arrow::read_parquet(tf, as_data_frame = FALSE)
# Recover cells from the uint64 column, bind with the rest of the data
recovered_cells <- a5_cell_from_arrow(pq$column(1))
result <- as.data.frame(pq)
result$cell <- recovered_cells
result <- tibble::as_tibble(result[c("name", "cell", "cell_res", "dist_from_edinburgh_km")])
result
## ----verify, eval = requireNamespace("arrow", quietly = TRUE)-----------------
identical(format(cities$cell), format(result$cell))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.