inst/doc/workflows.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)
library(hexify)
library(sf)
library(ggplot2)

## ----one-grid-many-datasets---------------------------------------------------
# Step 1: Define the grid once
# This is your shared spatial reference system
grid <- hex_grid(area_km2 = 5000)
print(grid)

# Step 2: Create multiple datasets with different structures
set.seed(123)

# Dataset 1: Bird observations (note different column names)
bird_obs <- data.frame(
  species = sample(c("Passer domesticus", "Turdus merula", "Parus major"), 50, replace = TRUE),
  longitude = runif(50, -10, 30),
  latitude = runif(50, 35, 60)
)

# Dataset 2: Mammal records
mammal_obs <- data.frame(
  species = sample(c("Vulpes vulpes", "Meles meles", "Sciurus vulgaris"), 40, replace = TRUE),
  lon = runif(40, -10, 30),
  lat = runif(40, 35, 60)
)

# Dataset 3: Climate stations
climate_data <- data.frame(
  station_id = paste0("WS", 1:20),
  x = runif(20, -10, 30),
  y = runif(20, 35, 60),
  temp_c = rnorm(20, 12, 5)
)

# Step 3: Attach all datasets to the SAME grid
birds <- hexify(bird_obs, lon = "longitude", lat = "latitude", grid = grid)
mammals <- hexify(mammal_obs, lon = "lon", lat = "lat", grid = grid)
climate <- hexify(climate_data, lon = "x", lat = "y", grid = grid)

cat("Birds:  ", nrow(as.data.frame(birds)), "observations in", n_cells(birds), "cells\n")
cat("Mammals:", nrow(as.data.frame(mammals)), "observations in", n_cells(mammals), "cells\n")
cat("Climate:", nrow(as.data.frame(climate)), "stations in", n_cells(climate), "cells\n")

## ----cell-level-analysis------------------------------------------------------
# Extract data frames with cell IDs
birds_df <- as.data.frame(birds)
birds_df$cell_id <- birds@cell_id

mammals_df <- as.data.frame(mammals)
mammals_df$cell_id <- mammals@cell_id

climate_df <- as.data.frame(climate)
climate_df$cell_id <- climate@cell_id

# Aggregate each dataset by cell
bird_richness <- aggregate(
  species ~ cell_id,
  data = birds_df,
  FUN = function(x) length(unique(x))
)
names(bird_richness)[2] <- "bird_species"

mammal_richness <- aggregate(
  species ~ cell_id,
  data = mammals_df,
  FUN = function(x) length(unique(x))
)
names(mammal_richness)[2] <- "mammal_species"

mean_temp <- aggregate(
  temp_c ~ cell_id,
  data = climate_df,
  FUN = mean
)
names(mean_temp)[2] <- "mean_temp"

# Join datasets by cell_id - guaranteed to align because same grid
combined <- merge(bird_richness, mammal_richness, by = "cell_id", all = TRUE)
combined <- merge(combined, mean_temp, by = "cell_id", all = TRUE)

head(combined)

## ----grid-rect, fig.width=7, fig.height=5-------------------------------------
# Create grid specification
grid <- hex_grid(area_km2 = 5000)

# Generate hexagons over Western Europe
europe_hexes <- grid_rect(c(-10, 35, 25, 60), grid)

# Get European countries for context
europe <- hexify_world[hexify_world$continent == "Europe", ]

ggplot() +
  geom_sf(data = europe, fill = "gray95", color = "gray60") +
  geom_sf(data = europe_hexes, fill = NA, color = "steelblue", linewidth = 0.4) +
  coord_sf(xlim = c(-10, 25), ylim = c(35, 60)) +
  labs(title = sprintf("Hexagonal Grid (~%.0f km² cells)", grid@area_km2)) +
  theme_minimal()

## ----grid-polygon, fig.width=6, fig.height=6----------------------------------
# Get France boundary
france <- hexify_world[hexify_world$name == "France", ]

# Generate grid covering mainland France
grid <- hex_grid(area_km2 = 2000)
france_grid <- grid_rect(c(-5, 41, 10, 52), grid)

# Clip grid to France boundary
france_grid_clipped <- st_intersection(france_grid, st_geometry(france))

ggplot() +
  geom_sf(data = france, fill = "gray95", color = "gray40", linewidth = 0.5) +
  geom_sf(data = france_grid_clipped, fill = alpha("steelblue", 0.3),
          color = "steelblue", linewidth = 0.3) +
  coord_sf(xlim = c(-5, 10), ylim = c(41, 52)) +
  labs(title = sprintf("Hexagonal Grid Clipped to France (~%.0f km² cells)", grid@area_km2)) +
  theme_minimal()

## ----grid-global, fig.width=8, fig.height=4, warning=FALSE--------------------
# Coarse global grid (be careful with fine grids - many cells!)
grid <- hex_grid(area_km2 = 500000)
global_hexes <- grid_global(grid)

ggplot() +
  geom_sf(data = hexify_world, fill = "gray90", color = "gray70", linewidth = 0.2) +
  geom_sf(data = global_hexes, fill = NA, color = "darkgreen", linewidth = 0.3) +
  labs(title = sprintf("Global Hexagonal Grid (~%.0f km² cells)", grid@area_km2)) +
  theme_minimal() +
  theme(axis.text = element_blank(), axis.ticks = element_blank())

## ----multi-resolution---------------------------------------------------------
# Sample data
set.seed(42)
observations <- data.frame(
  species = sample(c("Species A", "Species B", "Species C"), 100, replace = TRUE),
  lon = runif(100, -10, 30),
  lat = runif(100, 35, 60)
)

# Fine resolution (~1000 km² cells)
grid_fine <- hex_grid(area_km2 = 1000)
obs_fine <- hexify(observations, lon = "lon", lat = "lat", grid = grid_fine)

# Coarse resolution (~10000 km² cells)
grid_coarse <- hex_grid(area_km2 = 10000)
obs_coarse <- hexify(observations, lon = "lon", lat = "lat", grid = grid_coarse)

cat(sprintf("Fine resolution: %d unique cells (area: %.1f km²)\n",
            n_cells(obs_fine), grid_fine@area_km2))
cat(sprintf("Coarse resolution: %d unique cells (area: %.1f km²)\n",
            n_cells(obs_coarse), grid_coarse@area_km2))

## ----multi-scale-aggregate----------------------------------------------------
# Extract data with cell IDs
fine_df <- as.data.frame(obs_fine)
fine_df$cell_id <- obs_fine@cell_id

coarse_df <- as.data.frame(obs_coarse)
coarse_df$cell_id <- obs_coarse@cell_id

# Species richness at each scale
richness_fine <- aggregate(species ~ cell_id, data = fine_df,
                           FUN = function(x) length(unique(x)))
richness_coarse <- aggregate(species ~ cell_id, data = coarse_df,
                             FUN = function(x) length(unique(x)))

cat(sprintf("Fine scale: mean %.2f species per cell\n", mean(richness_fine$species)))
cat(sprintf("Coarse scale: mean %.2f species per cell\n", mean(richness_coarse$species)))

## ----spatial-joins------------------------------------------------------------
# Dataset 1: Weather stations
stations <- data.frame(
  station_id = paste0("ST", 1:50),
  lon = runif(50, -10, 30),
  lat = runif(50, 35, 60),
  temperature = rnorm(50, 15, 5)
)

# Dataset 2: Cities
cities <- data.frame(
  city = c("Vienna", "Paris", "London", "Berlin", "Rome",
           "Madrid", "Prague", "Warsaw", "Budapest", "Amsterdam"),
  lon = c(16.37, 2.35, -0.12, 13.40, 12.50,
          -3.70, 14.42, 21.01, 19.04, 4.90),
  lat = c(48.21, 48.86, 51.51, 52.52, 41.90,
          40.42, 50.08, 52.23, 47.50, 52.37)
)

# Use a coarse grid for joining disparate points
grid <- hex_grid(area_km2 = 50000)

# Hexify both datasets with the same grid
stations_hex <- hexify(stations, lon = "lon", lat = "lat", grid = grid)
cities_hex <- hexify(cities, lon = "lon", lat = "lat", grid = grid)

# Extract with cell IDs
stations_df <- as.data.frame(stations_hex)
stations_df$cell_id <- stations_hex@cell_id

cities_df <- as.data.frame(cities_hex)
cities_df$cell_id <- cities_hex@cell_id

# Join by cell_id
city_weather <- merge(
  cities_df[, c("city", "cell_id")],
  aggregate(temperature ~ cell_id, data = stations_df, FUN = mean),
  by = "cell_id",
  all.x = TRUE
)

city_weather

## ----choosing-resolution------------------------------------------------------
# Target: 100 km² cells
grid_100 <- hex_grid(area_km2 = 100, aperture = 3)
cat(sprintf("Target ~100 km²: resolution %d (actual: %.1f km²)\n",
            grid_100@resolution, grid_100@area_km2))

# Target: 1000 km² cells
grid_1000 <- hex_grid(area_km2 = 1000, aperture = 3)
cat(sprintf("Target ~1000 km²: resolution %d (actual: %.1f km²)\n",
            grid_1000@resolution, grid_1000@area_km2))

# Target: 10000 km² cells
grid_10000 <- hex_grid(area_km2 = 10000, aperture = 3)
cat(sprintf("Target ~10000 km²: resolution %d (actual: %.1f km²)\n",
            grid_10000@resolution, grid_10000@area_km2))

## ----resolution-table, echo=FALSE---------------------------------------------
comparison <- hexify_compare_resolutions(aperture = 3, res_range = 0:15)
comparison$n_cells_fmt <- ifelse(
  comparison$n_cells > 1e6,
  sprintf("%.1fM", comparison$n_cells / 1e6),
  ifelse(comparison$n_cells > 1e3,
         sprintf("%.1fK", comparison$n_cells / 1e3),
         as.character(comparison$n_cells))
)
knitr::kable(
  comparison[, c("resolution", "n_cells_fmt", "cell_area_km2", "cell_spacing_km")],
  col.names = c("Resolution", "# Cells", "Cell Area (km²)", "Spacing (km)"),
  digits = 1
)

## ----compare-apertures--------------------------------------------------------
target_area <- 1000  # km²

cat(sprintf("Target: ~%d km² cells\n\n", target_area))
for (ap in c(3, 4, 7)) {
  grid <- hex_grid(area_km2 = target_area, aperture = ap)
  n_cells <- 10 * (ap^grid@resolution) + 2
  cat(sprintf("Aperture %d: resolution %d -> %.1f km² (%s cells globally)\n",
              ap, grid@resolution, grid@area_km2,
              format(n_cells, big.mark = ",")))
}

## ----sf-workflow--------------------------------------------------------------
# Hexify some data
grid <- hex_grid(area_km2 = 20000)
result <- hexify(cities, lon = "lon", lat = "lat", grid = grid)

# Convert to sf points (uses cell centers)
sf_points <- as_sf(result, geometry = "point")
class(sf_points)

# Convert to sf polygons (for choropleth maps)
sf_polys <- as_sf(result, geometry = "polygon")
class(sf_polys)

# Or generate polygons directly from cell IDs
unique_cells <- cells(result)
cell_polys <- cell_to_sf(unique_cells, grid)

## ----sf-plot, fig.width=7, fig.height=5---------------------------------------
europe <- hexify_world[hexify_world$continent == "Europe", ]

ggplot() +
  geom_sf(data = europe, fill = "ivory", color = "gray70") +
  geom_sf(data = cell_polys, fill = "steelblue", alpha = 0.5, color = "darkblue") +
  coord_sf(xlim = c(-10, 25), ylim = c(35, 58)) +
  labs(title = "European Cities - Hexagonal Grid") +
  theme_minimal()

## ----export-formats, eval=FALSE-----------------------------------------------
# # Generate a grid
# grid <- hex_grid(area_km2 = 10000)
# europe <- grid_rect(c(-10, 35, 25, 60), grid)
# 
# # Export to various formats
# st_write(europe, "europe_grid.gpkg", layer = "hexgrid")     # GeoPackage
# st_write(europe, "europe_grid.shp")                         # Shapefile
# st_write(europe, "europe_grid.geojson")                     # GeoJSON
# st_write(europe, "europe_grid.kml", layer = "hexgrid")      # KML (Google Earth)

## ----edge-cases---------------------------------------------------------------
data_with_na <- data.frame(
  lon = c(16.37, NA, 2.35, 13.40),
  lat = c(48.21, 48.86, NA, 52.52)
)

grid <- hex_grid(area_km2 = 1000)
result <- hexify(data_with_na, lon = "lon", lat = "lat", grid = grid)

# Check which rows have valid cell assignments
cat("Cell IDs:", result@cell_id, "\n")
cat("NA indicates invalid coordinates\n")

Try the hexify package in your browser

Any scripts or data that you put into this service are public.

hexify documentation built on March 1, 2026, 1:07 a.m.