knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) library(hexify) library(sf) library(ggplot2)
This vignette covers practical workflows for common spatial analysis tasks: sharing grids across datasets, generating grid polygons, choosing resolution, and exporting to GIS formats.
The most powerful pattern in hexify is defining a grid once and reusing it across multiple datasets. This ensures spatial consistency and eliminates parameter repetition.
You often have:
Several independent datasets (observations, sensors, surveys)
All in longitude/latitude coordinates
Collected at different times or from different sources
You want to:
Put everything on one common global grid
Be sure the grids actually match
Combine results later without subtle errors
# Step 1: Define the grid once # This is your shared spatial reference system grid <- hex_grid(area_km2 = 5000) print(grid) # Step 2: Create multiple datasets with different structures set.seed(123) # Dataset 1: Bird observations (note different column names) bird_obs <- data.frame( species = sample(c("Passer domesticus", "Turdus merula", "Parus major"), 50, replace = TRUE), longitude = runif(50, -10, 30), latitude = runif(50, 35, 60) ) # Dataset 2: Mammal records mammal_obs <- data.frame( species = sample(c("Vulpes vulpes", "Meles meles", "Sciurus vulgaris"), 40, replace = TRUE), lon = runif(40, -10, 30), lat = runif(40, 35, 60) ) # Dataset 3: Climate stations climate_data <- data.frame( station_id = paste0("WS", 1:20), x = runif(20, -10, 30), y = runif(20, 35, 60), temp_c = rnorm(20, 12, 5) ) # Step 3: Attach all datasets to the SAME grid birds <- hexify(bird_obs, lon = "longitude", lat = "latitude", grid = grid) mammals <- hexify(mammal_obs, lon = "lon", lat = "lat", grid = grid) climate <- hexify(climate_data, lon = "x", lat = "y", grid = grid) cat("Birds: ", nrow(as.data.frame(birds)), "observations in", n_cells(birds), "cells\n") cat("Mammals:", nrow(as.data.frame(mammals)), "observations in", n_cells(mammals), "cells\n") cat("Climate:", nrow(as.data.frame(climate)), "stations in", n_cells(climate), "cells\n")
Once data are hexified, longitude/latitude no longer matter for analysis. The cell_id becomes the shared spatial key:
# Extract data frames with cell IDs birds_df <- as.data.frame(birds) birds_df$cell_id <- birds@cell_id mammals_df <- as.data.frame(mammals) mammals_df$cell_id <- mammals@cell_id climate_df <- as.data.frame(climate) climate_df$cell_id <- climate@cell_id # Aggregate each dataset by cell bird_richness <- aggregate( species ~ cell_id, data = birds_df, FUN = function(x) length(unique(x)) ) names(bird_richness)[2] <- "bird_species" mammal_richness <- aggregate( species ~ cell_id, data = mammals_df, FUN = function(x) length(unique(x)) ) names(mammal_richness)[2] <- "mammal_species" mean_temp <- aggregate( temp_c ~ cell_id, data = climate_df, FUN = mean ) names(mean_temp)[2] <- "mean_temp" # Join datasets by cell_id - guaranteed to align because same grid combined <- merge(bird_richness, mammal_richness, by = "cell_id", all = TRUE) combined <- merge(combined, mean_temp, by = "cell_id", all = TRUE) head(combined)
hexify provides functions to generate grid polygons over regions for visualization and analysis.
# Create grid specification grid <- hex_grid(area_km2 = 5000) # Generate hexagons over Western Europe europe_hexes <- grid_rect(c(-10, 35, 25, 60), grid) # Get European countries for context europe <- hexify_world[hexify_world$continent == "Europe", ] ggplot() + geom_sf(data = europe, fill = "gray95", color = "gray60") + geom_sf(data = europe_hexes, fill = NA, color = "steelblue", linewidth = 0.4) + coord_sf(xlim = c(-10, 25), ylim = c(35, 60)) + labs(title = sprintf("Hexagonal Grid (~%.0f km² cells)", grid@area_km2)) + theme_minimal()
Clip a grid to any sf polygon boundary:
# Get France boundary france <- hexify_world[hexify_world$name == "France", ] # Generate grid covering mainland France grid <- hex_grid(area_km2 = 2000) france_grid <- grid_rect(c(-5, 41, 10, 52), grid) # Clip grid to France boundary france_grid_clipped <- st_intersection(france_grid, st_geometry(france)) ggplot() + geom_sf(data = france, fill = "gray95", color = "gray40", linewidth = 0.5) + geom_sf(data = france_grid_clipped, fill = alpha("steelblue", 0.3), color = "steelblue", linewidth = 0.3) + coord_sf(xlim = c(-5, 10), ylim = c(41, 52)) + labs(title = sprintf("Hexagonal Grid Clipped to France (~%.0f km² cells)", grid@area_km2)) + theme_minimal()
# Coarse global grid (be careful with fine grids - many cells!) grid <- hex_grid(area_km2 = 500000) global_hexes <- grid_global(grid) ggplot() + geom_sf(data = hexify_world, fill = "gray90", color = "gray70", linewidth = 0.2) + geom_sf(data = global_hexes, fill = NA, color = "darkgreen", linewidth = 0.3) + labs(title = sprintf("Global Hexagonal Grid (~%.0f km² cells)", grid@area_km2)) + theme_minimal() + theme(axis.text = element_blank(), axis.ticks = element_blank())
Analyze data at multiple spatial scales using different target areas.
# Sample data set.seed(42) observations <- data.frame( species = sample(c("Species A", "Species B", "Species C"), 100, replace = TRUE), lon = runif(100, -10, 30), lat = runif(100, 35, 60) ) # Fine resolution (~1000 km² cells) grid_fine <- hex_grid(area_km2 = 1000) obs_fine <- hexify(observations, lon = "lon", lat = "lat", grid = grid_fine) # Coarse resolution (~10000 km² cells) grid_coarse <- hex_grid(area_km2 = 10000) obs_coarse <- hexify(observations, lon = "lon", lat = "lat", grid = grid_coarse) cat(sprintf("Fine resolution: %d unique cells (area: %.1f km²)\n", n_cells(obs_fine), grid_fine@area_km2)) cat(sprintf("Coarse resolution: %d unique cells (area: %.1f km²)\n", n_cells(obs_coarse), grid_coarse@area_km2))
# Extract data with cell IDs fine_df <- as.data.frame(obs_fine) fine_df$cell_id <- obs_fine@cell_id coarse_df <- as.data.frame(obs_coarse) coarse_df$cell_id <- obs_coarse@cell_id # Species richness at each scale richness_fine <- aggregate(species ~ cell_id, data = fine_df, FUN = function(x) length(unique(x))) richness_coarse <- aggregate(species ~ cell_id, data = coarse_df, FUN = function(x) length(unique(x))) cat(sprintf("Fine scale: mean %.2f species per cell\n", mean(richness_fine$species))) cat(sprintf("Coarse scale: mean %.2f species per cell\n", mean(richness_coarse$species)))
Join datasets based on shared grid cells rather than proximity.
# Dataset 1: Weather stations stations <- data.frame( station_id = paste0("ST", 1:50), lon = runif(50, -10, 30), lat = runif(50, 35, 60), temperature = rnorm(50, 15, 5) ) # Dataset 2: Cities cities <- data.frame( city = c("Vienna", "Paris", "London", "Berlin", "Rome", "Madrid", "Prague", "Warsaw", "Budapest", "Amsterdam"), lon = c(16.37, 2.35, -0.12, 13.40, 12.50, -3.70, 14.42, 21.01, 19.04, 4.90), lat = c(48.21, 48.86, 51.51, 52.52, 41.90, 40.42, 50.08, 52.23, 47.50, 52.37) ) # Use a coarse grid for joining disparate points grid <- hex_grid(area_km2 = 50000) # Hexify both datasets with the same grid stations_hex <- hexify(stations, lon = "lon", lat = "lat", grid = grid) cities_hex <- hexify(cities, lon = "lon", lat = "lat", grid = grid) # Extract with cell IDs stations_df <- as.data.frame(stations_hex) stations_df$cell_id <- stations_hex@cell_id cities_df <- as.data.frame(cities_hex) cities_df$cell_id <- cities_hex@cell_id # Join by cell_id city_weather <- merge( cities_df[, c("city", "cell_id")], aggregate(temperature ~ cell_id, data = stations_df, FUN = mean), by = "cell_id", all.x = TRUE ) city_weather
Use hex_grid() with area_km2 to get the closest available resolution:
# Target: 100 km² cells grid_100 <- hex_grid(area_km2 = 100, aperture = 3) cat(sprintf("Target ~100 km²: resolution %d (actual: %.1f km²)\n", grid_100@resolution, grid_100@area_km2)) # Target: 1000 km² cells grid_1000 <- hex_grid(area_km2 = 1000, aperture = 3) cat(sprintf("Target ~1000 km²: resolution %d (actual: %.1f km²)\n", grid_1000@resolution, grid_1000@area_km2)) # Target: 10000 km² cells grid_10000 <- hex_grid(area_km2 = 10000, aperture = 3) cat(sprintf("Target ~10000 km²: resolution %d (actual: %.1f km²)\n", grid_10000@resolution, grid_10000@area_km2))
comparison <- hexify_compare_resolutions(aperture = 3, res_range = 0:15) comparison$n_cells_fmt <- ifelse( comparison$n_cells > 1e6, sprintf("%.1fM", comparison$n_cells / 1e6), ifelse(comparison$n_cells > 1e3, sprintf("%.1fK", comparison$n_cells / 1e3), as.character(comparison$n_cells)) ) knitr::kable( comparison[, c("resolution", "n_cells_fmt", "cell_area_km2", "cell_spacing_km")], col.names = c("Resolution", "# Cells", "Cell Area (km²)", "Spacing (km)"), digits = 1 )
Different apertures offer different resolution steps:
target_area <- 1000 # km² cat(sprintf("Target: ~%d km² cells\n\n", target_area)) for (ap in c(3, 4, 7)) { grid <- hex_grid(area_km2 = target_area, aperture = ap) n_cells <- 10 * (ap^grid@resolution) + 2 cat(sprintf("Aperture %d: resolution %d -> %.1f km² (%s cells globally)\n", ap, grid@resolution, grid@area_km2, format(n_cells, big.mark = ","))) }
| Aperture | Best For | Trade-offs | |----------|----------|------------| | 3 | Fine resolution control, dggridR compatibility | Slowest cell growth | | 4 | Power-of-2 scaling, GIS workflows | Moderate resolution steps | | 7 | Rapid cell count growth, coarse analysis | Largest resolution jumps | | 4/3 | Balance of 4's fast start + 3's fine control | More complex indexing |
# Hexify some data grid <- hex_grid(area_km2 = 20000) result <- hexify(cities, lon = "lon", lat = "lat", grid = grid) # Convert to sf points (uses cell centers) sf_points <- as_sf(result, geometry = "point") class(sf_points) # Convert to sf polygons (for choropleth maps) sf_polys <- as_sf(result, geometry = "polygon") class(sf_polys) # Or generate polygons directly from cell IDs unique_cells <- cells(result) cell_polys <- cell_to_sf(unique_cells, grid)
europe <- hexify_world[hexify_world$continent == "Europe", ] ggplot() + geom_sf(data = europe, fill = "ivory", color = "gray70") + geom_sf(data = cell_polys, fill = "steelblue", alpha = 0.5, color = "darkblue") + coord_sf(xlim = c(-10, 25), ylim = c(35, 58)) + labs(title = "European Cities - Hexagonal Grid") + theme_minimal()
Use sf's st_write() to export grids for use in external GIS software:
# Generate a grid grid <- hex_grid(area_km2 = 10000) europe <- grid_rect(c(-10, 35, 25, 60), grid) # Export to various formats st_write(europe, "europe_grid.gpkg", layer = "hexgrid") # GeoPackage st_write(europe, "europe_grid.shp") # Shapefile st_write(europe, "europe_grid.geojson") # GeoJSON st_write(europe, "europe_grid.kml", layer = "hexgrid") # KML (Google Earth)
data_with_na <- data.frame( lon = c(16.37, NA, 2.35, 13.40), lat = c(48.21, 48.86, NA, 52.52) ) grid <- hex_grid(area_km2 = 1000) result <- hexify(data_with_na, lon = "lon", lat = "lat", grid = grid) # Check which rows have valid cell assignments cat("Cell IDs:", result@cell_id, "\n") cat("NA indicates invalid coordinates\n")
Coordinates with latitude > 89° may have projection artifacts. The grid remains valid, but polygon visualization can be distorted near poles.
Polygons crossing the date line (lon = ±180°) are handled automatically.
cell_to_sf() applies sf::st_wrap_dateline() internally, so flat map
projections render correctly without manual intervention.
| Task | Function |
|------|----------|
| Create grid specification | hex_grid(area_km2 = ...) |
| Assign points to cells | hexify(df, lon, lat, grid) |
| Get grid from HexData | grid_info(result) |
| Get unique cell IDs | cells(result) |
| Count cells | n_cells(result) |
| Extract data frame | as.data.frame(result) |
| Convert to sf | as_sf(result, geometry = "polygon") |
| Generate polygons | cell_to_sf(cell_ids, grid) |
| Grid over region | grid_rect(bbox, grid) |
| Global grid | grid_global(grid) |
| Coordinate conversion | lonlat_to_cell(), cell_to_lonlat() |
| Compare resolutions | hexify_compare_resolutions() |
vignette("quickstart") - Getting started with hexify
vignette("visualization") - Plotting with plot(), hexify_heatmap()
vignette("theory") - Mathematical foundations (ISEA projection, apertures)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.