Getting started with chopin

knitr::opts_chunk$set(warning = FALSE, message = FALSE)

Introduction

chopin workflow

library(chopin)
library(terra)
library(sf)
library(collapse)
library(dplyr)
library(future)
library(future.mirai)
library(future.apply)

Example data

temp_dir <- tempdir(check = TRUE)

url_nccnty <-
  paste0(
    "https://raw.githubusercontent.com/",
    "ropensci/chopin/refs/heads/main/",
    "tests/testdata/nc_hierarchy.gpkg"
  )
url_ncelev <-
  paste0(
    "https://raw.githubusercontent.com/",
    "ropensci/chopin/refs/heads/main/",
    "tests/testdata/nc_srtm15_otm.tif"
  )

nccnty_path <- file.path(temp_dir, "nc_hierarchy.gpkg")
ncelev_path <- file.path(temp_dir, "nc_srtm15_otm.tif")

# download data
download.file(url_nccnty, nccnty_path, mode = "wb", quiet = TRUE)
download.file(url_ncelev, ncelev_path, mode = "wb", quiet = TRUE)

nccnty <- terra::vect(nccnty_path, layer = "county")
ncelev <- terra::rast(ncelev_path)

Generating random points in North Carolina

ncsamp <-
  terra::spatSample(
    nccnty,
    1e4L
  )
ncsamp$pid <- seq_len(nrow(ncsamp))

Creating grids

ncgrid <- par_pad_grid(ncsamp, mode = "grid", nx = 4L, ny = 2L, padding = 10000)

plot(ncgrid$original)

Extracting values from raster

future::plan(future.mirai::mirai_multisession, workers = 2L)
pg <-
  par_grid(
    grids = ncgrid,
    pad_y = FALSE,
    .debug = TRUE,
    fun_dist = extract_at,
    x = ncelev_path,
    y = sf::st_as_sf(ncsamp),
    id = "pid",
    radius = 1e4,
    func = "mean"
  )
# also possible in mirai with par_*_mirai functions
# mirai daemons should be created first
mirai::daemons(n = 2L)
pg <-
  par_grid_mirai(
    grids = ncgrid,
    pad_y = FALSE,
    .debug = TRUE,
    fun_dist = extract_at,
    x = ncelev_path,
    y = sf::st_as_sf(ncsamp),
    id = "pid",
    radius = 1e4,
    func = "mean"
  )
mirai::daemons(n = 0L)

Hierarchical processing

nccnty <- sf::st_read(nccnty_path, layer = "county")
nctrct <- sf::st_read(nccnty_path, layer = "tracts")
px <-
  par_hierarchy(
    # from here the par_hierarchy-specific arguments
    regions = nctrct,
    regions_id = "GEOID",
    length_left = 5,
    pad = 10000,
    pad_y = FALSE,
    .debug = TRUE,
    # from here are the dispatched function definition
    # for parallel workers
    fun_dist = extract_at,
    # below should follow the arguments of the dispatched function
    x = ncelev,
    y = sf::st_as_sf(ncsamp),
    id = "pid",
    radius = 1e4,
    func = "mean"
  )

dim(px)
head(px)
tail(px)

Multiraster processing

ncelev <- terra::rast(ncelev_path)
tdir <- tempdir(check = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test1.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test2.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test3.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test4.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test5.tif"), overwrite = TRUE)

rasts <- list.files(tdir, pattern = "tif$", full.names = TRUE)

pm <-
  par_multirasters(
    filenames = rasts,
    fun_dist = extract_at,
    x = NA,
    y = sf::st_as_sf(ncsamp)[1:500, ],
    id = "pid",
    radius = 1e4,
    func = "mean",
    .debug = TRUE
  )

dim(pm)
head(pm)
tail(pm)

User-defined functions

From version 0.9.9, a custom function that takes sf objects in x and y arguments can be used with par_grid and par_hierarchy functions. The custom function should return a data.frame or tibble object.

An example below demonstrates to compute the floor area ratio (or area-enclosed ratio, AER) in 100 meters circular buffers of points from random points in central Seoul, South Korea.

gpkg_path <- system.file("extdata/osm_seoul.gpkg", package = "chopin")
bldg <- sf::st_read(gpkg_path, layer = "buildings")
grdpnt <- sf::st_read(gpkg_path, layer = "points")

# plot
plot(sf::st_geometry(bldg), col = "lightgrey")
plot(sf::st_geometry(grdpnt), add = TRUE, pch = 20, col = "blue")
# Radius for AER (meters)
radius_m <- 100

# This function will be dispatched in parallel over computational grids.
aer_at_points <-
  function(
    x, y,
    radius,
    id_col = "pid",
    floors_col = "floors",
    area_col = "foot_m2"
  ) {
    if (!inherits(x, "sf") && !inherits(y, "sf")) {
      x <- sf::st_as_sf(x)
      y <- sf::st_as_sf(y)
    }
    yorig <- y
    y <- sf::st_geometry(y)

    # Buffers around each point
    buf <- sf::st_buffer(y, radius)
    # spatial join (buildings intersecting buffers)
    j <- sf::st_intersects(buf, sf::st_geometry(x))
    # Compute
    out <- vapply(seq_along(j), function(i) {
      if (length(j[[i]]) == 0) return(NA_real_)
      sub <- x[j[[i]], ]
      sub <- sub[!is.na(sub[[floors_col]]) & !is.na(sub[[area_col]]), ]
      if (nrow(sub) == 0) return(NA_real_)
      aer_num <- sum(sub[[area_col]] * sub[[floors_col]], na.rm = TRUE)
      aer_den <- pi * radius^2
      aer_num / aer_den
    }, numeric(1))

    res <-
      data.frame(
        pid = yorig[[id_col]],
        aer = out
      )
    res
  }

## Parallel processing
# Initiate mirai damons
plan(mirai_multisession, workers = 4L)
grd <- chopin::par_pad_grid(
  bldg,
  mode = "grid",
  nx = 1L,
  ny = 4L,
  padding = 300
)

bldg_cast <- sf::st_cast(bldg, "POLYGON")
radius_m <- 300

res <- par_grid_mirai(
  grids    = grd,
  fun_dist = aer_at_points,
  x        = bldg_cast,
  y        = grdpnt,
  radius   = radius_m,
  id_col = "pid",
  .debug = TRUE
)
future::plan(future::sequential)
mirai::daemons(n = 0L)

head(res)

Caveats

Why parallelization is slower than the ordinary function run?

Notes on data restrictions

chopin works best with two-dimensional (planar) geometries. Users should disable s2 spherical geometry mode in sf by setting sf::sf_use_s2(FALSE). Running any chopin functions at spherical or three-dimensional (e.g., including M/Z dimensions) geometries may produce incorrect or unexpected results.



Try the chopin package in your browser

Any scripts or data that you put into this service are public.

chopin documentation built on Sept. 10, 2025, 5:08 p.m.