knitr::opts_chunk$set(warning = FALSE, message = FALSE)
chopin
automatically distributes geospatial data computation over multiple threads.chopin
workflowpar_pad_*
functions to par_grid
, or running par_hierarchy
, or par_multirasters
functions at once.library(chopin) library(terra) library(sf) library(collapse) library(dplyr) library(future) library(future.mirai) library(future.apply)
temp_dir <- tempdir(check = TRUE) url_nccnty <- paste0( "https://raw.githubusercontent.com/", "ropensci/chopin/refs/heads/main/", "tests/testdata/nc_hierarchy.gpkg" ) url_ncelev <- paste0( "https://raw.githubusercontent.com/", "ropensci/chopin/refs/heads/main/", "tests/testdata/nc_srtm15_otm.tif" ) nccnty_path <- file.path(temp_dir, "nc_hierarchy.gpkg") ncelev_path <- file.path(temp_dir, "nc_srtm15_otm.tif") # download data download.file(url_nccnty, nccnty_path, mode = "wb", quiet = TRUE) download.file(url_ncelev, ncelev_path, mode = "wb", quiet = TRUE) nccnty <- terra::vect(nccnty_path, layer = "county") ncelev <- terra::rast(ncelev_path)
ncsamp <- terra::spatSample( nccnty, 1e4L ) ncsamp$pid <- seq_len(nrow(ncsamp))
ncgrid <- par_pad_grid(ncsamp, mode = "grid", nx = 4L, ny = 2L, padding = 10000) plot(ncgrid$original)
par_*
functions operate on future
backends, users should define the future plan before running the functions. multicore
plan supports terra
objects which may lead to faster computation, but it is not supported in Windows. An alternative is future.mirai
's mirai_multisession
plan, which is supported in many platforms and generally faster than plain future multisession plan.workers
argument should be defined with an integer value to specify the number of threads to be used.future::plan(future.mirai::mirai_multisession, workers = 2L)
extract_at
runs on the grid polygons.pg <- par_grid( grids = ncgrid, pad_y = FALSE, .debug = TRUE, fun_dist = extract_at, x = ncelev_path, y = sf::st_as_sf(ncsamp), id = "pid", radius = 1e4, func = "mean" )
# also possible in mirai with par_*_mirai functions # mirai daemons should be created first mirai::daemons(n = 2L) pg <- par_grid_mirai( grids = ncgrid, pad_y = FALSE, .debug = TRUE, fun_dist = extract_at, x = ncelev_path, y = sf::st_as_sf(ncsamp), id = "pid", radius = 1e4, func = "mean" ) mirai::daemons(n = 0L)
nccnty <- sf::st_read(nccnty_path, layer = "county") nctrct <- sf::st_read(nccnty_path, layer = "tracts")
px <- par_hierarchy( # from here the par_hierarchy-specific arguments regions = nctrct, regions_id = "GEOID", length_left = 5, pad = 10000, pad_y = FALSE, .debug = TRUE, # from here are the dispatched function definition # for parallel workers fun_dist = extract_at, # below should follow the arguments of the dispatched function x = ncelev, y = sf::st_as_sf(ncsamp), id = "pid", radius = 1e4, func = "mean" ) dim(px) head(px) tail(px)
ncelev <- terra::rast(ncelev_path) tdir <- tempdir(check = TRUE) terra::writeRaster(ncelev, file.path(tdir, "test1.tif"), overwrite = TRUE) terra::writeRaster(ncelev, file.path(tdir, "test2.tif"), overwrite = TRUE) terra::writeRaster(ncelev, file.path(tdir, "test3.tif"), overwrite = TRUE) terra::writeRaster(ncelev, file.path(tdir, "test4.tif"), overwrite = TRUE) terra::writeRaster(ncelev, file.path(tdir, "test5.tif"), overwrite = TRUE) rasts <- list.files(tdir, pattern = "tif$", full.names = TRUE) pm <- par_multirasters( filenames = rasts, fun_dist = extract_at, x = NA, y = sf::st_as_sf(ncsamp)[1:500, ], id = "pid", radius = 1e4, func = "mean", .debug = TRUE ) dim(pm) head(pm) tail(pm)
From version 0.9.9, a custom function that takes sf
objects in x
and y
arguments can be used with par_grid
and par_hierarchy
functions. The custom function should return a data.frame or tibble object.
An example below demonstrates to compute the floor area ratio (or area-enclosed ratio, AER) in 100 meters circular buffers of points from random points in central Seoul, South Korea.
gpkg_path <- system.file("extdata/osm_seoul.gpkg", package = "chopin") bldg <- sf::st_read(gpkg_path, layer = "buildings") grdpnt <- sf::st_read(gpkg_path, layer = "points") # plot plot(sf::st_geometry(bldg), col = "lightgrey") plot(sf::st_geometry(grdpnt), add = TRUE, pch = 20, col = "blue")
# Radius for AER (meters) radius_m <- 100 # This function will be dispatched in parallel over computational grids. aer_at_points <- function( x, y, radius, id_col = "pid", floors_col = "floors", area_col = "foot_m2" ) { if (!inherits(x, "sf") && !inherits(y, "sf")) { x <- sf::st_as_sf(x) y <- sf::st_as_sf(y) } yorig <- y y <- sf::st_geometry(y) # Buffers around each point buf <- sf::st_buffer(y, radius) # spatial join (buildings intersecting buffers) j <- sf::st_intersects(buf, sf::st_geometry(x)) # Compute out <- vapply(seq_along(j), function(i) { if (length(j[[i]]) == 0) return(NA_real_) sub <- x[j[[i]], ] sub <- sub[!is.na(sub[[floors_col]]) & !is.na(sub[[area_col]]), ] if (nrow(sub) == 0) return(NA_real_) aer_num <- sum(sub[[area_col]] * sub[[floors_col]], na.rm = TRUE) aer_den <- pi * radius^2 aer_num / aer_den }, numeric(1)) res <- data.frame( pid = yorig[[id_col]], aer = out ) res } ## Parallel processing # Initiate mirai damons plan(mirai_multisession, workers = 4L) grd <- chopin::par_pad_grid( bldg, mode = "grid", nx = 1L, ny = 4L, padding = 300 ) bldg_cast <- sf::st_cast(bldg, "POLYGON") radius_m <- 300 res <- par_grid_mirai( grids = grd, fun_dist = aer_at_points, x = bldg_cast, y = grdpnt, radius = radius_m, id_col = "pid", .debug = TRUE ) future::plan(future::sequential) mirai::daemons(n = 0L) head(res)
chopin
works best with two-dimensional (planar) geometries. Users should disable s2
spherical geometry mode in sf
by setting sf::sf_use_s2(FALSE)
. Running any chopin
functions at spherical or three-dimensional (e.g., including M/Z dimensions) geometries may produce incorrect or unexpected results.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.