benchmark.R
In duckspatial: R Interface to 'DuckDB' Database with Spatial Extension

## -----------------------------------------------------------------------------
#| include: false

# Limit threads to avoid a CRAN NOTE
Sys.setenv(OMP_THREAD_LIMIT = 2)


## -----------------------------------------------------------------------------
#| echo: false

## Internal packages
library(patchwork)

## Functions used in the vignette
ggplot_benchmark <- function(
  data, 
  log = FALSE,
  show.legend = TRUE,
  ...
  ) {

    ## Generate base plot
    ggplot(
      data = data, 
      aes(
        x     = factor(format(n, big.mark = ".")),
        y     = if (log) median else as.numeric(median),
        color = pkg,
        group = pkg
    )) +
      geom_line(
        linewidth = 0.7,
        linetype  = "dashed",
        alpha     = 0.6,
        show.legend = show.legend
      ) +
      geom_point(
        aes(size = as.numeric(mem_alloc)),
        alpha = 0.9,
        show.legend = show.legend
      ) +
      scale_size_binned(
        name   = "Memory allocated",
        labels = scales::label_bytes(),
        n.breaks = 4,
        range = c(1, 8)
      ) +
      scale_color_brewer(palette = "Set1") +
      labs(
        color    = "Package",
        x        = "Data size (rows)",
        y        = "Computation time",
        ...
      ) +
      theme_minimal(base_size = 13) +
      theme(
        # legend.position  = "top",
        panel.grid.minor = element_blank(),
        plot.title       = element_text(face = "bold")
      )
}

ggplot_assemble <- function(plot1, plot2, fun_name) {

  plot1 +
    plot2 +
    plot_annotation(
      title    = paste("Benchmark Comparison for", fun_name),
      subtitle = "Execution time vs. data size",
      theme    = theme(
        plot.title = element_text(face = "bold", hjust = .5),
        plot.subtitle = element_text(hjust = .5),
        text = element_text(size = rel(3.5))
      )
    )

}


## -----------------------------------------------------------------------------
#| message: false
#| warning: false
#| code-fold: true
#| code-summary: "Set-up"

# Load necessary packages
library(duckspatial)
library(bench)
library(dplyr)
library(sf)
library(ggplot2)
options(scipen = 999)

# Function to generate random points
make_points <- function(n_points) {
    points_df <- data.frame(
      id = 1:n_points,
      x = runif(n_points, min = -180, max = 180),
      y = runif(n_points, min = -90, max = 90),
      value = rnorm(n_points, mean = 100, sd = 15),
      category = sample(c("A", "B", "C", "D"), n_points, replace = TRUE)
  ) |>
    sf::st_as_sf(coords = c("x", "y"), crs = 4326)
}

# Generate datasets of different sizes
withr::with_seed(27, {
  points_sf_100k <- make_points(1e5)
  points_sf_1mi  <- make_points(1e6)
  points_sf_3mi  <- make_points(3e6)
})

# Generate polygons
# Create large polygon dataset (e.g., administrative regions, zones, etc.)
n_polygons <- 10000
polygons_list <- vector("list", n_polygons)

for(i in 1:n_polygons) {
  # Random center point with buffer from edges
  center_x <- runif(1, min = -170, max = 170)
  center_y <- runif(1, min = -80, max = 80)
  
  # Create simple rectangular polygons to avoid geometry issues
  width <- runif(1, min = 0.5, max = 3)
  height <- runif(1, min = 0.5, max = 3)
  
  # Create rectangle coordinates (must be closed: first point = last point)
  x_coords <- c(
    center_x - width/2,
    center_x + width/2,
    center_x + width/2,
    center_x - width/2,
    center_x - width/2  # Close the polygon
  )
  
  y_coords <- c(
    center_y - height/2,
    center_y - height/2,
    center_y + height/2,
    center_y + height/2,
    center_y - height/2  # Close the polygon
  )
  
  # Create polygon matrix
  coords <- cbind(x_coords, y_coords)
  
  # Create polygon (wrapped in list as required by st_polygon)
  polygons_list[[i]] <- st_polygon(list(coords))
}

polygons_sf <- st_sf(
  poly_id    = 1:n_polygons,
  region     = sample(c("North", "South", "East", "West"), n_polygons, replace = TRUE),
  population = sample(1000:1000000, n_polygons, replace = TRUE),
  geometry   = st_sfc(polygons_list, crs = 4326)
)


## -----------------------------------------------------------------------------
#| message: false
#| code-fold: true
#| code-summary: "Benchmark code - ddbs_join"

# Helper to run the benchmark
run_join_benchmark <- function(points_sf) {
  temp <- bench::mark(
    iterations  = 3,
    check       = FALSE,
    duckspatial = ddbs_join(points_sf, polygons_sf, join = "within"),
    sf          = st_join(points_sf, polygons_sf, join = st_within)
  )
  temp$n   <- nrow(points_sf)
  temp$pkg <- c("duckspatial", "sf")
  temp
}

# Run the benchmark
df_bench_join <- lapply(
  X   = list(points_sf_100k, points_sf_1mi, points_sf_3mi),
  FUN = run_join_benchmark
) |>
  dplyr::bind_rows()


## -----------------------------------------------------------------------------
#| echo: false
#| warning: false

# Id to store the figures, saving the older ones
id_output <- "v1.5.1"

# Generate the plots
gg_join <- ggplot_benchmark(
  data     = df_bench_join,
  log      = FALSE,
  show.legend = T,
  subtitle = "A) Normal scale"
)

gg_join_log <- ggplot_benchmark(
  data     = df_bench_join,
  log      = TRUE,
  show.legend = F,
  subtitle = "B) Log scale"
)

# Assemble them in a single plot
ggplot_assemble(
  plot1 = gg_join,
  plot2 = gg_join_log,
  fun_name = "ddbs_join()"
)

# Export it
ggsave(
  filename = paste0("man/figures/bench/bench-st-join-", id_output, ".png"),
  height   = 15,
  width    = 30,
  units    = "cm"
)


## -----------------------------------------------------------------------------
#| message: false
#| code-fold: true
#| code-summary: "Benchmark code - ddbs_filter"

# Helper to run the benchmark
run_filter_benchmark <- function(points_sf) {
  temp <- bench::mark(
    iterations  = 3,
    check       = FALSE,
    duckspatial = ddbs_filter(points_sf, polygons_sf),
    sf          = st_filter(points_sf, polygons_sf)
  )
  temp$n   <- nrow(points_sf)
  temp$pkg <- c("duckspatial", "sf")
  temp
}

# Run the benchmark
df_bench_filter <- lapply(
  X   = list(points_sf_100k, points_sf_1mi, points_sf_3mi),
  FUN = run_filter_benchmark
) |>
  dplyr::bind_rows()


## -----------------------------------------------------------------------------
#| echo: false
#| warning: false

# Generate the plots
gg_filter <- ggplot_benchmark(
  data     = df_bench_filter,
  log      = FALSE,
  show.legend = T,
  subtitle = "A) Normal scale"
)

gg_filter_log <- ggplot_benchmark(
  data     = df_bench_filter,
  log      = TRUE,
  show.legend = F,
  subtitle = "B) Log scale"
)

# Assemble them in a single plot
ggplot_assemble(
  plot1 = gg_filter,
  plot2 = gg_filter_log,
  fun_name = "ddbs_filter()"
)

# Export it
ggsave(
  filename = paste0("man/figures/bench/bench-st-filter-", id_output, ".png"),
  height   = 15,
  width    = 30,
  units    = "cm"
)


## -----------------------------------------------------------------------------
#| message: false
#| code-fold: true
#| code-summary: "Benchmark code - ddbs_distance"

# Helper to run the benchmark
run_distance_benchmark <- function(n) {

  points_sf <- withr::with_seed(27, make_points(n))

  temp <- bench::mark(
    iterations  = 1,
    check       = FALSE,
    duckspatial = ddbs_distance(points_sf, points_sf),
    sf          = st_distance(points_sf, points_sf)
  )
  temp$n   <- n
  temp$pkg <- c("duckspatial", "sf")
  temp
}

df_bench_distance <- lapply(
  X   = c(1000, 5000, 10000),
  FUN = run_distance_benchmark
) |>
  dplyr::bind_rows()


## -----------------------------------------------------------------------------
#| echo: false
#| warning: false

# Generate the plots
gg_distance <- ggplot_benchmark(
  data     = df_bench_distance,
  log      = FALSE,
  show.legend = T,
  subtitle = "A) Normal scale"
)

gg_distance_log <- ggplot_benchmark(
  data     = df_bench_distance,
  log      = TRUE,
  show.legend = F,
  subtitle = "B) Log scale"
)

# Assemble them in a single plot
ggplot_assemble(
  plot1 = gg_distance,
  plot2 = gg_distance_log,
  fun_name = "ddbs_distance()"
)

# Export it
ggsave(
  filename = paste0("man/figures/bench/bench-st-distance-", id_output, ".png"),
  height   = 15,
  width    = 30,
  units    = "cm"
)


## -----------------------------------------------------------------------------
#| message: false
#| code-fold: true
#| code-summary: "Benchmark code - ddbs_union_agg"

# Helper to run the benchmark
run_union_benchmark <- function(points_sf) {
  temp <- bench::mark(
    iterations  = 3,
    check       = FALSE,
    duckspatial = ddbs_union_agg(points_sf, by = "category"),
    sf          = points_sf |> 
      group_by(category) |> 
      summarise(geometry = st_union(geometry))
  )
  temp$n   <- nrow(points_sf)
  temp$pkg <- c("duckspatial", "sf")
  temp
}

# Run the benchmark
df_bench_union <- lapply(
  X   = list(points_sf_100k, points_sf_1mi, points_sf_3mi),
  FUN = run_union_benchmark
) |>
  dplyr::bind_rows()


## -----------------------------------------------------------------------------
#| echo: false
#| warning: false

# Generate the plots
gg_union_agg <- ggplot_benchmark(
  data     = df_bench_union,
  log      = FALSE,
  show.legend = T,
  subtitle = "A) Normal scale"
)

gg_union_agg_log <- ggplot_benchmark(
  data     = df_bench_union,
  log      = TRUE,
  show.legend = F,
  subtitle = "B) Log scale"
)

# Assemble them in a single plot
ggplot_assemble(
  plot1 = gg_union_agg,
  plot2 = gg_union_agg_log,
  fun_name = "ddbs_union_agg()"
)

# Export it
ggsave(
  filename = paste0("man/figures/bench/bench-st-dissolve-", id_output, ".png"),
  height   = 15,
  width    = 30,
  units    = "cm"
)


## -----------------------------------------------------------------------------
#| message: false
#| code-fold: true
#| code-summary: "Benchmark code - ddbs_intersects"

# Helper to run the benchmark
run_predicate_benchmark <- function(points_sf) {
  temp <- bench::mark(
    iterations  = 1,
    check       = FALSE,
    duckspatial = ddbs_intersects(points_sf, polygons_sf),
    sf          = st_intersects(points_sf, polygons_sf)
  )
  temp$n   <- nrow(points_sf)
  temp$pkg <- c("duckspatial", "sf")
  temp
}

# Run the benchmark
df_bench_predicate <- lapply(
  X   = list(points_sf_100k, points_sf_1mi, points_sf_3mi),
  FUN = run_predicate_benchmark
) |>
  dplyr::bind_rows()


## -----------------------------------------------------------------------------
#| echo: false
#| warning: false

# Generate the plots
gg_predicate <- ggplot_benchmark(
  data     = df_bench_predicate,
  log      = FALSE,
  show.legend = T,
  subtitle = "A) Normal scale"
)

gg_predicate_log <- ggplot_benchmark(
  data     = df_bench_predicate,
  log      = TRUE,
  show.legend = F,
  subtitle = "B) Log scale"
)

# Assemble them in a single plot
ggplot_assemble(
  plot1 = gg_predicate,
  plot2 = gg_predicate_log,
  fun_name = "ddbs_intersects()"
)

# Export it
ggsave(
  filename = paste0("man/figures/bench/bench-st-intersects-", id_output, ".png"),
  height   = 15,
  width    = 30,
  units    = "cm"
)
Any scripts or data that you put into this service are public.
duckspatial documentation built on June 22, 2026, 9:08 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
duckspatial
R Interface to 'DuckDB' Database with Spatial Extension

inst/doc/benchmark.R
In duckspatial: R Interface to 'DuckDB' Database with Spatial Extension

Try the duckspatial package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

duckspatial R Interface to 'DuckDB' Database with Spatial Extension

inst/doc/benchmark.R In duckspatial: R Interface to 'DuckDB' Database with Spatial Extension

Try the duckspatial package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

duckspatial
R Interface to 'DuckDB' Database with Spatial Extension

inst/doc/benchmark.R
In duckspatial: R Interface to 'DuckDB' Database with Spatial Extension