Nothing
## -----------------------------------------------------------------------------
#| include: false
# Limit threads to avoid a CRAN NOTE
Sys.setenv(OMP_THREAD_LIMIT = 2)
## -----------------------------------------------------------------------------
#| echo: false
## Internal packages
library(patchwork)
## Functions used in the vignette
ggplot_benchmark <- function(
data,
log = FALSE,
show.legend = TRUE,
...
) {
## Generate base plot
ggplot(
data = data,
aes(
x = factor(format(n, big.mark = ".")),
y = if (log) median else as.numeric(median),
color = pkg,
group = pkg
)) +
geom_line(
linewidth = 0.7,
linetype = "dashed",
alpha = 0.6,
show.legend = show.legend
) +
geom_point(
aes(size = as.numeric(mem_alloc)),
alpha = 0.9,
show.legend = show.legend
) +
scale_size_binned(
name = "Memory allocated",
labels = scales::label_bytes(),
n.breaks = 4,
range = c(1, 8)
) +
scale_color_brewer(palette = "Set1") +
labs(
color = "Package",
x = "Data size (rows)",
y = "Computation time",
...
) +
theme_minimal(base_size = 13) +
theme(
# legend.position = "top",
panel.grid.minor = element_blank(),
plot.title = element_text(face = "bold")
)
}
ggplot_assemble <- function(plot1, plot2, fun_name) {
plot1 +
plot2 +
plot_annotation(
title = paste("Benchmark Comparison for", fun_name),
subtitle = "Execution time vs. data size",
theme = theme(
plot.title = element_text(face = "bold", hjust = .5),
plot.subtitle = element_text(hjust = .5),
text = element_text(size = rel(3.5))
)
)
}
## -----------------------------------------------------------------------------
#| message: false
#| warning: false
#| code-fold: true
#| code-summary: "Set-up"
# Load necessary packages
library(duckspatial)
library(bench)
library(dplyr)
library(sf)
library(ggplot2)
options(scipen = 999)
# Function to generate random points
make_points <- function(n_points) {
points_df <- data.frame(
id = 1:n_points,
x = runif(n_points, min = -180, max = 180),
y = runif(n_points, min = -90, max = 90),
value = rnorm(n_points, mean = 100, sd = 15),
category = sample(c("A", "B", "C", "D"), n_points, replace = TRUE)
) |>
sf::st_as_sf(coords = c("x", "y"), crs = 4326)
}
# Generate datasets of different sizes
withr::with_seed(27, {
points_sf_100k <- make_points(1e5)
points_sf_1mi <- make_points(1e6)
points_sf_3mi <- make_points(3e6)
})
# Generate polygons
# Create large polygon dataset (e.g., administrative regions, zones, etc.)
n_polygons <- 10000
polygons_list <- vector("list", n_polygons)
for(i in 1:n_polygons) {
# Random center point with buffer from edges
center_x <- runif(1, min = -170, max = 170)
center_y <- runif(1, min = -80, max = 80)
# Create simple rectangular polygons to avoid geometry issues
width <- runif(1, min = 0.5, max = 3)
height <- runif(1, min = 0.5, max = 3)
# Create rectangle coordinates (must be closed: first point = last point)
x_coords <- c(
center_x - width/2,
center_x + width/2,
center_x + width/2,
center_x - width/2,
center_x - width/2 # Close the polygon
)
y_coords <- c(
center_y - height/2,
center_y - height/2,
center_y + height/2,
center_y + height/2,
center_y - height/2 # Close the polygon
)
# Create polygon matrix
coords <- cbind(x_coords, y_coords)
# Create polygon (wrapped in list as required by st_polygon)
polygons_list[[i]] <- st_polygon(list(coords))
}
polygons_sf <- st_sf(
poly_id = 1:n_polygons,
region = sample(c("North", "South", "East", "West"), n_polygons, replace = TRUE),
population = sample(1000:1000000, n_polygons, replace = TRUE),
geometry = st_sfc(polygons_list, crs = 4326)
)
## -----------------------------------------------------------------------------
#| message: false
#| code-fold: true
#| code-summary: "Benchmark code - ddbs_join"
# Helper to run the benchmark
run_join_benchmark <- function(points_sf) {
temp <- bench::mark(
iterations = 3,
check = FALSE,
duckspatial = ddbs_join(points_sf, polygons_sf, join = "within"),
sf = st_join(points_sf, polygons_sf, join = st_within)
)
temp$n <- nrow(points_sf)
temp$pkg <- c("duckspatial", "sf")
temp
}
# Run the benchmark
df_bench_join <- lapply(
X = list(points_sf_100k, points_sf_1mi, points_sf_3mi),
FUN = run_join_benchmark
) |>
dplyr::bind_rows()
## -----------------------------------------------------------------------------
#| echo: false
#| warning: false
# Id to store the figures, saving the older ones
id_output <- "v1.5.1"
# Generate the plots
gg_join <- ggplot_benchmark(
data = df_bench_join,
log = FALSE,
show.legend = T,
subtitle = "A) Normal scale"
)
gg_join_log <- ggplot_benchmark(
data = df_bench_join,
log = TRUE,
show.legend = F,
subtitle = "B) Log scale"
)
# Assemble them in a single plot
ggplot_assemble(
plot1 = gg_join,
plot2 = gg_join_log,
fun_name = "ddbs_join()"
)
# Export it
ggsave(
filename = paste0("man/figures/bench/bench-st-join-", id_output, ".png"),
height = 15,
width = 30,
units = "cm"
)
## -----------------------------------------------------------------------------
#| message: false
#| code-fold: true
#| code-summary: "Benchmark code - ddbs_filter"
# Helper to run the benchmark
run_filter_benchmark <- function(points_sf) {
temp <- bench::mark(
iterations = 3,
check = FALSE,
duckspatial = ddbs_filter(points_sf, polygons_sf),
sf = st_filter(points_sf, polygons_sf)
)
temp$n <- nrow(points_sf)
temp$pkg <- c("duckspatial", "sf")
temp
}
# Run the benchmark
df_bench_filter <- lapply(
X = list(points_sf_100k, points_sf_1mi, points_sf_3mi),
FUN = run_filter_benchmark
) |>
dplyr::bind_rows()
## -----------------------------------------------------------------------------
#| echo: false
#| warning: false
# Generate the plots
gg_filter <- ggplot_benchmark(
data = df_bench_filter,
log = FALSE,
show.legend = T,
subtitle = "A) Normal scale"
)
gg_filter_log <- ggplot_benchmark(
data = df_bench_filter,
log = TRUE,
show.legend = F,
subtitle = "B) Log scale"
)
# Assemble them in a single plot
ggplot_assemble(
plot1 = gg_filter,
plot2 = gg_filter_log,
fun_name = "ddbs_filter()"
)
# Export it
ggsave(
filename = paste0("man/figures/bench/bench-st-filter-", id_output, ".png"),
height = 15,
width = 30,
units = "cm"
)
## -----------------------------------------------------------------------------
#| message: false
#| code-fold: true
#| code-summary: "Benchmark code - ddbs_distance"
# Helper to run the benchmark
run_distance_benchmark <- function(n) {
points_sf <- withr::with_seed(27, make_points(n))
temp <- bench::mark(
iterations = 1,
check = FALSE,
duckspatial = ddbs_distance(points_sf, points_sf),
sf = st_distance(points_sf, points_sf)
)
temp$n <- n
temp$pkg <- c("duckspatial", "sf")
temp
}
df_bench_distance <- lapply(
X = c(1000, 5000, 10000),
FUN = run_distance_benchmark
) |>
dplyr::bind_rows()
## -----------------------------------------------------------------------------
#| echo: false
#| warning: false
# Generate the plots
gg_distance <- ggplot_benchmark(
data = df_bench_distance,
log = FALSE,
show.legend = T,
subtitle = "A) Normal scale"
)
gg_distance_log <- ggplot_benchmark(
data = df_bench_distance,
log = TRUE,
show.legend = F,
subtitle = "B) Log scale"
)
# Assemble them in a single plot
ggplot_assemble(
plot1 = gg_distance,
plot2 = gg_distance_log,
fun_name = "ddbs_distance()"
)
# Export it
ggsave(
filename = paste0("man/figures/bench/bench-st-distance-", id_output, ".png"),
height = 15,
width = 30,
units = "cm"
)
## -----------------------------------------------------------------------------
#| message: false
#| code-fold: true
#| code-summary: "Benchmark code - ddbs_union_agg"
# Helper to run the benchmark
run_union_benchmark <- function(points_sf) {
temp <- bench::mark(
iterations = 3,
check = FALSE,
duckspatial = ddbs_union_agg(points_sf, by = "category"),
sf = points_sf |>
group_by(category) |>
summarise(geometry = st_union(geometry))
)
temp$n <- nrow(points_sf)
temp$pkg <- c("duckspatial", "sf")
temp
}
# Run the benchmark
df_bench_union <- lapply(
X = list(points_sf_100k, points_sf_1mi, points_sf_3mi),
FUN = run_union_benchmark
) |>
dplyr::bind_rows()
## -----------------------------------------------------------------------------
#| echo: false
#| warning: false
# Generate the plots
gg_union_agg <- ggplot_benchmark(
data = df_bench_union,
log = FALSE,
show.legend = T,
subtitle = "A) Normal scale"
)
gg_union_agg_log <- ggplot_benchmark(
data = df_bench_union,
log = TRUE,
show.legend = F,
subtitle = "B) Log scale"
)
# Assemble them in a single plot
ggplot_assemble(
plot1 = gg_union_agg,
plot2 = gg_union_agg_log,
fun_name = "ddbs_union_agg()"
)
# Export it
ggsave(
filename = paste0("man/figures/bench/bench-st-dissolve-", id_output, ".png"),
height = 15,
width = 30,
units = "cm"
)
## -----------------------------------------------------------------------------
#| message: false
#| code-fold: true
#| code-summary: "Benchmark code - ddbs_intersects"
# Helper to run the benchmark
run_predicate_benchmark <- function(points_sf) {
temp <- bench::mark(
iterations = 1,
check = FALSE,
duckspatial = ddbs_intersects(points_sf, polygons_sf),
sf = st_intersects(points_sf, polygons_sf)
)
temp$n <- nrow(points_sf)
temp$pkg <- c("duckspatial", "sf")
temp
}
# Run the benchmark
df_bench_predicate <- lapply(
X = list(points_sf_100k, points_sf_1mi, points_sf_3mi),
FUN = run_predicate_benchmark
) |>
dplyr::bind_rows()
## -----------------------------------------------------------------------------
#| echo: false
#| warning: false
# Generate the plots
gg_predicate <- ggplot_benchmark(
data = df_bench_predicate,
log = FALSE,
show.legend = T,
subtitle = "A) Normal scale"
)
gg_predicate_log <- ggplot_benchmark(
data = df_bench_predicate,
log = TRUE,
show.legend = F,
subtitle = "B) Log scale"
)
# Assemble them in a single plot
ggplot_assemble(
plot1 = gg_predicate,
plot2 = gg_predicate_log,
fun_name = "ddbs_intersects()"
)
# Export it
ggsave(
filename = paste0("man/figures/bench/bench-st-intersects-", id_output, ".png"),
height = 15,
width = 30,
units = "cm"
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.