rrq benchmarks

These benchmarks aim to measure the per-call overhead of the systems while they do as little as possible. This is fairly meaningless because you'd only typically use parallel execution when you have something that takes a significant amount of time, but knowing what the overhead is may help determine what "significant" means here.

``` {r echo = FALSE, results = "hide"} knitr::opts_chunk$set(error = FALSE)

A controller to throw jobs at:

```r
library(rrq)
id <- paste0("rrq:", ids::random_id(bytes = 4))
obj <- rrq_controller(id)
rrq_default_controller_set(obj)
w <- rrq_worker_spawn(1)

Running a thousand trivial jobs (look in particular at elapsed as we're interested in wall time)

run_example <- function(n) {
  id <- rrq_task_create_bulk_call(identity, seq_len(n))
  rrq_task_wait(id, progress = FALSE)
  rrq_task_results(id)
}
run_example(5)

Then over a matrix of numbers of tasks and workers:

bench_rrq <- function(n, nworkers, nrep) {
  if (interactive()) {
    message(sprintf("%d / %d / %d", n, nrep, nworkers))
  }
  diff <- nworkers - rrq_worker_len()
  if (diff > 0L) {
    suppressMessages(rrq_worker_spawn(diff))
  } else if (diff < 0L) {
    rrq_worker_stop(rrq_worker_list()[seq_len(-diff)])
  }
  replicate(
    nrep,
    system.time(run_example(n))[["elapsed"]])
}

n <- 2^c(1, 3, 5, 7, 9, 11)
nw <- 1:4
nrep <- 10
dat <- expand.grid(n = n, nworkers = nw)
times <- Map(bench_rrq, dat$n, dat$nworkers, nrep)
time <- matrix(vapply(times, median, numeric(1)), length(n)) * 1000

Total time taken

cols <- seq_along(nw) # RColorBrewer::brewer.pal(length(nw), "Blues")
matplot(n, time, type = "l", log = "xy", lty = 1, col = cols,
        xlab = "Number of tasks", ylab = "Total time (ms)")
legend("topleft", legend = nw, col = cols, lty = 1, bty = "n")
text(n[length(n)], time[nrow(time), ], adj = -0.5, cex = 0.5)

Time per call (the increase on the rhs is probably due to the cost of submitting the tasks, which is done in serial)

time_call <- time / n
matplot(n, time_call, type = "l", log = "xy", lty = 1, col = cols,
        xlab = "Number of tasks", ylab = "Per-call time (ms / call)")
legend("topright", legend = nw, col = cols, lty = 1, bty = "n")
text(n[length(n)], time_call[nrow(time_call), ], adj = -0.5, cex = 0.5)

Cleanup

rrq_destroy()


richfitz/rrq documentation built on July 15, 2024, 8:12 p.m.