rrq benchmarks

These benchmarks aim to measure the per-call overhead of the systems while they do as little as possible. This is fairly meaningless because you'd only typically use parallel execution when you have something that takes a significant amount of time, but knowing what the overhead is may help determine what "significant" means here.

``` {r echo = FALSE, results = "hide"} knitr::opts_chunk$set(error = FALSE)

A controller to throw jobs at:

```r
id <- paste0("rrq:", ids::random_id(bytes = 4))
obj <- rrq::rrq_controller$new(id)
rrq::rrq_worker_spawn(obj, 1)

Running a thousand trivial jobs (look in particular at elapsed as we're interested in wall time)

system.time(obj$lapply(1:1000, identity, progress = FALSE))

Then over a matrix of numbers of tasks and workers:

bench_rrq <- function(n, nworkers, nrep, obj) {
  if (interactive()) {
    message(sprintf("%d / %d / %d", n, nrep, nworkers))
  }
  diff <- nworkers - obj$worker_len()
  if (diff > 0L) {
    suppressMessages(rrq::rrq_worker_spawn(obj, diff))
  } else if (diff < 0L) {
    obj$worker_stop(obj$worker_list()[seq_len(-diff)])
  }
  i <- seq_len(n)
  replicate(
    nrep,
    system.time(obj$lapply(i, identity, progress = FALSE))[["elapsed"]])
}

n <- 2^c(1, 3, 5, 7, 9, 11)
nw <- 1:4
nrep <- 10
dat <- expand.grid(n = n, nworkers = nw)
times <- Map(bench_rrq, dat$n, dat$nworkers, nrep, list(obj))
time <- matrix(vapply(times, median, numeric(1)), length(n)) * 1000

Total time taken

cols <- RColorBrewer::brewer.pal(length(nw), "Blues")
matplot(n, time, type = "l", log = "xy", lty = 1, col = cols,
        xlab = "Number of tasks", ylab = "Total time (ms)")
legend("topleft", legend = nw, col = cols, lty = 1, bty = "n")
text(n[length(n)], time[nrow(time), ], adj = -0.5, cex = 0.5)

Time per call (the increase on the rhs is probably due to the cost of submitting the tasks, which is done in serial)

time_call <- time / n
matplot(n, time_call, type = "l", log = "xy", lty = 1, col = cols,
        xlab = "Number of tasks", ylab = "Per-call time (ms / call)")
legend("topright", legend = nw, col = cols, lty = 1, bty = "n")
text(n[length(n)], time_call[nrow(time_call), ], adj = -0.5, cex = 0.5)

Time per call per worker

time_call_worker <- time / n / rep(nw, each = nrow(time))
matplot(n, time_call_worker, type = "l", log = "xy", lty = 1, col = cols,
        xlab = "Number of tasks",
        ylab = "Per-call-per-worker time (ms / call / worker)")
legend("topright", legend = nw, col = cols, lty = 1, bty = "n")
text(n[length(n)], time_call_worker[nrow(time_call_worker), ],
     adj = -0.5, cex = 0.5)
obj$destroy()


mrc-ide/rrq documentation built on March 5, 2024, 7:31 p.m.