This document serves as an overview for measuring the performance of RcppAlgos
against other tools for generating combinations, permutations, and partitions. This stackoverflow post: How to generate permutations or combinations of object in R? has some benchmarks. You will note that the examples in that post are relatively small. The benchmarks below will focus on larger examples where performance really matters and for this reason we only consider the packages arrangements, partitions, and RcppAlgos.
For the benchmarks below, we used a 2022 Macbook Air Apple M2 24 GB
machine.
library(RcppAlgos) library(partitions) library(arrangements) #> #> Attaching package: 'arrangements' #> The following object is masked from 'package:partitions': #> #> compositions library(microbenchmark) options(digits = 4) options(width = 90) pertinent_output <- capture.output(sessionInfo()) cat(paste(pertinent_output[1:3], collapse = "\n")) #> R version 4.3.1 (2023-06-16) #> Platform: aarch64-apple-darwin20 (64-bit) #> Running under: macOS Ventura 13.4.1 pkgs <- c("RcppAlgos", "arrangements", "partitions", "microbenchmark") sapply(pkgs, packageVersion, simplify = FALSE) #> $RcppAlgos #> [1] '2.8.2' #> #> $arrangements #> [1] '1.1.9' #> #> $partitions #> [1] '1.10.7' #> #> $microbenchmark #> [1] '1.4.10' numThreads <- min(as.integer(RcppAlgos::stdThreadMax() / 2), 6) numThreads #> [1] 4
set.seed(13) v1 <- sort(sample(100, 30)) m <- 21 t1 <- comboGeneral(v1, m, Parallel = T) t2 <- combinations(v1, m) stopifnot(identical(t1, t2)) dim(t1) #> [1] 14307150 21 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads), cbRcppAlgosSer = comboGeneral(v1, m), cbArrangements = combinations(v1, m), times = 15, unit = "relative") #> Warning in microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads), : #> less accurate nanosecond times to avoid potential integer overflows #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 a #> cbRcppAlgosSer 3.466 3.197 3.134 3.131 3.057 2.995 15 b #> cbArrangements 3.388 3.124 3.058 3.059 2.988 2.867 15 c
v2 <- v1[1:10] m <- 20 t1 <- comboGeneral(v2, m, repetition = TRUE, nThreads = numThreads) t2 <- combinations(v2, m, replace = TRUE) stopifnot(identical(t1, t2)) dim(t1) #> [1] 10015005 20 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = comboGeneral(v2, m, TRUE, nThreads = numThreads), cbRcppAlgosSer = comboGeneral(v2, m, TRUE), cbArrangements = combinations(v2, m, replace = TRUE), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 a #> cbRcppAlgosSer 2.902 3.015 3.052 2.992 2.983 3.915 15 b #> cbArrangements 3.067 3.049 3.012 3.016 2.980 2.934 15 b
myFreqs <- c(2, 4, 4, 5, 3, 2, 2, 2, 3, 4, 1, 4, 2, 5) v3 <- as.integer(c(1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610)) t1 <- comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads) t2 <- combinations(freq = myFreqs, k = 20, x = v3) stopifnot(identical(t1, t2)) dim(t1) #> [1] 14594082 20 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads), cbRcppAlgosSer = comboGeneral(v3, 20, freqs = myFreqs), cbArrangements = combinations(freq = myFreqs, k = 20, x = v3), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a #> cbRcppAlgosSer 3.117 3.128 3.086 3.087 3.089 2.954 10 b #> cbArrangements 5.863 5.976 5.857 5.872 5.839 5.578 10 c
v4 <- as.integer(c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59)) t1 <- permuteGeneral(v4, 6, nThreads = numThreads) t2 <- permutations(v4, 6) stopifnot(identical(t1, t2)) dim(t1) #> [1] 8910720 6 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = permuteGeneral(v4, 6, nThreads = numThreads), cbRcppAlgosSer = permuteGeneral(v4, 6), cbArrangements = permutations(v4, 6), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 a #> cbRcppAlgosSer 1.523 1.522 1.442 1.517 1.233 1.371 15 b #> cbArrangements 2.540 2.535 2.353 2.499 2.123 2.084 15 c ## Indexing permutation example with the partitions package t1 <- permuteGeneral(11, nThreads = 4) t2 <- permutations(11) t3 <- perms(11) dim(t1) #> [1] 39916800 11 stopifnot(identical(t1, t2), identical(t1, t(as.matrix(t3)))) rm(t1, t2, t3) invisible(gc()) microbenchmark(cbRcppAlgosPar = permuteGeneral(11, nThreads = 4), cbRcppAlgosSer = permuteGeneral(11), cbArrangements = permutations(11), cbPartitions = perms(11), times = 5, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 5 a #> cbRcppAlgosSer 2.499 2.534 2.185 2.497 1.765 1.937 5 b #> cbArrangements 4.110 4.090 3.678 4.219 3.144 3.260 5 c #> cbPartitions 8.056 8.104 7.204 8.316 6.046 6.397 5 d
v5 <- v3[1:5] t1 <- permuteGeneral(v5, 10, repetition = TRUE, nThreads = numThreads) t2 <- permutations(v5, 10, replace = TRUE) stopifnot(identical(t1, t2)) dim(t1) #> [1] 9765625 10 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = permuteGeneral(v5, 10, TRUE, nThreads = numThreads), cbRcppAlgosSer = permuteGeneral(v5, 10, TRUE), cbArrangements = permutations(x = v5, k = 10, replace = TRUE), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a #> cbRcppAlgosSer 2.814 2.816 2.983 2.801 2.774 4.459 10 b #> cbArrangements 3.486 3.443 3.604 3.431 3.410 5.059 10 c
v6 <- sort(runif(12)) t1 <- permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads) t2 <- permutations(freq = rep(1:3, 4), k = 7, x = v6) stopifnot(identical(t1, t2)) dim(t1) #> [1] 19520760 7 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads), cbRcppAlgosSer = permuteGeneral(v6, 7, freqs = rep(1:3, 4)), cbArrangements = permutations(freq = rep(1:3, 4), k = 7, x = v6), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a #> cbRcppAlgosSer 3.101 3.223 2.994 3.213 3.195 1.907 10 b #> cbArrangements 3.547 3.547 3.319 3.549 3.541 2.122 10 c
t1 <- comboGeneral(0:140, freqs=c(140, rep(1, 140)), constraintFun = "sum", comparisonFun = "==", limitConstraints = 140) t2 <- partitions(140, distinct = TRUE) t3 <- diffparts(140) # Each package has different output formats... we only examine dimensions # and that each result is a partition of 140 stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))), all(rowSums(t1) == 140), all(rowSums(t2) == 140), all(colSums(t3) == 140)) dim(t1) #> [1] 9617150 16 rm(t1, t2, t3) invisible(gc()) microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:140, freqs=c(140, rep(1, 140)), nThreads = numThreads), cbRcppAlgosSer = partitionsGeneral(0:140, freqs=c(140, rep(1, 140))), cbArrangements = partitions(140, distinct = TRUE), cbPartitions = diffparts(140), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a #> cbRcppAlgosSer 3.291 3.284 3.027 3.530 2.663 2.485 10 b #> cbArrangements 2.617 2.591 2.351 2.565 2.133 1.989 10 c #> cbPartitions 17.788 18.491 16.415 19.044 14.459 13.510 10 d
t1 <- comboGeneral(160, 10, constraintFun = "sum", comparisonFun = "==", limitConstraints = 160) t2 <- partitions(160, 10, distinct = TRUE) stopifnot(identical(t1, t2)) dim(t1) #> [1] 8942920 10 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = partitionsGeneral(160, 10, nThreads = numThreads), cbRcppAlgosSer = partitionsGeneral(160, 10), cbArrangements = partitions(160, 10, distinct = TRUE), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a #> cbRcppAlgosSer 3.368 3.365 2.944 3.286 2.236 2.378 10 b #> cbArrangements 4.418 4.389 3.737 4.285 2.910 2.736 10 c
t1 <- comboGeneral(0:65, repetition = TRUE, constraintFun = "sum", comparisonFun = "==", limitConstraints = 65) t2 <- partitions(65) t3 <- parts(65) # Each package has different output formats... we only examine dimensions # and that each result is a partition of 65 stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))), all(rowSums(t1) == 65), all(rowSums(t2) == 65), all(colSums(t3) == 65)) dim(t1) #> [1] 2012558 65 rm(t1, t2, t3) invisible(gc()) microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:65, repetition = TRUE, nThreads = numThreads), cbRcppAlgosSer = partitionsGeneral(0:65, repetition = TRUE), cbArrangements = partitions(65), cbPartitions = parts(65), times = 20, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.0000 20 a #> cbRcppAlgosSer 2.869 2.544 2.285 2.464 2.402 0.8894 20 b #> cbArrangements 2.337 1.986 1.872 1.929 1.877 1.2701 20 b #> cbPartitions 9.119 8.959 9.232 10.740 10.446 3.8642 20 c
t1 <- comboGeneral(100, 15, TRUE, constraintFun = "sum", comparisonFun = "==", limitConstraints = 100) t2 <- partitions(100, 15) stopifnot(identical(t1, t2)) dim(t1) #> [1] 9921212 15 rm(t1, t2) # This takes a really long time... not because of restrictedparts, # but because apply is not that fast. This transformation is # needed for proper comparisons. As a result, we will compare # a smaller example # t3 <- t(apply(as.matrix(restrictedparts(100, 15, include.zero = F)), 2, sort)) t3 <- t(apply(as.matrix(restrictedparts(50, 15, include.zero = F)), 2, sort)) stopifnot(identical(partitions(50, 15), t3)) rm(t3) invisible(gc()) microbenchmark(cbRcppAlgosPar = partitionsGeneral(100, 15, TRUE, nThreads = numThreads), cbRcppAlgosSer = partitionsGeneral(100, 15, TRUE), cbArrangements = partitions(100, 15), cbPartitions = restrictedparts(100, 15, include.zero = FALSE), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a #> cbRcppAlgosSer 3.352 3.366 2.525 2.645 2.764 1.246 10 b #> cbArrangements 4.207 4.216 3.126 3.326 3.305 1.513 10 c #> cbPartitions 15.888 16.140 11.928 12.606 12.364 6.303 10 d
Currenlty, RcppAlgos
is the only package capable of efficiently generating partitions of multisets. Therefore, we will only time RcppAlgos
and use this as a reference for future improvements.
t1 <- comboGeneral(120, 10, freqs=rep(1:8, 15), constraintFun = "sum", comparisonFun = "==", limitConstraints = 120) dim(t1) #> [1] 7340225 10 stopifnot(all(rowSums(t1) == 120)) microbenchmark(cbRcppAlgos = partitionsGeneral(120, 10, freqs=rep(1:8, 15)), times = 10) #> Unit: milliseconds #> expr min lq mean median uq max neval #> cbRcppAlgos 284.7 287.6 291 289.6 295.2 298.2 10
t1 <- compositionsGeneral(0:15, repetition = TRUE) t2 <- arrangements::compositions(15) t3 <- partitions::compositions(15) # Each package has different output formats... we only examine dimensions # and that each result is a partition of 15 stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))), all(rowSums(t1) == 15), all(rowSums(t2) == 15), all(colSums(t3) == 15)) dim(t1) #> [1] 16384 15 rm(t1, t2, t3) invisible(gc()) microbenchmark(cbRcppAlgosSer = compositionsGeneral(0:15, repetition = TRUE), cbArrangements = arrangements::compositions(15), cbPartitions = partitions::compositions(15), times = 20, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosSer 1.000 1.000 1.000 1.000 1.000 1.000 20 a #> cbArrangements 1.237 1.237 1.234 1.229 1.233 1.234 20 a #> cbPartitions 130.273 148.479 179.197 178.491 203.364 262.383 20 b
For the next two examples, we will exclude the partitions
package for efficiency reasons.
t1 <- compositionsGeneral(0:23, repetition = TRUE) t2 <- arrangements::compositions(23) # Each package has different output formats... we only examine dimensions # and that each result is a partition of 23 stopifnot(identical(dim(t1), dim(t2)), all(rowSums(t1) == 23), all(rowSums(t2) == 23)) dim(t1) #> [1] 4194304 23 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = compositionsGeneral(0:23, repetition = TRUE, nThreads = numThreads), cbRcppAlgosSer = compositionsGeneral(0:23, repetition = TRUE), cbArrangements = arrangements::compositions(23), times = 20, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20 a #> cbRcppAlgosSer 3.361 3.383 3.293 3.381 3.378 2.250 20 b #> cbArrangements 3.707 3.740 3.657 3.763 3.761 2.506 20 c
t1 <- compositionsGeneral(30, 10, repetition = TRUE) t2 <- arrangements::compositions(30, 10) stopifnot(identical(t1, t2), all(rowSums(t1) == 30)) dim(t1) #> [1] 10015005 10 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = compositionsGeneral(30, 10, repetition = TRUE, nThreads = numThreads), cbRcppAlgosSer = compositionsGeneral(30, 10, repetition = TRUE), cbArrangements = arrangements::compositions(30, 10), times = 20, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20 a #> cbRcppAlgosSer 3.042 3.075 3.069 3.071 3.065 3.059 20 b #> cbArrangements 3.132 3.129 3.109 3.115 3.100 3.070 20 c
We will show one example from each category to demonstrate the efficiency of the iterators in RcppAlgos
. The results are similar for the rest of the cases not shown.
pkg_arrangements <- function(n, total) { a <- icombinations(n, as.integer(n / 2)) for (i in 1:total) a$getnext() } pkg_RcppAlgos <- function(n, total) { a <- comboIter(n, as.integer(n / 2)) for (i in 1:total) a@nextIter() } total <- comboCount(18, 9) total #> [1] 48620 microbenchmark(cbRcppAlgos = pkg_RcppAlgos(18, total), cbArrangements = pkg_arrangements(18, total), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.0 15 a #> cbArrangements 19.39 18.95 18.49 18.68 18.63 15.6 15 b
pkg_arrangements <- function(n, total) { a <- ipermutations(n) for (i in 1:total) a$getnext() } pkg_RcppAlgos <- function(n, total) { a <- permuteIter(n) for (i in 1:total) a@nextIter() } total <- permuteCount(8) total #> [1] 40320 microbenchmark(cbRcppAlgos = pkg_RcppAlgos(8, total), cbArrangements = pkg_arrangements(8, total), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 a #> cbArrangements 19.11 18.85 18.62 18.67 17.92 20.89 15 b
pkg_partitions <- function(n, total) { a <- firstpart(n) for (i in 1:(total - 1)) a <- nextpart(a) } pkg_arrangements <- function(n, total) { a <- ipartitions(n) for (i in 1:total) a$getnext() } pkg_RcppAlgos <- function(n, total) { a <- partitionsIter(0:n, repetition = TRUE) for (i in 1:total) a@nextIter() } total <- partitionsCount(0:40, repetition = TRUE) total #> [1] 37338 microbenchmark(cbRcppAlgos = pkg_RcppAlgos(40, total), cbArrangements = pkg_arrangements(40, total), cbPartitions = pkg_partitions(40, total), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgos 1.00 1.00 1.00 1.00 1.0 1.00 15 a #> cbArrangements 15.30 15.36 14.95 15.27 14.9 13.63 15 b #> cbPartitions 24.85 24.97 24.02 24.93 24.3 18.44 15 c
pkg_partitions <- function(n, total) { a <- firstcomposition(n) for (i in 1:(total - 1)) a <- nextcomposition(a, FALSE) } pkg_arrangements <- function(n, total) { a <- icompositions(n) for (i in 1:total) a$getnext() } pkg_RcppAlgos <- function(n, total) { a <- compositionsIter(0:n, repetition = TRUE) for (i in 1:total) a@nextIter() } total <- compositionsCount(0:15, repetition = TRUE) total #> [1] 16384 microbenchmark(cbRcppAlgos = pkg_RcppAlgos(15, total), cbArrangements = pkg_arrangements(15, total), cbPartitions = pkg_partitions(15, total), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 a #> cbArrangements 14.06 13.84 13.56 13.83 13.72 12.00 15 b #> cbPartitions 46.04 45.00 44.88 44.40 44.14 45.76 15 c
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.