The goal of this document is to compare the performances of some of the existing packages.
r packageDescription("dequer")$Version
implements double
ended queues and it supports arbitrary R objects. However, it uses
R_PreserveObject
and R_ReleaseObject
heavily which could be an issue
for long queues. r packageDescription("datastructures")$Version
uses
‘Boost’ and ‘STL’ data types to implement queues and hashmaps. For some
reasons, it is often slow as shown in the benchmark.r packageDescription("liqueueR")$Version
implements
queues in pure R code which explains its slowness.r packageDescription("hash")$Version
uses R environments to
create hash tables. It is known that regular R enviroments leak memory.r packageDescription("fastmap")$Version
. The current implementation of collection::dict
is actually inspired by it. However, a more
efficient hash table library tommy
is used.requireNamespace("bench") library(tidyverse) options(digits = 3) knitr::opts_chunk$set(warning = FALSE, message = FALSE)
queue <- function() { self <- environment() q <- NULL n <- NULL push <- function(item) { if (is.null(item)) { q[n + 1] <<- list(item) } else { q[[n + 1]] <<- item } n <<- n + 1 invisible(self) } pop <- function() { if (n == 0) stop("queue is empty") v <- q[[1]] q <<- q[-1] n <<- n - 1 v } peek <- function() { if (n == 0) stop("queue is empty") q[[1]] } clear <- function() { q <<- list() n <<- 0 self } clear() self }
bench_queue <- bench::press( n = c(10, 50, 100, 200, 500), bench::mark( `dequer::queue` = { q <- dequer::queue() pushback <- dequer::pushback pop <- dequer::pop x <- rnorm(n) for (i in 1:n) pushback(q, x[i]) for (i in 1:n) pop(q) }, `datastructures::queue` = { q <- datastructures::queue() insert <- datastructures::insert pop <- datastructures::pop x <- rnorm(n) for (i in 1:n) insert(q, x[i]) for (i in 1:n) pop(q) }, `liqueueR::Queue` = { q <- liqueueR::Queue$new() x <- rnorm(n) for (i in 1:n) q$push(x[i]) for (i in 1:n) q$pop() }, `base::list` = { q <- queue() x <- rnorm(n) for (i in 1:n) q$push(x[i]) for (i in 1:n) q$pop() }, `collections::queue` = { q <- collections::queue() x <- rnorm(n) for (i in 1:n) q$push(x[i]) for (i in 1:n) q$pop() }, check = FALSE )) %>% mutate(expression = fct_reorder( as.character(expression), median, .fun = mean, .desc = TRUE))
bench_queue %>% ggplot(aes(x = n, y = median)) + geom_line(aes(color = expression)) + scale_colour_brewer(palette = "Set2", direction = -1) + ggtitle("push and pop n times") + ylab("time") bench_queue %>% filter(expression %in% c("base::list", "dequer::queue", "collections::queue")) %>% ggplot(aes(x = n, y = median)) + geom_line(aes(color = expression)) + scale_colour_brewer(palette = "Set2", direction = -1) + ggtitle("(zoom in)") + ylab("time")
stack <- function() { self <- environment() q <- NULL n <- NULL push <- function(item) { if (is.null(item)) { q[n + 1] <<- list(item) } else { q[[n + 1]] <<- item } n <<- n + 1 invisible(self) } pop <- function() { if (n == 0) stop("stack is empty") v <- q[[n]] q <<- q[-n] n <<- n - 1 v } clear <- function() { q <<- list() n <<- 0 invisible(self) } clear() self }
bench_stack <- bench::press( n = c(10, 50, 100, 200, 500), bench::mark( `dequer::stack` = { q <- dequer::stack() push <- dequer::push pop <- dequer::pop x <- rnorm(n) for (i in 1:n) push(q, x[i]) for (i in 1:n) pop(q) }, `datastructures::stack` = { q <- datastructures::stack() insert <- datastructures::insert pop <- datastructures::pop x <- rnorm(n) for (i in 1:n) insert(q, x[i]) for (i in 1:n) pop(q) }, `liqueueR::Stack` = { q <- liqueueR::Stack$new() x <- rnorm(n) for (i in 1:n) q$push(x[i]) for (i in 1:n) q$pop() }, `base::list` = { q <- stack() x <- rnorm(n) for (i in 1:n) q$push(x[i]) for (i in 1:n) q$pop() }, `collections::stack` = { q <- collections::stack() x <- rnorm(n) for (i in 1:n) q$push(x[i]) for (i in 1:n) q$pop() }, check = FALSE )) %>% mutate(expression = fct_reorder( as.character(expression), median, .fun = mean, .desc = TRUE))
bench_stack %>% ggplot(aes(x = n, y = median)) + geom_line(aes(color = expression)) + scale_colour_brewer(palette = "Set2", direction = -1) + ggtitle("push and pop n times") + ylab("time") bench_stack %>% filter(expression %in% c("base::list", "dequer::stack", "collections::stack")) %>% ggplot(aes(x = n, y = median)) + geom_line(aes(color = expression)) + scale_colour_brewer(palette = "Set2", direction = -1) + ggtitle("(zoom in)") + ylab("time")
deque <- function() { self <- environment() q <- NULL n <- NULL push <- function(item) { if (is.null(item)) { q[n + 1] <<- list(item) } else { q[[n + 1]] <<- item } n <<- self$n + 1 invisible(self) } pushleft <- function(item) { q <<- c(list(item), q) n <<- n + 1 invisible(self) } pop <- function() { if (n == 0) stop("deque is empty") v <- q[[n]] q <<- q[-n] n <<- n - 1 v } popleft <- function() { if (n == 0) stop("deque is empty") v <- q[[1]] q <<- q[-1] n <<- n - 1 v } clear <- function() { q <<- list() n <<- 0 invisible(self) } clear() self }
bench::press( n = c(10, 50, 100, 200, 500), bench::mark( `dequer::deque` = { q <- dequer::deque() push <- dequer::push pop <- dequer::pop popback <- dequer::popback x <- rnorm(n) for (i in 1:n) push(q, x[i]) for (i in 1:floor(n / 2)) { pop(q) popback(q) } }, `base::list` = { q <- deque() x <- rnorm(n) for (i in 1:n) q$push(x[i]) for (i in 1:floor(n / 2)) { q$pop() q$popleft() } }, `collections::deque` = { q <- collections::deque() x <- rnorm(n) for (i in 1:n) q$push(x[i]) for (i in 1:floor(n / 2)) { q$pop() q$popleft() } }, check = FALSE )) %>% mutate(expression = fct_reorder( as.character(expression), median, .fun = mean, .desc = TRUE)) %>% ggplot(aes(x = n, y = median)) + geom_line(aes(color = expression)) + scale_colour_brewer(palette = "Set2", direction = -1) + ggtitle("push and pop n times") + ylab("time")
bench::press( n = c(10, 50, 100, 200, 500), bench::mark( `liqueueR::PriorityQueue` = { q <- liqueueR::PriorityQueue$new() x <- rnorm(n) p <- sample.int(n, n, replace = TRUE) for (i in 1:n) q$push(x[i], p[i]) for (i in 1:n) q$pop() }, `datastructures::binomial_heap` = { q <- datastructures::binomial_heap(key.class = "integer") insert <- datastructures::insert pop <- datastructures::pop x <- rnorm(n) p <- sample.int(n, n, replace = TRUE) for (i in 1:n) insert(q, p[i], x[i]) for (i in 1:n) pop(q) }, `collections::priority_queue` = { q <- collections::priority_queue() x <- rnorm(n) p <- sample.int(n, n, replace = TRUE) for (i in 1:n) q$push(x[i], p[i]) for (i in 1:n) q$pop() }, check = FALSE )) %>% mutate(expression = fct_reorder( as.character(expression), median, .fun = mean, .desc = TRUE)) %>% ggplot(aes(x = n, y = median)) + geom_line(aes(color = expression)) + scale_colour_brewer(palette = "Set2", direction = -1) + ggtitle("push and pop n times") + ylab("time")
bench_dict <- bench::press( n = c(10, 50, 100, 200, 500), bench::mark( `datastructures::hashmap` = { h <- datastructures::hashmap() insert <- datastructures::insert x <- rnorm(n) k <- as.character(seq_len(n)) for (i in 1:n) insert(h, k[i], x[i]) for (i in sample(1:n)) h[k[i]] }, `hash::hash` = { h <- hash::hash() x <- rnorm(n) k <- as.character(seq_len(n)) for (i in 1:n) h[[k[i]]] <- x[i] for (i in sample(1:n)) h[[k[i]]] }, `fastmap::fastmap` = { h <- fastmap::fastmap() x <- rnorm(n) k <- as.character(seq_len(n)) for (i in 1:n) h$set(k[i], x[i]) for (i in sample(1:n)) h$get(k[i]) }, `base::new.env` = { h <- new.env(parent = emptyenv()) x <- rnorm(n) k <- as.character(seq_len(n)) for (i in 1:n) h[[k[i]]] <- x[i] for (i in sample(1:n)) get0(k[i], envir = x, inherits = FALSE) }, `utils::hashtab` = { h <- hashtab() x <- rnorm(n) k <- as.character(seq_len(n)) for (i in 1:n) h[[k[i]]] <- x[i] for (i in sample(1:n)) h[[k[i]]] }, `collections::dict` = { h <- collections::dict() x <- rnorm(n) k <- as.character(seq_len(n)) for (i in 1:n) h$set(k[i], x[i]) for (i in sample(1:n)) h$get(k[i]) }, check = FALSE )) %>% mutate(expression = fct_reorder( as.character(expression), median, .fun = mean, .desc = TRUE))
bench_dict %>% ggplot(aes(x = n, y = median)) + geom_line(aes(color = expression)) + scale_colour_brewer(palette = "Set2", direction = -1) + ggtitle("push and pop n times") + ylab("time") bench_dict %>% filter(expression %in% c("hash::hash", "fastmap::fastmap", "collections::dict", "utils::hashtab", "base::new.env")) %>% ggplot(aes(x = n, y = median)) + geom_line(aes(color = expression)) + scale_colour_brewer(palette = "Set2", direction = -1) + ggtitle("(zoom in)") + ylab("time")
Note that base::new.env
suffers from memory leak issue. The performance gap between R environments and collections::dict
is mainly attributed to the overhead in accessing the $get
and $set
methods
from the dict
object.
bench::press( n = c(10, 50, 100, 200, 500), bench::mark( `base::list` = { h <- list() x <- rnorm(n) k <- as.character(seq_len(n)) for (i in 1:n) h[[k[i]]] <- x[i] for (i in sample(1:n)) h[[k[i]]] }, `collections::ordered_dict` = { h <- collections::ordered_dict() x <- rnorm(n) k <- as.character(seq_len(n)) for (i in 1:n) h$set(k[i], x[i]) for (i in sample(1:n)) h$get(k[i]) }, check = FALSE )) %>% mutate(expression = fct_reorder( as.character(expression), median, .fun = mean, .desc = TRUE)) %>% ggplot(aes(x = n, y = median)) + geom_line(aes(color = expression)) + scale_colour_brewer(palette = "Set2", direction = -1) + ggtitle("set and get n times") + ylab("time")
Note that ordered_dict
grows linearly in n
but but list
does not.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.