The goal of this document is to compare the performances of some of the existing packages.

requireNamespace("bench")
library(tidyverse)
options(digits = 3)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)

Queue

queue <- function() {
    self <- environment()
    q <- NULL
    n <- NULL
    push <- function(item) {
        if (is.null(item)) {
            q[n + 1] <<- list(item)
        } else {
            q[[n + 1]] <<- item
        }
        n <<- n + 1
        invisible(self)
    }
    pop <- function() {
        if (n == 0) stop("queue is empty")
        v <- q[[1]]
        q <<- q[-1]
        n <<- n - 1
        v
    }
    peek <- function() {
        if (n == 0) stop("queue is empty")
        q[[1]]
    }
    clear <- function() {
        q <<- list()
        n <<- 0
        self
    }
    clear()
    self
}
bench_queue <- bench::press(
    n = c(10, 50, 100, 200, 500),
    bench::mark(
        `dequer::queue` = {
            q <- dequer::queue()
            pushback <- dequer::pushback
            pop <- dequer::pop
            x <- rnorm(n)
            for (i in 1:n) pushback(q, x[i])
            for (i in 1:n) pop(q)
        },
        `datastructures::queue` = {
            q <- datastructures::queue()
            insert <- datastructures::insert
            pop <- datastructures::pop
            x <- rnorm(n)
            for (i in 1:n) insert(q, x[i])
            for (i in 1:n) pop(q)
        },
        `liqueueR::Queue` = {
            q <- liqueueR::Queue$new()
            x <- rnorm(n)
            for (i in 1:n) q$push(x[i])
            for (i in 1:n) q$pop()
        },
        `base::list` = {
            q <- queue()
            x <- rnorm(n)
            for (i in 1:n) q$push(x[i])
            for (i in 1:n) q$pop()
        },
        `collections::queue` = {
            q <- collections::queue()
            x <- rnorm(n)
            for (i in 1:n) q$push(x[i])
            for (i in 1:n) q$pop()
        },
        check = FALSE
    )) %>%
    mutate(expression = fct_reorder(
            as.character(expression), median, .fun = mean, .desc = TRUE))
bench_queue %>%
    ggplot(aes(x = n, y = median)) +
    geom_line(aes(color = expression)) +
    scale_colour_brewer(palette = "Set2", direction = -1) +
    ggtitle("push and pop n times") + ylab("time")
bench_queue %>%
    filter(expression %in% c("base::list", "dequer::queue", "collections::queue")) %>%
    ggplot(aes(x = n, y = median)) +
    geom_line(aes(color = expression)) +
    scale_colour_brewer(palette = "Set2", direction = -1) +
    ggtitle("(zoom in)") + ylab("time")

Stack

stack <- function() {
    self <- environment()
    q <- NULL
    n <- NULL

    push <- function(item) {
        if (is.null(item)) {
            q[n + 1] <<- list(item)
        } else {
            q[[n + 1]] <<- item
        }
        n <<- n + 1
        invisible(self)
    }
    pop <- function() {
        if (n == 0) stop("stack is empty")
        v <- q[[n]]
        q <<- q[-n]
        n <<- n - 1
        v
    }
    clear <- function() {
        q <<- list()
        n <<- 0
        invisible(self)
    }
    clear()
    self
}
bench_stack <- bench::press(
    n = c(10, 50, 100, 200, 500),
    bench::mark(
        `dequer::stack` = {
            q <- dequer::stack()
            push <- dequer::push
            pop <- dequer::pop
            x <- rnorm(n)
            for (i in 1:n) push(q, x[i])
            for (i in 1:n) pop(q)
        },
        `datastructures::stack` = {
            q <- datastructures::stack()
            insert <- datastructures::insert
            pop <- datastructures::pop
            x <- rnorm(n)
            for (i in 1:n) insert(q, x[i])
            for (i in 1:n) pop(q)
        },
        `liqueueR::Stack` = {
            q <- liqueueR::Stack$new()
            x <- rnorm(n)
            for (i in 1:n) q$push(x[i])
            for (i in 1:n) q$pop()
        },
        `base::list` = {
            q <- stack()
            x <- rnorm(n)
            for (i in 1:n) q$push(x[i])
            for (i in 1:n) q$pop()
        },
        `collections::stack` = {
            q <- collections::stack()
            x <- rnorm(n)
            for (i in 1:n) q$push(x[i])
            for (i in 1:n) q$pop()
        },
        check = FALSE
    )) %>%
    mutate(expression = fct_reorder(
            as.character(expression), median, .fun = mean, .desc = TRUE))
bench_stack %>%
    ggplot(aes(x = n, y = median)) +
    geom_line(aes(color = expression)) +
    scale_colour_brewer(palette = "Set2", direction = -1) +
    ggtitle("push and pop n times") + ylab("time")
bench_stack %>%
    filter(expression %in% c("base::list", "dequer::stack", "collections::stack")) %>%
    ggplot(aes(x = n, y = median)) +
    geom_line(aes(color = expression)) +
    scale_colour_brewer(palette = "Set2", direction = -1) +
    ggtitle("(zoom in)") + ylab("time")

Deque

deque <- function() {
    self <- environment()
    q <- NULL
    n <- NULL

    push <- function(item) {
        if (is.null(item)) {
            q[n + 1] <<- list(item)
        } else {
            q[[n + 1]] <<- item
        }
        n <<- self$n + 1
        invisible(self)
    }
    pushleft <- function(item) {
        q <<- c(list(item), q)
        n <<- n + 1
        invisible(self)
    }
    pop <- function() {
        if (n == 0) stop("deque is empty")
        v <- q[[n]]
        q <<- q[-n]
        n <<- n - 1
        v
    }
    popleft <- function() {
        if (n == 0) stop("deque is empty")
        v <- q[[1]]
        q <<- q[-1]
        n <<- n - 1
        v
    }
    clear <- function() {
        q <<- list()
        n <<- 0
        invisible(self)
    }
    clear()
    self
}
bench::press(
    n = c(10, 50, 100, 200, 500),
    bench::mark(
        `dequer::deque` = {
            q <- dequer::deque()
            push <- dequer::push
            pop <- dequer::pop
            popback <- dequer::popback
            x <- rnorm(n)
            for (i in 1:n) push(q, x[i])
            for (i in 1:floor(n / 2)) {
                pop(q)
                popback(q)
            }
        },
        `base::list` = {
            q <- deque()
            x <- rnorm(n)
            for (i in 1:n) q$push(x[i])
            for (i in 1:floor(n / 2)) {
                q$pop()
                q$popleft()
            }
        },
        `collections::deque` = {
            q <- collections::deque()
            x <- rnorm(n)
            for (i in 1:n) q$push(x[i])
            for (i in 1:floor(n / 2)) {
                q$pop()
                q$popleft()
            }
        },
        check = FALSE
    )) %>%
    mutate(expression = fct_reorder(
            as.character(expression), median, .fun = mean, .desc = TRUE)) %>%
    ggplot(aes(x = n, y = median)) +
    geom_line(aes(color = expression)) +
    scale_colour_brewer(palette = "Set2", direction = -1) +
    ggtitle("push and pop n times") + ylab("time")

Priority Queue

bench::press(
    n = c(10, 50, 100, 200, 500),
    bench::mark(
        `liqueueR::PriorityQueue` = {
            q <- liqueueR::PriorityQueue$new()
            x <- rnorm(n)
            p <- sample.int(n, n, replace = TRUE)
            for (i in 1:n) q$push(x[i], p[i])
            for (i in 1:n) q$pop()
        },
        `datastructures::binomial_heap` = {
            q <- datastructures::binomial_heap(key.class = "integer")
            insert <- datastructures::insert
            pop <- datastructures::pop
            x <- rnorm(n)
            p <- sample.int(n, n, replace = TRUE)
            for (i in 1:n) insert(q, p[i], x[i])
            for (i in 1:n) pop(q)
        },
        `collections::priority_queue` = {
            q <- collections::priority_queue()
            x <- rnorm(n)
            p <- sample.int(n, n, replace = TRUE)
            for (i in 1:n) q$push(x[i], p[i])
            for (i in 1:n) q$pop()
        },
        check = FALSE
    )) %>%
    mutate(expression = fct_reorder(
            as.character(expression), median, .fun = mean, .desc = TRUE)) %>%
    ggplot(aes(x = n, y = median)) +
    geom_line(aes(color = expression)) +
    scale_colour_brewer(palette = "Set2", direction = -1) +
    ggtitle("push and pop n times") + ylab("time")

Dictionary

bench_dict <- bench::press(
    n = c(10, 50, 100, 200, 500),
    bench::mark(
        `datastructures::hashmap` = {
            h <- datastructures::hashmap()
            insert <- datastructures::insert
            x <- rnorm(n)
            k <- as.character(seq_len(n))
            for (i in 1:n) insert(h, k[i], x[i])
            for (i in sample(1:n)) h[k[i]]
        },
        `hash::hash` = {
            h <- hash::hash()
            x <- rnorm(n)
            k <- as.character(seq_len(n))
            for (i in 1:n) h[[k[i]]] <- x[i]
            for (i in sample(1:n)) h[[k[i]]]
        },
        `fastmap::fastmap` = {
            h <- fastmap::fastmap()
            x <- rnorm(n)
            k <- as.character(seq_len(n))
            for (i in 1:n) h$set(k[i], x[i])
            for (i in sample(1:n)) h$get(k[i])
        },
        `base::new.env` = {
            h <- new.env(parent = emptyenv())
            x <- rnorm(n)
            k <- as.character(seq_len(n))
            for (i in 1:n) h[[k[i]]] <- x[i]
            for (i in sample(1:n)) get0(k[i], envir = x, inherits = FALSE)
        },
        `utils::hashtab` = {
            h <- hashtab()
            x <- rnorm(n)
            k <- as.character(seq_len(n))
            for (i in 1:n) h[[k[i]]] <- x[i]
            for (i in sample(1:n)) h[[k[i]]]
        },    
        `collections::dict` = {
            h <- collections::dict()
            x <- rnorm(n)
            k <- as.character(seq_len(n))
            for (i in 1:n) h$set(k[i], x[i])
            for (i in sample(1:n)) h$get(k[i])
        },
        check = FALSE
    )) %>%
    mutate(expression = fct_reorder(
            as.character(expression), median, .fun = mean, .desc = TRUE))
bench_dict %>%
    ggplot(aes(x = n, y = median)) +
    geom_line(aes(color = expression)) +
    scale_colour_brewer(palette = "Set2", direction = -1) +
    ggtitle("push and pop n times") + ylab("time")
bench_dict %>%
    filter(expression %in% 
           c("hash::hash", "fastmap::fastmap", "collections::dict", 
             "utils::hashtab", "base::new.env")) %>%
    ggplot(aes(x = n, y = median)) +
    geom_line(aes(color = expression)) +
    scale_colour_brewer(palette = "Set2", direction = -1) +
    ggtitle("(zoom in)") + ylab("time")

Note that base::new.env suffers from memory leak issue. The performance gap between R environments and collections::dict is mainly attributed to the overhead in accessing the $get and $set methods from the dict object.

Ordered dict

bench::press(
    n = c(10, 50, 100, 200, 500),
    bench::mark(
        `base::list` = {
            h <- list()
            x <- rnorm(n)
            k <- as.character(seq_len(n))
            for (i in 1:n) h[[k[i]]] <- x[i]
            for (i in sample(1:n)) h[[k[i]]]
        },
        `collections::ordered_dict` = {
            h <- collections::ordered_dict()
            x <- rnorm(n)
            k <- as.character(seq_len(n))
            for (i in 1:n) h$set(k[i], x[i])
            for (i in sample(1:n)) h$get(k[i])
        },
        check = FALSE
    )) %>%
    mutate(expression = fct_reorder(
            as.character(expression), median, .fun = mean, .desc = TRUE)) %>%
    ggplot(aes(x = n, y = median)) +
    geom_line(aes(color = expression)) +
    scale_colour_brewer(palette = "Set2", direction = -1) +
    ggtitle("set and get n times") + ylab("time")

Note that ordered_dict grows linearly in n but but list does not.



randy3k/collections documentation built on May 2, 2023, 9:35 p.m.