In hughjonesd/pastapi: Explore the Historical API of R Packages

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Speed tests

On my Macbook Air, standard broadband connection.

Setup

library(apicheck)
library(microbenchmark)
library(ggplot2)
library(dplyr)
library(huxtable)

options(repos = "https://cran.rstudio.com")

make_test <- function(fun, search, min_version = NA) {
  if (is.na(min_version)) min_version <- NULL
  bquote(when_fun_exists(.(fun), search = .(search), min_version = .(min_version), progress = FALSE))
}

print_results <- . %>% 
      mutate(date = as.character(date)) %>% 
      as_hux()                          %>% 
      add_colnames()                    %>% 
      set_bold(1, everywhere, TRUE)     %>% 
      set_outer_borders(0.4)            %>% 
      set_number_format(NA)

pretty_print <- . %>% 
      summary()                                          %>% 
      arrange(as.character(expr))                        %>% 
      as_hux()                                           %>% 
      add_colnames()                                     %>% 
      set_bold(1, everywhere, TRUE)                      %>% 
      set_background_color(evens, everywhere, grey(0.9)) %>% 
      set_outer_borders(0.4)                             %>% 
      set_number_format(everywhere, -matches("neval"), 1)

pretty_plot <- function (result) {
  autoplot(result) + 
       geom_point(color = "red", alpha = 0.5, size = 0.5) + 
        scale_x_discrete(limits = rev(sort(names(tests))))
}

The tests

clipr is a small package with few versions. mice is a larger package which requires compilation, with more dependencies.

funs <- c("clipr::dr_clipr", "mice::ampute") 
min_versions <- c(NA, "2.20")
args <- expand.grid(
        fun          = funs,
        search      = c("binary", "forward", "backward", "all", "parallel"), 
        stringsAsFactors = FALSE
      )
args$min_version <- min_versions[match(args$fun, funs)]

test_df <- args %>% rowwise %>% do(expr = make_test(.$fun, .$search, .$min_version)) %>% bind_cols(args) %>% mutate(label = paste(fun, search, sep = " / "))
tests <- test_df$expr
names(tests) <- test_df$label

noquote(as.character(tests))

Results

Backward search should work well for both these functions.

set_lib_dir(".apicheck")
for (f in funs) {
  print(f)
  print_results(when_fun_exists(f, search = "all", progress = FALSE))  
}

Pre-downloaded

# DO NOT EDIT: CACHE WILL RESET

# cache packages:
set_lib_dir(".apicheck")
invisible(mapply(when_fun_exists, fun = funs, min_version = ifelse(is.na(min_versions), "0.0", min_versions), search = "all", progress = FALSE))


cached <- microbenchmark(
        list = tests,
        times = 20
      )

pretty_print(cached)
pretty_plot(cached)

With download

# DO NOT EDIT: CACHE WILL RESET

set_lib_dir(NULL)

tests2 <- lapply(tests, function (x) bquote(
  {.(x); clear_lib_dir()})
)
names(tests2) <- names(tests)

uncached <- microbenchmark::microbenchmark(list = tests2, times = 5)

pretty_print(uncached)
pretty_plot(uncached)

These results suggest that choosing the right algorithm matters as much as parallelism; and parallelism is costly once packages have already been downloaded.

hughjonesd/pastapi documentation built on Sept. 9, 2019, 12:56 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Tweet to @rdrrHQ

GitHub issue tracker

ian@mutexlabs.com