In WinVector/wrapr: Wrap R Tools for Debugging and Parametric Programming

Some timings for wrapr::let().

Keep in mind for any serious application the calculation time on data will far dominate any expression re-write time from either rlang/tidyeval or wrapr. But it has been asked what the timings are, and it is fun to look.

So we will compare:

fWrapr* wrapr::let() substitution ("langsubs" mode).
fTidyN* rlang::eval_tidy() substitution to quoted names (the closest I found to wrapr::let()).
fTidyQ* rlang::eval_tidy() substitution to quo() free names (what seems to be the preferred case/notation by rlang/tidyeval developers as it moves from NSE (non-standard evaluation interface) to NSE).

# build source code for fns of size k
buildFnsK <- function(k) {
  varlist <- paste0("var_", seq(0, k-1))
  nmlist <- paste0("NM_", seq(0, k-1))

  aliasStr <- paste("c(",
                    paste(paste(nmlist, "=", paste0("'", varlist, "'"),
                                collapse= ",\n    "),
                          ")"))
  exprStr <- paste(nmlist, collapse= " +\n    ")

  wFn <- paste0("
fWrapr_", k, " <- function() {
  let(
   ", aliasStr, ",
   ", exprStr, "
 )}
")
  t1Str <- paste(nmlist, "=", paste0("as.name('", varlist, "')"),
                 collapse= "\n   ")
  t1Expr <- exprStr <- paste(paste0("(!!", nmlist, ")"), collapse= " +\n    ")

  tFn1 <- paste0("
fTidyN_", k, " <- function() {
   ", t1Str, "
   eval_tidy(quo( ", t1Expr, " ))
}
")

  t2Str <- paste(nmlist, "=", paste0("quo(", varlist, ")"),
                 collapse= "\n   ")

  tFn2 <- paste0("
fTidyQ_", k, " <- function() {
   ", t2Str, "
   eval_tidy(quo( ", t1Expr, " ))
}
")

  paste(wFn, tFn1, tFn2,
        collapse = "\n\n\n")
}

ftext <- paste(
  buildFnsK(10),
  buildFnsK(15),
  buildFnsK(20),
  buildFnsK(25),
  buildFnsK(50),
  collapse = "\n\n")

fileConn<-file("genFns.R")
writeLines(ftext, fileConn)
close(fileConn)

library("microbenchmark")
library("wrapr")
library("rlang")
suppressPackageStartupMessages(library("ggplot2"))
suppressPackageStartupMessages(library("dplyr"))

# load generated examples
source("genFns.R")

# load up vars
nvars <- 200
for(i in seq(0, nvars-1)) {
  assign(paste('var', i, sep='_'), i)
}

fWrapr_1 <- function() {
  let(
   c( NM_0 = 'var_0' ),
   NM_0
 )}

fTidyN_1 <- function() {
   NM_0 = as.name('var_0')
   eval_tidy(quo( (!!NM_0) ))
}

fTidyQ_1 <- function() {
   NM_0 = quo(var_0)
   eval_tidy(quo( (!!NM_0) ))
}

fWrapr_1()
fTidyN_1()
fTidyQ_1()


fWrapr_5 <- function() {
  let(
   c( NM_0 = 'var_0', NM_1 = 'var_1', NM_2 = 'var_2', NM_3 = 'var_3', NM_4 = 'var_4' ),
   NM_0 + NM_1 + NM_2 + NM_3 + NM_4
 )}

fTidyN_5 <- function() {
   NM_0 = as.name('var_0')
   NM_1 = as.name('var_1')
   NM_2 = as.name('var_2')
   NM_3 = as.name('var_3')
   NM_4 = as.name('var_4')
   eval_tidy(quo( (!!NM_0) + (!!NM_1) + (!!NM_2) + (!!NM_3) + (!!NM_4) ))
}

fTidyQ_5 <- function() {
   NM_0 = quo(var_0)
   NM_1 = quo(var_1)
   NM_2 = quo(var_2)
   NM_3 = quo(var_3)
   NM_4 = quo(var_4)
   eval_tidy(quo( (!!NM_0) + (!!NM_1) + (!!NM_2) + (!!NM_3) + (!!NM_4) ))
}


fWrapr_5()
fTidyN_5()
fTidyQ_5()

fWrapr_25()
fTidyN_25()
fTidyQ_25()

bm <- microbenchmark(
  fWrapr_1(),
  fTidyN_1(),
  fTidyQ_1(),
  fWrapr_5(),
  fTidyN_5(),
  fTidyQ_5(),
  fWrapr_10(),
  fTidyN_10(),
  fTidyQ_10(),
  fWrapr_15(),
  fTidyN_15(),
  fTidyQ_15(),
  fWrapr_20(),
  fTidyN_20(),
  fTidyQ_20(),
  fWrapr_25(),
  fTidyN_25(),
  fTidyQ_25(), 
  times=1000L
)
print(bm)
autoplot(bm)

d <- as.data.frame(bm)
d$size <- as.numeric(gsub("[^0-9]+", "", d$expr))
d$fn <- gsub("[_0-9].*$", "", d$expr)

mkPlot <- function(d, title) {
  d$size <- as.factor(d$size)
  highCut <- as.numeric(quantile(d$time, probs = 0.99))
  dcut <- d[d$time<=highCut, , drop=FALSE]

  ggplot(data=dcut, aes(x=time, group=expr, color=size)) +
    geom_density(adjust=0.3) +
    facet_wrap(~fn, ncol=1, scales = 'free_y') +
    xlab('time (NS)') + ggtitle(title)
}

mkPlot(d, 'all timings')
mkPlot(d[d$fn %in% c('fWrapr', 'fTidyN'), , drop=FALSE], 
       'fWrapr v.s. fTidyN')
mkPlot(d[d$fn %in% c('fTidyQ', 'fTidyN'), , drop=FALSE], 
       'fTidyQ v.s. fTidyN')

# fit a linear function for runtime as a function of size
# per group.
fits <- d %>%
  split(., .$fn) %>%
  lapply(., 
         function(di) { 
           lm(time ~ size, data=di) 
         }) %>%
  lapply(., coefficients) %>%
  lapply(., 
         function(ri) {
           data.frame(Intercept= ri[["(Intercept)"]],
                      size= ri[['size']])
         }) 
dfits <- bind_rows(fits)
dfits$fn <- names(fits)

# "Intercept" is roughly start-up cost 
# "size" is slope or growth rate
print(dfits)

# solve for size where two lines interesect.
# Note: this is a naive estimate, and not stable
# in the face of estimated slopes and intercepts.
solve <- function(dfits, f1, f2) {
  idx1 <- which(dfits$fn==f1)
  idx2 <- which(dfits$fn==f2)
  size <- (dfits$Intercept[[idx1]] - dfits$Intercept[[idx2]]) /
    (dfits$size[[idx2]] - dfits$size[[idx1]])
  size
}

crossingPoint <- solve(dfits, 'fTidyN', 'fWrapr')
print(crossingPoint)

Overall:

Remember: these timings are not important, for any interesting calculation data manipulation time will quickly dominate expression manipulation time (meaning tuning here is not important).
fWrapr* is fastest, but seems to have worse size dependent growth rate (or slope) than fTidyN*. This means that we would expect at some large substitution size fTidyN* could become quicker (about r format(crossingPoint, digits=0) or more variables). Likely wrapr::let() is paying too much for a map-lookup somewhere and this could be fixed at some point.
fTidyQ* is very much slower with a much worse slope. Likely the slope is also some expensive mapping that can also be fixed.