Some timings for wrapr::let()
.
Keep in mind for any serious application the calculation time on data will far dominate any expression re-write time from either rlang
/tidyeval
or wrapr
. But it has been asked what the timings are, and it is fun to look.
So we will compare:
fWrapr*
wrapr::let()
substitution ("langsubs"
mode).fTidyN*
rlang::eval_tidy()
substitution to quoted names (the closest I found to wrapr::let()
).fTidyQ*
rlang::eval_tidy()
substitution to quo()
free names (what seems to be the preferred case/notation by rlang
/tidyeval
developers as it moves from NSE (non-standard evaluation interface) to NSE).# build source code for fns of size k buildFnsK <- function(k) { varlist <- paste0("var_", seq(0, k-1)) nmlist <- paste0("NM_", seq(0, k-1)) aliasStr <- paste("c(", paste(paste(nmlist, "=", paste0("'", varlist, "'"), collapse= ",\n "), ")")) exprStr <- paste(nmlist, collapse= " +\n ") wFn <- paste0(" fWrapr_", k, " <- function() { let( ", aliasStr, ", ", exprStr, " )} ") t1Str <- paste(nmlist, "=", paste0("as.name('", varlist, "')"), collapse= "\n ") t1Expr <- exprStr <- paste(paste0("(!!", nmlist, ")"), collapse= " +\n ") tFn1 <- paste0(" fTidyN_", k, " <- function() { ", t1Str, " eval_tidy(quo( ", t1Expr, " )) } ") t2Str <- paste(nmlist, "=", paste0("quo(", varlist, ")"), collapse= "\n ") tFn2 <- paste0(" fTidyQ_", k, " <- function() { ", t2Str, " eval_tidy(quo( ", t1Expr, " )) } ") paste(wFn, tFn1, tFn2, collapse = "\n\n\n") } ftext <- paste( buildFnsK(10), buildFnsK(15), buildFnsK(20), buildFnsK(25), buildFnsK(50), collapse = "\n\n") fileConn<-file("genFns.R") writeLines(ftext, fileConn) close(fileConn)
library("microbenchmark") library("wrapr") library("rlang") suppressPackageStartupMessages(library("ggplot2")) suppressPackageStartupMessages(library("dplyr")) # load generated examples source("genFns.R") # load up vars nvars <- 200 for(i in seq(0, nvars-1)) { assign(paste('var', i, sep='_'), i) } fWrapr_1 <- function() { let( c( NM_0 = 'var_0' ), NM_0 )} fTidyN_1 <- function() { NM_0 = as.name('var_0') eval_tidy(quo( (!!NM_0) )) } fTidyQ_1 <- function() { NM_0 = quo(var_0) eval_tidy(quo( (!!NM_0) )) } fWrapr_1() fTidyN_1() fTidyQ_1() fWrapr_5 <- function() { let( c( NM_0 = 'var_0', NM_1 = 'var_1', NM_2 = 'var_2', NM_3 = 'var_3', NM_4 = 'var_4' ), NM_0 + NM_1 + NM_2 + NM_3 + NM_4 )} fTidyN_5 <- function() { NM_0 = as.name('var_0') NM_1 = as.name('var_1') NM_2 = as.name('var_2') NM_3 = as.name('var_3') NM_4 = as.name('var_4') eval_tidy(quo( (!!NM_0) + (!!NM_1) + (!!NM_2) + (!!NM_3) + (!!NM_4) )) } fTidyQ_5 <- function() { NM_0 = quo(var_0) NM_1 = quo(var_1) NM_2 = quo(var_2) NM_3 = quo(var_3) NM_4 = quo(var_4) eval_tidy(quo( (!!NM_0) + (!!NM_1) + (!!NM_2) + (!!NM_3) + (!!NM_4) )) } fWrapr_5() fTidyN_5() fTidyQ_5() fWrapr_25() fTidyN_25() fTidyQ_25()
bm <- microbenchmark( fWrapr_1(), fTidyN_1(), fTidyQ_1(), fWrapr_5(), fTidyN_5(), fTidyQ_5(), fWrapr_10(), fTidyN_10(), fTidyQ_10(), fWrapr_15(), fTidyN_15(), fTidyQ_15(), fWrapr_20(), fTidyN_20(), fTidyQ_20(), fWrapr_25(), fTidyN_25(), fTidyQ_25(), times=1000L ) print(bm) autoplot(bm)
d <- as.data.frame(bm) d$size <- as.numeric(gsub("[^0-9]+", "", d$expr)) d$fn <- gsub("[_0-9].*$", "", d$expr) mkPlot <- function(d, title) { d$size <- as.factor(d$size) highCut <- as.numeric(quantile(d$time, probs = 0.99)) dcut <- d[d$time<=highCut, , drop=FALSE] ggplot(data=dcut, aes(x=time, group=expr, color=size)) + geom_density(adjust=0.3) + facet_wrap(~fn, ncol=1, scales = 'free_y') + xlab('time (NS)') + ggtitle(title) } mkPlot(d, 'all timings') mkPlot(d[d$fn %in% c('fWrapr', 'fTidyN'), , drop=FALSE], 'fWrapr v.s. fTidyN') mkPlot(d[d$fn %in% c('fTidyQ', 'fTidyN'), , drop=FALSE], 'fTidyQ v.s. fTidyN')
# fit a linear function for runtime as a function of size # per group. fits <- d %>% split(., .$fn) %>% lapply(., function(di) { lm(time ~ size, data=di) }) %>% lapply(., coefficients) %>% lapply(., function(ri) { data.frame(Intercept= ri[["(Intercept)"]], size= ri[['size']]) }) dfits <- bind_rows(fits) dfits$fn <- names(fits) # "Intercept" is roughly start-up cost # "size" is slope or growth rate print(dfits) # solve for size where two lines interesect. # Note: this is a naive estimate, and not stable # in the face of estimated slopes and intercepts. solve <- function(dfits, f1, f2) { idx1 <- which(dfits$fn==f1) idx2 <- which(dfits$fn==f2) size <- (dfits$Intercept[[idx1]] - dfits$Intercept[[idx2]]) / (dfits$size[[idx2]] - dfits$size[[idx1]]) size } crossingPoint <- solve(dfits, 'fTidyN', 'fWrapr') print(crossingPoint)
Overall:
fWrapr*
is fastest, but seems to have worse size dependent growth rate (or slope) than fTidyN*
. This means that we would expect at some large substitution size fTidyN*
could become quicker (about r format(crossingPoint, digits=0)
or more variables). Likely wrapr::let()
is paying too much for a map-lookup somewhere and this could be fixed at some point.fTidyQ*
is very much slower with a much worse slope. Likely the slope is also some expensive mapping that can also be fixed.Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.