knitr::opts_chunk$set( collapse = TRUE, comment = " # " ) options(width =100)
# small timings library("data.table") library("dplyr") library("microbenchmark") library("replyr") library("ggplot2") set.seed(32535) data("iris",package = "datasets") iris.dt <- data.table(iris) iris.dt[, mean(Sepal.Length), by=Species] let(list(GROUPCOL='Species', DATACOL='Sepal.Length'), iris.dt[, mean(DATACOL), by=GROUPCOL]) iris %>% group_by(Species) %>% summarize(mean(Sepal.Length)) let(list(GROUPCOL='Species', DATACOL='Sepal.Length'), iris %>% group_by(GROUPCOL) %>% summarize(mean(DATACOL))) # gapply is very bad at this task both notationally, and in terms of speed gapply(iris, 'Species', function(di) { mean(di[['Sepal.Length']]) }, partitionMethod = 'split', bindrows=FALSE) ks <- sort(unique(floor(10^(0.25*(0:16))))) meass <- lapply(ks, function(k) { irisk <- iris[rep(seq_len(nrow(iris)),k),,drop=FALSE] irisk.dt <- data.table(irisk) meas <- microbenchmark(data.table= irisk.dt[, mean(Sepal.Length), by=Species], dplyr= irisk %>% group_by(Species) %>% summarize(mean(Sepal.Length)), let.data.table= let(list(GROUPCOL='Species', DATACOL='Sepal.Length'), irisk.dt[, mean(DATACOL), by=GROUPCOL]), let.dplyr= let(list(GROUPCOL='Species', DATACOL='Sepal.Length'), irisk %>% group_by(GROUPCOL) %>% summarize(mean(DATACOL)))) meas %>% as.data.frame() %>% mutate(k=k) }) res <- lapply(meass, function(meas) { meas %>% as.data.frame() %>% group_by(expr) %>% summarize(mean= mean(time), median= median(time), k= mean(k), # pseudo-aggregator q1= as.numeric(quantile(time,0.25)), q3= as.numeric(quantile(time,0.75))) }) res <- dplyr::bind_rows(res) res$expr <- reorder(res$expr, -res$median) ggplot(data=res, mapping=aes(x=k,y=median,color=expr)) + geom_ribbon(mapping=aes(ymin=q1,ymax=q3,fill=expr),alpha=0.2,color=NA) + geom_point() + geom_line() + scale_x_log10() + scale_y_log10() + ylab('time') + ggtitle("median run times (nanosecond)", subtitle = "as a function of method and replications")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.