# set the knitr options ... for everyone! # if you unset this, then vignette build bonks. oh, joy. #opts_knit$set(progress=TRUE) opts_knit$set(eval.after='fig.cap') # for a package vignette, you do want to echo. # opts_chunk$set(echo=FALSE,warning=FALSE,message=FALSE) opts_chunk$set(warning=FALSE,message=FALSE) #opts_chunk$set(results="asis") opts_chunk$set(cache=TRUE,cache.path="cache/") #opts_chunk$set(fig.path="figure/",dev=c("pdf","cairo_ps")) opts_chunk$set(fig.path="figure/",dev=c("png")) opts_chunk$set(fig.width=7,fig.height=6,dpi=100,out.width='700px',out.height='600px') # doing this means that png files are made of figures; # the savings is small, and it looks like shit: #opts_chunk$set(fig.path="figure/",dev=c("png","pdf","cairo_ps")) #opts_chunk$set(fig.width=4,fig.height=4) # for figures? this is sweave-specific? #opts_knit$set(eps=TRUE) # this would be for figures: #opts_chunk$set(out.width='.8\\textwidth') # for text wrapping: options(width=160,digits=2) opts_chunk$set(size="small") opts_chunk$set(tidy=TRUE,tidy.opts=list(width.cutoff=50,keep.blank.line=TRUE)) library(ggplot2) library(fromo) library(dplyr) library(moments) library(microbenchmark)
To prevent performance regressions, compare them here, including benchmarks against other packages:
library(fromo) library(RollingWindow) library(roll) library(RcppRoll) library(microbenchmark) library(moments) library(RcppParallel) # keep this constant for comparison setThreadOptions(numThreads = 2) print(Sys.info()) print(sessionInfo()) set.seed(12345) x <- rnorm(1e5) xpos <- runif(length(x)) +1 xm <- matrix(x,ncol=1) xmps <- matrix(xpos,ncol=1) w <- runif(length(x)) dumbk <- function(x) { c(kurtosis(x) - 3.0,skewness(x),sd(x),mean(x),length(x)) } checkit <- microbenchmark(sum(x), mean(x), sd(x), skewness(x), kurtosis(x), sd3(x), skew4(x), kurt5(x), dumbk(x)) print(checkit) resdf <- checkit gc()
# weights slow_sd <- function(x,w) { n0 <- length(x) mu <- weighted.mean(x,w=w) sg <- sqrt(sum(w * (x-mu)^2)/(n0 - 1)) c(sg,mu,n0) } checkit <- microbenchmark(cent_moments(x,max_order=4,wts=w,na_rm=TRUE,normalize_wts=FALSE), sd3(x,wts=w), slow_sd(x,w)) print(checkit) resdf <- rbind(resdf,checkit) gc()
set.seed(12345) x1 <- runif(1e4) x2 <- runif(length(x1)) checkit <- microbenchmark(as.centsums(x1,1), as.centsums(x1,2), as.centsums(x1,3), as.centsums(x1,4)) print(checkit) resdf <- rbind(resdf,checkit) gc() # join them together max_ord <- 6L obj1 <- as.centsums(x1,max_ord) obj2 <- as.centsums(x2,max_ord) obj3 <- as.centsums(c(x1,x2),max_ord) checkit <- microbenchmark(c(obj1,obj2), obj3 %-% obj1) print(checkit) resdf <- rbind(resdf,checkit) gc() max_ord <- 6L rs1 <- cent_sums(x1,max_ord) rs2 <- cent_sums(x2,max_ord) rs3 <- cent_sums(c(x1,x2),max_ord) checkit <- microbenchmark(join_cent_sums(rs1,rs2), unjoin_cent_sums(rs3,rs2), unjoin_cent_sums(rs3,rs1)) print(checkit) resdf <- rbind(resdf,checkit) gc()
set.seed(54321) x1 <- matrix(rnorm(100*4),ncol=4) x2 <- matrix(rnorm(100*4),ncol=4) max_ord <- 2L # join them together mobj1 <- as.centcosums(x1,max_ord) mobj2 <- as.centcosums(x2,max_ord) mobj3 <- as.centcosums(rbind(x1,x2),max_ord) alt3 <- c(mobj1,mobj2) # unjoin them, with this one weird operator: alt2 <- mobj3 %-% mobj1 alt1 <- mobj3 %-% mobj2 checkit <- microbenchmark(as.centcosums(x1,max_ord), mobj3 %-% mobj1) print(checkit) resdf <- rbind(resdf,checkit) gc()
set.seed(4422) x <- rnorm(1e4) xpos <- runif(length(x)) +1 xm <- matrix(x,ncol=1) xmps <- matrix(xpos,ncol=1) w <- runif(length(x)) dumb_zscore <- function(x,window) { altz <- sapply(seq_along(x),function(iii) { rowi <- max(1,iii - window + 1) xrang <- x[rowi:iii] (x[iii] - mean(xrang)) / sd(xrang) },simplify=TRUE) } # run fun on each wins sized window... silly_fun <- function(x,wins,fun,...) { xout <- rep(NA,length(x)) for (iii in seq_along(x)) { xout[iii] <- fun(x[max(1,iii-wins+1):iii],...) } xout } wins <- 250 checkit <- microbenchmark(silly_fun(x,wins,sum,na.rm=FALSE), silly_fun(x,wins,mean,na.rm=FALSE), running_sum(x,wins), running_mean(x,wins), roll::roll_sum(xm,wins), roll::roll_mean(xm,wins), roll::roll_sd(xm,wins), RollingWindow::RollingSum(x,wins,na_method='ignore'), RollingWindow::RollingSum(x,wins), RollingWindow::RollingMean(x,wins), RollingWindow::RollingStd(x,wins), RcppRoll::roll_sum(xm,n=wins,align='right',fill=NA), RcppRoll::roll_mean(xm,n=wins,align='right',fill=NA), RcppRoll::roll_sd(xm,n=wins,align='right',fill=NA), running_sd(x,wins,na_rm=FALSE,restart_period=50000L), running_sd(x,wins,na_rm=TRUE,restart_period=1000L), running_sd3(x,wins), running_skew(x,wins), running_skew4(x,wins), running_kurt(x,wins), running_kurt5(x,wins), running_tstat(x,wins), running_zscored(x,wins), running_sharpe(x,wins), running_apx_median(x,wins), running_centered(x,wins), running_scaled(x,wins) ) print(checkit) resdf <- rbind(resdf,checkit) gc()
library(readr) FAKE_IT <- FALSE if (FAKE_IT) { resdf <- readr::read_csv('timings.csv') #print(resdf) } else { readr::write_csv(resdf,'timings.csv') }
Can we see them here? load all the timing data to check.
library(magrittr) library(dplyr) library(readr) library(tidyr) library(knitr) mysernum <- as.character(packageVersion('fromo') ) allt <- data.frame(fname=dir('.','*.csv'),stringsAsFactors=FALSE) %>% dplyr::filter(grepl('^timings_\\d.+\\d+.csv$',fname)) %>% group_by(fname) %>% mutate(tims=list(readr::read_csv(fname))) %>% ungroup() %>% tidyr::unnest() %>% mutate(sernum=gsub('^timings_(.+).csv$','\\1',fname)) %>% dplyr::select(-fname) %>% rbind(resdf %>% dplyr::mutate(sernum=mysernum)) %>% group_by(sernum,expr) %>% summarize(meantime=mean(time,na.rm=TRUE)) %>% ungroup() %>% group_by(sernum) %>% mutate(sumx_time=median(ifelse(grepl('^sum\\(x\\)$',expr),meantime,NA),na.rm=TRUE)) %>% ungroup() %>% mutate(normalized=meantime / sumx_time) %>% arrange(sernum) %>% group_by(expr) %>% mutate(first_norm=first(normalized), last_norm=last(normalized)) %>% ungroup() %>% mutate(relchange=normalized / first_norm, last_status=last_norm / first_norm) %>% mutate(exgrp=ifelse(grepl('^(roll|RollingWindow|RcppRoll)::',expr),"brand_x", ifelse(grepl('^running_',expr),"running","summarizing"))) library(ggplot2) ph <- allt %>% dplyr::filter(!grepl('brand_x',exgrp)) %>% ggplot(aes(sernum,normalized,group=expr,color=expr)) + geom_line() + geom_point() + scale_y_log10() + guides(colour=FALSE) + theme(axis.text.x=element_text(angle = -90, hjust = 0)) + facet_grid(exgrp ~ .,scales='free') + labs(x='release', y='mean time taken, relative to sum(x)', title='fromo microbenchmark timings, lasagna') print(ph) ph <- allt %>% dplyr::filter(!grepl('brand_x',exgrp)) %>% ggplot(aes(sernum,relchange,group=expr,color=expr)) + geom_line() + geom_point() + scale_y_log10() + guides(colour=FALSE) + theme(axis.text.x=element_text(angle = -90, hjust = 0)) + facet_grid(exgrp ~ .,scales='free') + labs(x='release', y='normalized time taken, relative to first iteration', title='fromo microbenchmark timings, spaghetti') print(ph) ph <- allt %>% dplyr::filter(!grepl('brand_x',exgrp)) %>% ggplot(aes(sernum,relchange)) + geom_boxplot(aes(group=sernum),alpha=0.7) + stat_summary(aes(group="1",color='mean'),fun.y=mean,geom="line") + scale_y_log10() + theme(axis.text.x=element_text(angle = -90, hjust = 0)) + facet_grid(exgrp ~ .,scales='free') + labs(x='release', y='normalized time taken, relative to first iteration', color='stat', title='fromo microbenchmark timings, boxplots') print(ph) allt %>% dplyr::filter(!grepl('brand_x',exgrp)) %>% select(expr,sernum,relchange,last_status) %>% tidyr::spread(key='sernum',value='relchange') %>% arrange(desc(last_status)) %>% select(-last_status) %>% head(n=50) %>% kable() allt %>% dplyr::filter(!grepl('brand_x',exgrp)) %>% select(-sernum,-relchange,-meantime,-sumx_time,-normalized) %>% distinct(expr,.keep_all=TRUE) %>% arrange(desc(last_status)) %>% head(n=20) %>% kable()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.