completed = read.csv("extdata/typeform.csv")
setnicepar = function(mar=c(3,3,2,1), 
                      mgp=c(2,0.4,0), tck=-.01,
                      cex.axis=0.9, las=1, mfrow=c(1,1),...) {
  par(mar=mar, 
      mgp=mgp, tck=tck,
      cex.axis=cex.axis, las=las,mfrow=mfrow,...)
}

Tutorial R Package

Contains copies of slides and exercises

install.packages("drat")
drat::addRepo("jr-packages")
install.packages("efficientTutorial")

Who am I

Jumping Rivers

Who are you? Time to make a friend

Who are you?

setnicepar()
tab = table(completed$list_iVD4_choice)
barplot(tab, col="steelblue")

We also have a Transitionning Physicist in the room

Functions & loops

setnicepar(mfrow=c(1, 2))
r_fun = factor(as.numeric(completed$opinionscale_nW40), levels=1:10)
barplot(table(r_fun), col="steelblue", main = "Functions", ylim=c(0, 40))
r_for = factor(as.numeric(completed$opinionscale_LMTT), levels=1:10)
barplot(table(r_for), col="steelblue", main = "Loops", ylim=c(0, 40))

Other bits and pieces

Todays tutorial

## Slides
browseVignettes("efficientTutorial")

What we won't cover

What we do cover

The goal is to give a flavour of the topics

Optimisation

Optimisation

The real problem is that programmers have spent far too much time worrying about efficiency in the wrong places and at the wrong times; premature optimization is the root of all evil (or at least most of it) in programming.

Donald Knuth

Timing code

system.time()

microbenchmark()

library("microbenchmark")
(res = microbenchmark(times = 1000, 
               unit = "ms", # milliseconds
           d_m[1,], d_df[1,], d_m[,1], d_df[,1]))
#Unit: milliseconds
#      expr   min    lq   mean  median      uq      max neval cld
#  d_m[1, ] 0.004 0.008  0.014   0.014  0.0204    0.049  1000  a 
# d_df[1, ] 4.722 5.067  5.681   5.333  5.6767  109.383  1000   b
#  d_m[, 1] 0.006 0.006  0.007   0.007  0.0081    0.024  1000  a 
# d_df[, 1] 0.006 0.008  0.012   0.012  0.0153   0.0558  1000  a 
d_m = matrix(1:10000, ncol=100)
d_df = as.data.frame(d_m)
colnames(d_df) = paste0("c", 1:ncol(d_df))
res = microbenchmark::microbenchmark(times = 1000, 
               unit = "ms", # milliseconds
           d_m[1,], d_df[1,], d_m[,1], d_df[,1])
saveRDS(res, "extdata/data_matrix.rds")

Plotting method

plot(res, log="y")
res = readRDS("extdata/data_matrix.rds")
setnicepar()
plot(res, log="y", colour="steelblue")
grid()

data frame vs matrix

eRum Resolution

Never ask on Stackoverflow which method is faster!

On to byte compiling



jr-packages/efficientTutorial documentation built on Feb. 16, 2020, 7:05 p.m.