completed = read.csv("extdata/typeform.csv")
setnicepar = function(mar=c(3, 3, 2, 1), 
                      mgp=c(2, 0.4, 0), tck=-.01,
                      cex.axis=0.9, las=1, mfrow=c(1, 1), ...) {
  par(mar = mar, 
      mgp = mgp, tck = tck,
      cex.axis = cex.axis, las = las, mfrow = mfrow, ...)
}

Tutorial R Package

Contains copies of slides and exercises

install.packages("drat")
drat::addRepo("jr-packages")
install.packages("efficientTutorial")

Who am I

Jumping Rivers

Who are you? Time to make a friend

Who are you?

setnicepar()
tab = table(completed$list_iVD4_choice)
barplot(tab, col = "steelblue")

We also have a Transitionning Physicist in the room

Functions & loops

setnicepar(mfrow = c(1, 2))
r_fun = factor(as.numeric(completed$opinionscale_nW40), levels = 1:10)
barplot(table(r_fun), col = "steelblue", main = "Functions", ylim = c(0, 40))
r_for = factor(as.numeric(completed$opinionscale_LMTT), levels = 1:10)
barplot(table(r_for), col = "steelblue", main = "Loops", ylim = c(0, 40))

Other bits and pieces

Todays tutorial

## Slides
browseVignettes("efficientTutorial")

What we won't cover

What we do cover

The goal is to give a flavour of the topics

Optimisation

Optimisation

The real problem is that programmers have spent far too much time worrying about efficiency in the wrong places and at the wrong times; premature optimization is the root of all evil (or at least most of it) in programming.

Donald Knuth

Timing code

system.time()

microbenchmark()

library("microbenchmark")
(res = microbenchmark(times = 1000, 
               unit = "ms", # milliseconds
           d_m[1, ], d_df[1, ], d_m[, 1], d_df[, 1]))
#Unit: milliseconds
#      expr   min    lq   mean  median      uq      max neval cld
#  d_m[1, ] 0.004 0.008  0.014   0.014  0.0204    0.049  1000  a 
# d_df[1, ] 4.722 5.067  5.681   5.333  5.6767  109.383  1000   b
#  d_m[, 1] 0.006 0.006  0.007   0.007  0.0081    0.024  1000  a 
# d_df[, 1] 0.006 0.008  0.012   0.012  0.0153   0.0558  1000  a 
d_m = matrix(1:10000, ncol = 100)
d_df = as.data.frame(d_m)
colnames(d_df) = paste0("c", seq_along(d_df))
res = microbenchmark::microbenchmark(times = 1000, 
               unit = "ms", # milliseconds
           d_m[1, ], d_df[1, ], d_m[, 1], d_df[, 1])
saveRDS(res, "extdata/data_matrix.rds")

Plotting method

plot(res, log = "y")
res = readRDS("extdata/data_matrix.rds")
setnicepar()
plot(res, log = "y", colour = "steelblue")
grid()

data frame vs matrix

eRum Resolution

Never ask on Stackoverflow which method is faster!

On to byte compiling



jr-packages/efficientTutorial documentation built on Feb. 16, 2020, 7:05 p.m.