knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  echo = FALSE
)
library(Rcpp)
library(yasss)

# toggle the speed_up variable during development.
speed_up <- 1
speed_up <- 100
speeding_it_up <- speed_up > 1
for (i in 1:10){
print('Speeding up the process for development')
print(speed_up)
}
yasss:::restart_r()
library(testthat)
devtools::load_all()

Premature optimization i sthe root of all evil

Turns out that I forgot to specify method = 'hamming' in a stringdist call and that caused 99% of the delays. Right now further optimization is not needed. Something will be needed for get_fit_offspring which I will look at later.

To improve performance, various parts of the simulations were implemented in C++ and accessed via Rcpp. This vignette explains some of the weirder things that had to be done to get this working and presents a detailed performance comparison.

Passing Rcpp functions into Rcpp functions.

Due to the way the mutators and fitness_evaluators are specified in the sim_pop call, we need to check that you can actually pass functions to Rcpp and that the performance hits are not too large.

To test this, write a silly function in Rcpp and pass it to another silly function in Rcpp.

R performs poorly when you grow data structures or when you update a data.frame repeatedly. So write a C++ function that produces a fibonacci-like sequence and then write another function that repeatedly call s this fibonacci-like sequence generator and populates a data.frame with it. Instead of adding the two previous numbers to each other add the imediately preceding number to the 4th power root of the number preceding it. Making this modification keeps the number sizes under control. This will be a good indicator of the performance characteristics for the kinds of calculations in yass.

Pure R

fibo <- function(s1, s2, n){
  result <- c(s1, s2)
  for (i in 3:n){
    result <- c(result, result[i-2]^0.25 + result[i-1])
  }
  result
}

fibo_df <- function(n, n_columns = 20){
  x <- data.frame()
  for (i in 1:n_columns){
    x[,paste("V", i, sep = '')] <- numeric(0)
  }
  s1 <- 1
  s2 <- 1
  for (i in 1:n){
    y <- fibo(s1, s2, n_columns+2)
    y <- matrix(y[3:(n_columns+2)], ncol = n_columns)
    y <- as.data.frame(y)
    x <- rbind(x, y)
  }
  x
}

print('Time required to compute 50000 elements of the sequence')
start_time <- proc.time()
x <- fibo(1, 1, 50000/speed_up)
print(proc.time() - start_time)

print('Time required to compute 400 x 400 matrix')
start_time <- proc.time()
y <- fibo_df(400/speed_up, 400/speed_up)
print(proc.time() - start_time)

Pure C++

print('Time required to compute 50000 elements of the sequence')
start_time <- proc.time()
x <- cpp_fibo_exported(1, 1, 50000/speed_up)
print(proc.time() - start_time)

print('Time required to compute 10000 x 10000 matrix')
start_time <- proc.time()
y <- yasss:::cpp_fibo_df(10000/speed_up, 10000/speed_up)
print(proc.time() - start_time)

Passing Rcpp into Rcpp

print('Time required to compute 10000 x 10000 matrix')
start_time <- proc.time()
x <- yasss:::cpp_fibo_df_pass(10000/speed_up, 10000/speed_up, cpp_fibo_exported)
print(proc.time() - start_time)

print('Time required to compute 10000 x 10000 matrix')
start_time <- proc.time()
x <- yasss:::cpp_fibo_df_pass(10000/speed_up, 10000/speed_up, yasss:::cpp_fibo)
print(proc.time() - start_time)

IT WORKS !!!!!

So this means that it is a valid strategy to write a C++ function for the mutator / fitness_evaluator and then pass that function, via R, into another C++ function (sim_next_gen / sim_pop).

Mutators

mutator_uniform

Performance of mutator_uniform_fun

print('Time to mutate 5e6 sequence in R')
start_time <- proc.time()
x <- mutator_uniform_fun(paste(rep('A', 5000000/speed_up), sep = '', collapse = ''), 0.1)
print(proc.time() - start_time)

Performance of cpp_mutator_uniform_fun

print('Time to mutate 5e7 sequence in cpp')
library(Rcpp)
sourceCpp('../src/mutator_uniform.cpp')
start_time <- proc.time()
x <- yasss:::cpp_mutator_uniform_fun(paste(rep('A', 50000000/speed_up), 
                                           sep = '', collapse = ''), 0.1)
print(proc.time() - start_time)

Very minor speedup.

Initially there was a decent speed-up, but it was only due to me forgetting to prevent a mutation from mutation to the same letter. I think I can regain the speed-up if I use a map instead of a while loop.

Performance of mutator_uniform_fun for many repeated calls

print('Time to mutate 10000 length 500 sequences in R')
start_time <- proc.time()
for (i in 1:(10000/speed_up)){
  x <- mutator_uniform_fun(paste(rep('A', 500), sep = '', collapse = ''), 0.1)
}
print(proc.time() - start_time)

Performance of cpp_mutator_uniform_fun for many repeated calls

print('Time to mutate 10000 length 500 sequences in cpp')
start_time <- proc.time()
for (i in 1:(10000/speed_up)){
  x <- mutator_uniform_fun(paste(rep('A', 500), sep = '', collapse = ''), 0.1)
}
print(proc.time() - start_time)

Much smaller speedup.

sim_next_gen

ancestors <- paste(rep('A', 500), sep = '', collapse = '')
ancestors <- rep(ancestors, 2000/speed_up)
genea <- data.frame(
  gen_num = 0,
  id = 1:length(ancestors),
  parent_id = -1,
  the_seq = ancestors,
  n_mut = NA_real_,
  recomb_pos = NA_real_,
  recomb_replaced = NA_character_,
  recomb_partner = NA_real_,
  recomb_muts = NA_real_,
  fitness_score = NA_real_,
  stringsAsFactors = FALSE
                    )

Passing mutator_uniform_fun

print('Time to compute next gen for 2k ancestors with 20 offspring each when passing mutator_uniform_fun to sim_next_gen')
start_time <- proc.time()
x <- sim_next_gen(genea, 20, 
                  list(fun = "mutator_uniform_fun",
                       args = list(mu = 0.01))
                  )
print(proc.time() - start_time)

Passing cpp_mutator_uniform_fun

print('Time to compute next gen for 2k ancestors with 20 offspring each when passing cpp_mutator_uniform_fun to sim_next_gen')
start_time <- proc.time()
x <- sim_next_gen(genea, 20, 
                  list(fun = "cpp_mutator_uniform_fun",
                       args = list(mu = 0.01))
                  )
print(proc.time() - start_time)

4x speedup

Profiling sim_pop

library(profvis)
devtools::load_all()

profvis({
x <- sim_pop(ancestors = paste(rep("A", 500), collapse = ''),
             r0 = 2,
             n_gen = 18,
             n_pop = Inf,
             mutator = list(fun = "mutator_uniform_fun",
                            args = list(mu = 1/250)),
             fitness_evaluator = list(fun = "fitness_evaluator_homology_fun",
                           args = list(comparators = paste(rep('XXXXA', 100), collapse = ''),
                                       h2fs = "h2fs_univariate_linear_fun")))
})

get_fit_offpspring

After fixing the stringdist issue, get_fit_offspring was the biggest bottleneck. As a first step to optimizing this function, I just implemented it in R without using data.frames.

x <- sim_pop(ancestors = paste(rep("A", 500), collapse = ''),
             r0 = 2,
             n_gen = 10,
             n_pop = Inf,
             mutator = list(fun = "mutator_uniform_fun",
                            args = list(mu = 1/250)),
             fitness_evaluator = list(fun = "fitness_evaluator_homology_fun",
                           args = list(comparators = paste(rep('XXXXA', 100), collapse = ''),
                                       h2fs = "h2fs_univariate_linear_fun")))
x$fitness_score[1:31] <- 1

get_fit_offpsring with data.frames

print('Time to compute next gen for 2k ancestors with 20 offspring each when passing cpp_mutator_uniform_fun to sim_next_gen')
start_time <- proc.time()
fit_df <- get_fit_offspring(x, 0.02, 'df')
print(proc.time() - start_time)

get_fit_offspring with R vectors

print('Time to compute next gen for 2k ancestors with 20 offspring each when passing cpp_mutator_uniform_fun to sim_next_gen')
start_time <- proc.time()
fit_vec <- get_fit_offspring(x, 0.02, 'Rvec')
print(proc.time() - start_time)

data.frames are evil. On 14 generations, it is 53sec vs 0.4sec.



philliplab/yasss documentation built on Sept. 7, 2020, 3:28 p.m.