knitr::opts_chunk$set( collapse = TRUE, comment = "#>", echo = FALSE ) library(Rcpp) library(yasss) # toggle the speed_up variable during development. speed_up <- 1 speed_up <- 100 speeding_it_up <- speed_up > 1
for (i in 1:10){ print('Speeding up the process for development') print(speed_up) }
yasss:::restart_r() library(testthat) devtools::load_all()
Turns out that I forgot to specify method = 'hamming' in a stringdist call and that caused 99% of the delays. Right now further optimization is not needed. Something will be needed for get_fit_offspring which I will look at later.
To improve performance, various parts of the simulations were implemented in C++ and accessed via Rcpp. This vignette explains some of the weirder things that had to be done to get this working and presents a detailed performance comparison.
Due to the way the mutators and fitness_evaluators are specified in the sim_pop call, we need to check that you can actually pass functions to Rcpp and that the performance hits are not too large.
To test this, write a silly function in Rcpp and pass it to another silly function in Rcpp.
R performs poorly when you grow data structures or when you update a data.frame repeatedly. So write a C++ function that produces a fibonacci-like sequence and then write another function that repeatedly call s this fibonacci-like sequence generator and populates a data.frame with it. Instead of adding the two previous numbers to each other add the imediately preceding number to the 4th power root of the number preceding it. Making this modification keeps the number sizes under control. This will be a good indicator of the performance characteristics for the kinds of calculations in yass.
fibo <- function(s1, s2, n){ result <- c(s1, s2) for (i in 3:n){ result <- c(result, result[i-2]^0.25 + result[i-1]) } result } fibo_df <- function(n, n_columns = 20){ x <- data.frame() for (i in 1:n_columns){ x[,paste("V", i, sep = '')] <- numeric(0) } s1 <- 1 s2 <- 1 for (i in 1:n){ y <- fibo(s1, s2, n_columns+2) y <- matrix(y[3:(n_columns+2)], ncol = n_columns) y <- as.data.frame(y) x <- rbind(x, y) } x } print('Time required to compute 50000 elements of the sequence') start_time <- proc.time() x <- fibo(1, 1, 50000/speed_up) print(proc.time() - start_time) print('Time required to compute 400 x 400 matrix') start_time <- proc.time() y <- fibo_df(400/speed_up, 400/speed_up) print(proc.time() - start_time)
print('Time required to compute 50000 elements of the sequence') start_time <- proc.time() x <- cpp_fibo_exported(1, 1, 50000/speed_up) print(proc.time() - start_time) print('Time required to compute 10000 x 10000 matrix') start_time <- proc.time() y <- yasss:::cpp_fibo_df(10000/speed_up, 10000/speed_up) print(proc.time() - start_time)
print('Time required to compute 10000 x 10000 matrix') start_time <- proc.time() x <- yasss:::cpp_fibo_df_pass(10000/speed_up, 10000/speed_up, cpp_fibo_exported) print(proc.time() - start_time) print('Time required to compute 10000 x 10000 matrix') start_time <- proc.time() x <- yasss:::cpp_fibo_df_pass(10000/speed_up, 10000/speed_up, yasss:::cpp_fibo) print(proc.time() - start_time)
IT WORKS !!!!!
So this means that it is a valid strategy to write a C++ function for the mutator / fitness_evaluator and then pass that function, via R, into another C++ function (sim_next_gen / sim_pop).
print('Time to mutate 5e6 sequence in R') start_time <- proc.time() x <- mutator_uniform_fun(paste(rep('A', 5000000/speed_up), sep = '', collapse = ''), 0.1) print(proc.time() - start_time)
print('Time to mutate 5e7 sequence in cpp') library(Rcpp) sourceCpp('../src/mutator_uniform.cpp') start_time <- proc.time() x <- yasss:::cpp_mutator_uniform_fun(paste(rep('A', 50000000/speed_up), sep = '', collapse = ''), 0.1) print(proc.time() - start_time)
Very minor speedup.
Initially there was a decent speed-up, but it was only due to me forgetting to prevent a mutation from mutation to the same letter. I think I can regain the speed-up if I use a map instead of a while loop.
print('Time to mutate 10000 length 500 sequences in R') start_time <- proc.time() for (i in 1:(10000/speed_up)){ x <- mutator_uniform_fun(paste(rep('A', 500), sep = '', collapse = ''), 0.1) } print(proc.time() - start_time)
print('Time to mutate 10000 length 500 sequences in cpp') start_time <- proc.time() for (i in 1:(10000/speed_up)){ x <- mutator_uniform_fun(paste(rep('A', 500), sep = '', collapse = ''), 0.1) } print(proc.time() - start_time)
Much smaller speedup.
ancestors <- paste(rep('A', 500), sep = '', collapse = '') ancestors <- rep(ancestors, 2000/speed_up) genea <- data.frame( gen_num = 0, id = 1:length(ancestors), parent_id = -1, the_seq = ancestors, n_mut = NA_real_, recomb_pos = NA_real_, recomb_replaced = NA_character_, recomb_partner = NA_real_, recomb_muts = NA_real_, fitness_score = NA_real_, stringsAsFactors = FALSE )
print('Time to compute next gen for 2k ancestors with 20 offspring each when passing mutator_uniform_fun to sim_next_gen') start_time <- proc.time() x <- sim_next_gen(genea, 20, list(fun = "mutator_uniform_fun", args = list(mu = 0.01)) ) print(proc.time() - start_time)
print('Time to compute next gen for 2k ancestors with 20 offspring each when passing cpp_mutator_uniform_fun to sim_next_gen') start_time <- proc.time() x <- sim_next_gen(genea, 20, list(fun = "cpp_mutator_uniform_fun", args = list(mu = 0.01)) ) print(proc.time() - start_time)
4x speedup
library(profvis) devtools::load_all() profvis({ x <- sim_pop(ancestors = paste(rep("A", 500), collapse = ''), r0 = 2, n_gen = 18, n_pop = Inf, mutator = list(fun = "mutator_uniform_fun", args = list(mu = 1/250)), fitness_evaluator = list(fun = "fitness_evaluator_homology_fun", args = list(comparators = paste(rep('XXXXA', 100), collapse = ''), h2fs = "h2fs_univariate_linear_fun"))) })
After fixing the stringdist issue, get_fit_offspring was the biggest bottleneck. As a first step to optimizing this function, I just implemented it in R without using data.frames.
x <- sim_pop(ancestors = paste(rep("A", 500), collapse = ''), r0 = 2, n_gen = 10, n_pop = Inf, mutator = list(fun = "mutator_uniform_fun", args = list(mu = 1/250)), fitness_evaluator = list(fun = "fitness_evaluator_homology_fun", args = list(comparators = paste(rep('XXXXA', 100), collapse = ''), h2fs = "h2fs_univariate_linear_fun"))) x$fitness_score[1:31] <- 1
print('Time to compute next gen for 2k ancestors with 20 offspring each when passing cpp_mutator_uniform_fun to sim_next_gen') start_time <- proc.time() fit_df <- get_fit_offspring(x, 0.02, 'df') print(proc.time() - start_time)
print('Time to compute next gen for 2k ancestors with 20 offspring each when passing cpp_mutator_uniform_fun to sim_next_gen') start_time <- proc.time() fit_vec <- get_fit_offspring(x, 0.02, 'Rvec') print(proc.time() - start_time)
data.frames are evil. On 14 generations, it is 53sec vs 0.4sec.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.