#' Simulates the evolution of a family of sequences by taking into
#' consideration both point mutation and burst events
#' @param sequences The original sequences to begin with
#' @param timestep The size of the timesteps of jumps
#' @param final_time The time to simulate until
#' @param get_transition_matrix The transition matrix for the point mutations
#' @param burst_threshhold The probability of a burst even occurring
#' @param burst_mean The mean (for a Poisson distribution) increase in copy
#' number during a burst event
#' @return A list of sequences that have evolved over time with both point
#' mutation and recombination
#' @export
# TODO: things to make modular
# - the way we mutate sequences using the transition matrix
# - the way we recombine two sequences
# - the way we kill sequences
# - make probability of bursting depend on last burst
# - do it like guilesspie and make mutate_sequences take a time-jump?
simulate_burst_evolution <- function(sequences,
timestep,
final_time,
transition_matrix,
burst_probability,
burst_mean,
max_copy_number,
recombination_rate,
dist_method,
num_matrix_dumps = 0,
num_seq_dumps = 0)
{
num_jumps = ceiling(final_time/timestep) + 1
current_time <- 0
initial_sequence <- sequences[[1]]
# the sequeces for the current time step
curr_data_seq <- data.frame(time = rep(NA_integer_, 1), sequences = rep(NA, 1))
curr_data_seq[1, ] <- c(current_time, list(list(sequences)))
# the summary statistics for pairwise distances between sequences
summ_pair_data <- get_summary_stats_timestep(
pairwise_distances_timestamp(curr_data_seq, current_time, dist_method),
current_time)
# the summary statistics for distance to initial sequence
init_data_all <- (get_distance_to_initial_timestep(
curr_data_seq, initial_sequence, current_time, dist_method))
summ_init_data <- init_data_all$summary_table
init_data_dist <- convert_init_dist_to_df(init_data_all, current_time)
# setting up when to save all the raw sequences
if (num_seq_dumps > 0) {
time_to_seq_dump <- ceiling(num_jumps/num_seq_dumps)
all_data_seq <- curr_data_seq
}
else {
time_to_seq_dump <- num_jumps + 1
all_data_seq <- NULL
}
# setting up when to store the pairwise distance matrix
if (num_matrix_dumps > 0) {
time_to_matrix_dump <- ceiling(num_jumps/num_matrix_dumps)
pair_data <- pairwise_distances_timestamp(curr_data_seq, current_time, dist_method)
}
else {
time_to_matrix_dump <- num_jumps + 1
pair_data <- NULL
}
for (iter in 2:num_jumps) {
current_time = (iter-1)*timestep
cat(paste(iter, "/", num_jumps, "\n", sep=""))
# point mutations
sequences <- mutate_sequences(sequences, transition_matrix)
num_sequences <- length(sequences)
# check and see which members have a burst event
burst_numbers <- burst_sequences(burst_mean, num_sequences,
to_burst(burst_probability, num_sequences))
new_num_sequences <- sum(burst_numbers)
if (new_num_sequences > num_sequences) {
create_new_sequence <- function(index, sequences, burst_numbers, recombination_rate) {
seq1 <- sequences[[select_sequence(burst_numbers)]]
seq2 <- sequences[[select_sequence(burst_numbers)]]
new_sequence <- recombine(list(seq1, seq2), recombination_rate)
return(new_sequence)
}
# generate a set of new sequences for each burst event
new_sequences <- purrr::map((num_sequences+1):new_num_sequences,
create_new_sequence,
sequences,
burst_numbers, recombination_rate)
burst_mean <- update_burst_mean(burst_mean, num_sequences, new_num_sequences)
sequences <- c(sequences, new_sequences)
if (max_copy_number > 0) {
sequences <- kill_sequences(sequences, max_copy_number)
}
}
new_row <- c(current_time, list(list(sequences)))
curr_data_seq[1, ] <- new_row
pair_data_new <- pairwise_distances_bursts(curr_data_seq, current_time, dist_method)
if (iter %% time_to_seq_dump == 0) {
all_data_seq[iter, ] <- new_row
}
if (iter %% time_to_matrix_dump == 0) {
pair_data <- rbind(pair_data, pair_data_new)
}
summ_pair_data_new <- get_summary_stats_timestep(pair_data_new, current_time)
summ_pair_data <- rbind(summ_pair_data, summ_pair_data_new)
init_data_all <- (get_distance_to_initial_timestep(
curr_data_seq, initial_sequence, current_time, dist_method))
summ_init_data_new <- init_data_all$summary_table
summ_init_data <- rbind(summ_init_data, summ_init_data_new)
init_data_dist_new <- convert_init_dist_to_df(init_data_all, current_time)
init_data_dist <- rbind(init_data_dist, init_data_dist_new)
}
return(list(initial_sequence=initial_sequence,
summ_pair_data=summ_pair_data,
summ_init_data=summ_init_data,
all_data_seq=all_data_seq,
pair_data=pair_data,
init_data_dist=init_data_dist,
curr_data_seq=curr_data_seq))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.