R/simulate_burst_evolution.R

#' Simulates the evolution of a family of sequences by taking into
#' consideration both point mutation and burst events
#' @param sequences The original sequences to begin with
#' @param timestep The size of the timesteps of jumps
#' @param final_time The time to simulate until
#' @param get_transition_matrix The transition matrix for the point mutations
#' @param burst_threshhold The probability of a burst even occurring
#' @param burst_mean The mean (for a Poisson distribution) increase in copy
#' number during a burst event
#' @return A list of sequences that have evolved over time with both point
#' mutation and recombination
#' @export

# TODO: things to make modular
# - the way we mutate sequences using the transition matrix
# - the way we recombine two sequences
# - the way we kill sequences
# - make probability of bursting depend on last burst
# - do it like guilesspie and make mutate_sequences take a time-jump?
simulate_burst_evolution <- function(sequences,
                                     timestep,
                                     final_time,
                                     transition_matrix,
                                     burst_probability,
                                     burst_mean,
                                     max_copy_number,
                                     recombination_rate,
                                     dist_method,
                                     num_matrix_dumps = 0,
                                     num_seq_dumps = 0)
{
  num_jumps = ceiling(final_time/timestep) + 1
  current_time <- 0

  initial_sequence <- sequences[[1]]
  # the sequeces for the current time step
  curr_data_seq <- data.frame(time = rep(NA_integer_, 1), sequences = rep(NA, 1))
  curr_data_seq[1, ] <- c(current_time, list(list(sequences)))

  # the summary statistics for pairwise distances between sequences
  summ_pair_data <- get_summary_stats_timestep(
                      pairwise_distances_timestamp(curr_data_seq, current_time, dist_method),
                      current_time)

  # the summary statistics for distance to initial sequence
  init_data_all <- (get_distance_to_initial_timestep(
                      curr_data_seq, initial_sequence, current_time, dist_method))
  summ_init_data <- init_data_all$summary_table
  init_data_dist <- convert_init_dist_to_df(init_data_all, current_time)

  # setting up when to save all the raw sequences
  if (num_seq_dumps > 0) {
    time_to_seq_dump <- ceiling(num_jumps/num_seq_dumps)
    all_data_seq <- curr_data_seq
  }
  else {
    time_to_seq_dump <- num_jumps + 1
    all_data_seq <- NULL
  }

  # setting up when to store the pairwise distance matrix
  if (num_matrix_dumps > 0) {
    time_to_matrix_dump <- ceiling(num_jumps/num_matrix_dumps)
    pair_data <- pairwise_distances_timestamp(curr_data_seq, current_time, dist_method)
  }
  else {
    time_to_matrix_dump <- num_jumps + 1
    pair_data <- NULL
  }

  for (iter in 2:num_jumps) {
    current_time = (iter-1)*timestep
    cat(paste(iter, "/", num_jumps, "\n", sep=""))

    # point mutations
    sequences <- mutate_sequences(sequences, transition_matrix)
    num_sequences <- length(sequences)

    # check and see which members have a burst event
    burst_numbers <- burst_sequences(burst_mean, num_sequences,
                                     to_burst(burst_probability, num_sequences))

    new_num_sequences <- sum(burst_numbers)
    if (new_num_sequences > num_sequences) {

      create_new_sequence <- function(index, sequences, burst_numbers, recombination_rate) {
        seq1 <- sequences[[select_sequence(burst_numbers)]]
        seq2 <- sequences[[select_sequence(burst_numbers)]]
        new_sequence <- recombine(list(seq1, seq2), recombination_rate)
        return(new_sequence)
      }
      # generate a set of new sequences for each burst event
      new_sequences <-  purrr::map((num_sequences+1):new_num_sequences,
                                   create_new_sequence,
                                   sequences,
                                   burst_numbers, recombination_rate)
      burst_mean <- update_burst_mean(burst_mean, num_sequences, new_num_sequences)
      sequences <- c(sequences, new_sequences)
      if (max_copy_number > 0) {
          sequences <- kill_sequences(sequences, max_copy_number)
      }
    }
    new_row <- c(current_time, list(list(sequences)))
    curr_data_seq[1, ] <- new_row
    pair_data_new <- pairwise_distances_bursts(curr_data_seq, current_time, dist_method)

    if (iter %% time_to_seq_dump == 0) {
      all_data_seq[iter, ] <- new_row
    }
    if (iter %% time_to_matrix_dump == 0) {
      pair_data <- rbind(pair_data, pair_data_new)
    }
    summ_pair_data_new <- get_summary_stats_timestep(pair_data_new, current_time)
    summ_pair_data <- rbind(summ_pair_data, summ_pair_data_new)

    init_data_all <- (get_distance_to_initial_timestep(
                        curr_data_seq, initial_sequence, current_time, dist_method))
    summ_init_data_new <- init_data_all$summary_table
    summ_init_data <- rbind(summ_init_data, summ_init_data_new)

    init_data_dist_new <- convert_init_dist_to_df(init_data_all, current_time)
    init_data_dist <- rbind(init_data_dist, init_data_dist_new)
  }

  return(list(initial_sequence=initial_sequence,
              summ_pair_data=summ_pair_data,
              summ_init_data=summ_init_data,
              all_data_seq=all_data_seq,
              pair_data=pair_data,
              init_data_dist=init_data_dist,
              curr_data_seq=curr_data_seq))

}
sams25/rcombinator_old documentation built on May 28, 2019, 8:40 a.m.