R/get_distance_to_initial_timestep.R

#' Prints summary statistics for distances to initial sequence for a timestep
#' @param seq_data The data returned by a simulation for a single timestep
#' @param timestep What this timestep is
#' @return A data frama of summary statistics for that time
#' @importFrom magrittr "%>%"
#' @export

get_distance_to_initial_timestep <- function(seq_data, init_seq, timestep, dist_method)
{
  seq_data <- seq_data[seq_data$time == timestep, ]
  if(length(seq_data) == 0)
  {
    stop("Sequence data provided has no data for timestamp", timestamp)
  }
  summary_table <- data.frame(time = rep(NA_integer_, 1),
                              num_seq = rep(NA_integer_, 1),
                              mean_dist = rep(NA_real_, 1),
                              median_dist = rep(NA_real_, 1),
                              num_na = rep(NA_integer_, 1))
  summary_table$quantiles_dist <- list(rep(NA_real_, 4))

  current_sequences <- seq_data$sequences[[1]]
  num_seq <- length(current_sequences)


  if(dist_method == "ape")
  {
    dist_fun <- stringdist_ape
  }
  else if(dist_method == "levenshtein")
  {
    dist_fun <- stringdist_levenshtein
  }

  distances <- sapply(current_sequences,
                      function(x) dist_fun(list(x, init_seq))[1])

  mean_dist <- mean(distances, na.rm = TRUE)
  median_dist <- median(distances, na.rm = TRUE)
  quantiles_dist <- stats::quantile(distances, na.rm = TRUE)
  num_na <- sum(is.na(distances))

  summary_table[1, "time"] <- timestep
  summary_table[1, "num_seq"] <- num_seq
  summary_table[1, "mean_dist"] <- mean_dist
  summary_table[1, "median_dist"] <- median_dist
  summary_table[1, "num_na"] <- num_na
  summary_table$quantiles_dist[1] <- list(quantiles_dist)

  return(list(summary_table=summary_table, distances=distances))
}
sams25/rcombinator_old documentation built on May 28, 2019, 8:40 a.m.