R/get_summary_stats.R

#' Prints summary statistics for distances between sequences
#' @param seq_data The data returned by a simulation
#' @return A data frama of summary statistics for each time
#' @importFrom magrittr "%>%"
#' @export

get_summary_stats <- function(seq_data)
{
  num_T <- max(seq_data$time)

  summary_table <- data.frame(time = rep(NA_integer_, num_T),
                              num_seq = rep(NA_integer_, num_T),
                              mean_dist = rep(NA_real_, num_T),
                              num_na = rep(NA_integer_, num_T))
  summary_table$quantiles_dist <- list(rep(NA_real_, 4))
  for(t in 1:num_T)
  {
    this_time <- seq_data %>% dplyr::filter(time == t)
    num_seq <- max(c(this_time$seq1, this_time$seq2), na.rm = TRUE)
    mean_dist <- mean(this_time$seq_dist, na.rm = TRUE)
    quantiles_dist <- stats::quantile(this_time$seq_dist, na.rm = TRUE)
    num_na <- sum(is.na(this_time$seq_dist))

    summary_table[t, "time"] <- t
    summary_table[t, "num_seq"] <- num_seq
    summary_table[t, "mean_dist"] <- mean_dist
    summary_table[t, "num_na"] <- num_na
    summary_table$quantiles_dist[t] <- list(quantiles_dist)
  }
  return(summary_table)
}
sams25/rcombinator_old documentation built on May 28, 2019, 8:40 a.m.