R/events_counter.R

Defines functions rate_gl count_gl effective_rate count_events

Documented in count_events count_gl effective_rate rate_gl

#' Calculate the rate of host-repertoire evolution
#'
#' Calculate the number of host gains, host losses, and the
#'   effective rate of host repertoire evolution.
#'
#' @param history A data frame containing the character history produced by
#'   RevBayes and read by `read_history()`.
#' @param tree A phylogenetic tree of the symbiont clade
#'
#' @name events_counter
NULL

#' @describeIn events_counter Get the average number of events along the symbiont tree and the highest posterior density interval with 95% probability (HPD95), based on all MCMC iterations in `history`.
#' @export
#' @importFrom rlang .data
#'
#' @examples
#' # read data that comes with the package
#' data_path <- system.file("extdata", package = "evolnets")
#' tree <- read_tree_from_revbayes(paste0(data_path,"/tree_pieridae.tre"))
#' history <- read_history(paste0(data_path,"/history_thin_pieridae.txt"), burnin = 0)
#'
#' # all events
#' n_events <- count_events(history)
#' rate <- effective_rate(history,tree)
#'
#' # gains and losses separately
#' gl_events <- count_gl(history)
#' gl_rates <- rate_gl(history, tree)
count_events <- function(history){
  if (!is.data.frame(history)) {
    stop('`history` should be a data.frame, usually generated by `read_history()`')
  }
  if (!all(c('node_index', 'iteration', 'transition_type') %in% names(history))) {
    stop('`history` needs to have columns `node_index`, `iteration` and `transition_type`.')
  }

  root <- max(history$node_index)
  dist_events <- history %>%
  dplyr::filter(.data$node_index < root,
                .data$transition_type == 'anagenetic') %>%
  dplyr::group_by(.data$iteration) %>%
    dplyr::summarise(n_events = n()) %>%
    dplyr::pull(.data$n_events)

  mean <- mean(dist_events)
  events_mcmc <- coda::mcmc(dist_events)
  hpd <- coda::HPDinterval(events_mcmc)

  out <- data.frame(mean = mean, HPD95 = hpd)
  rownames(out) <- NULL

  return(out)
}


#' @describeIn events_counter Get the effective rate of evolution, i.e. number
#'   of events per branch unit, along each tree branch. Mean and HP95 are outputted.
#' @export
effective_rate <- function(history, tree) {
  # history will be checked in count_events
  if (!inherits(tree, 'phylo')) stop('`tree` should be a phylogeny of class `phylo`.')

  nev <- count_events(history)
  tree_length <- sum(tree$edge.length)
  rate <- nev/tree_length

  return(rate)
}


#' @describeIn events_counter Get the average number of host gains and host losses
#' @export
count_gl <- function(history) {
  if (!is.data.frame(history)) {
    stop('`history` should be a data.frame, usually generated by `read_history()`')
  }
  if (!all(c('node_index', 'iteration', 'transition_type', 'start_state', 'end_state')
           %in% names(history))) {
    stop('`history` needs to have columns `node_index`, `iteration`, `transition_type`, `start_date` and `end_date`.')
  }

  root <- max(history$node_index)

  filtered_history <- history %>%
    dplyr::filter(.data$transition_type == "anagenetic", .data$node_index < root) %>%
    dplyr::select(.data$iteration, .data$start_state, .data$end_state)

  # str_split_fixed creates a matrix of size [n_chars, n_obs]
  str_size <- stringr::str_length(filtered_history$start_state[1])
  starts <- stringr::str_split_fixed(filtered_history$start_state, "", str_size)
  ends   <- stringr::str_split_fixed(filtered_history$end_state,   "", str_size)

  # Change to numeric:
  storage.mode(starts) <- storage.mode(ends) <- 'numeric'

  # Compare the sums
  start_sums <- rowSums(starts)
  end_sums   <- rowSums(ends)
  gains <- sum(end_sums > start_sums)
  loss  <- sum(end_sums < start_sums)

  iters <- dplyr::n_distinct(filtered_history$iteration)

  ngains <- gains / iters
  nloss  <- loss  / iters

  gl <- c(ngains, nloss)
  names(gl) <- c("gains","losses")

  gl
}


#' @describeIn events_counter Get the average effective rate of host gain and host loss
#' @export
rate_gl <- function(history, tree) {
  # history will be checked in count_gl
  if (!inherits(tree, 'phylo')) stop('`tree` should be a phylogeny of class `phylo`.')

  gl <- count_gl(history)
  tree_length <- sum(tree$edge.length)
  rg <- gl[1]/tree_length
  rl <- gl[2]/tree_length

  rgl <- c(rg,rl)
  names(rgl) <- c("gain_rate","loss_rate")

  rgl

}
maribraga/evolnets documentation built on Feb. 3, 2025, 6:46 p.m.