Nothing
#' Draw allele frequencies for subpopulations related by a tree
#'
#' The allele frequency matrix `P` for `m_loci` loci (rows) and `k_subpops` subpopulations (columns) are drawn from the Balding-Nichols distribution.
#' If the allele frequency in the parent node is `p` and the FST parameter of the child node from the parent node is `F`, then the allele frequency in the child node is drawn from
#' `rbeta( 1, nu * p, nu * ( 1 - p ) )`,
#' where `nu <- 1 / F - 1`.
#' This function iterates drawing allele frequencies through the tree structure, therefore allowing covariance between subpopulations that share branches.
#'
#' @param p_anc The scalar or length-`m_loci` vector of ancestral allele frequencies per locus.
#' @param tree_subpops The coancestry tree relating the `k_subpops` subpopulations.
#' Must be a `phylo` object from the `ape` package (see [ape::read.tree()]).
#' The edge lengths of this tree must be the FST parameters relating parent and child subpopulations.
#' @param m_loci If `p_anc` is scalar, optionally provide the desired number of loci (lest only one locus be simulated).
#' Stops if both `length(p_anc) > 1` and `m_loci` is not `NA` and they disagree.
#' @param nodes If `FALSE` (default), returns allele frequencies of "tip" subpopulations only; otherwise returns all allele frequencies, including internal nodes.
#'
#' @return The `m_loci`-by-`k_subpops` matrix of independent subpopulation allele frequencies.
#' If `nodes = FALSE`, columns include only tip subpopulations.
#' If `nodes = TRUE`, internal node subpopulations are also included (in this case the input `p_anc` is returned in the column corresponding to the root node).
#' In all cases subpopulations are ordered as indexes in the tree, which normally implies the tip nodes are listed first, followed by the internal nodes (as in `tree_subpops$edge` matrix, see [ape::read.tree()] for details).
#' The `tree_subpops` tip and node names are copied to the column names of this output matrix (individual names may be blank if missing in tree (such as for internal nodes); column names are `NULL` if all `tree_subpops` tip labels were blank, regardless of internal node labels).
#' If `p_anc` is length-`m_loci` with names, these names are copied to the row names of this output matrix.
#'
#' @examples
#' # for simulating a tree with `rtree`
#' library(ape)
#'
#' # a typical, non-trivial example
#' # number of tip subpopulations
#' k_subpops <- 3
#' # number of loci
#' m_loci <- 10
#' # random vector of ancestral allele frequencies
#' p_anc <- draw_p_anc(m_loci)
#' # simulate tree
#' tree_subpops <- rtree( k_subpops )
#' # matrix of intermediate subpop allele freqs
#' p_subpops <- draw_p_subpops_tree(p_anc, tree_subpops)
#'
#' @seealso
#' [draw_p_subpops()] for version for independent subpopulations.
#'
#' For "phylo" tree class, see [ape::read.tree()]
#'
#' @export
draw_p_subpops_tree <- function(
p_anc,
tree_subpops,
m_loci = NA,
nodes = FALSE
) {
# basic param checking
if ( missing( p_anc ) )
stop('ancestral allele frequencies `p_anc` are required!')
if ( missing( tree_subpops ) )
stop('`tree_subpops` (FST) scalar or vector are required!')
# run through special tree validator
validate_coanc_tree( tree_subpops, name = 'tree_subpops' )
if ( !is.null( tree_subpops$root.edge ) )
warning( 'Input `tree_subpops` has a root edge (`tree_subpops$root.edge = ', tree_subpops$root.edge, '`) that will be ignored by function `draw_p_subpops_tree`!' )
# look at data ranges
# all of these are probabilities so problems happen when they're out of range
if ( any( p_anc < 0 ) )
stop( '`p_anc` cannot be negative!' )
if ( any( p_anc > 1 ) )
stop( '`p_anc` cannot exceed 1!' )
# number of loci to simulate
# allow p_anc to be a scalar, m_loci can be passed separately
# actual length
m_loci_p <- length(p_anc)
# make sure both things were not set and contradict each other
if (
m_loci_p > 1 && # length(p_anc) > 1
!is.na(m_loci) && # and m_loci was also set
m_loci != m_loci_p # and they disagree
)
stop('length of `p_anc` (', m_loci_p, ') disagrees with passed parameter `m_loci` (', m_loci, ')')
# if this is missing, always set to actual value (even if it is 1)
if ( is.na( m_loci ) )
m_loci <- m_loci_p
# number of subpopulations to simulate
k_subpops <- ape::Ntip( tree_subpops )
# and total number of nodes (initially we keep them all in the output matrix
k_subpops_all <- max( tree_subpops$edge )
# matrix of intermediate allele frequencies we want
p_subpops <- matrix( nrow = m_loci, ncol = k_subpops_all )
# it'd be nice to keep names if they're available
# they're separate, unfortunately
# need to join them before assigning
# 1) tip labels
# unfortunately this is always defined, even when there are no labels
names_subpops <- tree_subpops$tip.label
if ( any( names_subpops != '' ) ) {
# we have non-trivial labels!
# NOTE: we won't incorporate node labels if there aren't any tip labels (that'd be weird)
# 2) node labels, these are NULL if missing
names_node <- tree_subpops$node.label
# fill with empty strings, since the final names must have length `k_subpops_all`
if ( is.null( names_node ) )
names_node <- rep.int( '', tree_subpops$Nnode )
# concatenate what we have
names_subpops <- c( names_subpops, names_node )
# check final length
if ( length( names_subpops ) != k_subpops_all )
stop( 'Names of tips and nodes have length ', length( names_subpops ), ', expected ', k_subpops_all, '! Is the `tree_subpops` object malformed?' )
# add names to matrix
colnames( p_subpops ) <- names_subpops
}
# now transfer `p_anc` names if possible
# p_anc can be scalar, so this only makes sense if the length matches (and names are defined)
if ( !is.null( names( p_anc ) ) && length( p_anc ) == m_loci )
rownames( p_subpops ) <- names( p_anc )
# algorithm is sensitive to edge ordering
# assumption is that we move from the root up, which is the reverse of postorder
order_edges <- rev( ape::postorder( tree_subpops ) )
# initialize the root AFs
# determine root node
# it is very first parent node (in reverse postorder)
j_root <- tree_subpops$edge[ order_edges[ 1 ], 1 ]
# make sure we got it right, should be 1 + the number of leaf nodes
# the code doesn't really depend on this assumption exactly, although it'd be very bad if j_root <= k_subpops
if ( j_root != k_subpops + 1 )
stop( 'The root node index in `tree_subpops$edge` (', j_root, ') does not match `k_subpops + 1` (', k_subpops + 1, ') where `k_subpops` is the number of tips! Is the `tree_subpops` object malformed?' )
# now copy input AFs into that "root" column
# this works if p_anc is a scalar as well as a length-m vector
p_subpops[ , j_root ] <- p_anc
# for extra safety, keep track of nodes that have been initialized with AFs
nodes_done <- j_root # just this one for now
# now we navigate the edges, which in the current structure move away from the root, so it's the perfect order for our operations
n_edges <- nrow( tree_subpops$edge )
# make sure "edge lengths" vector has right length
if ( length( tree_subpops$edge.length ) != n_edges )
stop( 'Length of `tree_subpops$edge.length` (', length( tree_subpops$edge.length ), ') does not match number of rows of `tree_subpops$edge` (', n_edges, ')! Is `tree_subpops` object malformed?' )
# start navigating (in reverse postorder!)
for ( e in order_edges ) {
# get parent and child nodes for this edge
j_parent <- tree_subpops$edge[ e, 1 ]
j_child <- tree_subpops$edge[ e, 2 ]
# edge length is FST we need
fst <- tree_subpops$edge.length[ e ]
# make sure parent has been processed already
if ( !( j_parent %in% nodes_done ) )
stop( 'Parent node index ', j_parent, ' has not yet been processed (nodes processed: ', toString( nodes_done ), ')! Is `tree_subpops` object malformed?' )
# make sure child hasn't been processed already
if ( j_child %in% nodes_done )
stop( 'Child node index ', j_child, ' has already been processed (nodes processed: ', toString( nodes_done ), ')! Is `tree_subpops` object malformed?' )
# get allele frequencies from parent column
p_parent <- p_subpops[ , j_parent ]
# transform FST into this other factor
nu <- 1 / fst - 1
# handle infinite "nu" special case
if ( is.infinite( nu ) ) {
# there is no drift from p_anc in this special case (inbr_subpops == 0)
# (coded separately because rbeta incorrectly returns 0.5 instead)
p_subpops[ , j_child ] <- p_parent
} else {
# draw all SNPs for this population, store immediately
p_subpops[ , j_child ] <- stats::rbeta( m_loci, nu * p_parent, nu * ( 1 - p_parent ) )
}
# add child node to processed nodes now
nodes_done <- c( nodes_done, j_child )
}
# check that all nodes were processed
nodes_exp <- 1 : k_subpops_all
nodes_mis <- setdiff( nodes_done, nodes_exp )
if ( length( nodes_mis ) > 0 )
stop( 'Some nodes were not processed: ', toString( nodes_mis ), '! Is `tree_subpops` object malformed?' )
# subset to return "tips" (leaf nodes) only if required
if ( !nodes )
p_subpops <- p_subpops[ , 1 : k_subpops, drop = FALSE ]
# done!
return( p_subpops )
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.