```{setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )

This vignette demonstrates

 * When the PBD package samples as expected
 * When the PBD package samples in unexpected ways
    * `youngest` gives longer branches than `oldest`
    * `random` gives shortest branches than `youngest`
    * `random` gives longer branches than `oldest`
 * A solution to that problem

```{load_library}
library(raket)

Setup

Define a plotting function:

# Plot the results of PBD::pbd_sim
# '@param out the results of PBD::pbd_sim
plot <- function(out) {
  # Check input
  testit::assert("igtree.extant" %in% names(out))
  testit::assert("stree_youngest" %in% names(out))
  testit::assert("stree_oldest" %in% names(out))
  testit::assert("stree_random" %in% names(out))
  testit::assert(class(out$igtree.extant) == c("simmap", "phylo"))
  testit::assert(class(out$stree_youngest) == "phylo")
  testit::assert(class(out$stree_oldest) == "phylo")
  testit::assert(class(out$stree_random) == "phylo")

  graphics::par(mfrow = c(1, 4), mar = c(5, 4, 6, 2) + 0.3)
  cols <- stats::setNames(c("gray", "black"), c("i", "g"))
  phytools::plotSimmap(out$igtree.extant, colors = cols, fsize = 2)

  sum_youngest <- sum(out$stree_youngest$edge.length)
  sum_oldest <- sum(out$stree_oldest$edge.length)
  sum_random <- sum(out$stree_random$edge.length)

  ape::plot.phylo(out$stree_youngest, edge.width = 2, font = 1,
    label.offset = 0.1, cex = 2, cex.main = 0.75,
    main = paste(
      "\n\n", "youngest", format(round(sum_youngest, 2), nsmall = 2), "\n\n"))
  ape::add.scale.bar()
  ape::plot.phylo(out$stree_random, edge.width = 2, font = 1,
    label.offset = 0.1, cex = 2, cex.main = 0.75,
    main = paste(
      "\n\n", "random", format(round(sum_random, 2), nsmall = 2), "\n\n"))
  ape::add.scale.bar()
  ape::plot.phylo(out$stree_oldest, edge.width = 2, font = 1,
    label.offset = 0.1, cex = 2, cex.main = 0.75,
    main = paste(
      "\n\n", "oldest", format(round(sum_oldest, 2), nsmall = 2), "\n\n"))
  ape::add.scale.bar()
  graphics::par(mfrow = c(1, 1))
}

When the PBD package samples as expected

One expects that:

Here are some examples in which this is the case.

First, the simplest phylogeny possible:

```{find_expected_simplest, fig.width=7, fig.height=5} out <- becosys::pbd_find_scenario( scr = 0.1, sirg = 2, siri = 2, scenario = "expected", rng_seed = 42, max_n_subspecies = 3 ) plot(out)

For a phylogeny that is one bit more complex, sampling also works as expected:

```{find_expected_simpler, fig.width=7, fig.height=5}
out <- becosys::pbd_find_scenario(
  scr = 0.1, sirg = 2, siri = 2, scenario = "expected",
  rng_seed = 44,
  min_n_subspecies = 4,
  max_n_subspecies = 4
)
plot(out)

For an even bigger phylogeny, sampling also works as expected:

```{find_expected_simple, fig.width=7, fig.height=5} out <- becosys::pbd_find_scenario( scr = 0.5, sirg = 1, siri = 2, scenario = "expected", rng_seed = 51, min_n_subspecies = 5, max_n_subspecies = 10, min_n_species = 3 ) plot(out)

## When the PBD package samples in unexpected ways

### 1. `youngest` gives longer branches than `oldest`

```{fig.width=7, fig.height=5}
out <- becosys::pbd_find_scenario(
  scr = 0.5, sirg = 1, siri = 2, scenario = "ylto",
  rng_seed = 54,
  min_n_subspecies = 4,
  max_n_subspecies = 4,
  min_n_species = 3
)
plot(out)

2. random gives shortest branches than youngest

```{fig.width=7, fig.height=5} out <- becosys::pbd_find_scenario( scr = 0.5, sirg = 1, siri = 2, scenario = "rsty", rng_seed = 55, min_n_subspecies = 4, max_n_subspecies = 7, min_n_species = 3 ) plot(out)

### 3. `random` gives longer branches than `oldest`

```{fig.width=7, fig.height=5}
out <- becosys::pbd_find_scenario(
  scr = 0.5, sirg = 1, siri = 2, scenario = "rlto",
  rng_seed = 57,
  min_n_subspecies = 4,
  max_n_subspecies = 7,
  min_n_species = 3
)
plot(out)

A solution to this problem

To deal with this problem of inconsistent sampling, two new sample methods have been added: shortest and longest. These sample methods always give the shortest and longest branches respectively.

Setup

The plotting function has to be altered to be able to show the new sample methods:

# Plot the results of PBD::pbd_sim
# '@param out the results of PBD::pbd_sim
plot <- function(out) {
  # Check input
  testit::assert("igtree.extant" %in% names(out))
  testit::assert("stree_youngest" %in% names(out))
  testit::assert("stree_oldest" %in% names(out))
  testit::assert("stree_random" %in% names(out))
  testit::assert("stree_shortest" %in% names(out))
  testit::assert("stree_longest" %in% names(out))
  testit::assert(class(out$igtree.extant) == c("simmap", "phylo"))
  testit::assert(class(out$stree_youngest) == "phylo")
  testit::assert(class(out$stree_oldest) == "phylo")
  testit::assert(class(out$stree_random) == "phylo")
  testit::assert(class(out$stree_shortest) == "phylo")
  testit::assert(class(out$stree_longest) == "phylo")

  graphics::par(mfrow = c(2, 3), mar = c(5, 4, 6, 2) + 0.3)
  cols <- stats::setNames(c("gray", "black"), c("i", "g"))
  phytools::plotSimmap(out$igtree.extant, colors = cols, fsize = 2)

  sum_youngest <- sum(out$stree_youngest$edge.length)
  sum_oldest <- sum(out$stree_oldest$edge.length)
  sum_random <- sum(out$stree_random$edge.length)
  sum_shortest <- sum(out$stree_shortest$edge.length)
  sum_longest <- sum(out$stree_longest$edge.length)

  ape::plot.phylo(out$stree_youngest, edge.width = 2, font = 1,
    label.offset = 0.1, cex = 2, cex.main = 0.75,
    main = paste(
      "\n\n", "youngest", format(round(sum_youngest, 2), nsmall = 2), "\n\n"))
  ape::add.scale.bar()
  ape::plot.phylo(out$stree_random, edge.width = 2, font = 1,
    label.offset = 0.1, cex = 2, cex.main = 0.75,
    main = paste(
      "\n\n", "random", format(round(sum_random, 2), nsmall = 2), "\n\n"))
  ape::add.scale.bar()
  ape::plot.phylo(out$stree_oldest, edge.width = 2, font = 1,
    label.offset = 0.1, cex = 2, cex.main = 0.75,
    main = paste(
      "\n\n", "oldest", format(round(sum_oldest, 2), nsmall = 2), "\n\n"))
  ape::add.scale.bar()
  ape::plot.phylo(out$stree_shortest, edge.width = 2, font = 1,
    label.offset = 0.1, cex = 2, cex.main = 0.75,
    main = paste(
      "\n\n", "shortest", format(round(sum_shortest, 2), nsmall = 2), "\n\n"))
  ape::add.scale.bar()
  ape::plot.phylo(out$stree_longest, edge.width = 2, font = 1,
    label.offset = 0.1, cex = 2, cex.main = 0.75,
    main = paste(
      "\n\n", "longest", format(round(sum_longest, 2), nsmall = 2), "\n\n"))
  ape::add.scale.bar()
  graphics::par(mfrow = c(1, 1))
}

Examples

First, the phylogenies that were sampled fine when using 'youngest' and 'oldest'.

The simplest one:

```{find_expected_simplest, fig.width=7, fig.height=5} out <- becosys::pbd_find_scenario( scr = 0.1, sirg = 2, siri = 2, scenario = "expsl", rng_seed = 42, max_n_subspecies = 3 ) plot(out)

The phylogeny that is one bit more complex:

```{find_expected_simpler, fig.width=7, fig.height=5}
out <- becosys::pbd_find_scenario(
  scr = 0.1, sirg = 2, siri = 2, scenario = "expsl",
  rng_seed = 44,
  min_n_subspecies = 4,
  max_n_subspecies = 4
)
plot(out)

And the even bigger phylogeny:

```{find_expected_simple, fig.width=7, fig.height=5} out <- becosys::pbd_find_scenario( scr = 0.5, sirg = 1, siri = 2, scenario = "expsl", rng_seed = 51, min_n_subspecies = 5, max_n_subspecies = 10, min_n_species = 3 ) plot(out)

Now, the phylogenies that didn't get sampled as expected:

`youngest` gives longer branches than `oldest`

```{fig.width=7, fig.height=5}
out <- becosys::pbd_find_scenario(
  scr = 0.5, sirg = 1, siri = 2, scenario = "yltosl",
  rng_seed = 54,
  min_n_subspecies = 4,
  max_n_subspecies = 4,
  min_n_species = 3
)
plot(out)

random gives shortest branches than youngest

```{fig.width=7, fig.height=5} out <- becosys::pbd_find_scenario( scr = 0.5, sirg = 1, siri = 2, scenario = "rstysl", rng_seed = 55, min_n_subspecies = 4, max_n_subspecies = 7, min_n_species = 3 ) plot(out)

`random` gives longer branches than `oldest`

```{fig.width=7, fig.height=5}
out <- becosys::pbd_find_scenario(
  scr = 0.5, sirg = 1, siri = 2, scenario = "rltosl",
  rng_seed = 57,
  min_n_subspecies = 4,
  max_n_subspecies = 7,
  min_n_species = 3
)
plot(out)


richelbilderbeek/becosys documentation built on Oct. 19, 2020, 9:20 a.m.