Estimating initial branch lengths

library(datelife)
library(phangorn)

Ideally, to perform a phylogenetic dating analysis, we require some estimate of initial branch lengths.

datelife does this by extracting DNA sequence data from BOLD.

The function make_bold_otol_tree() does all the work:

make_bold_otol_tree(input = "Canis")

To exemplify how the function works under the hood, we will use a DNA sequence alignment data set provided in the phangorn package:

data(Laurasiatherian)
Laurasiatherian
#> 47 sequences with 3179 character and 1605 different site patterns.
#> The states are a c g t
common_names <- names(Laurasiatherian)
common_names
#>  [1] "Platypus"   "Wallaroo"   "Possum"     "Bandicoot"  "Opposum"   
#>  [6] "Armadillo"  "Elephant"   "Aardvark"   "Tenrec"     "Hedghog"   
#> [11] "Gymnure"    "Mole"       "Shrew"      "Rbat"       "FlyingFox" 
#> [16] "RyFlyFox"   "FruitBat"   "LongTBat"   "Horse"      "Donkey"    
#> [21] "WhiteRhino" "IndianRhin" "Pig"        "Alpaca"     "Cow"       
#> [26] "Sheep"      "Hippo"      "FinWhale"   "BlueWhale"  "SpermWhale"
#> [31] "Rabbit"     "Pika"       "Squirrel"   "Dormouse"   "GuineaPig" 
#> [36] "Mouse"      "Vole"       "CaneRat"    "Baboon"     "Human"     
#> [41] "Loris"      "Cebus"      "Cat"        "Dog"        "HarbSeal"  
#> [46] "FurSeal"    "GraySeal"

Get scientific names from common names:

taxize_names <- taxize::comm2sci(common_names)
length(taxize_names)
#> [1] 47

is_empty <- lapply(taxize_names, length) == 0

taxize_names[is_empty] <- "NA"

taxon_names <- unlist(taxize_names)
names(taxon_names[is_empty])
#>  [1] "Wallaroo"   "Possum"     "Bandicoot"  "Opposum"    "Elephant"  
#>  [6] "Aardvark"   "Tenrec"     "Hedghog"    "Gymnure"    "Mole"      
#> [11] "Shrew"      "Rbat"       "FlyingFox"  "RyFlyFox"   "FruitBat"  
#> [16] "LongTBat"   "WhiteRhino" "IndianRhin" "Hippo"      "FinWhale"  
#> [21] "BlueWhale"  "SpermWhale" "Pika"       "Squirrel"   "Dormouse"  
#> [26] "GuineaPig"  "Vole"       "CaneRat"    "Loris"      "Cebus"     
#> [31] "HarbSeal"   "FurSeal"    "GraySeal"

Manually add scientific names that were not found with taxize:

# rphylotastic::taxa_common_to_scientific(common_names)
taxon_names["Wallaroo"] <- "Macropus robustus"
taxon_names["Possum"] <- "Trichosurus"
taxon_names["Bandicoot"] <- "Perameles"
taxon_names["Opposum"] <- "Didelphis marsupialis"
taxon_names["Elephant"] <- "Elephas maximus"
taxon_names["Aardvark"] <- "Orycteropus afer"
taxon_names["Tenrec"] <- "Hemicentetes"
taxon_names["Hedghog"] <- "Echinops"
taxon_names["Gymnure"] <- "Echinosorex gymnura"
taxon_names["Mole"] <- "Talpa"
taxon_names["Shrew"] <- "Sorex"
taxon_names["Rbat"] <- "Lasiurus borealis"
taxon_names["FlyingFox"] <- "Pteropus alecto"
taxon_names["RyFlyFox"] <- "Pteropus aruensis"
taxon_names["FruitBat"] <- "Desmodus rotundus"
taxon_names["LongTBat"] <- "Chalinolobus tuberculatus"
taxon_names["WhiteRhino"] <- "Ceratotherium simum"
taxon_names["IndianRhin"] <- "Rhinoceros unicornis"
taxon_names["Hippo"] <- "Hippopotamus amphibius"
taxon_names["FinWhale"] <- "Balaenoptera physalus"
taxon_names["BlueWhale"] <- "Balaenoptera musculus"
taxon_names["SpermWhale"] <- "Physeter macrocephalus"
taxon_names["Pika"] <- "Ochotona"
taxon_names["Squirrel"] <- "Sciurini"
taxon_names["Dormouse"] <- "Gliridae"
taxon_names["GuineaPig"] <- "Cavia porcellus"
taxon_names["Vole"] <- "Arvicolinae"
taxon_names["CaneRat"] <- "Thryonomys"
taxon_names["Loris"] <- "Lorisinae"
taxon_names["Cebus"] <- "Cebus"
taxon_names["HarbSeal"] <- "Phoca vitulina"
taxon_names["FurSeal"] <- "Otariidae"
taxon_names["GraySeal"] <- "Halichoerus grypus"

Get datelife query data:

query <- datelife::make_datelife_query2(input = taxon_names)
#> ---> Phylo-processing 'input'.
#> * 'input' is not a phylogeny.
#> ---> Runnning TNRS to match to reference taxonomy ott.
#>                                                                    
  |================================================================| 100%
data.frame(query$cleaned_names, query$tnrs_names)
#>          query.cleaned_names                 query.tnrs_names
#> 1   Ornithorhynchus anatinus         Ornithorhynchus anatinus
#> 2          Macropus robustus              Osphranter robustus
#> 3                Trichosurus                      Trichosurus
#> 4                  Perameles                        Perameles
#> 5      Didelphis marsupialis            Didelphis marsupialis
#> 6                Dasypodidae                      Dasypodidae
#> 7            Elephas maximus                  Elephas maximus
#> 8           Orycteropus afer                 Orycteropus afer
#> 9               Hemicentetes                     Hemicentetes
#> 10                  Echinops Echinops (genus in Opisthokonta)
#> 11       Echinosorex gymnura              Echinosorex gymnura
#> 12                     Talpa                            Talpa
#> 13                     Sorex                            Sorex
#> 14         Lasiurus borealis                Lasiurus borealis
#> 15           Pteropus alecto                  Pteropus alecto
#> 16         Pteropus aruensis                Pteropus aruensis
#> 17         Desmodus rotundus                Desmodus rotundus
#> 18 Chalinolobus tuberculatus        Chalinolobus tuberculatus
#> 19            Equus caballus                   Equus caballus
#> 20              Equus asinus                     Equus asinus
#> 21       Ceratotherium simum              Ceratotherium simum
#> 22      Rhinoceros unicornis             Rhinoceros unicornis
#> 23                Sus scrofa                       Sus scrofa
#> 24             Vicugna pacos                    Vicugna pacos
#> 25                Bos taurus                       Bos taurus
#> 26                Ovis aries                       Ovis aries
#> 27    Hippopotamus amphibius           Hippopotamus amphibius
#> 28     Balaenoptera physalus            Balaenoptera physalus
#> 29     Balaenoptera musculus            Balaenoptera musculus
#> 30    Physeter macrocephalus                 Physeter catodon
#> 31     Oryctolagus cuniculus            Oryctolagus cuniculus
#> 32                  Ochotona                         Ochotona
#> 33                  Sciurini                         Sciurini
#> 34                  Gliridae                         Gliridae
#> 35           Cavia porcellus                  Cavia porcellus
#> 36              Mus musculus                     Mus musculus
#> 37               Arvicolinae                      Arvicolinae
#> 38                Thryonomys                       Thryonomys
#> 39               Papio papio                      Papio papio
#> 40              Homo sapiens                     Homo sapiens
#> 41                 Lorisinae                        Corixinae
#> 42                     Cebus                            Cebus
#> 43               Felis catus                      Felis catus
#> 44    Canis lupus familiaris           Canis lupus familiaris
#> 45            Phoca vitulina                   Phoca vitulina
#> 46                 Otariidae                        Otariidae
#> 47        Halichoerus grypus               Halichoerus grypus

Get a topology:

topology <- rotl::tol_induced_subtree(ott_ids = query$ott_ids, label_format = "id") 
#> 
Progress [---------------------------------] 0/350 (  0) ?s
Progress [==============================] 350/350 (100)  0s

topology_names <- rotl::tol_induced_subtree(ott_ids = query$ott_ids, label_format = "name")$tip.label
#> 
Progress [---------------------------------] 0/350 (  0) ?s
Progress [==============================] 350/350 (100)  0s

topology <- ape::collapse.singles(topology)
index <- match(topology$tip.label, paste0("ott", query$ott_ids))
data.frame(topology$tip.label, query$ott_ids[index])
#>    topology.tip.label query.ott_ids.index.
#> 1           ott542509               542509
#> 2           ott664070               664070
#> 3           ott744000               744000
#> 4           ott692681               692681
#> 5           ott649553               649553
#> 6           ott425409               425409
#> 7           ott864596               864596
#> 8           ott644237               644237
#> 9           ott513904               513904
#> 10          ott770315               770315
#> 11          ott217260               217260
#> 12           ott70819                70819
#> 13          ott490099               490099
#> 14          ott276851               276851
#> 15          ott226193               226193
#> 16          ott226190               226190
#> 17          ott510762               510762
#> 18          ott730013               730013
#> 19          ott906301               906301
#> 20         ott1068202              1068202
#> 21         ott1068218              1068218
#> 22         ott1034198              1034198
#> 23         ott1087496              1087496
#> 24          ott970126               970126
#> 25           ott61860                61860
#> 26          ott238431               238431
#> 27          ott813028               813028
#> 28         ott3613485              3613485
#> 29          ott698422               698422
#> 30         ott1040694              1040694
#> 31          ott749638               749638
#> 32          ott247333               247333
#> 33          ott563166               563166
#> 34          ott226394               226394
#> 35          ott222367               222367
#> 36         ott1027567              1027567
#> 37          ott541928               541928
#> 38          ott222356               222356
#> 39          ott542053               542053
#> 40          ott561087               561087
#> 41          ott273244               273244
#> 42          ott372367               372367
#> 43          ott323243               323243
#> 44          ott683256               683256
#> 45          ott919176               919176
#> 46          ott962377               962377
#> 47          ott571895               571895
a <- query$cleaned_names %in% query$cleaned_names[index]
query$cleaned_names[!a]
#> character(0)

data.frame(query$cleaned_names[index], taxon_names[index])
#>            query.cleaned_names.index.        taxon_names.index.
#> Mouse                    Mus musculus              Mus musculus
#> Vole                      Arvicolinae               Arvicolinae
#> GuineaPig             Cavia porcellus           Cavia porcellus
#> CaneRat                    Thryonomys                Thryonomys
#> Squirrel                     Sciurini                  Sciurini
#> Dormouse                     Gliridae                  Gliridae
#> Rabbit          Oryctolagus cuniculus     Oryctolagus cuniculus
#> Pika                         Ochotona                  Ochotona
#> Baboon                    Papio papio               Papio papio
#> Human                    Homo sapiens              Homo sapiens
#> Cebus                           Cebus                     Cebus
#> Sheep                      Ovis aries                Ovis aries
#> Cow                        Bos taurus                Bos taurus
#> SpermWhale     Physeter macrocephalus    Physeter macrocephalus
#> FinWhale        Balaenoptera physalus     Balaenoptera physalus
#> BlueWhale       Balaenoptera musculus     Balaenoptera musculus
#> Hippo          Hippopotamus amphibius    Hippopotamus amphibius
#> Pig                        Sus scrofa                Sus scrofa
#> Alpaca                  Vicugna pacos             Vicugna pacos
#> Donkey                   Equus asinus              Equus asinus
#> Horse                  Equus caballus            Equus caballus
#> WhiteRhino        Ceratotherium simum       Ceratotherium simum
#> IndianRhin       Rhinoceros unicornis      Rhinoceros unicornis
#> LongTBat    Chalinolobus tuberculatus Chalinolobus tuberculatus
#> Rbat                Lasiurus borealis         Lasiurus borealis
#> FruitBat            Desmodus rotundus         Desmodus rotundus
#> FlyingFox             Pteropus alecto           Pteropus alecto
#> RyFlyFox            Pteropus aruensis         Pteropus aruensis
#> HarbSeal               Phoca vitulina            Phoca vitulina
#> GraySeal           Halichoerus grypus        Halichoerus grypus
#> FurSeal                     Otariidae                 Otariidae
#> Dog            Canis lupus familiaris    Canis lupus familiaris
#> Cat                       Felis catus               Felis catus
#> Shrew                           Sorex                     Sorex
#> Mole                            Talpa                     Talpa
#> Gymnure           Echinosorex gymnura       Echinosorex gymnura
#> Elephant              Elephas maximus           Elephas maximus
#> Hedghog                      Echinops                  Echinops
#> Tenrec                   Hemicentetes              Hemicentetes
#> Aardvark             Orycteropus afer          Orycteropus afer
#> Armadillo                 Dasypodidae               Dasypodidae
#> Wallaroo            Macropus robustus         Macropus robustus
#> Possum                    Trichosurus               Trichosurus
#> Bandicoot                   Perameles                 Perameles
#> Opposum         Didelphis marsupialis     Didelphis marsupialis
#> Platypus     Ornithorhynchus anatinus  Ornithorhynchus anatinus
#> Loris                       Lorisinae                 Lorisinae
topology$tip.label <- names(taxon_names[index])

Get an NJ tree:

# get NJ tree
dm <- dist.hamming(Laurasiatherian)
tree_nj <- NJ(dm)
# parsimony(tree_nj, Laurasiatherian)
plot(tree_nj)

NJ tree


Get branch lengths with ACCTRAN algorithm (deltran is not available in R):

# names(Laurasiatherian)
tree_acctran <- phangorn::acctran(tree = tree_nj, 
                                  data = Laurasiatherian)
plot(tree_acctran, cex = 0.8)

ACCTRAN tree

Optimize branch lengths to get maximum likelihood:

pml <- phangorn::pml(tree_acctran, data = Laurasiatherian)
tree_pml <- phangorn::optim.pml(pml, data = Laurasiatherian)
#> optimize edge weights:  -207130.4 --> -92073.04 
#> optimize edge weights:  -92073.04 --> -60586.86 
#> optimize edge weights:  -60586.86 --> -54303.67 
#> optimize edge weights:  -54303.67 --> -54303.67
plot(tree_pml, cex = 0.8)

Optimized tree

data.frame(nj = ape::branching.times(tree_nj), 
           acctran = ape::branching.times(tree_acctran), 
           optim_pml = ape::branching.times(tree_pml$tree))
#>             nj acctran    optim_pml
#> 48 0.075159106   321.0  0.107064350
#> 49 0.074663666   284.0  0.102593327
#> 50 0.073681576   244.0  0.097146781
#> 51 0.072808761   230.0  0.092561163
#> 52 0.071237434   160.0  0.080246326
#> 53 0.070852888   184.0  0.085875992
#> 54 0.072402082   222.0  0.094843463
#> 55 0.070159654   175.0  0.086331259
#> 56 0.070158809   149.0  0.079683763
#> 57 0.068402201    71.5  0.061969166
#> 58 0.066330555   143.0  0.074316747
#> 59 0.066063222   116.5  0.072728795
#> 60 0.063112914    79.0  0.062921450
#> 61 0.062316865    89.5  0.058590132
#> 62 0.060984052    27.5  0.053168778
#> 63 0.059516822    -1.0  0.049363834
#> 64 0.060010969    70.0  0.060093467
#> 65 0.058745717    34.5  0.052868434
#> 66 0.068287715   121.0  0.079088325
#> 67 0.067707842   131.5  0.070542031
#> 68 0.057316264   -25.0  0.038478291
#> 69 0.055307172   -85.0  0.029719556
#> 70 0.055854838   -11.0  0.041133191
#> 71 0.053485916  -141.0  0.021404320
#> 72 0.061528553    91.0  0.054118927
#> 73 0.054099932    18.5  0.045794155
#> 74 0.052307954   -68.5  0.035347826
#> 75 0.052921072   -39.0  0.034727448
#> 76 0.062130184   205.0  0.072165304
#> 77 0.061680356    98.5  0.056499524
#> 78 0.053837166    24.0  0.045532404
#> 79 0.038484053  -124.0  0.015848828
#> 80 0.039374093  -119.5  0.013503246
#> 81 0.036273689  -103.0  0.008657362
#> 82 0.034213936  -249.5 -0.009260506
#> 83 0.047525047   -53.0  0.034485225
#> 84 0.042017855   -74.5  0.020045102
#> 85 0.024609363  -154.0  0.005832687
#> 86 0.025461698   -60.0  0.011005829
#> 87 0.037238301    70.0  0.030054865
#> 88 0.016952218  -311.0 -0.027997263
#> 89 0.016785633  -195.0 -0.010363564
#> 90 0.011732705  -253.5 -0.017033589
#> 91 0.009098229  -317.5 -0.028687772
#> 92 0.001053604  -385.5 -0.048094863

Using OpenTree topology:

# ape::comparePhylo(x = tree_nj, y = tree_acctran)

otol_acctran <- phangorn::acctran(tree = ape::unroot(topology), 
                                  data = Laurasiatherian)

# get likelihood of acctran branch lengths and alignment
otol_pml <- phangorn::pml(otol_acctran, data = Laurasiatherian)
# optimize branch lengths
otol_optim <-  phangorn::optim.pml(otol_pml)
#> optimize edge weights:  -207130.4 --> -106097.5 
#> optimize edge weights:  -106097.5 --> -106096.6 
#> optimize edge weights:  -106096.6 --> -106096.6

data.frame(otol_acctran = ape::branching.times(otol_acctran), 
           otol_optim_pml = ape::branching.times(otol_optim$tree))
#>                        otol_acctran otol_optim_pml
#> ott244265                     136.0   8.100226e-02
#> ott229558                     -48.0   6.705178e-02
#> ott683263                    -163.0   6.705177e-02
#> ott5334778                   -205.0   6.345339e-02
#> ott392222                    -264.5   4.802316e-02
#> mrcaott42ott29157            -317.5   4.802315e-02
#> mrcaott42ott10477            -363.0   4.802313e-02
#> mrcaott42ott38834            -426.5   3.757718e-02
#> mrcaott102ott739             -565.0  -1.873340e-02
#> mrcaott38834ott45520         -510.0  -5.291179e-02
#> ott44559                     -412.5   3.042209e-02
#> ott644242                    -407.0  -8.945198e+01
#> ott386195                    -396.5  -1.319520e+02
#> mrcaott786ott83926           -491.0  -2.264520e+02
#> ott392223                    -250.5   4.934326e-02
#> mrcaott1548ott4697           -290.0   3.508578e-02
#> mrcaott1548ott6790           -325.0   2.671321e-02
#> mrcaott1548ott3021           -353.0   1.838742e-02
#> ott622916                    -411.0   5.608031e-03
#> mrcaott1548ott21987          -457.0  -1.454455e-03
#> mrcaott1548ott5256           -511.0  -1.051769e-02
#> ott768677                    -589.0  -4.083177e-02
#> ott7655791                   -556.0  -2.006278e-02
#> mrcaott5256ott44568          -658.0  -5.017645e-02
#> mrcaott44568ott226190        -721.5  -6.958462e-02
#> ott541948                    -397.0   6.043458e-03
#> ott541951                    -476.5  -2.445224e-02
#> ott1034218                   -458.5  -1.304954e-02
#> ott574724                    -386.0   1.230833e-02
#> mrcaott6790ott6794           -462.5  -6.137077e-03
#> mrcaott10323ott61857         -508.0  -8.318380e-02
#> ott813030                    -488.5  -2.606782e-02
#> mrcaott4697ott6940           -349.0   2.214181e-02
#> ott827263                    -408.0   1.153504e-02
#> mrcaott22064ott95364         -486.0  -1.026178e-02
#> mrcaott22064ott6145546       -561.5  -3.342281e-02
#> mrcaott3285ott17250          -321.0  -5.802449e-02
#> mrcaott3285ott60434          -359.0  -1.005802e+01
#> mrcaott72667ott180375        -239.0   4.144542e-02
#> ott746703                    -318.0  -2.134463e-03
#> mrcaott82081ott292026        -418.5  -1.005021e+02
#> ott922729                    -508.5  -1.005021e+02
#> mrcaott6735ott29033          -259.0  -2.109329e+02
#> mrcaott6735ott70811          -321.0  -2.109329e+02
#> mrcaott6735ott44497          -385.0  -2.109329e+02
plot(otol_pml, main="ACCTRAN branches", cex = 0.8)   # top = default 

ACCTRAN tree

plot(otol_optim, main="optimized branches", cex = 0.8)   # bottom = optimized branch lengths

Optimized tree



Try the datelife package in your browser

Any scripts or data that you put into this service are public.

datelife documentation built on July 10, 2023, 2:02 a.m.