library(datelife) library(phangorn)
Ideally, to perform a phylogenetic dating analysis, we require some estimate of initial branch lengths.
datelife
does this by extracting DNA sequence data from BOLD.
The function make_bold_otol_tree()
does all the work:
make_bold_otol_tree(input = "Canis")
To exemplify how the function works under the hood, we will use a DNA sequence alignment data set provided in the phangorn
package:
data(Laurasiatherian) Laurasiatherian #> 47 sequences with 3179 character and 1605 different site patterns. #> The states are a c g t common_names <- names(Laurasiatherian) common_names #> [1] "Platypus" "Wallaroo" "Possum" "Bandicoot" "Opposum" #> [6] "Armadillo" "Elephant" "Aardvark" "Tenrec" "Hedghog" #> [11] "Gymnure" "Mole" "Shrew" "Rbat" "FlyingFox" #> [16] "RyFlyFox" "FruitBat" "LongTBat" "Horse" "Donkey" #> [21] "WhiteRhino" "IndianRhin" "Pig" "Alpaca" "Cow" #> [26] "Sheep" "Hippo" "FinWhale" "BlueWhale" "SpermWhale" #> [31] "Rabbit" "Pika" "Squirrel" "Dormouse" "GuineaPig" #> [36] "Mouse" "Vole" "CaneRat" "Baboon" "Human" #> [41] "Loris" "Cebus" "Cat" "Dog" "HarbSeal" #> [46] "FurSeal" "GraySeal"
Get scientific names from common names:
taxize_names <- taxize::comm2sci(common_names)
length(taxize_names) #> [1] 47 is_empty <- lapply(taxize_names, length) == 0 taxize_names[is_empty] <- "NA" taxon_names <- unlist(taxize_names) names(taxon_names[is_empty]) #> [1] "Wallaroo" "Possum" "Bandicoot" "Opposum" "Elephant" #> [6] "Aardvark" "Tenrec" "Hedghog" "Gymnure" "Mole" #> [11] "Shrew" "Rbat" "FlyingFox" "RyFlyFox" "FruitBat" #> [16] "LongTBat" "WhiteRhino" "IndianRhin" "Hippo" "FinWhale" #> [21] "BlueWhale" "SpermWhale" "Pika" "Squirrel" "Dormouse" #> [26] "GuineaPig" "Vole" "CaneRat" "Loris" "Cebus" #> [31] "HarbSeal" "FurSeal" "GraySeal"
Manually add scientific names that were not found with taxize
:
# rphylotastic::taxa_common_to_scientific(common_names) taxon_names["Wallaroo"] <- "Macropus robustus" taxon_names["Possum"] <- "Trichosurus" taxon_names["Bandicoot"] <- "Perameles" taxon_names["Opposum"] <- "Didelphis marsupialis" taxon_names["Elephant"] <- "Elephas maximus" taxon_names["Aardvark"] <- "Orycteropus afer" taxon_names["Tenrec"] <- "Hemicentetes" taxon_names["Hedghog"] <- "Echinops" taxon_names["Gymnure"] <- "Echinosorex gymnura" taxon_names["Mole"] <- "Talpa" taxon_names["Shrew"] <- "Sorex" taxon_names["Rbat"] <- "Lasiurus borealis" taxon_names["FlyingFox"] <- "Pteropus alecto" taxon_names["RyFlyFox"] <- "Pteropus aruensis" taxon_names["FruitBat"] <- "Desmodus rotundus" taxon_names["LongTBat"] <- "Chalinolobus tuberculatus" taxon_names["WhiteRhino"] <- "Ceratotherium simum" taxon_names["IndianRhin"] <- "Rhinoceros unicornis" taxon_names["Hippo"] <- "Hippopotamus amphibius" taxon_names["FinWhale"] <- "Balaenoptera physalus" taxon_names["BlueWhale"] <- "Balaenoptera musculus" taxon_names["SpermWhale"] <- "Physeter macrocephalus" taxon_names["Pika"] <- "Ochotona" taxon_names["Squirrel"] <- "Sciurini" taxon_names["Dormouse"] <- "Gliridae" taxon_names["GuineaPig"] <- "Cavia porcellus" taxon_names["Vole"] <- "Arvicolinae" taxon_names["CaneRat"] <- "Thryonomys" taxon_names["Loris"] <- "Lorisinae" taxon_names["Cebus"] <- "Cebus" taxon_names["HarbSeal"] <- "Phoca vitulina" taxon_names["FurSeal"] <- "Otariidae" taxon_names["GraySeal"] <- "Halichoerus grypus"
Get datelife query data:
query <- datelife::make_datelife_query2(input = taxon_names) #> ---> Phylo-processing 'input'. #> * 'input' is not a phylogeny. #> ---> Runnning TNRS to match to reference taxonomy ott. #> |================================================================| 100%
data.frame(query$cleaned_names, query$tnrs_names) #> query.cleaned_names query.tnrs_names #> 1 Ornithorhynchus anatinus Ornithorhynchus anatinus #> 2 Macropus robustus Osphranter robustus #> 3 Trichosurus Trichosurus #> 4 Perameles Perameles #> 5 Didelphis marsupialis Didelphis marsupialis #> 6 Dasypodidae Dasypodidae #> 7 Elephas maximus Elephas maximus #> 8 Orycteropus afer Orycteropus afer #> 9 Hemicentetes Hemicentetes #> 10 Echinops Echinops (genus in Opisthokonta) #> 11 Echinosorex gymnura Echinosorex gymnura #> 12 Talpa Talpa #> 13 Sorex Sorex #> 14 Lasiurus borealis Lasiurus borealis #> 15 Pteropus alecto Pteropus alecto #> 16 Pteropus aruensis Pteropus aruensis #> 17 Desmodus rotundus Desmodus rotundus #> 18 Chalinolobus tuberculatus Chalinolobus tuberculatus #> 19 Equus caballus Equus caballus #> 20 Equus asinus Equus asinus #> 21 Ceratotherium simum Ceratotherium simum #> 22 Rhinoceros unicornis Rhinoceros unicornis #> 23 Sus scrofa Sus scrofa #> 24 Vicugna pacos Vicugna pacos #> 25 Bos taurus Bos taurus #> 26 Ovis aries Ovis aries #> 27 Hippopotamus amphibius Hippopotamus amphibius #> 28 Balaenoptera physalus Balaenoptera physalus #> 29 Balaenoptera musculus Balaenoptera musculus #> 30 Physeter macrocephalus Physeter catodon #> 31 Oryctolagus cuniculus Oryctolagus cuniculus #> 32 Ochotona Ochotona #> 33 Sciurini Sciurini #> 34 Gliridae Gliridae #> 35 Cavia porcellus Cavia porcellus #> 36 Mus musculus Mus musculus #> 37 Arvicolinae Arvicolinae #> 38 Thryonomys Thryonomys #> 39 Papio papio Papio papio #> 40 Homo sapiens Homo sapiens #> 41 Lorisinae Corixinae #> 42 Cebus Cebus #> 43 Felis catus Felis catus #> 44 Canis lupus familiaris Canis lupus familiaris #> 45 Phoca vitulina Phoca vitulina #> 46 Otariidae Otariidae #> 47 Halichoerus grypus Halichoerus grypus
Get a topology:
topology <- rotl::tol_induced_subtree(ott_ids = query$ott_ids, label_format = "id") #> Progress [---------------------------------] 0/350 ( 0) ?s Progress [==============================] 350/350 (100) 0s topology_names <- rotl::tol_induced_subtree(ott_ids = query$ott_ids, label_format = "name")$tip.label #> Progress [---------------------------------] 0/350 ( 0) ?s Progress [==============================] 350/350 (100) 0s topology <- ape::collapse.singles(topology) index <- match(topology$tip.label, paste0("ott", query$ott_ids)) data.frame(topology$tip.label, query$ott_ids[index]) #> topology.tip.label query.ott_ids.index. #> 1 ott542509 542509 #> 2 ott664070 664070 #> 3 ott744000 744000 #> 4 ott692681 692681 #> 5 ott649553 649553 #> 6 ott425409 425409 #> 7 ott864596 864596 #> 8 ott644237 644237 #> 9 ott513904 513904 #> 10 ott770315 770315 #> 11 ott217260 217260 #> 12 ott70819 70819 #> 13 ott490099 490099 #> 14 ott276851 276851 #> 15 ott226193 226193 #> 16 ott226190 226190 #> 17 ott510762 510762 #> 18 ott730013 730013 #> 19 ott906301 906301 #> 20 ott1068202 1068202 #> 21 ott1068218 1068218 #> 22 ott1034198 1034198 #> 23 ott1087496 1087496 #> 24 ott970126 970126 #> 25 ott61860 61860 #> 26 ott238431 238431 #> 27 ott813028 813028 #> 28 ott3613485 3613485 #> 29 ott698422 698422 #> 30 ott1040694 1040694 #> 31 ott749638 749638 #> 32 ott247333 247333 #> 33 ott563166 563166 #> 34 ott226394 226394 #> 35 ott222367 222367 #> 36 ott1027567 1027567 #> 37 ott541928 541928 #> 38 ott222356 222356 #> 39 ott542053 542053 #> 40 ott561087 561087 #> 41 ott273244 273244 #> 42 ott372367 372367 #> 43 ott323243 323243 #> 44 ott683256 683256 #> 45 ott919176 919176 #> 46 ott962377 962377 #> 47 ott571895 571895 a <- query$cleaned_names %in% query$cleaned_names[index] query$cleaned_names[!a] #> character(0) data.frame(query$cleaned_names[index], taxon_names[index]) #> query.cleaned_names.index. taxon_names.index. #> Mouse Mus musculus Mus musculus #> Vole Arvicolinae Arvicolinae #> GuineaPig Cavia porcellus Cavia porcellus #> CaneRat Thryonomys Thryonomys #> Squirrel Sciurini Sciurini #> Dormouse Gliridae Gliridae #> Rabbit Oryctolagus cuniculus Oryctolagus cuniculus #> Pika Ochotona Ochotona #> Baboon Papio papio Papio papio #> Human Homo sapiens Homo sapiens #> Cebus Cebus Cebus #> Sheep Ovis aries Ovis aries #> Cow Bos taurus Bos taurus #> SpermWhale Physeter macrocephalus Physeter macrocephalus #> FinWhale Balaenoptera physalus Balaenoptera physalus #> BlueWhale Balaenoptera musculus Balaenoptera musculus #> Hippo Hippopotamus amphibius Hippopotamus amphibius #> Pig Sus scrofa Sus scrofa #> Alpaca Vicugna pacos Vicugna pacos #> Donkey Equus asinus Equus asinus #> Horse Equus caballus Equus caballus #> WhiteRhino Ceratotherium simum Ceratotherium simum #> IndianRhin Rhinoceros unicornis Rhinoceros unicornis #> LongTBat Chalinolobus tuberculatus Chalinolobus tuberculatus #> Rbat Lasiurus borealis Lasiurus borealis #> FruitBat Desmodus rotundus Desmodus rotundus #> FlyingFox Pteropus alecto Pteropus alecto #> RyFlyFox Pteropus aruensis Pteropus aruensis #> HarbSeal Phoca vitulina Phoca vitulina #> GraySeal Halichoerus grypus Halichoerus grypus #> FurSeal Otariidae Otariidae #> Dog Canis lupus familiaris Canis lupus familiaris #> Cat Felis catus Felis catus #> Shrew Sorex Sorex #> Mole Talpa Talpa #> Gymnure Echinosorex gymnura Echinosorex gymnura #> Elephant Elephas maximus Elephas maximus #> Hedghog Echinops Echinops #> Tenrec Hemicentetes Hemicentetes #> Aardvark Orycteropus afer Orycteropus afer #> Armadillo Dasypodidae Dasypodidae #> Wallaroo Macropus robustus Macropus robustus #> Possum Trichosurus Trichosurus #> Bandicoot Perameles Perameles #> Opposum Didelphis marsupialis Didelphis marsupialis #> Platypus Ornithorhynchus anatinus Ornithorhynchus anatinus #> Loris Lorisinae Lorisinae topology$tip.label <- names(taxon_names[index])
Get an NJ tree:
# get NJ tree dm <- dist.hamming(Laurasiatherian) tree_nj <- NJ(dm) # parsimony(tree_nj, Laurasiatherian) plot(tree_nj)
Get branch lengths with ACCTRAN algorithm (deltran is not available in R):
# names(Laurasiatherian) tree_acctran <- phangorn::acctran(tree = tree_nj, data = Laurasiatherian) plot(tree_acctran, cex = 0.8)
Optimize branch lengths to get maximum likelihood:
pml <- phangorn::pml(tree_acctran, data = Laurasiatherian) tree_pml <- phangorn::optim.pml(pml, data = Laurasiatherian) #> optimize edge weights: -207130.4 --> -92073.04 #> optimize edge weights: -92073.04 --> -60586.86 #> optimize edge weights: -60586.86 --> -54303.67 #> optimize edge weights: -54303.67 --> -54303.67 plot(tree_pml, cex = 0.8)
data.frame(nj = ape::branching.times(tree_nj), acctran = ape::branching.times(tree_acctran), optim_pml = ape::branching.times(tree_pml$tree)) #> nj acctran optim_pml #> 48 0.075159106 321.0 0.107064350 #> 49 0.074663666 284.0 0.102593327 #> 50 0.073681576 244.0 0.097146781 #> 51 0.072808761 230.0 0.092561163 #> 52 0.071237434 160.0 0.080246326 #> 53 0.070852888 184.0 0.085875992 #> 54 0.072402082 222.0 0.094843463 #> 55 0.070159654 175.0 0.086331259 #> 56 0.070158809 149.0 0.079683763 #> 57 0.068402201 71.5 0.061969166 #> 58 0.066330555 143.0 0.074316747 #> 59 0.066063222 116.5 0.072728795 #> 60 0.063112914 79.0 0.062921450 #> 61 0.062316865 89.5 0.058590132 #> 62 0.060984052 27.5 0.053168778 #> 63 0.059516822 -1.0 0.049363834 #> 64 0.060010969 70.0 0.060093467 #> 65 0.058745717 34.5 0.052868434 #> 66 0.068287715 121.0 0.079088325 #> 67 0.067707842 131.5 0.070542031 #> 68 0.057316264 -25.0 0.038478291 #> 69 0.055307172 -85.0 0.029719556 #> 70 0.055854838 -11.0 0.041133191 #> 71 0.053485916 -141.0 0.021404320 #> 72 0.061528553 91.0 0.054118927 #> 73 0.054099932 18.5 0.045794155 #> 74 0.052307954 -68.5 0.035347826 #> 75 0.052921072 -39.0 0.034727448 #> 76 0.062130184 205.0 0.072165304 #> 77 0.061680356 98.5 0.056499524 #> 78 0.053837166 24.0 0.045532404 #> 79 0.038484053 -124.0 0.015848828 #> 80 0.039374093 -119.5 0.013503246 #> 81 0.036273689 -103.0 0.008657362 #> 82 0.034213936 -249.5 -0.009260506 #> 83 0.047525047 -53.0 0.034485225 #> 84 0.042017855 -74.5 0.020045102 #> 85 0.024609363 -154.0 0.005832687 #> 86 0.025461698 -60.0 0.011005829 #> 87 0.037238301 70.0 0.030054865 #> 88 0.016952218 -311.0 -0.027997263 #> 89 0.016785633 -195.0 -0.010363564 #> 90 0.011732705 -253.5 -0.017033589 #> 91 0.009098229 -317.5 -0.028687772 #> 92 0.001053604 -385.5 -0.048094863
# ape::comparePhylo(x = tree_nj, y = tree_acctran) otol_acctran <- phangorn::acctran(tree = ape::unroot(topology), data = Laurasiatherian) # get likelihood of acctran branch lengths and alignment otol_pml <- phangorn::pml(otol_acctran, data = Laurasiatherian) # optimize branch lengths otol_optim <- phangorn::optim.pml(otol_pml) #> optimize edge weights: -207130.4 --> -106097.5 #> optimize edge weights: -106097.5 --> -106096.6 #> optimize edge weights: -106096.6 --> -106096.6 data.frame(otol_acctran = ape::branching.times(otol_acctran), otol_optim_pml = ape::branching.times(otol_optim$tree)) #> otol_acctran otol_optim_pml #> ott244265 136.0 8.100226e-02 #> ott229558 -48.0 6.705178e-02 #> ott683263 -163.0 6.705177e-02 #> ott5334778 -205.0 6.345339e-02 #> ott392222 -264.5 4.802316e-02 #> mrcaott42ott29157 -317.5 4.802315e-02 #> mrcaott42ott10477 -363.0 4.802313e-02 #> mrcaott42ott38834 -426.5 3.757718e-02 #> mrcaott102ott739 -565.0 -1.873340e-02 #> mrcaott38834ott45520 -510.0 -5.291179e-02 #> ott44559 -412.5 3.042209e-02 #> ott644242 -407.0 -8.945198e+01 #> ott386195 -396.5 -1.319520e+02 #> mrcaott786ott83926 -491.0 -2.264520e+02 #> ott392223 -250.5 4.934326e-02 #> mrcaott1548ott4697 -290.0 3.508578e-02 #> mrcaott1548ott6790 -325.0 2.671321e-02 #> mrcaott1548ott3021 -353.0 1.838742e-02 #> ott622916 -411.0 5.608031e-03 #> mrcaott1548ott21987 -457.0 -1.454455e-03 #> mrcaott1548ott5256 -511.0 -1.051769e-02 #> ott768677 -589.0 -4.083177e-02 #> ott7655791 -556.0 -2.006278e-02 #> mrcaott5256ott44568 -658.0 -5.017645e-02 #> mrcaott44568ott226190 -721.5 -6.958462e-02 #> ott541948 -397.0 6.043458e-03 #> ott541951 -476.5 -2.445224e-02 #> ott1034218 -458.5 -1.304954e-02 #> ott574724 -386.0 1.230833e-02 #> mrcaott6790ott6794 -462.5 -6.137077e-03 #> mrcaott10323ott61857 -508.0 -8.318380e-02 #> ott813030 -488.5 -2.606782e-02 #> mrcaott4697ott6940 -349.0 2.214181e-02 #> ott827263 -408.0 1.153504e-02 #> mrcaott22064ott95364 -486.0 -1.026178e-02 #> mrcaott22064ott6145546 -561.5 -3.342281e-02 #> mrcaott3285ott17250 -321.0 -5.802449e-02 #> mrcaott3285ott60434 -359.0 -1.005802e+01 #> mrcaott72667ott180375 -239.0 4.144542e-02 #> ott746703 -318.0 -2.134463e-03 #> mrcaott82081ott292026 -418.5 -1.005021e+02 #> ott922729 -508.5 -1.005021e+02 #> mrcaott6735ott29033 -259.0 -2.109329e+02 #> mrcaott6735ott70811 -321.0 -2.109329e+02 #> mrcaott6735ott44497 -385.0 -2.109329e+02
plot(otol_pml, main="ACCTRAN branches", cex = 0.8) # top = default
plot(otol_optim, main="optimized branches", cex = 0.8) # bottom = optimized branch lengths
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.