library(DAISIEprep)
library(DAISIEprepExtra)
# It is useful to know from the outset the requirements for this scripts
# it uses the madagascar_mammals dataset stored in the data/ folder. This is a
# two column data frame with the species names (tip labels), formatted as
# genus_species in the first column and the species endemicity status, formatted
# as all lowercase underscore separated (use DAISIEprep::translate_status()) to
# convert endemicity status to the correct format. Column names must be
# "tip_labels" and "tip_endemicity_status"
# here is the head of the madagascar mammals data
# tip_labels tip_endemicity_status
# 1 Echinops_telfairi endemic
# 2 Geogale_aurita endemic
# 3 Hemicentetes_nigriceps endemic
# 4 Hemicentetes_semispinosus endemic
# 5 Microgale_brevicaudata endemic
# 6 Microgale_cowani endemic
# the phylogeny of mammal species is also required and here we use the
# posterior distribution of 100 mammal phylogenetic trees from Upham et al.,
# (2019) https://doi.org/10.1371/journal.pbio.3000494 stored as
# Upham_dna_posterior_100.tre and Upham_complete_posterior_100.tre files loaded
# from extdata/
# the variables required to be given by the user for this script are:
# the island age (island_age) in million years, and number of species in the
# mainland pool (num_mainland_species)
# We set these at the start for our estimates for the mammals of Madagascar
island_age <- 88
num_mainland_species <- 1000
# load madagascar mammals species data table
data("madagascar_mammals", package = "DAISIEprepExtra")
# load the checklist
checklist <- utils::read.csv(
file = system.file(
"extdata", "madagascar_mammal_checklist.csv",
package = "DAISIEprepExtra"
),
header = TRUE,
)
missing_species <- DAISIEprep::count_missing_species(
checklist = checklist,
phylo_name_col = "Name_In_Tree",
genus_name_col = "Genus",
in_phylo_col = "Sampled",
endemicity_status_col = "DAISIE_Status_Species",
rm_species_col = "Remove_Species"
)
# load the complete tree
complete_phylos <- ape::read.nexus(
system.file(
"extdata", "Upham_complete_posterior_100.nex",
package = "DAISIEprepExtra"
)
)
# for this example we subset the posterior distribution of 100 phylogenetic
# trees to 3 trees to make the script faster to run. If you want to run the
# script for the entire set of 100 trees you can not run the next line
complete_phylos <- complete_phylos[1:3]
# convert trees to phylo4 objects
complete_phylos <- lapply(complete_phylos, phylobase::phylo4)
# create endemicity status data frame
endemicity_status <- lapply(
complete_phylos,
DAISIEprep::create_endemicity_status,
island_species = madagascar_mammals
)
# combine tree and endemicity status
multi_phylod <- mapply(
phylobase::phylo4d,
complete_phylos,
endemicity_status
)
# reconstruct geographic ancestral states for extraction with asr
multi_phylod <- lapply(
multi_phylod,
DAISIEprep::add_asr_node_states,
asr_method = "mk",
tie_preference = "mainland"
)
# extract island community using asr algorithm
multi_island_tbl <- DAISIEprep::multi_extract_island_species(
multi_phylod = multi_phylod,
extraction_method = "asr",
verbose = TRUE
)
# determine which island clade the missing species should be assigned to
missing_genus <- lapply(
multi_island_tbl,
DAISIEprep::unique_island_genera
)
# add missing species that match genera found in the island tbl
multi_island_tbl <- mapply(
DAISIEprep::add_multi_missing_species,
missing_genus,
multi_island_tbl,
missing_species = list(missing_species)
)
# remove missing species that have already been inserted into the island tbl
no_island_tbl_missing_species <- mapply(
DAISIEprep::rm_multi_missing_species,
missing_genus,
multi_island_tbl,
missing_species = list(missing_species),
SIMPLIFY = FALSE
)
# add the Archaeoindris, Babakotia, Hadropithecus, Mesopropithecus, Pachylemur
# as a missing species of the clade with
# Megaladapis_edwardsi in it as it is a extinct lemur species
multi_island_tbl <- lapply(
multi_island_tbl,
DAISIEprep::add_missing_species,
num_missing_species = 8,
species_to_add_to = "Megaladapis_edwardsi"
)
# add the Plesiorycteropus as a missing_species of the clade with
# Tenrec_ecaudatus in it as it is a tenrec species
multi_island_tbl <- lapply(
multi_island_tbl,
DAISIEprep::add_missing_species,
num_missing_species = 2,
species_to_add_to = "Tenrec_ecaudatus"
)
# add Chaerephon species as separate colonisation
Chaerephon_stem_age <- list() # nolint
for (i in seq_along(multi_phylod)) {
Chaerephon_stem_age[[i]] <- DAISIEprep::extract_stem_age( # nolint
genus_name = "Chaerephon",
phylod = multi_phylod[[i]],
stem = "genus"
)
}
# add the Chaerephon as an stem age max age given the stem age in the tree
multi_island_tbl <- mapply(
DAISIEprep::add_island_colonist,
multi_island_tbl,
clade_name = list("Chaerephon_leucogaster"),
status = list("nonendemic"),
missing_species = list(0),
col_time = Chaerephon_stem_age,
col_max_age = list(TRUE),
branching_times = list(NA_real_),
min_age = list(NA_real_),
species = list("Chaerephon_leucogaster"),
clade_type = list(1)
)
# add the Macronycteris as a missing species of the clade with
# Hipposideros_commersoni in it as it is a bat species
multi_island_tbl <- lapply(
multi_island_tbl,
DAISIEprep::add_missing_species,
num_missing_species = 2,
species_to_add_to = "Hipposideros_commersoni"
)
# convert to daisie data list
daisie_data_list <- lapply(
multi_island_tbl,
DAISIEprep::create_daisie_data,
island_age = island_age,
num_mainland_species = num_mainland_species
)
# to run the maximum likelihood DAISIE inference model we now have the data
# required by the model, but now need the model settings to ensure we are
# fitting the correct model
# the initparsopt is the initial parameter estimates that are used by the
# optimisation algorithm as a starting point from which to optimise. In an
# ideal situation the maximum likelihood parameter estimates and model
# likelihood would be independent of the starting conditions by finding the
# global optimum. However, this is not always the case so we pick sensible
# values that should prevent us getting trapped in local optima. To avoid local
# optima in empirical analyses run the model with different initial parameter
# values to check whether a global optimum has likely been found. The parameters
# in the vector are, in order: cladogenesis, extinction, carrying capacity,
# colonisation and anagenesis
# idparsopt is selecting which parameters in the we want to optimise and here
# we chosen the five parameters listed above and thus idparsopt = 1:5
# parsfix and idparsfix are the parameters that are to be fixed and thus not
# optimised, however, here we optimise all five parameters and so these are both
# NULL
# ddmodel is set to 11 to set both cladogenesis and colonisation to diversity-
# dependent given the carrying capacity parameter.
# the jitter is set to 1e-5 to help initial likelihood calculations but is not
# important for the model fitting and can be left as 1e-5 for all model runs
# run the DAISIE model for all data sets
mls <- list()
for (i in seq_along(multi_phylod)) {
message(
"Fitting DAISIE model to data set ", i, " of ", length(daisie_data_list)
)
mls[[i]] <- DAISIE::DAISIE_ML_CS(
datalist = daisie_data_list[[i]],
initparsopt = c(1, 1, 200, 0.1, 1),
idparsopt = 1:5,
parsfix = NULL,
idparsfix = NULL,
ddmodel = 11,
jitter = 1e-5
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.