R/get_large_trees.R

Defines functions get_tree_plant_n100_Carruthers get_tree_shark_ray_n100 get_tree_reptile_n100 get_tree_mammal_n100_vertlife get_tree_mammal_n100_phylacine get_tree_fish_32k_n50 get_tree_bird_n100 get_tree_bee_n100 get_tree_amphibian_n100 .pb_download_cached

Documented in get_tree_amphibian_n100 get_tree_bee_n100 get_tree_bird_n100 get_tree_fish_32k_n50 get_tree_mammal_n100_phylacine get_tree_mammal_n100_vertlife get_tree_plant_n100_Carruthers get_tree_reptile_n100 get_tree_shark_ray_n100

#' @import ape
NULL

# Accessor functions for large multi-tree datasets stored as GitHub release
# assets. Each function downloads the file on first use and caches it locally.

.pb_download_cached <- function(filename, size_hint, force) {
  if (!requireNamespace("piggyback", quietly = TRUE)) {
    stop(
      "Package 'piggyback' is required to download this dataset.\n",
      "Install it with: install.packages('piggyback')",
      call. = FALSE
    )
  }
  cache_dir <- tools::R_user_dir("megatrees", which = "data")
  local_file <- file.path(cache_dir, filename)
  if (!file.exists(local_file) || force) {
    message("Downloading ", filename, " (", size_hint, ") to local cache...")
    dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
    piggyback::pb_download(
      file     = filename,
      repo     = "daijiang/megatrees",
      tag      = "large_multiPhylo",
      dest     = cache_dir,
      overwrite = TRUE
    )
  }
  e <- new.env(parent = emptyenv())
  load(local_file, envir = e)
  e[[tools::file_path_sans_ext(filename)]]
}


#' Get 100 randomly selected mega-trees of Amphibians from VertLife
#'
#' The first tree is the "consensus" tree; the remaining 99 are posterior
#' phylogenies. Because of the large file size (~12 MB), this dataset is not
#' bundled with the package and is downloaded and cached locally on first use.
#'
#' @param force Logical. Re-download even if a local cache exists. Default FALSE.
#' @return A \code{multiPhylo} object. Each phylogeny also has a data frame
#'   \code{genus_family_root} with root node information for every unique genus
#'   and family, usable for inserting new tips onto the phylogeny.
#' @source \url{http://vertlife.org/data/amphibians/}
#' @references Jetz, W., & Pyron, R. A. (2018). The interplay of past
#'   diversification and evolutionary isolation with present imperilment across
#'   the amphibian tree of life. Nature ecology & evolution, 2(5), 850-858.
#' @export
get_tree_amphibian_n100 <- function(force = FALSE) {
  .pb_download_cached("tree_amphibian_n100.rda", "~12 MB", force)
}


#' Get 100 randomly selected mega-trees of Bees from Bee Tree of Life
#'
#' Because of the large file size (~5 MB), this dataset is not bundled with
#' the package and is downloaded and cached locally on first use.
#'
#' @param force Logical. Re-download even if a local cache exists. Default FALSE.
#' @return A \code{multiPhylo} object. Each phylogeny also has a data frame
#'   \code{genus_family_root} with root node information for every unique genus
#'   and family, usable for inserting new tips onto the phylogeny.
#' @source \url{http://beetreeoflife.org}
#' @references Henríquez-Piskulich, P., Hugall, A. F., & Stuart-Fox, D. (2023).
#'   A supermatrix phylogeny of the world's bees (Hymenoptera: Anthophila).
#'   bioRxiv 2023.06.16.545281. \doi{10.1101/2023.06.16.545281}
#' @seealso \code{\link{tree_bee}}
#' @export
get_tree_bee_n100 <- function(force = FALSE) {
  .pb_download_cached("tree_bee_n100.rda", "~5 MB", force)
}


#' Get 100 randomly selected mega-trees of Birds from Bird Tree
#'
#' The first 50 phylogenies use the Ericson backbone and the second 50 use the
#' Hackett backbone (Jetz et al. 2012). Because of the large file size (~18 MB),
#' this dataset is not bundled with the package and is downloaded and cached
#' locally on first use.
#'
#' @param force Logical. Re-download even if a local cache exists. Default FALSE.
#' @return A \code{multiPhylo} object. Each phylogeny also has a data frame
#'   \code{genus_family_root} with root node information for every unique genus
#'   and family, usable for inserting new tips onto the phylogeny.
#' @source \url{https://data.vertlife.org/}
#' @references Jetz, W., Thomas, G. H., Joy, J. B., Hartmann, K., & Mooers,
#'   A. O. (2012). The global diversity of birds in space and time. Nature,
#'   491(7424), 444.
#' @seealso \code{\link{tree_bird_McTavish}}
#' @export
get_tree_bird_n100 <- function(force = FALSE) {
  .pb_download_cached("tree_bird_n100.rda", "~18 MB", force)
}


#' Get 50 randomly selected mega-trees of 31516 Fish from The Fish Tree of Life
#'
#' This tree was generated by Rabosky et al. (2018). It has 31516 tips, with
#' species missing sequence data added using birth-death models. The authors
#' suggest not using these trees for trait evolution analyses. Because of the
#' large file size (~23 MB), this dataset is not bundled with the package and
#' is downloaded and cached locally on first use.
#'
#' @param force Logical. Re-download even if a local cache exists. Default FALSE.
#' @return A \code{multiPhylo} object. Each phylogeny also has a data frame
#'   \code{genus_family_root} with root node information for every unique genus
#'   and family, usable for inserting new tips onto the phylogeny.
#' @source \url{https://fishtreeoflife.org/downloads/actinopt_full.trees.xz}
#' @references Rabosky, D. L., Chang, J., Title, P. O., Cowman, P. F., Sallan,
#'   L., Friedman, M., ... & Alfaro, M. E. (2018). An inverse latitudinal
#'   gradient in speciation rate for marine fishes. Nature, 559(7714), 392.
#' @seealso \code{\link{tree_fish_12k}}
#' @export
get_tree_fish_32k_n50 <- function(force = FALSE) {
  .pb_download_cached("tree_fish_32k_n50.rda", "~23 MB", force)
}


#' Get 100 randomly selected mega-trees of Mammals from PHYLACINE V1.2
#'
#' These 100 phylogenies are randomly selected from the 1000 trees generated
#' by Faurby et al. (2018). Because of the large file size (~6 MB), this
#' dataset is not bundled with the package and is downloaded and cached locally
#' on first use.
#'
#' @param force Logical. Re-download even if a local cache exists. Default FALSE.
#' @return A \code{multiPhylo} object. Each phylogeny also has a data frame
#'   \code{genus_family_root} with root node information for every unique genus
#'   and family, usable for inserting new tips onto the phylogeny.
#' @source \url{https://github.com/MegaPast2Future/PHYLACINE_1.2/blob/master/Data/Phylogenies/Complete_phylogeny.nex}
#' @references Faurby, S., Davis, M., Pedersen, R. Ø., Schowanek, S. D.,
#'   Antonelli, A., & Svenning, J. C. (2018). PHYLACINE 1.2: The phylogenetic
#'   atlas of mammal macroecology. Ecology, 99(11), 2626-2626.
#' @seealso \code{\link[megatrees]{get_tree_mammal_n100_vertlife}}
#' @export
get_tree_mammal_n100_phylacine <- function(force = FALSE) {
  .pb_download_cached("tree_mammal_n100_phylacine.rda", "~6 MB", force)
}


#' Get 100 randomly selected mega-trees of Mammals from VertLife
#'
#' The first 50 phylogenies use the Node Dating Exponential backbone and the
#' second 50 use the Fossil Birth Death backbone (Upham et al. 2019). Because
#' of the large file size (~11 MB), this dataset is not bundled with the
#' package and is downloaded and cached locally on first use.
#'
#' @param force Logical. Re-download even if a local cache exists. Default FALSE.
#' @return A \code{multiPhylo} object. Each phylogeny also has a data frame
#'   \code{genus_family_root} with root node information for every unique genus
#'   and family, usable for inserting new tips onto the phylogeny.
#' @source \url{https://vertlife.org/data/mammals/}
#' @references Upham, N. S., Esselstyn, J. A., & Jetz, W. (2019). Inferring
#'   the mammal tree: species-level sets of phylogenies for questions in
#'   ecology, evolution, and conservation. PLoS biology, 17(12), e3000494.
#' @seealso \code{\link[megatrees]{get_tree_mammal_n100_phylacine}}
#' @export
get_tree_mammal_n100_vertlife <- function(force = FALSE) {
  .pb_download_cached("tree_mammal_n100_vertlife.rda", "~11 MB", force)
}


#' Get 100 randomly selected mega-trees of Reptiles (Squamates) from VertLife
#'
#' Because of the large file size (~15 MB), this dataset is not bundled with
#' the package and is downloaded and cached locally on first use.
#'
#' @param force Logical. Re-download even if a local cache exists. Default FALSE.
#' @return A \code{multiPhylo} object. Each phylogeny also has a data frame
#'   \code{genus_family_root} with root node information for every unique genus
#'   and family, usable for inserting new tips onto the phylogeny.
#' @source \url{https://vertlife.org/data/squamates/}
#' @references Tonini, J. F. R., Beard, K. H., Ferreira, R. B., Jetz, W., &
#'   Pyron, R. A. (2016). Fully-sampled phylogenies of squamates reveal
#'   evolutionary patterns in threat status. Biological Conservation, 204, 23-31.
#' @export
get_tree_reptile_n100 <- function(force = FALSE) {
  .pb_download_cached("tree_reptile_n100.rda", "~15 MB", force)
}


#' Get 100 randomly selected mega-trees of Sharks, Rays, and Chimaeras from VertLife
#'
#' Because of the large file size (~2 MB), this dataset is not bundled with
#' the package and is downloaded and cached locally on first use.
#'
#' @param force Logical. Re-download even if a local cache exists. Default FALSE.
#' @return A \code{multiPhylo} object. Each phylogeny also has a data frame
#'   \code{genus_family_root} with root node information for every unique genus
#'   and family, usable for inserting new tips onto the phylogeny.
#' @source \url{https://vertlife.org/data/sharks/}
#' @references Stein, R. W., Mull, C. G., Kuhn, T. S., Aschliman, N. C.,
#'   Davidson, L. N., Joy, J. B., ... & Mooers, A. O. (2018). Global priorities
#'   for conserving the evolutionary history of sharks, rays and chimaeras.
#'   Nature ecology & evolution, 2(2), 288-298.
#' @export
get_tree_shark_ray_n100 <- function(force = FALSE) {
  .pb_download_cached("tree_shark_ray_n100.rda", "~2 MB", force)
}


#' Get 100 randomly selected plant mega-trees (Carruthers et al.)
#'
#' This tree was based on Carruthers et al. (2026), which in turn was an update
#' of the Smith and Brown (2018). We randomly selected 100 plant mega-trees from this dataset.
#' Because of the large file size (~135 MB), this dataset is not bundled with
#' the package and is downloaded and cached locally on first use.
#'
#' @param force Logical. Re-download even if a local cache exists. Default FALSE.
#' @return A \code{multiPhylo} object. Each phylogeny also has a data frame
#'   \code{genus_family_root} with root node information for every unique genus
#'   and family, which can be used to insert new tips onto the phylogeny later.
#' @source \url{https://www.biorxiv.org/content/10.64898/2026.01.06.695000v1}
#' @references Carruthers et al., (2026). A large phylogenetic tree for
#'   euphyllophytes. bioRxiv.
#' @seealso \code{\link{tree_plant_Carruthers}}
#' @export
get_tree_plant_n100_Carruthers <- function(force = FALSE) {
  .pb_download_cached("tree_plant_n100_Carruthers.rda", "~135 MB", force)
}

Try the megatrees package in your browser

Any scripts or data that you put into this service are public.

megatrees documentation built on May 22, 2026, 5:07 p.m.