#' Rboretum Rooted Tree Reader
#'
#' This function simultaenously reads in and roots one or more trees at a common root clade
#' @param to_root Where to find tree files. Options include:
#' \itemize{
#' \item A character vector of one or more tree file paths
#' \item A path to a single directory containing all tree files
#' }
#' @param root_taxa Outgroup species IDs. Must be in tree(s) and monophyletic. Can be provided as:
#' \itemize{
#' \item A character vector of one or more tip labels
#' \item A semicolon-separated list of tip labels
#' }
#' @param tree_names OPTIONAL: If multiple tree paths are provided, a character vector of names to assign to trees. Length must equal the number of trees. [Default: Trees will be autonamed based on the filename]
#' @param dummy_names OPTIONAL: If TRUE, and multiple tree paths are provdied, trees will be named with placeholder names (e.g. Tree_1, Tree_2, etc.) [Default: Trees will be autonamed based on the filename]
#' @param prefix OPTIONAL: If 'to_root' is a directory, provide a character vector of file prefixes (e.g. all trees start with "RAxML")
#' @param suffix OPTIONAL: If 'to_root' is a directory, provide a character vector of file suffixes (e.g. all trees end with ".nwk")
#' @return A phylo object, rooted at specified taxa, or a named, rooted multiPhlyo
#' @examples
#' # Read in one tree
#' root_taxa = c('Species_1','Species_2')
#' myTree <- readRooted('/path/to/tree.nwk',root_taxa)
#'
#' # Read in multiple trees
#' tree_paths <- c('/path/to/tree1.nwk','/path/to/tree2.nwk')
#' tree_names <- c('Tree1','Tree2')
#' myTrees <- readRooted(tree_paths,root_taxa,tree_names=tree_names)
#'
#' # Read all trees from a directory
#' myTrees <- readRooted('/path/to/tree/dir/',root_taxa) # Trees will be named based off their filenames
#'
#' # Read all .nwk files from a directory
#' myTrees <- readRooted('/path/to/tree/dir/',root_taxa,suffix=".nwk") # Trees will be named based off their filenames
#'
#' @export
readRooted <- function(to_root,root_taxa,tree_names,dummy_names,prefix,suffix){
# Ensure that a path and root taxa are provided as character vectors
if(missing(to_root)){
stop("No tree file or directories indicated with 'to_root'")
} else if(!is.character(to_root)){
stop("'to_root' should be a character vector of file paths or the path to a tree directory.")
}
if(missing(root_taxa)){
stop("No root taxa provided")
} else if(!is.character(root_taxa)){
stop("'root_taxa' should be a character vector of tip labels")
} else if(length(root_taxa)==1 & semiChecker(root_taxa)){ # If root taxa are provided as a semi-colon separated list
root_taxa <- semiVector(root_taxa)
}
# Use dummy names for multiPhylo?
if(missing(dummy_names)){
dummy_names <- FALSE
} else if(!is.logical(dummy_names)){
dummy_names <- FALSE
}
# Create regex search pattern in case a directory is given
if(missing(prefix)){
prefix <- c()
} else if(is.null(prefix)){
prefix <- c()
} else if(!is.character(prefix)){
stop("'prefix' must be a character vector")
} else{
prefix <- unlist(purrr::map(.x=prefix,.f=function(x){paste(c("^",x),collapse = '')}))
prefix <- paste(c("(",paste(prefix,collapse = "|"),")"),collapse = '')
}
if(missing(suffix)){
suffix <- c()
} else if(is.null(suffix)){
suffix <- c()
} else if(!is.character(suffix)){
stop("'suffix' must be a character vector")
} else{
suffix <- unlist(purrr::map(.x=suffix,.f=function(x){ifelse(substr(x,start = 1,stop = 1)==".",paste(c("\\",x,"$"),collapse = ''),paste(c(x,"$"),collapse = ''))}))
suffix <- paste(c("(",paste(suffix,collapse = "|"),")"),collapse = '')
}
if(length(prefix)==0 & length(suffix)==0){
tree_regex <- ''
} else if(length(prefix)>0 & length(suffix)==0){
tree_regex <- prefix
} else if(length(prefix)==0 & length(suffix)>0){
tree_regex <- suffix
} else if(length(prefix)>0 & length(suffix)>0){
tree_regex <- paste(paste(c(prefix,"(.*)",suffix),collapse = ""))
}
isFile <- file.exists(to_root) & !dir.exists(to_root)
isDir <- dir.exists(to_root) & !isFile
# Ensure files all exist
if(length(to_root)==1){
if(Rboretum::checkValidFiles(to_root)){ # 'to_root' points to a single valid file
tree_count <- 1
to_root <- Rboretum::checkValidFiles(to_root,return_full_path = TRUE)
default_name <- basename(to_root)
} else if(isDir){ # 'to_root' points to a valid directory
if(has_error(silent=TRUE,list.files(path=to_root,pattern=tree_regex,full.names = TRUE,include.dirs = FALSE))){
stop("Can't process file fetch. Check path or regex?")
} else{
to_root <- list.files(path=to_root,pattern=tree_regex,full.names = TRUE,include.dirs = FALSE)
if(length(to_root)==0){
stop("Directory found, but no files identified in 'to_root'. Check regex?")
} else if(length(to_root)==1){
tree_count <- 1
default_name <- basename(to_root)
} else{
tree_count <- length(to_root)
default_name <- purrr::map(to_root,.f = function(x){basename(x)}) %>% unlist() %>% as.character()
}
}
} else{ stop("'to_root' points to neither a valid file or directory.") }
} else{ # 'to_root' is a list of file paths
file_check <- Rboretum::checkValidFiles(to_root) # Check that all paths in 'to_root' point to valid files
if(!file_check){
invalid_paths <- Rboretum::checkValidFiles(to_root,return_invalid = TRUE)
print(invalid_paths)
stop("The above paths from 'to_root' do not point to a valid file...")
} else{
to_root <- Rboretum::checkValidFiles(to_root,return_full_path = TRUE)
tree_count <- length(to_root)
default_name <- purrr::map(to_root,.f = function(x){basename(x)}) %>% unlist() %>% as.character()
}
}
# If a single tree path is provided, return a phylo
if(tree_count == 1){
tree <- Rboretum::readRooted_Worker(to_root,root_taxa)
if(!Rboretum::isPhylo(tree)){
stop("'to_root' cannot be rooted with 'root_taxa'")
} else{
return(tree)
}
} else if(tree_count > 1){ # If multiple tree paths are provided, return a named multiPhylo
if(missing(tree_names)){
tree_names <- default_name
} else if(length(tree_names) != tree_count){
print(paste(c("'tree_names' (",length(tree_names),") and number of trees (",tree_count,") do not match...using default names..."),collapse = ''))
tree_names <- default_name
}
trees <- purrr::map(.x = to_root,.f = function(x){Rboretum::readRooted_Worker(x,root_taxa)})
if(any(is.na(unlist(trees)))){
stop("At least one tree from 'to_root' could not be rooted with 'root_taxa'")
} else{
class(trees) <- "multiPhylo"
names(trees) <- tree_names
if(dummy_names){
print("Applying dummy tree names as requested...")
trees <- Rboretum::treeNamer(trees)
}
return(trees)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.