R/TreeUtils.R

# Author: Walter Xie
# Accessed on 12 Oct 2016

#' @name TreeUtils
#' @title Utils to manipulate or visualise phylogenetic tree 
#'
#' @description The \strong{tree} object here is \code{\link{ape}} tree.
#'
#' @details \code{annotateRAXMLTree} annotates a \strong{RAxML} tree to visualise in \strong{FigTree}.
#' 
#' @param tree.file The file path to load a tree generated by \strong{RAxML}.
#' @param taxa.map The data.frame to map tree tips to traits. 
#' It must have at least 2 columns, where the 1st column is tree tip label.
#' 
#' For example,
#' \tabular{rrrr}{
#'   tip.label \tab trait1 \tab trait2 \tab ...\cr
#'   tip1 \tab x \tab a \tab ...\cr
#'   tip2 \tab x \tab b \tab ...\cr
#'   ... \tab y \tab a \tab ...
#' } 
#' 
#' @param anno.tree.file The file path to load a tree 
#' that can be imported into \strong{FigTree} including annotations.
#' If NA, as default, to overwrite the \code{tree.file}.
#' @export
#' @keywords tree
#' @examples 
#' tree <- read.tree("RAxML_bestTree.16s.txt")
#' tree1 <- renameTips(tree, taxa.map, "16s_RAxML.nex")
#' 
#' @rdname TreeUtils
annotateRAXMLTree <- function(tree, taxa.map, anno.tree.file="annotated-tree.nex", no.match="unknown") {
  require(ape)
  if (!is(tree, "phylo")) 
    stop("The input tree has to be 'ape' 'phylo' class !")
  if (ncol(taxa.map) < 2) 
    stop("taxa.map must have at least 2 columns, where the 1st column is tree tip label !")

  # match tips
  row.id <- match(tree$tip.label, taxa.map[,1])
  if (all(is.na(row.id))) 
    stop("Cannot match tree tips to the 1st column of taxa.map !")
  # annotate
  for (r in 1:length(row.id)) {
    traits <- ifelse(is.na(row.id[r]), paste0(colnames(taxa.map)[-1], "=", no.match, collapse=","),
                    paste0(colnames(taxa.map)[-1], "=", taxa.map[row.id[r], colnames(taxa.map)[-1]], collapse=",") )
    tree$tip.label[r] <- paste0(tree$tip.label[r],"[&",traits,"]")   
  }
  if (is.na(anno.tree.file))
    anno.tree.file = tree.file
  
  # modify checkLabel to allow comma in write.tree
  # http://stackoverflow.com/questions/23279904/modifying-an-r-package-function-for-current-r-session-assigninnamespace-not-beh
  tmpfun <- get("checkLabel", envir = asNamespace("ape"))
  environment(checkLabel2) <- environment(tmpfun)
  assignInNamespace("checkLabel", checkLabel2, ns="ape")
  
  write.tree(tree, file=anno.tree.file)
  # assume 1 tree 1 line
  trees <- ComMA::readFileLineByLine(anno.tree.file)
  # make file to suit figtree
  cat("#NEXUS", "BEGIN TREES;", file=anno.tree.file, sep = "\n")
  for (t in 1:length(trees)) {
    cat("  tree", paste0("tree_",t), "=", trees[t], "\n", file=anno.tree.file, append=TRUE)
  }
  cat("END;\n", file=anno.tree.file, append=TRUE)
  return(tree)
}

#' @details \code{renameTips} renames tree tips. It also can be used to insert annotations
#' using regular expression. 
#' 
#' @param tree The \pkg{\link{ape}} tree object required. 
#' @param pattern,replacement,... Arguments of \code{\link{gsub}}.
#' @export
#' @keywords tree
#' @examples 
#' # remove "_Viridiplantae" postfix from tips
#' tree2 <- renameTips(tree, "_Viridiplantae", "")
#' 
#' # AB1_Soil.123_Metazoa:0.016 => AB1_Soil.123_Metazoa[&taxon=Metazoa]:0.016
#' tree2 <- renameTips(tree, "([a-zA-Z0-9\\_\\.]+)_([[a-zA-Z]+):", "\\1_\\2[\\&taxon=\\2]:")
#' 
#' @rdname TreeUtils
renameTips <- function(tree, pattern, replacement, ...) {
  require(ape)
  tree$tip.label <- gsub(pattern, replacement, tree$tip.label, ...)
  #sbj.des <- taxa.map[match(tree$tip.label, taxa.map$seqid), "description"]
  #tree$tip.label <- ifelse(is.na(sbj.des), tree$tip.label, sbj.des)
  
  return(tree)
}

checkLabel2 <- function(x, ...) {
  ## delete all leading and trailing spaces and tabs, and
  ## the leading left and trailing right parentheses:
  ## (the syntax will work with any mix of these characters,
  ##  e.g., "    ( ( ((  " will correctly be deleted)
  x <- gsub("^[[:space:]\\(]+", "", x)
  x <- gsub("[[:space:]\\)]+$", "", x)
  ## replace all spaces and tabs by underscores:
  x <- gsub("[[:space:]]", "_", x)
  ## remove all commas, colons, and semicolons
  #x <- gsub("[,:;]", "", x)
  ## replace left and right parentheses with dashes:
  x <- gsub("[\\(\\)]", "-", x)
  ## delete extra underscores and extra dashes:
  x <- gsub("_{2,}", "_", x)
  x <- gsub("-{2,}", "-", x)
  x
}
walterxie/ComMA documentation built on May 3, 2019, 11:51 p.m.