R/tip_labels.R

Defines functions change_tip_labels

#' Change tip labels ton a tree based on OTU information file.
#'
#' @param treefile A character vector with the path to the tree you want to change tip labels in newick format.
#' @param otufile A character vector with the path to the OTU information csv file generated by Physcraper
#' @author Luna L. Sanchez Reyes
#' @return a phylo object, tips can be duplicated
#' @examples
#' treefile = 'data/pg_2827_tree6577/run_pg_2827tree6577_run4/RAxML_bestTree.2020-07-31'
#' otufile = 'data/pg_2827_tree6577/outputs_pg_2827tree6577/otu_info_pg_2827tree6577.csv'

change_tip_labels <- function(treefile, otufile, label = "ott"){
  # read the tree in
  phy = ape::read.tree(treefile)
  # read the OTU file in
  tip_info = utils::read.csv(otufile, sep = '\t', row.names = 1, stringsAsFactors = FALSE)
  # check the label argument
  label <- match.arg(label, c("ott"))

  # get a boolean vector of ingroup belonging
  ig <- as.logical(tip_info$X.physcraper.ingroup)
  names(ig) <- rownames(tip_info)
  # get the taxon names that are not ingroups, i.e., are outgroups
  og <- names(inout)[which(!ig)]
  # get the tips that are in the outgroup list
  outgroups <- phy$tip.label[phy$tip.label %in% og]

  # get a char vector of taxon names from OTU info file
  if (label == "ott"){
    spp <- as.vector(tip_info$X.ot.ottTaxonName)
  }
  # name it using the physcraper taxon id
  names(spp) <- rownames(tip_info)
  # get and index of taxa in OTU info file that are also in the tree
  ii <- match(phy$tip.label, names(spp))
  # subset the vector of taxon names and assign it as new tip labels:
  phy$tip.label <-  spp[ii]
  # spaces for underscore
  phy$tip.label <- gsub(" ", "_", phy$tip.label)
  # return the tree with new tip labels
  return(phy)

}
McTavishLab/physcraperex documentation built on April 10, 2021, 12:02 a.m.