R/plots.R

Defines functions compare summarize plot_branches plot_branches_method2 plot_branches_method1 get_tip_values sum_tips get_file_names trenex2pdf

Documented in get_file_names get_tip_values plot_branches plot_branches_method1 plot_branches_method2 sum_tips trenex2pdf

#' trenex2pdf
#'
#' Write nexus tree files to pdf.
#'
#'
#' @param treefile A character vector with the name of the nexus or newick file
#' @param studyid TreeBASE study id

trenex2pdf <- function(treefile,
                       treefilesdir = "../data-raw/trees/treebase/S1091/",
                       treepdfsdir = "../data-raw/trees/treebase/S1091-pdf/",
                       height = 7, ...){
  # all treebase trees are in nexus format
  tree <- ape::read.nexus(file= paste0(treefilesdir, "/", treefile))

  outputfile <- paste0(treepdfsdir, gsub(".nex", ".pdf", treefile))
  pdf(file = outputfile, height)
  ape::plot.phylo(ape::ladderize(tree), cex = 0.5, ...)
  graphics::mtext(paste("TreeBASE tree:", gsub(".nex", "", treefile)))
  dev.off()
}

#' get_file_names
#'
#' Write nexus or newick files to pdf or png.
#'
#' @param studyid TreeBASE study id
#' @param file_dir Character vector of a directory with one or more tree files

get_file_names <- function(studyid, file_dir = "data-raw/trees/treebase"){
  # dir_original <- getwd()
  print(getwd())
  treefiles <- paste0(studyid, "-treefiles.csv")
  system(paste0("ls ", studyid, " > ", treefiles))
  return(paste0(file_dir, "/", treefiles))
}

#' Cummulative sum of tip values for each node/branch down to the root
#'
#'
#' @param tree a phylo object
#' @param values named numeric character vector with names corresponding to tip labels
#' @author Luna L. Sanchez Reyes
#' @return a vector
#'

sum_tips <- function(tree, values) {
    # tree <- ape::reorder.phylo(tree, "postorder")  # no need to reorder the whole tree
    res <- numeric(max(tree$edge))
    res[1:ape::Ntip(tree)] <- values[match(names(values), tree$tip.label)]
    for (i in ape::postorder(tree))  { # ape postorder doesn't include root
         tmp <- tree$edge[i,1]
         # print(i)
         res[tmp] <- res[tmp] + res[tree$edge[i, 2]]
         # print(res)
   }
   res
}


#' Assign tip values to a tree based on OTU information file.
#'
#' @param treefile A character vector with the path to the tree you want to plot in newick format.
#' @param otufile A character vector with the path to the OTU information csv file generated by Physcraper
#' @author Emily Jane McTavish
#' @return a list of a phylo object and a vector of tip values
#' @examples
#' treefile = 'data/pg_2827_tree6577/run_pg_2827tree6577_run4/RAxML_bestTree.2020-07-31'
#' otufile = 'data/pg_2827_tree6577/outputs_pg_2827tree6577/otu_info_pg_2827tree6577.csv'

get_tip_values <- function(treefile, otufile){

  phytree = ape::read.tree(treefile)

  tip_info = utils::read.csv(otufile, sep = '\t', row.names = 1, stringsAsFactors = FALSE)

  inout <- as.vector(tip_info$X.physcraper.ingroup)
  names(inout) <- rownames(tip_info)

  outgroups = c()
  for (lab in phytree[["tip.label"]]) {
    status = inout[names(inout) == lab]
    if (status == "False")
    {outgroups = c(outgroups, lab)}
  }


  st <- as.vector(tip_info$X.physcraper.status)
  names(st) <- rownames(tip_info)

  spp <- as.vector(tip_info$X.ot.ottTaxonName)
  names(spp) <- rownames(tip_info)

  # doing a loop bc tip_info has more taxa than phytree ("not added" sequences)
  newtips = c()
  spp_labels = c()
  for (lab in phytree[["tip.label"]]) {
    status = st[names(st) == lab]
    val = !grepl("original", status, fixed = TRUE)
    val = 100*as.integer(val)
    names(val) <- lab
    newtips = c(newtips, val)
    tax = spp[names(spp) == lab]
    spp_labels = c(spp_labels, tax)
  }
  original <- spp_labels[newtips == 0]
  return(list(phytree=phytree,
              newtips=newtips,
              spp_labels=spp_labels,
              outgroups=outgroups,
              original = original))

}

#' Plot a tree with branches colored according to availability of molecular data, method 1
#'
#' @param x A list from get_tip_values
#' @param tip_label A character vector. Can be one of "otu" or "taxon"
#' @param drop_outgroup Boolean
#' @param ladderize_tree Boolean
#' @param type From plot.phylo
#' @param color Color to use for new tip labels. Old tip labels are always colored black.
#' @param edge_length Boolean. Whether to plot edge length values to branches or not.
#' @param font From plot.phylo
#' @return a plot
#' @examples
#' treefile = 'data/pg_2827_tree6577/run_pg_2827tree6577_run4/RAxML_bestTree.2020-07-31'
#' otufile = 'data/pg_2827_tree6577/outputs_pg_2827tree6577/otu_info_pg_2827tree6577.csv'

plot_branches_method1 <- function(x,
                                  tip_label = "otu",
                                  drop_outgroup = TRUE,
                                  ladderize_tree = TRUE,
                                  type = "cladogram",
                                  color = "red",
                                  edge_length=TRUE,
                                  font = 3,
                                  ...){

  phytree <- x$phytree
  if (!inherits(phytree, "phylo"))
    stop("tree should be an object of class \"phylo\".")

  newtips <- x$newtips

  tip_label <- match.arg(tip_label, c("otu", "taxon"))
  if("taxon" %in% tip_label){
    # phytree[["tip.label"]] <- x$spp_labels
    # names(newtips) <- phytree[["tip.label"]]
    tip_labels_final <- x$spp_labels
  } else {
    tip_labels_final <- phytree[["tip.label"]]
  }

  if(ladderize_tree){
    phytree <- ape::ladderize(phytree)
    # ladderize does not change the order of tip labels, so if new tips have the
    # same ordering as phytree$tip.label, then tip_colors will have the correct
    # color assignation
  }

  tip_color <- ifelse(newtips == 0, "black", color)

  if(drop_outgroup){
    phytree <- ape::drop.tip(phytree, phytree$tip.label[match(x$outgroups, phytree$tip.label)])
    tip_labels_final <- tip_labels_final[!names(newtips) %in% x$outgroups]
    tip_color <- tip_color[!names(newtips) %in% x$outgroups]
    newtips <- newtips[!names(newtips) %in% x$outgroups]
  }

  if (hasArg(cex)){
    cex <- list(...)$cex
  } else {
    cex <- par("cex")
  }
  if (length(cex) == 1){
    cex <- rep(cex, 2)
  }
  # when there are duplicated taxon names, if we use those as tip labels, plotBranchbyTrait
  # will not work properly bc it matches names of newtips vector with phytree$tip.labels
  # so we have to run it using OTU ids only, and then adding taxon names as labels if we want
  phytools::plotBranchbyTrait(tree = phytree,
                              x = newtips,
                              mode = "tips",
                              legend=FALSE,
                              show.tip.label = TRUE,
                              palette=colorRampPalette(c("black", color)),
                              tip.color="#fcfcfc00",
                              label.offset=0.1,
                              cex=cex,
                              type=type,
                              ...)
  # the final tip labels are added here:
  ape::tiplabels(text=tip_labels_final,
                 col=tip_color,
                 cex=cex,
                 frame="none",
                 adj = c(-0.1,0.5),
                 font = font)
  # the bootstrap values are added here:
  if(!is.null(phytree$node.label)){
    ape::nodelabels(text=as.numeric(phytree$node.label)*100,
                    cex=cex*0.7,
                    frame="none",
                    adj = c(-0.1,0.5))
  }
  # the branch lengths are added here:
  if(edge_length){
    zz <- round(phytree$edge.length, digits=3)
    w <- zz>0.01
    ape::edgelabels(text=zz[w], edge=seq(length(zz))[w],
                    cex=cex*0.5, frame="none", col = "gray", adj = c(0.5, -0.4))
  }
}
#
# phytools::plotBranchbyTrait(tree = phytree,
#                             x = newtips,
#                             mode = "tips",
#                             legend=FALSE,
#                             show.tip.label = TRUE,
#                             palette=colorRampPalette(c("black", color)),
#                             tip.color="blue",
#                             label.offset=0,
#                             cex=cex,
#                             edge.width = 1)



#' Plot a tree with ggtree branches and tips colored according to molecular data, method 2
#' It is giving an error with ggtree, make an issue or contact the author.
#'
#' @param x A list from get_tip_values
#' @param tip_label A character vector. Can be one of "otu" or "taxon"
#' @param drop_outgroup Boolean
#' @param ladderize_tree Boolean
#' @author Luna L. Sanchez Reyes
#' @return a plot
#' @examples
#' treefile = 'data/pg_2827_tree6577/run_pg_2827tree6577_run4/RAxML_bestTree.2020-07-31'
#' otufile = 'data/pg_2827_tree6577/outputs_pg_2827tree6577/otu_info_pg_2827tree6577.csv'

plot_branches_method2 <- function(x, tip_label = "otu", drop_outgroup = TRUE, ladderize_tree = TRUE, color = "red", ...){
  tree <- x$phytree
  tree2 <- ggtree::groupOTU(tree, .node=names(newtips[newtips==0]))
  ggtree::ggtree(tree2, aes(color=group)) + ggtree::geom_tiplab()
  # Error in get(x, envir = ns, inherits = FALSE) :
    # object 'mutate.tbl_df' not found
}


#' Get a plot of a tree with tips and branches colored following a tip value.
#'
#' @inheritParams get_tip_values
#'
#' @return a plot

plot_branches <- function(treefile, otufile, ...){
  x <- get_tip_values(treefile, otufile)
  plot_branches_method1(x, ...)

}


#' summary of new taxa and tips added to a tree based on OTU information file.
#'
#' @param treefile A character vector with the path to the tree you want to plot in newick format.
#' @param otufile A character vector with the path to the OTU information csv file generated by Physcraper
#' @author Luna L. Sanchez Reyes
#' @return a list of summary values
#' @examples
#' treefile = 'data/pg_2827_tree6577/run_pg_2827tree6577_run4/RAxML_bestTree.2020-07-31'
#' otufile = 'data/pg_2827_tree6577/outputs_pg_2827tree6577/otu_info_pg_2827tree6577.csv'

summarize <- function(treefile, otufile){
  x <- get_tip_values(treefile, otufile)


  new_taxa <- x$spp_labels[x$newtips==100][!x$spp_labels[x$newtips==100]
            %in% unique(x$spp_labels[x$newtips==0])]

  length(unique(x$spp_labels)) == length(unique(new_taxa)) + length(unique(x$spp_labels[x$newtips==0]))

  xx <- list(all_taxa= unique(x$spp_labels),
            new_taxa=unique(new_taxa),
            original_taxa=unique(x$spp_labels[x$newtips==0]),
            new_tips= x$spp_labels[x$newtips==100],
            original_tips=x$spp_labels[x$newtips==0])

  message("This updated tree has ", length(x$spp_labels[x$newtips==0]), " original tips, representing ",
          length(unique(x$spp_labels[x$newtips==0])), " unique OTT taxa.")

  message("And ", length(x$spp_labels[x$newtips==100]), " new tips representing ",
          length(unique(new_taxa)), " new OTT taxa added to the original tree.")

  message("It is ", round(x$phytree$Nnode/ape::Ntip(x$phytree), digits = 2),
          "% resolved.")

  return(xx)
}

#' summary of new taxa on updated tree and any other tree, compared to the original tree
#'
#' @param treefile A character vector with the path to the tree you want to plot in newick format.
#' @param otufile A character vector with the path to the OTU information csv file generated by Physcraper
#' @author Luna L. Sanchez Reyes
#' @return a list of summary values
#' @examples
#' treefile = 'data/pg_2827_tree6577/run_pg_2827tree6577_run4/RAxML_bestTree.2020-07-31'
#' otufile = 'data/pg_2827_tree6577/outputs_pg_2827tree6577/otu_info_pg_2827tree6577.csv'


compare <- function(treefile, otufile, tree){
  #everything to space
  x <- get_tip_values(treefile, otufile)

  if (!inherits(tree, "phylo")){
    stop("tree should be an object of class \"phylo\".")
  }
  tree$tip.label <- gsub("_", " ", tree$tip.label)

  outersect <- function(x, y) {
    sort(c(setdiff(x, y), setdiff(y, x)))
  } # function from https://www.r-bloggers.com/outersect-the-opposite-of-rs-intersect-function/

  all_new <- unique(c(tree$tip.label[!tree$tip.label %in% x$original],
                      x$spp_labels[!x$spp_labels %in% x$original]))

  shared_new <- intersect(tree$tip.label[!tree$tip.label %in% x$original],
                      x$spp_labels[!x$spp_labels %in% x$original])

  # test that none of the original are in shared_new
  # all(!x$original %in% shared_new)

  # new taxa that are only in treefile and not in tree
  not_in_tree <- x$spp_labels[!x$spp_labels %in% tree$tip.label]
  not_in_tree_new <- not_in_tree[!not_in_tree %in% x$original]
  names(not_in_tree_new) <- NULL

  # new taxa that are only in tree and not in treefile
  # it will always exclude all original, bc they are all in x$spp_labels
  not_in_x <- tree$tip.label[!tree$tip.label %in% x$spp_labels]

  original_in_tree <- tree$tip.label[tree$tip.label %in% x$original]

  # tests:
  any(not_in_x %in% not_in_tree)
  any(not_in_x %in% x$original)
  any(not_in_tree_new %in% not_in_x)
  any(not_in_tree_new %in% x$original)
  any(duplicated(not_in_tree_new))
  any(duplicated(not_in_x))

  xx <- list(all_new = all_new,
             shared_new= shared_new,
             only_in_updated = unique(not_in_tree_new),
             only_in_tree = not_in_x,
             original_in_tree = unique(original_in_tree)
           )

  message("The updated tree (from treefile) and the tree to compare (from tree)
  contribute jointly with ", length(xx$all_new),
  " distinct taxa that are in not in the original tree.", "\n From this, ",
  length(shared_new), " taxa are present in both trees.\n")

  message("The updated tree contributes ", length(xx$only_in_updated),
  " distinct taxa that are neither in original tree
  nor in tree to compare.\n")

  message("The tree to compare has ", sum(!tree$tip.label %in% xx$original),
  " new tips and contributes ", length(unique(tree$tip.label[!tree$tip.label %in% xx$original])),
  " distinct taxa \n from which ",
  length(xx$only_in_tree),
  " are neither in original tree nor in the updated tree.\n")

  message("The tree to compare has ", length(xx$original_in_tree),
  " taxa that are in original tree.")

  message("And ", sum(!tree$tip.label %in% xx$original),
          " tips different from original, representing ",
          length(unique(xx$only_in_tree)), " new OTT taxa added to the original tree.")

  message("It is ", round(tree$Nnode/ape::Ntip(tree), digits = 2),
          "% resolved.")

  return(xx)
}

# To get a color blind safe palette:
# help from https://stackoverflow.com/questions/57153428/r-plot-color-combinations-that-are-colorblind-accessible
# library(rcartocolor)
# display_carto_all(colorblind_friendly = TRUE)
# rcartocolor::display_carto_pal(3, "ag_Sunset")
# mycol <- rcartocolor::carto_pal(3, "ag_Sunset")

# scales::show_col(safe_colorblind_palette)

# phytools::plotBranchbyTrait(tree = phytree,
#                               x = newtips,
#                               mode = "tips",
#                               legend=FALSE,
#                               show.tip.label = FALSE,
#                               palette=colorRampPalette(c("black", "red")),
#                               type='phylogram',
#                               edge.width = 1,
#                               show.tip.label = TRUE,
#                               label.offset= 0,
#                             align.tip.label = TRUE)

## set working directory to path to https://github.com/McTavishLab/physcraper_example

#treefile = 'data/pg_2827_tree6577/run_pg_2827tree6577_run4/RAxML_bestTree.2020-07-31'
#otufile = 'data/pg_2827_tree6577/outputs_pg_2827tree6577/otu_info_pg_2827tree6577.csv'

#treefile = 'pg_55_local_new/outputs_pg_55tree5864_ndhf/physcraper_pg_55tree5864_ndhf.tre'
#otufile = 'pg_55_local_new/outputs_pg_55tree5864_ndhf/otu_info_pg_55tree5864_ndhf.csv'


#treefile = 'ot_350/outputs_ot_350Tr53297/physcraper_ot_350Tr53297.tre'
#otufile = 'ot_350/outputs_ot_350Tr53297/otu_info_ot_350Tr53297.csv'
McTavishLab/physcraperex documentation built on April 10, 2021, 12:02 a.m.