#' trenex2pdf
#'
#' Write nexus tree files to pdf.
#'
#'
#' @param treefile A character vector with the name of the nexus or newick file
#' @param studyid TreeBASE study id
trenex2pdf <- function(treefile,
treefilesdir = "../data-raw/trees/treebase/S1091/",
treepdfsdir = "../data-raw/trees/treebase/S1091-pdf/",
height = 7, ...){
# all treebase trees are in nexus format
tree <- ape::read.nexus(file= paste0(treefilesdir, "/", treefile))
outputfile <- paste0(treepdfsdir, gsub(".nex", ".pdf", treefile))
pdf(file = outputfile, height)
ape::plot.phylo(ape::ladderize(tree), cex = 0.5, ...)
graphics::mtext(paste("TreeBASE tree:", gsub(".nex", "", treefile)))
dev.off()
}
#' get_file_names
#'
#' Write nexus or newick files to pdf or png.
#'
#' @param studyid TreeBASE study id
#' @param file_dir Character vector of a directory with one or more tree files
get_file_names <- function(studyid, file_dir = "data-raw/trees/treebase"){
# dir_original <- getwd()
print(getwd())
treefiles <- paste0(studyid, "-treefiles.csv")
system(paste0("ls ", studyid, " > ", treefiles))
return(paste0(file_dir, "/", treefiles))
}
#' Cummulative sum of tip values for each node/branch down to the root
#'
#'
#' @param tree a phylo object
#' @param values named numeric character vector with names corresponding to tip labels
#' @author Luna L. Sanchez Reyes
#' @return a vector
#'
sum_tips <- function(tree, values) {
# tree <- ape::reorder.phylo(tree, "postorder") # no need to reorder the whole tree
res <- numeric(max(tree$edge))
res[1:ape::Ntip(tree)] <- values[match(names(values), tree$tip.label)]
for (i in ape::postorder(tree)) { # ape postorder doesn't include root
tmp <- tree$edge[i,1]
# print(i)
res[tmp] <- res[tmp] + res[tree$edge[i, 2]]
# print(res)
}
res
}
#' Assign tip values to a tree based on OTU information file.
#'
#' @param treefile A character vector with the path to the tree you want to plot in newick format.
#' @param otufile A character vector with the path to the OTU information csv file generated by Physcraper
#' @author Emily Jane McTavish
#' @return a list of a phylo object and a vector of tip values
#' @examples
#' treefile = 'data/pg_2827_tree6577/run_pg_2827tree6577_run4/RAxML_bestTree.2020-07-31'
#' otufile = 'data/pg_2827_tree6577/outputs_pg_2827tree6577/otu_info_pg_2827tree6577.csv'
get_tip_values <- function(treefile, otufile){
phytree = ape::read.tree(treefile)
tip_info = utils::read.csv(otufile, sep = '\t', row.names = 1, stringsAsFactors = FALSE)
inout <- as.vector(tip_info$X.physcraper.ingroup)
names(inout) <- rownames(tip_info)
outgroups = c()
for (lab in phytree[["tip.label"]]) {
status = inout[names(inout) == lab]
if (status == "False")
{outgroups = c(outgroups, lab)}
}
st <- as.vector(tip_info$X.physcraper.status)
names(st) <- rownames(tip_info)
spp <- as.vector(tip_info$X.ot.ottTaxonName)
names(spp) <- rownames(tip_info)
# doing a loop bc tip_info has more taxa than phytree ("not added" sequences)
newtips = c()
spp_labels = c()
for (lab in phytree[["tip.label"]]) {
status = st[names(st) == lab]
val = !grepl("original", status, fixed = TRUE)
val = 100*as.integer(val)
names(val) <- lab
newtips = c(newtips, val)
tax = spp[names(spp) == lab]
spp_labels = c(spp_labels, tax)
}
original <- spp_labels[newtips == 0]
return(list(phytree=phytree,
newtips=newtips,
spp_labels=spp_labels,
outgroups=outgroups,
original = original))
}
#' Plot a tree with branches colored according to availability of molecular data, method 1
#'
#' @param x A list from get_tip_values
#' @param tip_label A character vector. Can be one of "otu" or "taxon"
#' @param drop_outgroup Boolean
#' @param ladderize_tree Boolean
#' @param type From plot.phylo
#' @param color Color to use for new tip labels. Old tip labels are always colored black.
#' @param edge_length Boolean. Whether to plot edge length values to branches or not.
#' @param font From plot.phylo
#' @return a plot
#' @examples
#' treefile = 'data/pg_2827_tree6577/run_pg_2827tree6577_run4/RAxML_bestTree.2020-07-31'
#' otufile = 'data/pg_2827_tree6577/outputs_pg_2827tree6577/otu_info_pg_2827tree6577.csv'
plot_branches_method1 <- function(x,
tip_label = "otu",
drop_outgroup = TRUE,
ladderize_tree = TRUE,
type = "cladogram",
color = "red",
edge_length=TRUE,
font = 3,
...){
phytree <- x$phytree
if (!inherits(phytree, "phylo"))
stop("tree should be an object of class \"phylo\".")
newtips <- x$newtips
tip_label <- match.arg(tip_label, c("otu", "taxon"))
if("taxon" %in% tip_label){
# phytree[["tip.label"]] <- x$spp_labels
# names(newtips) <- phytree[["tip.label"]]
tip_labels_final <- x$spp_labels
} else {
tip_labels_final <- phytree[["tip.label"]]
}
if(ladderize_tree){
phytree <- ape::ladderize(phytree)
# ladderize does not change the order of tip labels, so if new tips have the
# same ordering as phytree$tip.label, then tip_colors will have the correct
# color assignation
}
tip_color <- ifelse(newtips == 0, "black", color)
if(drop_outgroup){
phytree <- ape::drop.tip(phytree, phytree$tip.label[match(x$outgroups, phytree$tip.label)])
tip_labels_final <- tip_labels_final[!names(newtips) %in% x$outgroups]
tip_color <- tip_color[!names(newtips) %in% x$outgroups]
newtips <- newtips[!names(newtips) %in% x$outgroups]
}
if (hasArg(cex)){
cex <- list(...)$cex
} else {
cex <- par("cex")
}
if (length(cex) == 1){
cex <- rep(cex, 2)
}
# when there are duplicated taxon names, if we use those as tip labels, plotBranchbyTrait
# will not work properly bc it matches names of newtips vector with phytree$tip.labels
# so we have to run it using OTU ids only, and then adding taxon names as labels if we want
phytools::plotBranchbyTrait(tree = phytree,
x = newtips,
mode = "tips",
legend=FALSE,
show.tip.label = TRUE,
palette=colorRampPalette(c("black", color)),
tip.color="#fcfcfc00",
label.offset=0.1,
cex=cex,
type=type,
...)
# the final tip labels are added here:
ape::tiplabels(text=tip_labels_final,
col=tip_color,
cex=cex,
frame="none",
adj = c(-0.1,0.5),
font = font)
# the bootstrap values are added here:
if(!is.null(phytree$node.label)){
ape::nodelabels(text=as.numeric(phytree$node.label)*100,
cex=cex*0.7,
frame="none",
adj = c(-0.1,0.5))
}
# the branch lengths are added here:
if(edge_length){
zz <- round(phytree$edge.length, digits=3)
w <- zz>0.01
ape::edgelabels(text=zz[w], edge=seq(length(zz))[w],
cex=cex*0.5, frame="none", col = "gray", adj = c(0.5, -0.4))
}
}
#
# phytools::plotBranchbyTrait(tree = phytree,
# x = newtips,
# mode = "tips",
# legend=FALSE,
# show.tip.label = TRUE,
# palette=colorRampPalette(c("black", color)),
# tip.color="blue",
# label.offset=0,
# cex=cex,
# edge.width = 1)
#' Plot a tree with ggtree branches and tips colored according to molecular data, method 2
#' It is giving an error with ggtree, make an issue or contact the author.
#'
#' @param x A list from get_tip_values
#' @param tip_label A character vector. Can be one of "otu" or "taxon"
#' @param drop_outgroup Boolean
#' @param ladderize_tree Boolean
#' @author Luna L. Sanchez Reyes
#' @return a plot
#' @examples
#' treefile = 'data/pg_2827_tree6577/run_pg_2827tree6577_run4/RAxML_bestTree.2020-07-31'
#' otufile = 'data/pg_2827_tree6577/outputs_pg_2827tree6577/otu_info_pg_2827tree6577.csv'
plot_branches_method2 <- function(x, tip_label = "otu", drop_outgroup = TRUE, ladderize_tree = TRUE, color = "red", ...){
tree <- x$phytree
tree2 <- ggtree::groupOTU(tree, .node=names(newtips[newtips==0]))
ggtree::ggtree(tree2, aes(color=group)) + ggtree::geom_tiplab()
# Error in get(x, envir = ns, inherits = FALSE) :
# object 'mutate.tbl_df' not found
}
#' Get a plot of a tree with tips and branches colored following a tip value.
#'
#' @inheritParams get_tip_values
#'
#' @return a plot
plot_branches <- function(treefile, otufile, ...){
x <- get_tip_values(treefile, otufile)
plot_branches_method1(x, ...)
}
#' summary of new taxa and tips added to a tree based on OTU information file.
#'
#' @param treefile A character vector with the path to the tree you want to plot in newick format.
#' @param otufile A character vector with the path to the OTU information csv file generated by Physcraper
#' @author Luna L. Sanchez Reyes
#' @return a list of summary values
#' @examples
#' treefile = 'data/pg_2827_tree6577/run_pg_2827tree6577_run4/RAxML_bestTree.2020-07-31'
#' otufile = 'data/pg_2827_tree6577/outputs_pg_2827tree6577/otu_info_pg_2827tree6577.csv'
summarize <- function(treefile, otufile){
x <- get_tip_values(treefile, otufile)
new_taxa <- x$spp_labels[x$newtips==100][!x$spp_labels[x$newtips==100]
%in% unique(x$spp_labels[x$newtips==0])]
length(unique(x$spp_labels)) == length(unique(new_taxa)) + length(unique(x$spp_labels[x$newtips==0]))
xx <- list(all_taxa= unique(x$spp_labels),
new_taxa=unique(new_taxa),
original_taxa=unique(x$spp_labels[x$newtips==0]),
new_tips= x$spp_labels[x$newtips==100],
original_tips=x$spp_labels[x$newtips==0])
message("This updated tree has ", length(x$spp_labels[x$newtips==0]), " original tips, representing ",
length(unique(x$spp_labels[x$newtips==0])), " unique OTT taxa.")
message("And ", length(x$spp_labels[x$newtips==100]), " new tips representing ",
length(unique(new_taxa)), " new OTT taxa added to the original tree.")
message("It is ", round(x$phytree$Nnode/ape::Ntip(x$phytree), digits = 2),
"% resolved.")
return(xx)
}
#' summary of new taxa on updated tree and any other tree, compared to the original tree
#'
#' @param treefile A character vector with the path to the tree you want to plot in newick format.
#' @param otufile A character vector with the path to the OTU information csv file generated by Physcraper
#' @author Luna L. Sanchez Reyes
#' @return a list of summary values
#' @examples
#' treefile = 'data/pg_2827_tree6577/run_pg_2827tree6577_run4/RAxML_bestTree.2020-07-31'
#' otufile = 'data/pg_2827_tree6577/outputs_pg_2827tree6577/otu_info_pg_2827tree6577.csv'
compare <- function(treefile, otufile, tree){
#everything to space
x <- get_tip_values(treefile, otufile)
if (!inherits(tree, "phylo")){
stop("tree should be an object of class \"phylo\".")
}
tree$tip.label <- gsub("_", " ", tree$tip.label)
outersect <- function(x, y) {
sort(c(setdiff(x, y), setdiff(y, x)))
} # function from https://www.r-bloggers.com/outersect-the-opposite-of-rs-intersect-function/
all_new <- unique(c(tree$tip.label[!tree$tip.label %in% x$original],
x$spp_labels[!x$spp_labels %in% x$original]))
shared_new <- intersect(tree$tip.label[!tree$tip.label %in% x$original],
x$spp_labels[!x$spp_labels %in% x$original])
# test that none of the original are in shared_new
# all(!x$original %in% shared_new)
# new taxa that are only in treefile and not in tree
not_in_tree <- x$spp_labels[!x$spp_labels %in% tree$tip.label]
not_in_tree_new <- not_in_tree[!not_in_tree %in% x$original]
names(not_in_tree_new) <- NULL
# new taxa that are only in tree and not in treefile
# it will always exclude all original, bc they are all in x$spp_labels
not_in_x <- tree$tip.label[!tree$tip.label %in% x$spp_labels]
original_in_tree <- tree$tip.label[tree$tip.label %in% x$original]
# tests:
any(not_in_x %in% not_in_tree)
any(not_in_x %in% x$original)
any(not_in_tree_new %in% not_in_x)
any(not_in_tree_new %in% x$original)
any(duplicated(not_in_tree_new))
any(duplicated(not_in_x))
xx <- list(all_new = all_new,
shared_new= shared_new,
only_in_updated = unique(not_in_tree_new),
only_in_tree = not_in_x,
original_in_tree = unique(original_in_tree)
)
message("The updated tree (from treefile) and the tree to compare (from tree)
contribute jointly with ", length(xx$all_new),
" distinct taxa that are in not in the original tree.", "\n From this, ",
length(shared_new), " taxa are present in both trees.\n")
message("The updated tree contributes ", length(xx$only_in_updated),
" distinct taxa that are neither in original tree
nor in tree to compare.\n")
message("The tree to compare has ", sum(!tree$tip.label %in% xx$original),
" new tips and contributes ", length(unique(tree$tip.label[!tree$tip.label %in% xx$original])),
" distinct taxa \n from which ",
length(xx$only_in_tree),
" are neither in original tree nor in the updated tree.\n")
message("The tree to compare has ", length(xx$original_in_tree),
" taxa that are in original tree.")
message("And ", sum(!tree$tip.label %in% xx$original),
" tips different from original, representing ",
length(unique(xx$only_in_tree)), " new OTT taxa added to the original tree.")
message("It is ", round(tree$Nnode/ape::Ntip(tree), digits = 2),
"% resolved.")
return(xx)
}
# To get a color blind safe palette:
# help from https://stackoverflow.com/questions/57153428/r-plot-color-combinations-that-are-colorblind-accessible
# library(rcartocolor)
# display_carto_all(colorblind_friendly = TRUE)
# rcartocolor::display_carto_pal(3, "ag_Sunset")
# mycol <- rcartocolor::carto_pal(3, "ag_Sunset")
# scales::show_col(safe_colorblind_palette)
# phytools::plotBranchbyTrait(tree = phytree,
# x = newtips,
# mode = "tips",
# legend=FALSE,
# show.tip.label = FALSE,
# palette=colorRampPalette(c("black", "red")),
# type='phylogram',
# edge.width = 1,
# show.tip.label = TRUE,
# label.offset= 0,
# align.tip.label = TRUE)
## set working directory to path to https://github.com/McTavishLab/physcraper_example
#treefile = 'data/pg_2827_tree6577/run_pg_2827tree6577_run4/RAxML_bestTree.2020-07-31'
#otufile = 'data/pg_2827_tree6577/outputs_pg_2827tree6577/otu_info_pg_2827tree6577.csv'
#treefile = 'pg_55_local_new/outputs_pg_55tree5864_ndhf/physcraper_pg_55tree5864_ndhf.tre'
#otufile = 'pg_55_local_new/outputs_pg_55tree5864_ndhf/otu_info_pg_55tree5864_ndhf.csv'
#treefile = 'ot_350/outputs_ot_350Tr53297/physcraper_ot_350Tr53297.tre'
#otufile = 'ot_350/outputs_ot_350Tr53297/otu_info_ot_350Tr53297.csv'
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.