####
#Functions To Plot Out The Species Phylogenetic Tree With Trait Distributions and Their Node and Branch Ancestral State Annotations
#Inferred using recpd_calc().
####
#plt_recpd() - requires the 'ggtree' package.
#' Plot feature RecPD ancestral lineage reconstruction onto a species
#' phylogenetic tree.
#'
#' By default trees are plotted in 'willow tree' format, where the evolutionary
#' distance between nodes/tips is mapped as branch width, instead of length.
#'
#' @param res A results data.frame generated by recpd_calc().
#' @param feat A user-provided array of at least length 2, containing named
#' features, or indices, corresponding to the feature column/rows of the
#' recpd_calc() results data.frame.
#' @param map Use finalized RecPD ancestral lineages ('anc_new') or preliminary
#' state mappings ('anc_old').
#' @param tree.format Plot tree as phylogram ('phylo') or in willow-tree
#' ('willow') format.
#' @param direction Plotting direction of the tree: either vertical ('v') or
#' horizontal ('h').
#'
#' @author Cedoljub Bundalovic-Torma
#'
#' @return A ggtree object.
#' @export
#'
#' @examples
#' library(ape)
#' library(dplyr)
#' library(ggplot2)
#'
#' ##Test case for a randomly generated tree (10 tips) and 10 randomly generated
#' ###feature presence/absence (1, 0) data.frame.
#'
#' #Generate a random phylogenetic tree:
#' n_tip <- 10
#' tree <- rtree(n_tip)
#'
#' #Generate 10 randomly distributed features (rows = features, columns =
#' #phylogenetic tree tips):
#' tab <- replicate(10,
#' sapply(10, function(x) sample(c(0,1), x, replace=TRUE)),
#' simplify='matrix') %>%
#' t() %>%
#' data.frame(check.names=FALSE) %>%
#' rename_with(function(x) tree$tip.label[as.numeric(x)])
#'
#'
#' #Perform ancestral lineage reconstruction with recpd_calc():
#' #res <- recpd_calc(tree, tab, calc=TRUE)
#'
#' #Plot the tree:
#' #plt_recpd(res, 1) +
#' # #Can also add ggplot2 elements, like a title:
#' # labs(title = res %>%
#' # filter(feature == 1) %>%
#' # paste(colnames(.), '=', ., collapse = '\n'))
#'
#' @importFrom rlang .data
#'
plt_recpd <- function(res,
feat,
map = c('anc_new', 'anc_old'),
tree.format = c('willow', 'phylo'),
direction = c('v', 'h')){
#Input arguments:
#res - a results data.frame generated by recpd_calc().
#feat - feature of interest for which the recpd ancestral reconstruction is to
#be plotted. Either the numeric index of character string of the feature found
#in the resulting data.frame produced by recpd_calc().
#Conditional statements to check/test if input arguments are proper:
try(if(class(res) != 'data.frame') stop('Results from recpd_calc() cannot be found.'))
#Check if selected features can be found in the recpd results data.frame:
stopifnot('Please provide a valid feature name/index.'=feat %in% names(attr(res, map[1])))
#The phylogenetic tree:
tree <- attr(res, 'tree')
#The tip/node/branch state mappings assigned by recpd_calc():
br_l <- attr(res, map[1])[[feat]]
#Assign labels for node/branch/tip states:
labs <- array(c('Absence', 'Loss', 'Gain', 'Split'),
dimnames=list(c('-1', '0', '1', '2')))
#And their corresponding colours:
col <- array(c('#C0C0C0','#F8766D', '#00B81F', '#00C0B8'),
dimnames=list(c('-1', '0', '1', '2')))
lab <- as.character(br_l$branch_state)
branchcol <- dplyr::tibble(data.frame(parent=tree$edge[,1],
node=tree$edge[,2]),
branch_col=as.numeric(br_l$branch_state))
nodecol <- dplyr::tibble(node=c(1:ape::Ntip(tree), as.numeric(names(br_l$node_state))),
node_col=as.numeric(c(br_l$tip_state, br_l$node_state)))
tr <- tidytree::as_tibble(tree)
tr <- dplyr::full_join(tr, branchcol, by=c('parent' = 'parent', 'node' = 'node'))
tr <- dplyr::full_join(tr, nodecol, by=c('node'='node'))
#Assign branch lengths to stroke width aesthetic:
#Group branch lengths into bins based on quantiles.
branch_bins <- cut(tr$branch.length,
unique(stats::quantile(tr$branch.length, na.rm=TRUE)),
include.lowest=TRUE)
b_size <- lapply(strsplit(as.character(levels(branch_bins)), ','), function(x) as.numeric(sub('[\\(\\)\\[\\]]', '', x, perl=TRUE)))
#Format the branch length bins to be more legible.
bin_level <- stringr::str_match_all(levels(branch_bins), '([\\(\\[])(.*),(.*)(])')
names(bin_level) <- levels(branch_bins)
relab <- unlist(lapply(bin_level, function(x) paste0(x[2], formatC(as.numeric(x[3]), format='e', digits=2), ', ', formatC(as.numeric(x[4]), format='e', digits=2), x[5])))
branch_bins <- factor(as.vector(relab[branch_bins]), levels=relab)
#Set the branch lengths to a unit size.
tr2 <- tr
tr2$branch.length.new <- 0.001
#Plot the tree with state mappings:
#Do not add branch.length size aesthetic mappings if tree.format == 'phylo':
if(tree.format[1] == 'phylo'){
gt <- ggtree::ggtree(tidytree::as.treedata(tr2),
ggtree::aes(color = factor(as.character(.data$branch_col), levels = c('0', '1', '-1', '2'))),
branch.length = ifelse(tree.format[1] == 'willow', 'branch.length.new', 'branch.length'),
layout= ifelse(tree.format[1] == 'willow', 'slanted', 'rectangular'))
}
else{
gt <- ggtree::ggtree(tidytree::as.treedata(tr2),
ggtree::aes(color = factor(as.character(.data$branch_col), levels = c('0', '1', '-1', '2')),
size = as.numeric(branch_bins)),
branch.length = ifelse(tree.format[1] == 'willow', 'branch.length.new', 'branch.length'),
layout= ifelse(tree.format[1] == 'willow', 'slanted', 'rectangular'))
}
#Vertically orient the tree:
if(direction[1] == 'v'){
gt <- gt + ggtree::layout_dendrogram()
}
#Add Tip and Node gain/loss/absence states:
gt <- gt +
ggtree::geom_tippoint(ggtree::aes(subset=(!is.na(.data$node_col) & grepl('\\w', .data$label)),
fill=factor(as.character(.data$node_col), levels=c('0', '1', '-1', '2'))),
size=1) +
#Internal node gain/loss/absence:
ggtree::geom_nodepoint(ggtree::aes(subset=!is.na(.data$node_col),
fill=factor(as.character(.data$node_col), levels=c('0', '1', '-1', '2'))),
shape=24, color='black', size=2) +
#Specify the aesthetics values:
ggplot2::scale_color_manual(name='Node and Edge',
labels=labs[sort(unique(as.character(tr$node_col)))],
values=col[sort(unique(as.character(tr$node_col)))],
na.translate=FALSE,
guide=ggtree::guide_legend(order=2)) +
ggplot2::scale_fill_manual(name='Node and Edge',
labels=labs[sort(unique(as.character(tr$node_col)))],
values=col[sort(unique(as.character(tr$node_col)))],
na.translate=FALSE,
guide=ggtree::guide_legend(order=2)) +
ggtree::theme(legend.position='right')
#If tree.format = 'willow', then add branch-width aesthetics:
if(tree.format[1] == 'willow'){
gt <- gt +
ggplot2::scale_size_continuous(name='Branch Length',
labels = levels(branch_bins),
breaks = sort(unique(as.numeric(branch_bins))),
range=c(0.25, 1),
guide=ggtree::guide_legend(order=1))
}
#Have to do the same for absent state branches...
#For older node mappings with split nodes:
#Old split color: '#00C0B8', also have to add na.value='#00C0B8' to ensure consistent color mapping for trait distributions
#where no splits occur, and set na.translate=TRUE.
#Add The Y-Axis Which Gives Branch Length From Root To Tip?
if(0){
gt <- gt +
ggtree::geom_treescale() +
ggplot2::coord_flip() +
ggplot2::scale_x_reverse() +
ggtree::theme(axis.line.y = ggplot2::element_line(color='black'),
axis.text.y = ggplot2::element_text(),
axis.ticks.y = ggplot2::element_line())
}
gt
}
#plt_recpd_ape() - The Equivalent Function As plt_tree_f(), but using plot.phylo from the ape package (base R graphics):
#' Plot feature RecPD ancestral lineage reconstruction onto a species phylogenetic tree.
#'
#' The version of \code{\link{plt_recpd}} which uses tree plotting functions from \code{\link[ape]{plot.phylo}}.
#'
#' @param res A results data.frame generated by recpd_calc().
#' @param feat A user-provided array of at least length 2, containing named features, or indices,
#' corresponding to the feature column/rows of the recpd_calc() results data.frame.
#' @param test Tree mappings to plot. An array: all ('a'), none ('-a'); or combinations of tips ('t'), nodes ('n'), or branches ('b').
#' @param branch.lengths logical. Use branch.lengths (TRUE), or plot in willow tree format (FALSE).
#' @param axis logical. Add a y-axis of branch length to root distance.
#' @param map Use finalized RecPD ancestral lineages ('anc_new') or preliminary state mappings ('anc_old')
#' @param direction Plot tree in vertical ('downwards') or horizontal ('rightwards') orientation.
#'
#'
#' @return A list returned by \code{\link[ape]{plot.phylo}}.
#' @export
#'
#' @author Cedoljub Bundalovic-Torma
#'
#' @seealso \code{\link{plt_recpd}} for the ggtree version.
#'
#' @examples
#' library(ape)
#' library(dplyr)
#' library(ggplot2)
#'
#' ##Test case for a randomly generated tree (10 tips) and 10 randomly generated
#' ##feature presence/absence (1, 0) data.frame.
#'
#' #Generate a random phylogenetic tree:
#' n_tip <- 10
#' tree <- rtree(n_tip)
#'
#' #Generate 10 randomly distributed features (rows = features, columns =
#' #phylogenetic tree tips):
#' tab <- replicate(10,
#' sapply(10, function(x) sample(c(0,1), x, replace=TRUE)),
#' simplify='matrix') %>%
#' t() %>%
#' data.frame(check.names=FALSE) %>%
#' rename_with(function(x) tree$tip.label[as.numeric(x)])
#'
#'
#' #Perform ancestral lineage reconstruction with recpd_calc():
#' #res <- recpd_calc(tree, tab, calc=TRUE)
#'
#' #plt_recpd_ape(res, 1)
#'
plt_recpd_ape <- function(res,
feat,
test = 'a',
branch.lengths = TRUE,
axis = FALSE,
map = c('anc_new', 'anc_old'),
direction = c('rightwards', 'downwards')){
#Input arguments:
#res - a results data.frame generated by recpd_calc().
#feat - feature of interest for which the recpd ancestral reconstruction is to
#be plotted. Either the numeric index of character string of the feature found
#in the resulting data.frame produced by recpd_calc().
#Test = Which mappings to add:
#-a - none
#t - tips
#n - internal nodes
#b - branches
#a - all
#branch.lengths = TRUE/FALSE - use tree branch lengths (TRUE), otherwise set
#all branch lengths to the same length. axis = TRUE/FALSE - add a y-axis
#representing tree branch lengths to the root.
#The phylogenetic tree:
tree <- attr(res, 'tree')
#The tip/node/branch state mappings assigned by recpd_calc():
br_l <- attr(res, map[1])[[feat]]
#Node colorings by presence/absence of descendants
n_col <- array(c('grey', 'red', 'green', 'blue'), dimnames=list(as.character(c(-1, 0, 1, 2))))
br_col <- array(c('grey', 'red', 'green'), dimnames=list(as.character(c(-1, 0 ,1))))
if('b' %in% test || 'a' %in% test){
brcol <- br_col[as.character(br_l$branch_state)]
}
else{
brcol <- rep('black', length(tree$edge.length))
}
#If branch.lengths == FALSE,
#Then set all of the tree edge lengths to the same size,
#and do not show the branch length axis.
if(branch.lengths == FALSE) tree$edge.length <- rep(1e-6, length(tree$edge.length))
plt <-ape::plot.phylo(tree,
use.edge.length = TRUE,
direction = direction[1],
show.tip.label=FALSE,
edge.color=brcol,
plot=TRUE)
# plot(tree,
# use.edge.length=plt$use.edge.length,
# direction=plt$direction,
# show.tip.label=plt$show.tip.label,
# edge.color=brcol)
#Plot y-axis: branch length to root? (axis == TRUE):
if(axis){
axis(2, at=c(seq(plt$y.lim[2], 0, -1), 0), labels=c(0:floor(plt$y.lim[2]), signif(plt$y.lim[2], 3)), las=2, line=1, cex.axis=0.8)
graphics::mtext('Branch Length to Root', 2, line=3.5, cex=0.8)
}
if('t' %in% test || 'a' %in% test){
if(ape::Ntip(tree) < 100) cex <- 1 else cex <- 0.5
ape::tiplabels('', 1:ape::Ntip(tree),
frame='none',
pch=16,
cex=cex,
#pch=ifelse(br_l[[prev_c]][[i]]$tip_col == 1, 16, 1),
col=n_col[as.character(br_l$tip_state)],
adj=c(0.5,0.5)
)
}
if('n' %in% test || 'a' %in% test){
ape::nodelabels('',
as.numeric(names(br_l$node_state)),
frame='none',
pch=24,
cex=1.25,
bg=n_col[as.character(br_l$node_state)],
col='black')
}
}
#plt_recpdcor() - Visualizes The Overlap of Ancestral States For A Selected Pair of Trait Lineages.
#* requires the 'ggtree' package.
#' Plot RecPD ancestral lineage reconstructions for a pair of features onto a species phylogenetic tree.
#'
#' Combines the RecPD lineage reconstructions of two features, incidating ancestral branches which
#' are shared, unique, or absent between them.
#'
#' @param res A results data.frame generated by recpd_calc().
#' @param feats A user-provided array of length 2, containing named features, or indices,
#' corresponding to the feature column/rows of the recpd_calc() results data.frame.
#' @param lab An array of labels for the presence/absence strips (features).
#' @param colnames.angle Angle of feature labels.
#' @param colnames.offset Offset of feature labels.
#' @param sep.col Separator line colour of feature presence/absence strips. Set to 'transparent' to remove.
#' @param offset Offset of feature presence/absence strips from the phylogenetic tree.
#' @param width Relative width of the feature presence/absence strips to the phylogenetic trree.
#' @param tree.format Plot tree as phylogram ('phylo') or in willow-tree
#' ('willow') format.
#' @param direction Plotting direction of the tree: either vertical ('v') or
#' horizontal ('h').
#'
#' @return A ggtree object.
#' @export
#'
#' @author Cedoljub Bundalovic-Torma
#'
#' @seealso \code{\link{plt_recpd}}
#' @examples
#' library(ape)
#' library(dplyr)
#' library(ggplot2)
#'
#' ##Test case for a randomly generated tree (10 tips) and 10 randomly generated
#' ##feature presence/absence (1, 0) data.frame.
#'
#' #Generate a random phylogenetic tree:
#' n_tip <- 10
#' tree <- rtree(n_tip)
#'
#' #Generate 10 randomly distributed features (rows = features, columns =
#' #phylogenetic tree tips):
#' tab <- replicate(10,
#' sapply(10, function(x) sample(c(0,1), x, replace=TRUE)),
#' simplify='matrix') %>%
#' t() %>%
#' data.frame(check.names=FALSE) %>%
#' rename_with(function(x) tree$tip.label[as.numeric(x)])
#'
#'
#' #Perform ancestral lineage reconstruction with recpd_calc():
#' #res <- recpd_calc(tree, tab, calc=TRUE)
#'
#' #Plot the combined lineages of features 1 and 2:
#' #plt_recpdcor(res, c(1,2))
#'
#' @importFrom rlang .data
#'
plt_recpdcor <- function(res,
feats,
lab=NULL,
colnames.angle=45,
colnames.offset=0,
sep.col='transparent',
offset = 0,
width=0.1,
tree.format = c('willow', 'phylo'),
direction = c('v', 'h')){
#Input arguments:
#res - a results data.frame generated by recpd_calc()
#feats - an array of length 2, which contains two named features, or indices
#corresponding to the feature presence/absence table provided to recpd_calc()
#lab - Labels for the presence/absence strips (features)
#Check that two features are provided:
stopifnot(length(feats) == 2)
#The phylogenetic tree:
tree <- attr(res, 'tree')
#Annotation state list (tip, node, branch) for one trait distribution:
br_x <- attr(res, 'anc_new')[[feats[1]]]
#Annotation state list (tip, node, branch) for a second trait distribution
br_y <- attr(res, 'anc_new')[[feats[2]]]
#Bind together tip locus presence absence state
pa <- cbind(as.character(ifelse(br_x$tip_state == 1, 1, 0)),
as.character(ifelse(br_y$tip_state == 1, 1, 0)))
rownames(pa) <- tree$tip.label
#Add column labels, if present:
if(length(lab) != 0) colnames(pa) <- lab
#rownames(pa) <- rowSums(pa)
#pa <- apply(pa, 1, function(x) ifelse(x != 0, 'Present', 'Absent'))
#bind together the edge mappings:
b <- rbind(ifelse(br_x$branch_state == 1, 1, 0),
ifelse(br_y$branch_state == 1, 1, 0))
b_t <- data.frame(cbind(node=tree$edge[,2], branch_col=apply(b, 2, sum)))
tr <- dplyr::full_join(tidytree::as_tibble(tree), b_t)
#Assign phylogenetic tree branch lengths into bins (by quantiles):
#br_bins <- cut(tr$branch.length, seq(floor(min(signif(tr$branch.length, 1), na.rm=TRUE)), ceiling(max(signif(tr$branch.length, 1), na.rm=TRUE)), 0.1), include.lowest=TRUE)
br_bins <- cut(tr$branch.length,
signif(unique(stats::quantile(tr$branch.length, na.rm=TRUE), 2)),
include.lowest=TRUE)
#Format the branch length bins to be more legible.
bin_level <- stringr::str_match_all(levels(br_bins), '([\\(\\[])(.*),(.*)(])')
names(bin_level) <- levels(br_bins)
relab <- unlist(lapply(bin_level, function(x) paste0(x[2], formatC(as.numeric(x[3]), format='e', digits=2), ', ', formatC(as.numeric(x[4]), format='e', digits=2), x[5])))
br_bins <- factor(as.vector(relab[br_bins]), levels=relab)
#Extract the lower bound of the branch length bins for size mapping.
br_size <- sapply(sapply(br_bins, function(x) as.numeric(unlist(strsplit(gsub('[\\(\\)\\[\\]]', '', as.character(x), perl=TRUE), split=',')))), function(x) x[1])
#Branch colouring if they are shared between the two locus distributions
branch_cols <- RColorBrewer::brewer.pal(3, 'Set1')
names(branch_cols) <- c('0', '1', '2')
branch_labs <- c('None', 'Unique', 'Shared')
names(branch_labs) <- c('0', '1', '2')
tr$branch_col <- factor(tr$branch_col, levels=c(0,1,2), exclude=NA)
#Normalize branch lengths:
tr <- tr %>% dplyr::mutate(branch.length.std=1e-6)
#Make the tree: Willow tree format - add branch widths as aesthetic size
#mapping, and use standardized branch lengths:
if(tree.format[1] == 'willow'){
gt <- ggtree::ggtree(tidytree::as.treedata(tr),
ggtree::aes(color=.data$branch_col, size=br_bins), #Note: exponentially transform branch lengths...?
branch.length = 'branch.length.std',
#lineend='square',
layout = 'slanted') +
ggplot2::scale_color_manual(values=branch_cols,
labels=branch_labs,
na.translate=FALSE) +
ggplot2::scale_size_manual(breaks=levels(br_bins),
values=(1/2)*1:length(levels(br_bins)),
labels=levels(br_bins)) +
ggplot2::labs(size='Branch Length',
color='Trait Branch Overlap') +
ggtree::theme(line=ggplot2::element_line(lineend='square'))
}
#Phylogram layout:
else{
gt <- ggtree::ggtree(tidytree::as.treedata(tr),
ggtree::aes(color=.data$branch_col),
branch.length = 'branch.length',
#lineend='square',
layout = 'rectangular') +
ggplot2::scale_color_manual(values=branch_cols,
labels=branch_labs,
na.translate=FALSE) +
ggplot2::labs(color='Trait Branch Overlap') +
ggtree::theme(line=ggplot2::element_line(lineend='square'))
}
#Vertically orient the tree:
if(direction[1] == 'v'){
gt <- gt + ggtree::layout_dendrogram()
}
#Append the presence/absence strips for the pair of traits:
##Plot column labels if they are supplied:
if(is.null(lab)) cn <- FALSE else cn <- TRUE
ggtree::gheatmap(gt, data.frame(pa),
width=width,
offset=offset,
color=sep.col,
colnames_position='top',
colnames=cn,
colnames_offset_y=colnames.offset,
colnames_angle=colnames.angle,
hjust=0) +
ggtree::scale_fill_manual(name='Tip State',
labels=c('Absent', 'Present'),
values=c('white', 'black')) +
ggplot2::ylim(0, NA) +
ggplot2::labs(
size='Branch Length',
fill='Trait Presence')
#Note: column names might not fit in the plot for large trees...
#Can be adjusted by changing colnames_offset_y and expanding plot limits.
}
###
#plt_faith() - tree plotting function for feature ancestry from Faith's PD.
#' Plot feature Faith's PD ancestral lineages onto a species phylogenetic tree.
#'
#' @param res A results data.frame generated by recpd_calc().
#' @param feat A user-provided array of length 2, containing named features, or
#' indices, corresponding to the feature column/rows of the recpd_calc()
#' results data.frame.
#' @param include.root logical. Include branches from the root of the tree
#' (TRUE) or the MRCA of the presence state tips (FALSE).
#'
#' @return A ggtree object.
#' @export
#'
#' @seealso \code{\link{plt_recpd}}
#' @examples
#'
plt_faith <- function(res,
feat,
include.root=TRUE){
#Input arguments:
#res - a results data.frame generated by recpd_calc().
#feat - feature of interest for which the recpd ancestral reconstruction is to
#be plotted. Either the numeric index of character string of the feature found
#in the resulting data.frame produced by recpd_calc().
#The phylogenetic tree:
tree <- attr(res, 'tree')
#The tip/node/branch state mappings assigned by recpd_calc():
node_state <- attr(res, 'anc_new')[[feat]]
#Get the tips presence/absence states:
t <- node_state$tip_state
#Corresponding normalized Faith's PD values:
f <- picante::pd(matrix(t, 1, ape::Ntip(tree), dimnames=list(NULL, tree$tip.label[as.numeric(names(t))])), tree, include.root)
f <- f$PD/sum(tree$edge.length)
#Faith branch length mappings - if include.root=TRUE for the PD calculation:
n <- unique(unlist(ape::nodepath(tree)[which(t == 1)]))
#If include.root=FALSE, then remove nodes between the root and the MRCA of the present tips.
if(include.root == FALSE){
m <- ape::getMRCA(tree, which(t == 1))
#If the MRCA is not the root
if(m != tree$edge[1,1]){
nr <- ape::nodepath(tree, n[1], m)
nr <- nr[-length(nr)]
n <- n[-which(n %in% nr)]
}
}
#Make a temporary node and edge mapping list, with branch lengths coded as those which are included for Faith's PD calculation.
ns_tmp <- list()
ns_tmp[[as.character(feat)]]$tip_state <- node_state$tip_state
ns_tmp[[as.character(feat)]]$branch_state <- ifelse(tree$edge[,1] %in% n & tree$edge[,2] %in% n, 1, 0)
ns_tmp[[as.character(feat)]]$node_state <- array(rep(NA, length(node_state$node_state)), dimnames=list(names(node_state$node_state)))
#return(ns_tmp)
#Now assign the new Faith's PD-based tip/node/branch state mappings, along
#with the tree as attributes of a new object and send it to plt_tree_f:
new_res <- structure(res[feat, ], anc_new = ns_tmp, tree=tree, class='data.frame')
gt <- plt_recpd(new_res, as.character(feat)) +
ggplot2::labs(title=paste0('Faith\'s PD = ', signif(f, 3)))
#labs(title=paste0('Faith\'s PD\n', 'With Root = ', signif(f[1,1], 3), '\n', 'Without Root = ', signif(f[2,1], 3)))
gt
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.