#' Convert from a phylo object to a set of source, value, and target for a Sankey diagram
#'
#' This will create information for a Sankey diagram. The output is a dataframe: one column for the name of the starting node, one column for the flow from that node to the third column, the name of the ending node.
#' If nodelabels exist, they will be used for node names, otherwise numbers will be used.
#'
#' @param phy The tree in ape phylo format
#' @param tip.weights Optionally a vector of the number of taxa each tip represents. If present, the final width matches this. It should have labels matching the taxon names
#' @param erase.labels If TRUE, erase any node labels; otherwise, they will be used for naming nodes if present
#' @param convert.numbers If TRUE, convert node numbers to labels if possible
#' @return Data.frame with columns for source, value, and target
#' @export
convert_phylo_to_sankey <- function(phy, tip.weights=NULL, erase.labels=FALSE, convert.numbers = TRUE) {
if(!inherits(phy, "phylo")) {
stop("The input must be a phylo class object (see documentation for the package ape)")
}
if(erase.labels) {
phy$node.label <- NULL
}
if(!is.null(phy$node.label)) {
actual.labels <- phy$node.label[which(nchar(phy$node.label)>0)]
if(any(duplicated(actual.labels))) {
warning(paste("There are some duplicate node.labels on your tree: ", paste(actual.labels[duplicated(actual.labels)], collapse=", "), " so we are dropping them all"))
phy$node.label <- NULL
}
}
if(is.null(tip.weights)) {
tip.weights <- rep(1, ape::Ntip(phy))
names(tip.weights) <- phy$tip.label
}
if(is.null(names(tip.weights))) {
warning("The tip.weights vector should have taxon names. We're assuming the names match those in the phy object $tip.label, but be careful")
names(tip.weights) <- phy$tip.label
}
result <- data.frame(source=phy$edge[,1], value=rep(0, length(phy$edge[,1])), target=phy$edge[,2])
for(node.row in sequence(length(phy$edge[,1]))) {
desc <- phangorn::Descendants(phy, result$target[node.row], type="tips")[[1]]
result$value[node.row] <- sum(tip.weights[phy$tip.label[desc]])
}
if(convert.numbers) {
result$source <- as.character(result$source)
result$target <- as.character(result$target)
for(node.id in sequence(max(phy$edge))) {
if(node.id <= ape::Ntip(phy)) { # we're at a tip
result$target[which(result$target==node.id)] <- phy$tip.label[node.id]
} else {
if(!is.null(phy$node.label)) {
local.label <- phy$node.label[node.id - ape::Ntip(phy)]
if(nchar(local.label)>0) {
result$target[which(result$target==node.id)] <- local.label
result$source[which(result$source==node.id)] <- local.label
}
}
}
}
}
return(result)
}
#' Convert value data.frame to input for http://sankeymatic.com
#'
#' Takes a data.frame from convert_phylo_to_sankey() or similar and creates a text block suitable for pasting into http://sankeymatic.com/build
#' You can then cat() the final object to see it.
#'
#' @param sankey The data.frame with source, value, and target
#' @return A string suitable for putting into sankeymatic to get a diagram
#' @export
convert_to_sankeymatic <- function(sankey) {
convert_one_row <- function(x) {
return(paste0(x["source"], " [", x["value"], "] ", x["target"]))
}
return(paste(apply(sankey, 1, convert_one_row), collapse="\n"))
}
#' Convert from a phylo object to a set of Source, Target, Value, Node Label for plotly sankey diagram
#' convert data to data frame for use in plotly sankey diagram
#'
#' @param phy The tree in ape phylo format
#' @param tip.weights Optionally a vector of the number of taxa each tip represents. If present, the final width matches this. It should have labels matching the taxon names
#' @param erase.labels If TRUE, erase any node labels; otherwise, they will be used for naming nodes if present
#' @return A list of links and nodes for sankey plot
#' @export
convert_phylo_to_plotly <- function(phy, tip.weights=NULL, erase.labels=FALSE) {
links.df <- convert_phylo_to_sankey(phy, tip.weights=tip.weights, erase.labels=FALSE, convert.numbers=FALSE)
links <- list(source=links.df$source, target=links.df$target, value=links.df$value)
node.labels <- rep("", ape::Ntip(phy) + ape::Nnode(phy))
node.labels[1:ape::Ntip(phy)] <- phy$tip.label #node.label produces replacement of zero length
if(!is.null(phy$node.label)) {
node.labels[(ape::Ntip(phy)+1) : (ape::Ntip(phy) + ape::Nnode(phy)) ] <- phy$node.label
}
#internal <- node.labels[(ape::Ntip(phy)+1) : (ape::Ntip(phy) + ape::Nnode(phy)) ] <- phy$node.label # error: replacement has length zero because no internal nodes have labels, need if else?
#nodes <- tips + internal
result_pl <- list(links=links,nodes=node.labels) # problem may be here, this is a list of lists but tips is shorter than the links...need all of the nodes but not all are labeled
return(result_pl)
}
#' Convert plotly object from convert_phylo_to_plotly to a sankey diagram
#'
#' @param phy The tree in ape phylo format
#' @param color A vector of colors for the nodes
#' @return A plotly santree
#' @export
convert_to_plotly_santree <- function(phy, color="") {
result_pl <- convert_phylo_to_plotly(phy)
my.plot <- plot_ly(
type = "sankey",
orientation = "h",
node = list(
label = result_pl$nodes, #maybe the problem is here - there are fewer labels than nodes
color = color,
pad = 15,
thickness = 20,
line = list(
color = "black",
width = 0.5
)
),
link = result_pl$links
)
return(my.plot)
}
#' Convert from a phylo object to an object containing Node, xpos, and edges (ID1, ID2, and Value)
#'
#' @param phy The tree in ape phylo format
#' @return A riverplot object
#' @export
convert_phylo_to_river <- function(phy, tip.weights=NULL, erase.labels=FALSE) {
plotly <- convert_phylo_to_plotly(phy, tip.weights=tip.weights, erase.labels=FALSE)
Nodes <- as.character(1:(Ntip(phy) + Nnode(phy)))
xpos <- rep(NA, ape::Ntip(phy) + ape::Nnode(phy))
for (node.index in sequence(ape::Ntip(phy) + ape::Nnode(phy))) {
xpos[node.index] <- phytools::nodeheight(phy, node.index)
}
#node_labels <- as.character(1:(tip.label(phy) + node.label(phy))) # this doesn't work.
ID1 <- as.character(plotly$links$source)
ID2 <- as.character(plotly$links$target)
Value <- plotly$links$value
Edges <- data.frame(N1=ID1,N2=ID2,Value=Value)
ape::plot.phylo(phy, plot=FALSE) # plotting to get yy
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv) # trick learned from phytools
river_phy <- riverplot::makeRiver(nodes = Nodes, edges = Edges, node_xpos = xpos, node_ypos = obj$yy)
return(river_phy)
}
#' Uses the Paleobiology Database to get diversity of a lineage through time
#' @param lineage Lineage name
#' @param rank What rank to count at; one of species, genera, genera_plus, families, orders
#' @param time_reso What time resolution to use; one of stage, epoch, period, era
#' @return A data.frame of diversity from PBDB, or NULL if the lineage is not found
get_diversity_for_lineage <- function(lineage, rank="genera", time_reso="stage") {
diversities <- read.csv(url(paste0("https://paleobiodb.org/data1.2/occs/diversity.txt?base_name=",utils::URLencode(lineage) ,"&recent&count=", utils::URLencode(rank), "&time_reso=", utils::URLencode(time_reso))), stringsAsFactors=FALSE)
if(ncol(diversities) < 5) {
diversities <- NULL
}
return(diversities)
}
#' Combine diversity of lineages
#'
#' When moving down a tree, merge the diversity of lineages (i.e., common ancestor of acrogymnosperms and angiosperms).
#' @param diversity list List of diversity data.frames
#' @return A data.frame of diversity from PBDB, or NULL if no lineage is not found
merge_diversities_for_lineages <- function(diversity_list) {
diversities <- diversity_list[[1]]
if(length(diversity_list)>1) {
for (additional_lineage_index in sequence(length(diversity_list)-1)) {
new.diversities <- diversity_list[[additional_lineage_index + 1]]
diversities <- merge(diversities,new.diversities, by=c('interval_name', 'interval_no', "max_ma", "min_ma"))
columns <- c("X_Ft","X_bL", "X_FL", "X_bt", "sampled_in_bin", "implied_in_bin", "n_occs")
for(i in seq_along(columns)) {
diversities[,columns[i]] <- diversities[,paste0(columns[i], ".x")] + diversities[,paste0(columns[i], ".y")]
}
diversities <- diversities[,-grep("\\.x", colnames(diversities))]
diversities <- diversities[,-grep("\\.y", colnames(diversities))]
}
}
return(diversities)
}
#' Get spindle diagram
#'
#' A spindle diagram shows the diversity at different time intervals.
#'
#' The Paleobiology Database has information about the number of taxa originating in an interval and persisting after it (X_Ft), the number of taxa originating before the interval and going extinct within it (X_bL), the number of taxa found only within an interval (X_FL), and the number of taxa that originate before an interval and persist after but which are not sampled in an interval (X_bt) [the odd capitalization is in PBDB].
#'
#' We want to use this information for a spindle diagram showing diversity through time. X_bL+X_bt are the number of taxa alive at the interval's start; X_Ft+X_bt are the number of taxa alive at the interval's end. The number of taxa alive at any point within the interval could be as low as X_bt or as high as X_bt+X_FL+max(X_Ft,X_bL). As a compromise, at the midpoint of an interval the width is X_bt+X_FL
#'
#' rank is what rank to count: number of species, genera, families, or orders (there is also a genera_plus option that counts subgenera as genera). While in the study of modern diversity species is the rank most commonly used, in paleontology genera or higher are more usual.
#'
#' time_reso is what resolution to use: stage is finest, era is coarsest
#'
#' This function assumes all taxon names have been resolved to match PBDB's names and that the tree is a chronogram
#' @param phy a chronogram of the groups you want
#' @inheritParams get_diversity_for_lineage
#' @param youngest_taxon How many years ago the youngest tip lives
#' @return a riverplot object
#' @export
convert_phylo_to_spindle_river <- function(phy, rank="genera", time_reso="stage", youngest_taxon=0) {
phy <- ape::reorder.phylo(phy, "postorder")
ape::plot.phylo(phy, plot=FALSE, use.edge.length=TRUE) # plotting to get yy and xx
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv) # trick learned from phytools
#lineage_names <- list()
lineage_diversities <- list()
lineage_heights <- phytools::nodeHeights(phy)
lineage_ranges <- youngest_taxon + max(lineage_heights) - lineage_heights # get depths from present
nodes.df <- data.frame()
diversities_list <- lapply(phy$tip.label, merge_diversities_for_lineages) # in node number order
for (edge_index in sequence(nrow(phy$edge))) {
my_descendants <- phytools::getDescendants(phy, phy$edge[edge_index,2])
tip_descendants <- my_descendants[which(my_descendants<=ape::Ntip(phy))]
if(phy$edge[,2]>ape::Ntip(phy)) {
diversities_list[[phy$edge[,2]]] <- merge_diversities_for_lineages[tip_descendants]
}
}
# Now traverse the tree, storing heights of each segment and diversity at segment end and midpoint
# Now convert the x dimensions from time to x positions if needed
xx_range <- range(obj$xx)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.