Nothing
## ----load-trees---------------------------------------------------------------
# Load required libraries
library("TreeTools", quietly = TRUE)
library("TreeDist")
library("TreeSearch")
# Load Sun et al. 2018 trees from TreeSearch package
treeFile <- system.file("datasets/Sun2018.nex", package = "TreeSearch")
# Load trees from your own file with
# treeFile <- "Path/to/my.file"
#
# To check and set working directory, use:
# getwd() # Should match location of data / tree files
# setwd(".") # Replace . with desired/directory to change
# Read all trees from file
trees <- read.nexus(treeFile)
# Down-sample to a maximum of 48 trees
trees <- trees[unique(as.integer(seq.int(1, length(trees), length.out = 48)))]
## ----exploring-consensus, fig.keep = "none"-----------------------------------
# Select an appropriate majority value
# When analysing parsimony results, this value should be 1.
majority <- 0.5
# Identify rogue taxa
exclude <- Rogue::QuickRogue(trees, p = majority)$taxon[-1]
exclude
# Select a rogue whose positions should be depicted
plottedRogue <- exclude[1]
# Remove other excluded taxa from tree
consTrees <- lapply(trees, DropTip, setdiff(exclude, plottedRogue))
# Colour tip labels according to their original 'instability' (Smith 2022a)
tipCols <- Rogue::ColByStability(trees)
# Our plotted rogue will not appear on the tree
tipCols <- tipCols[setdiff(consTrees[[1]]$tip.label, plottedRogue)]
# Set up plotting area
par(
mar = c(0, 0, 0, 0), # Zero margins
cex = 0.8 # Smaller font size
)
# Plot the reduced consensus tree, showing position of our plotted rogue
plotted <- RoguePlot(
trees = consTrees,
tip = plottedRogue,
p = majority,
edgeLength = 1,
tip.color = tipCols
)
# Calculate split concordance
concordance <- SplitFrequency(plotted$cons, trees) / length(trees)
# Annotate splits by concordance
LabelSplits(
tree = plotted$cons,
labels = signif(concordance, 3),
col = SupportColor(concordance),
frame = "none",
pos = 3
)
## ----tree-distances, fig.keep = "none"----------------------------------------
# Compute distances between pairs of trees
dists <- TreeDist::ClusteringInfoDistance(trees)
## ----quartet-distance, eval = FALSE-------------------------------------------
# # The quartet distance is a slower alternative:
# library("Quartet")
# dists <- as.dist(Quartet::QuartetDivergence(
# Quartet::ManyToManyQuartetAgreement(trees), similarity = FALSE)
# )
## ----compute-clusters, fig.keep = "none"--------------------------------------
# Try K-means clustering (Hartigan & Wong 1979):
kClusters <- lapply(
2:15, # Minimum 2 clusters; maximum 15
function (k) kmeans(dists, k)
)
kSils <- vapply(kClusters, function (kCluster) {
mean(cluster::silhouette(kCluster$cluster, dists)[, 3])
}, double(1))
bestK <- which.max(kSils)
kSil <- kSils[bestK] # Best silhouette coefficient
kCluster <- kClusters[[bestK]]$cluster # Best solution
# Try partitioning around medoids (Maechler et al. 2019):
pamClusters <- lapply(2:15, function (k) cluster::pam(dists, k = k))
pamSils <- vapply(pamClusters, function (pamCluster) {
mean(cluster::silhouette(pamCluster)[, 3])
}, double(1))
bestPam <- which.max(pamSils)
pamSil <- pamSils[bestPam] # Best silhouette coefficient
pamCluster <- pamClusters[[bestPam]]$cluster # Best solution
# Try hierarchical clustering with minimax linkage (Bien & Tibshirani 2011):
hTree <- protoclust::protoclust(dists)
hClusters <- lapply(2:15, function (k) cutree(hTree, k = k))
hSils <- vapply(hClusters, function (hCluster) {
mean(cluster::silhouette(hCluster, dists)[, 3])
}, double(1))
bestH <- which.max(hSils)
hSil <- hSils[bestH] # Best silhouette coefficient
hCluster <- hClusters[[bestH]] # Best solution
# Set threshold for recognizing meaningful clustering
# no support < 0.25 < weak < 0.5 < good < 0.7 < strong
threshold <- 0.5
# Compare silhouette coefficients of each method
bestMethodId <- which.max(c(threshold, pamSil, hSil, kSil))
bestCluster <- c("none", "pam", "hmm", "kmn")[bestMethodId]
# Best clustering:
c("Structure below threshold",
"Partitioning around medoids",
"Hierarchical-minimax",
"K-means")[bestMethodId]
# Best silhouette coefficient:
max(c(threshold, pamSil, hSil, kSil))
# Store the cluster to which each tree is optimally assigned:
clustering <- switch(bestCluster, pam = pamCluster, hmm = hCluster, kmn = kCluster, 1)
nClusters <- length(unique(clustering))
## ----map-space, fig.keep = "none"---------------------------------------------
nDim <- 3 # Number of dimensions to plot
# Generate first dimensions of tree space using PCoA
map <- cmdscale(dists, k = nDim)
# Prepare plot layout
nPanels <- nDim * (nDim - 1L) / 2L # Lower-left triangle
plotSeq <- matrix(0, nDim, nDim)
plotSeq[upper.tri(plotSeq)] <- seq_len(nPanels)
plotSeq[nDim - 1, 2] <- max(plotSeq) + 1L
layout(t(plotSeq[-nDim, -1]))
# Set plot margins
par(mar = rep(0.2, 4))
# Set up tree plotting symbols
treePch <- 0 # Square
clusterCol <- palette.colors(nClusters, "Alphabet", alpha = 0.8)
treeCols <- clusterCol[clustering]
for (i in 2:nDim) for (j in seq_len(i - 1)) {
# Set up blank plot
plot(
x = map[, j],
y = map[, i],
ann = FALSE, # No annotations
axes = FALSE, # No axes
frame.plot = TRUE, # Border around plot
type = "n", # Don't plot any points yet
asp = 1, # Fix aspect ratio to avoid distortion
xlim = range(map), # Constant X range for all dimensions
ylim = range(map) # Constant Y range for all dimensions
)
# Plot minimum spanning tree (Gower 1969) to depict distortion (Smith 2022)
mst <- MSTEdges(as.matrix(dists))
segments(
x0 = map[mst[, 1], j],
y0 = map[mst[, 1], i],
x1 = map[mst[, 2], j],
y1 = map[mst[, 2], i],
col = "#bbbbbb", # Light grey
lty = 1 # Solid lines
)
# Add points
points(
x = map[, j],
y = map[, i],
pch = treePch,
col = treeCols,
cex = 1.7, # Point size
lwd = 2 # Line width
)
# Mark clusters
for (clI in seq_len(nClusters)) {
inCluster <- clustering == clI
clusterX <- map[inCluster, j]
clusterY <- map[inCluster, i]
hull <- chull(clusterX, clusterY)
polygon(
x = clusterX[hull],
y = clusterY[hull],
lty = 1, # Solid line style
lwd = 2, # Wider line width
border = clusterCol[clI]
)
}
}
## ----cluster-consensus, fig.keep = "none"-------------------------------------
# Select proportion for majority rule consensus trees
# 1 is the appropriate value when parsimony trees are included
majority <- 1
# Plot each consensus tree in turn:
for (i in seq_len(nClusters)) {
clusterTrees <- trees[clustering == i]
# Identify rogue taxa for this cluster
clusterRogues <- Rogue::QuickRogue(clusterTrees, p = majority)$taxon[-1]
# Colour tree labels based on stability across cluster
tipCols <- Rogue::ColByStability(clusterTrees)
cons <- ConsensusWithout(
trees = clusterTrees,
tip = clusterRogues,
p = majority
)
# Root tree
cons <- RootTree(cons, trees[[1]]$tip.label[1])
# Set unit edge lengths for viewing
cons$edge.length <- rep.int(1, nrow(cons$edge))
# Rotate nodes, to display clades in order of size
cons <- SortTree(cons, order = names(trees[[1]]$tip.label))
# Plot tree
par(mar = c(0, 0, 0, 0), cex = 0.8) # Set up plotting area
plot(
cons,
edge.width = 2, # Widen lines
font = 3, # Italicize labels
cex = 0.83, # Shrink tip font size
edge.color = clusterCol[i], # Colour tree according to previous plot
tip.color = tipCols[cons$tip.label]
)
legend(
"bottomright",
paste("Cluster", i),
pch = 15, # Filled circle icon
pt.cex = 1.5, # Increase icon size
col = clusterCol[i],
bty = "n" # Don't plot legend in box
)
if (length(clusterRogues)) {
# Mark omitted taxa, if any rogues present
legend(
"topright",
clusterRogues,
col = tipCols[clusterRogues],
text.col = tipCols[clusterRogues],
pch = "-", # Mark as excluded
cex = 0.8, # Smaller font
text.font = 3, # Italicize
bty = "n" # Don't plot legend in box
)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.