#' @title Drawing a metatree (i.e. tree + data + metadata) to either the screen or a file.
#' @description This script is originally developed by Dr Kathryn Holt in the University of Melbourne.
#' The original script (plotTree.R) was acquired from https://github.com/katholt/plotTree/tree/master on 15 Sept 2015.
#' There are two important changes in my version:
#' (1) The function name was changed from plotTree to plotMetaTree to avoid the conflict with the plotTree function in the phytools package.
#' (2) Options "w" and "h" were renamed to "img.width" and "img.height" to avoid the problem:
#' Error in switch(units, in = res, cm = res/2.54, mm = res/25.4, px = 1) * :
#' non-numeric argument to binary operator
#'
#' @author Yu Wan (\email{wanyuac@@gmail.com})
#' @export
# Copyright 2018 Yu Wan
# Licensed under the Apache License, Version 2.0
# Editions: 16 Sept, 13 Oct 2015, 27 Feb 2016, 14 Nov 2016, 12 Apr 2017, 5 Aug 2018
plotMetaTree <- function(tree = NULL, heatmapData = NULL, barData = NULL,
infoFile = NULL, blockFile = NULL, snpFile = NULL,
encodedSNPMatrix = NULL, snp.pos = NULL, snp.pch = "|", bar.cex = 0.25,
ladderise = NULL, ancestral.reconstruction = FALSE,
heatmap.colours = rev(gray(seq(from = 0, to = 1, by = 0.1))),
heatmapBreaks = NULL, heatmapDecimalPlaces = 1, heatmap.blocks = NULL,
cluster = FALSE, cluster.method = "ward.D", dist.method = "euclidean",
treeWidth = 20, infoWidth = 10, dataWidth = 50,
barDataWidth = 10, blockPlotWidth = 10, edgeWidth = 1,
labelHeight = 10, mainHeight = 100, barDataCol = 2,
show.tipLabels = FALSE, tipLabelSize = 1, offset = 0,
tip.colour.cex = 0.5, tipColours = NULL,
colourNodesBy = NULL, infoCols = NULL, blwd = 5,
block.colour = "black", snp.colour = "red", gapChar = "?",
genome.size = 5E6, genome.offset = 0,
img.fmt = "png", img.name = "img", img.width = 640, img.height = 480, img.units = "px", img.res = 72,
legend = TRUE, legend.pos = "bottomleft", pie.cex = 0.5,
axis = FALSE, axisPos = 3, edge.color = "black",
infoCex = 0.8, colLabelCex = 0.8,
vlines.heatmap = NULL, vlines.heatmap.col = "gray75", vlines.heatmap.lty = 1,
hlines.heatmap = NULL, hlines.heatmap.col = "gray75", hlines.heatmap.lty = 1) {
require(ape)
# Prepare the tree, which may be either a tree file name or a variable name.
if (is.character(tree)) {
t <- read.tree(tree)
} else {
t <- tree
}
# Choose whether to ladderise the tree or not. ladderize is a function in the package ape.
if (is.null(ladderise)) {
tl <- t
}
else if (ladderise == "descending") {
tl <-ladderize(phy = t, right = TRUE) # specifies whether the smallest clade is on the right-hand side
}
else if (ladderise == "ascending") {
tl <- ladderize(phy = t, right = FALSE)
}
else {
print("Ladderisation option should be exactly 'ascending' or 'descending'.\nAny other command will raise this error.\nLeave this option empty to order branches as per input tree.")
}
# Get the ladderised tip order
tips <- tl$edge[, 2] # An edge is defined as a two-element vector from x to y. Here, "tips" actually comprises internal nodes.
# Extract the order of tips from the ladderised tree, excluding internal nodes
tip.order <- tips[tips <= length(tl$tip.label)] # retain only labels that are not more than the sample size - they represent samples (tips) on the tree
tip.label.order <- tl$tip.label[tip.order] # for ordering data. note that for tiplabel(), the order is the same as in t$tip (= tl$tip)
# Prepare the heat map
if (!is.null(heatmapData)) {
# read a heatmap data set and convert it into to data frame
x <- .readMatrix(heatmapData)
# match rows of the heatmap matrix with tips of the tree
# Variables x and the tree must have the same number of taxa.
y.ordered <- x[tip.label.order, ] # Match taxa on the heat map with those on the tree. y.ordered is a matrix ordered by rows.
# whether to perform clustering on columns or not?
if (cluster) {
if (cluster.method == "square" & ncol(y.ordered) == nrow(y.ordered)) {
# reorder columns to follow the row order in the matrix for the heat map
original.order <- 1 : nrow(x)
names(original.order) <- rownames(x)
reordered <- original.order[tip.label.order]
y.ordered <- y.ordered[, rev(as.numeric(reordered))]
} else {
h <- hclust(d = dist(x = t(na.omit(y.ordered)), method = dist.method), method = cluster.method)
y.ordered <- y.ordered[, h$order] # recorder columns (genes) of the heatmap matrix
# The order of rows (tips) remain the unchanged.
}
}
}
# Prepare the bar plot
if (!is.null(barData)) {
b <- .readMatrix(barData)
barData <- b[, 1]
names(barData) <- rownames(b)
}
# Prepare sample information
if (!is.null(infoFile)) {
info <- .readMatrix(infoFile)
info.ordered <- info[rev(tip.label.order),]
} else {
info.ordered <- NULL
}
# Colouring tips by sample information
# Specify a categorical trait in the info matrix to colour nodes and infer ancestral states
ancestral <- NULL
nodeColourSuccess <- NULL
if (!is.null(colourNodesBy) & !is.null(infoFile)) {
if (colourNodesBy %in% colnames(info.ordered)) {
nodeColourSuccess <- TRUE
loc1 <- info.ordered[, which(colnames(info.ordered) == colourNodesBy)]
# assign values
tipLabelSet <- character(length(loc1))
names(tipLabelSet) <- rownames(info.ordered)
groups <- table(loc1, exclude = "") # find out categories in the subject column of the info matrix
n <- length(groups) # number of categories
groupNames<-names(groups)
# set colours
colours <- ifelse(is.null(tipColours), rainbow(n), tipColours)
# assign colours based on values
for (i in 1 : n) {
g <- groupNames[i]
tipLabelSet[loc1 == g] <- colours[i]
}
tipLabelSet <- tipLabelSet[tl$tip]
# ancestral reconstruction
if (ancestral.reconstruction) {
ancestral <- ace(loc1, tl, type = "discrete") # a function in the ape package for ancestral character estimation
}
}
}
# Initialise an external device for plotting
if (img.fmt == "png") {
png(filename = paste(img.name, img.fmt, sep = "."), width = img.width, height = img.height,
units = img.units, res = img.res, type = "cairo-png")
} else {
img.fmt <- "pdf"
pdf(filename = paste(img.name, img.fmt, sep = "."), width = img.width, height = img.height,
units = img.units, res = img.res)
}
# Set up a plotting layout
doBlocks <- !is.null(blockFile) | !is.null(snpFile) | !is.null(encodedSNPMatrix)
l <- .getLayout(infoFile = infoFile, heatmapData = heatmapData, barData = barData, doBlocks = doBlocks,
treeWidth = treeWidth, infoWidth = infoWidth, dataWidth = dataWidth, edgeWidth = edgeWidth,
labelHeight = labelHeight, mainHeight = mainHeight, barDataWidth = barDataWidth,
blockPlotWidth = blockPlotWidth)
layout(l$m, widths = l$w, heights = l$h) # specifying complex plot arrangements for the current image device
print(l$m)
# Draw the tree
par(mar = c(0, 0, 0, 0)) # no margin around the tree
tlp <- plot.phylo(tl, no.margin = TRUE, show.tip.label = show.tipLabels, label.offset = offset,
edge.width = edgeWidth, edge.color = edge.color, xaxs = "i", yaxs = "i",
y.lim = c(0, length(tl$tip) + 0.5), cex = tipLabelSize) # The distance between two neighbouring taxa equals 1.
# colour tips by the selected categorical trait
if (!is.null(nodeColourSuccess)) {
tiplabels(col= tipLabelSet, pch = 16, cex = tip.colour.cex)
if (ancestral.reconstruction) {
nodelabels(pie = ancestral$lik.anc, cex = pie.cex, piecol = colours)
}
if (legend) {
legend(legend.pos, legend = groupNames, fill = colours)
}
}
if (axis) {
axisPhylo(axisPos)
}
# Plot sample information
if (!is.null(infoFile)) { # if info is provided
par(mar = rep(0, 4))
if (!is.null(infoCols)) {
infoColNumbers <- which(colnames(info.ordered) %in% infoCols)
} else {
infoColNumbers <- 1:ncol(info.ordered)
}
plot(NA, axes = FALSE, pch = "",
xlim = c(0, length(infoColNumbers) + 1.5), ylim = c(0.5, length(tl$tip) + 0.5),
xaxs = "i", yaxs = "i")
# plot all info columns
for (i in 1 : length(infoColNumbers)) {
j <- infoColNumbers[i]
text(x = rep(i + 1, nrow(info.ordered) + 1), y = c((nrow(info.ordered)) : 1),
info.ordered[, j], cex = infoCex)
}
}
# Draw the heat map
if (!is.null(heatmapData)) {
# set levels to of values to which colours will be designated
# Colours scale across the whole range of values in the y.ordered matrix, not across single columns.
if (is.null(heatmapBreaks)) {
heatmapBreaks <- seq(min(y.ordered, na.rm = TRUE), max(y.ordered, na.rm = TRUE),
length.out = length(heatmap.colours) + 1) # the minimum and maximum of the y.ordered matrix
} # heatmapBreaks specifies boundaries between colour segments.
# plot heatmap
par(mar = rep(0, 4), xpd = TRUE)
# nrow(t(y.ordered)) = ncol(y.ordered), which equals the number of variables for every taxon.
# Hence, both x and y specify midpoints between coloured cells. Namely, colour boxes are X-centred and Y -centred at
# middle points. Each box has a width and height of 1.
image(x = (1 : ncol(y.ordered)) - 0.5, y = (1 : nrow(y.ordered)) - 0.5, z = as.matrix(t(y.ordered)),
col = heatmap.colours, breaks = heatmapBreaks, axes = FALSE, xaxs = "i", yaxs = "i", xlab = "", ylab = "") # heatmapBreaks works for z values
print("Image has drawn")
# draw vertical lines between columns on the heat map
# Note that the left-most of the heatmap is v = 0 and the right-most of it is v = ncol(matrix).
if (!is.null(vlines.heatmap)) {
for (line.v in vlines.heatmap) {
abline(v = line.v, col = vlines.heatmap.col, lty = vlines.heatmap.lty)
}
}
# draw horizontal lines defining rows over heatmap
# Note that the bottom of the heatmap is h = 0 and the top of it is h = nrow(matrix).
if (!is.null(hlines.heatmap)) {
for (line.h in hlines.heatmap) {
abline(h = line.h, col = hlines.heatmap.col, lty = hlines.heatmap.lty)
}
}
# overlay blocks on the heatmap
if (!is.null(heatmap.blocks)) {
for (coords in heatmap.blocks) {
rect(xleft = coords[1], 0, coords[2], ncol(y.ordered), col = vlines.heatmap.col, border = NA)
}
}
# draw labels on the heat map
par(mar = rep(0, 4))
plot(NA, axes = FALSE, xaxs = "i", yaxs = "i", ylim = c(0, 2), xlim = c(0.5, ncol(y.ordered) + 0.5))
text(1 : ncol(y.ordered) - 0.5, rep(0, ncol(x)), colnames(y.ordered), srt = 90, cex = colLabelCex, pos = 4)
# Finally, add a bar to illustrate the colour scale of the heatmap at the top-left corner
if (img.height < 5000) {
par(mar = c(5, 0, 5, 2)) # otherwise, an error occurs: "figure margins too large"
} else {
print("Set margins")
par(mar = c(10, 0, 30, 2))
}
# Function as.matrix(heatmapBreaks) returns an n-by-1 matrix, where n is the number of breaks.
image(z = as.matrix(heatmapBreaks), breaks = heatmapBreaks, col = heatmap.colours,
yaxt = "n", axes = FALSE) # By default, boundaries range from 0 to 1 on the X axis.
axis(1, at = seq(0, 1, length.out = length(heatmapBreaks)),
labels = round(heatmapBreaks, digits = heatmapDecimalPlaces))
}
# bar plot
if (!is.null(barData)) {
par(mar = rep(0, 4))
barplot(barData[tip.label.order], horiz = TRUE, axes = FALSE, xaxs = "i",
yaxs = "i", xlab = "", ylab = "", ylim = c(0.25, length(barData) + 0.25),
xlim = c((-1) * max(barData, na.rm = TRUE) / 20, max(barData, na.rm = TRUE)),
col = barDataCol, border = 0, width = 0.5, space = 1, names.arg = NA)
# scale for barData plot
par(mar = c(2, 0, 0, 0))
plot(NA, yaxt = "n", xaxs = "i", yaxs = "i", xlab = "", ylab = "",
ylim = c(0,2), xlim = c((-1) * max(barData, na.rm = TRUE) / 20, max(barData, na.rm = TRUE)),
frame.plot = FALSE)
}
# draw SNP and recombination blocks
if (doBlocks) {
# According to xlim, the range of data points on the X axis is the genome size + 1.5 when
# genome.offset = 0. SNPs are plotted at their genomic positions and hence reveal their
# actual distribution on the reference genome.
par(mar = rep(0, 4))
plot(NA, axes = FALSE, pch = "",
xlim = c(genome.offset, genome.offset + genome.size + 1.5),
ylim = c(0.5, length(tl$tip) + 0.5), xaxs = "i", yaxs = "i") # create an empty plotting area
# Plot snps as vertical bars
# Expected format of the CSV file:
# First column: SNP positions (integers). This column will become row names of the data frame "snps".
# Excluding the 1st column, namely, contents of "snps".
# First row: strain names, where the first one is the reference strain.
# Everyone of rest of rows: allele (nucleotides or gaps) of a SNP
# Assuming L SNPs found in N strains, the dimension of "snps" should be (L+1)-by-N.
if (!is.null(snpFile)) {
encodedSNPMatrix <- NULL # The option snpFile overrides encodedSNPMatrix to avoid plotting SNP information twice.
# read SNP genotypes
snps <- read.csv(snpFile, header = FALSE , row.names = 1, stringsAsFactors = FALSE) # in case colnames start with numbers or contain dashes, which R does not like as column headers
snps.strainCols <- snps[1, ] # values of the first row, or column names in the CSV file: strain names
snps <- snps[-1, ] # rest of rows: a matrix of every SNP
ref.alleles <- snps[, 1] # alleles across L loci in the reference genome
for (strain in tip.label.order){
# print SNPs compared to reference (ancestral) alleles in column 1
query.alleles <- snps[, which(snps.strainCols == strain)] # alleles of L loci in the query genome
# identify SNPs which are neither all the same to the reference nor are gaps
s <- rownames(snps)[ref.alleles != query.alleles & query.alleles != gapChar & ref.alleles != gapChar]
y <- which(tip.label.order == strain)
if (length(s) > 0) { # when there are different alleles present in the query genome comparing to the reference genome
for (x in s) {
points(x, y, pch = "|", col = snp.colour, cex = bar.cex)
}
}
}
}
# Draw a SNP matrix of encoded genotypes (e.g., minor allele = 1 and major allele = 0)
# This feature only displays positions of SNPs, regardless how many alleles are present at each SNP (They will all be coloured the same).
# Structure of the encodedSNPMatrix: n(sample)-by-n(SNPs). It must be a matrix.
# rownames: sample names; colnames: SNP names.
# snp.pos is a named vector of integers (by SNP names) that must be supplied.
if (is.matrix(encodedSNPMatrix)) {
if (is.null(snp.pos)) {
print("Error: A named integer vector of SNP positions must be provided.")
dev.off()
stop()
} else {
strains <- rownames(encodedSNPMatrix)
for (snp in colnames(encodedSNPMatrix)) {
strains.2plot <- strains[encodedSNPMatrix[, snp] != 0] # strains having the current minor allele
pos <- snp.pos[[snp]]
for (s in strains.2plot) {
points(x = pos, y = which(tip.label.order == s), pch = snp.pch, col = snp.colour, cex = bar.cex) # "s" must present on the tree
}
}
}
}
# plot blocks
if (!is.null(blockFile)){
blocks <- read.delim(blockFile, header = FALSE)
for (i in 1 : nrow(blocks)) {
if (as.character(blocks[i, 1]) %in% tip.label.order) {
y <- which(tip.label.order == as.character(blocks[i, 1]))
x1 <- blocks[i, 2]
x2 <- blocks[i, 3]
lines(c(x1, x2), c(y, y), lwd = blwd, lend = 2, col = block.colour)
}
}
}
}
dev.off() # close the plotting device
# Return ordered info and ancestral reconstruction object
if (!is.null(heatmapData)){
mat <- as.matrix(y.ordered)
mat <- mat[nrow(mat) : 1, ] # reverse the order of rows to match the phylogeny as illustrated in the plot
}
else {
mat <- NULL
}
return(list(OTUs = tip.label.order, mat = mat, info = info.ordered, anc = ancestral))
}
.readMatrix <- function(m) {
# Read a CSV file when a matrix or data frame is not provided.
# The CSV file should have at least a column for row names, namely, taxon names.
# m: a name of a matrix/data frame, or a name of a CSV file
if (is.matrix(m)) {
x <- data.frame(m)
}
else if (is.data.frame(m)) {
x <- m
}
else {
x <- read.csv(m, row.names = 1) # take the first column as row names
}
return(x)
}
.getLayout <- function(infoFile, heatmapData, barData, doBlocks,
treeWidth = 10, infoWidth = 10, dataWidth = 30,
edgeWidth = 1, barDataWidth = 10, blockPlotWidth = 10,
labelHeight = 10, mainHeight = 100) {
# tree
w <- c(edgeWidth, treeWidth)
m <- cbind(c(0, 0, 0), c(0, 1, 0)) # first two columns, edge + tree
x <- 1
# info
if (!is.null(infoFile)) { # when info is provided
x <- x + 1
m <- cbind(m, c(0, x, 0))
w <- c(w, infoWidth)
}
# heatmap
if (!is.null(heatmapData)) {
x <- x + 1
m <- cbind(m, c(x + 1, x, 0)) # add heatmap & labels
x <- x + 2
m[1, 2] <- x # add heatmap scale above tree
w <- c(w, dataWidth)
}
# barplot
if (!is.null(barData)) {
x <- x + 1
m <- cbind(m, c(0, x, x + 1)) # barplot and scale bar
x <- x + 1
w <- c(w, barDataWidth)
}
if (doBlocks) {
x <- x + 1
m <- cbind(m, c(0, x, 0)) # recomb blocks
w <- c(w, blockPlotWidth)
}
# empty edge column
m <- cbind(m, c(0, 0, 0))
w <- c(w, edgeWidth)
if (!is.null(heatmapData) | !is.null(barData)) {
h <- c(labelHeight, mainHeight, labelHeight)
}
else {
h <- c(edgeWidth, mainHeight, edgeWidth)
}
return(list(m = as.matrix(m), w = w, h = h))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.