## Methods for representing differentially abundant (across conditions) otus on a grid or on a tree.
## Also useful for general sets of interesting otus.
## Small multiple plot: each otu in set daOTUS has its own plot and each
## its (mean) relative abundance in different levels of condition variable
## is represented as a point. If replicates are available for a given level,
## SEM are represented as error bars.
daOTU_plot <- function(physeq, daOTUS, variable, title = NULL, plot = TRUE) {
## Args:
## - physeq: phyloseq class object
## - daOTUS: set of OTUS to plot
## - variable: x axis, used for plotting and aggregating samples
## - title: optional, plot title
##
## Returns,
## - p: invisible, ggplot object
##
## check variable and otu set
numberOfProvidedTaxa <- length(daOTUS)
daOTUS <- intersect(daOTUS, taxa_names(physeq))
stopifnot(variable %in% sample_variables(physeq), length(daOTUS) > 0)
if ( numberOfProvidedTaxa != length(daOTUS) ) {
cat(paste(length(daOTUS), "otus out of", numberOfProvidedTaxa,
"provided were found, use only those for plotting"), sep = "\n")
}
## Get count data set
tdf <- otu_table(physeq)
if (!taxa_are_rows(physeq)) { tdf <- t(tdf) }
## Normalize counts to relative abundances
tdf <- apply(tdf, 2, function(x) x/sum(x))
tdf <- tdf[daOTUS, ]
## sort OTUs by overall abundance
otuAbun <- sort(rowSums(tdf), TRUE)
tdf <- data.frame(otu = rownames(tdf), tdf[daOTUS, ])
## Get within group mean and sem
tdf <- melt(tdf)
colnames(tdf)[2] <- "Sample"
tdf <- merge(tdf,
data.frame(Sample = sample_names(physeq), Replicate = get_variable(physeq, variable)),
by.y = "Sample")
meandf <- ddply(tdf, .(otu, Replicate), summarize,
mean = mean(value), sem = sd(value)/sqrt(length(value)))
colnames(meandf)[ colnames(meandf) == "Replicate"] <- variable
## Get best taxonomic annotation
bestAnn <- function(x) {
x <- x[!is.na(x)]
return(ifelse(length(x), x[length(x)], ""))
}
taxdf <- as(tax_table(physeq), "matrix")
ann <- apply(taxdf, 1, bestAnn)
taxdf <- data.frame(otu = rownames(taxdf), taxdf, tax = ann)[daOTUS, ]
taxdf$otu2 <- paste(taxdf$otu, taxdf$tax)
mdf <- merge(meandf, taxdf, by.x = "otu")
## Sort results by overall abundance
mdf$otu2 <- factor(mdf$otu2, levels = taxdf[names(otuAbun), "otu2"])
mdf[[variable]] <- factor(mdf[[variable]], levels = levels(get_variable(physeq, variable)))
## Plot results
p <- ggplot(mdf, aes_string(x = variable, y = "mean", color = variable))
p <- p + geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), width=.1) + geom_point()
## Add custom title if provided
if (is.null(title)) {
title.text <- paste("Differentially Abundant OTUs (", variable, ")", sep = "")
} else {
title.text <- title
}
p <- p + facet_wrap(~otu2, scales = "free_y") + ggtitle(title)
if (plot) {
plot(p)
}
return(invisible(p))
}
## subsidiary function to daOTU_tree_plot
## Add thermometer to figure
addThermo <- function(lastPP, meandf, adj.x, width.thermo = ncol(meandf)*8*strwidth(" ")) {
## Add Thermometer
BOTHlabels(XX = max(lastPP$xx[1:lastPP$Ntip] + adj.x) + 0.3*8*strwidth(" ") + 0.5*width.thermo,
YY = lastPP$yy[1:lastPP$Ntip], thermo = meandf,
horiz = TRUE, height = 0.9, adj = c(0.5, 0.5), pch = NULL,
width = width.thermo, piecol = gg_color_hue(ncol(meandf)),
frame = "none", pie = NULL, color = "black")
return(invisible(width.thermo))
}
## subsidiary function to daOTU_tree_plot
## Add Histogram to figure
addHist <- function(lastPP, meandf, adj.x, width.thermo = ncol(meandf)*8*strwidth(" "), cex = CEX) {
## Add squares
nSquares <- ncol(meandf)
XX <- max(lastPP$xx[1:lastPP$Ntip] + adj.x) + 0.3*8*strwidth(" ")
YY = lastPP$yy[1:lastPP$Ntip] - 1/2 * strheight(" ", cex = cex)
base.size <- width.thermo/nSquares ## baseline for histogram bin width
height.size <- 0.9
xl <- rep(XX + (seq(nSquares) - 1)*base.size, times = nrow(meandf)) ## left x positions
xr <- xl + base.size ## right x positions
yb <- rep(YY, each = ncol(meandf)) ## bottom y positions
yt <- as.vector(yb + t(height.size*meandf)) ## top y positions
rect(xl, yb, xr, yt, col = gg_color_hue(ncol(meandf)))
return(invisible(width.thermo))
}
## subsidiary function to daOTU_tree_plot
## Add Rectangles to figure
addRectangle <- function(lastPP, meandf, adj.x, width.thermo = ncol(meandf)*8*strwidth(" ")) {
## Add rectangles
nRectangles <- ncol(meandf)
XX <- max(lastPP$xx[1:lastPP$Ntip] + adj.x) + 0.3*8*strwidth(" ")
YY = lastPP$yy[1:lastPP$Ntip]
base.size <- width.thermo/nRectangles ## baseline for rectangles side length
height.size <- 0.9
scaling <- sqrt(meandf)
xcenter <- rep(XX + (seq(nRectangles) - 1/2)*base.size, times = nrow(scaling))
ycenter <- rep(YY, each = ncol(scaling))
rectangles <- cbind(as.vector(base.size*t(scaling)), as.vector(height.size*t(scaling)))
symbols(xcenter, ycenter, rectangles = rectangles,
inches = FALSE, bg = gg_color_hue(ncol(scaling)), add = TRUE)
return(invisible(widh.thermo))
}
## subsidiary function to daOTU_tree_plot
## Add circles to figure
addCircle <- function(lastPP, meandf, adj.x, width.thermo = ncol(meandf)*8*strwidth(" ")) {
## Add circles
## Requires plotrix
nCircles <- ncol(meandf)
XX <- max(lastPP$xx[1:lastPP$Ntip] + adj.x) + 0.3*8*strwidth(" ")
YY = lastPP$yy[1:lastPP$Ntip]
## baseline for circles radii
ratio <- diff(lastPP$y.lim)/diff(lastPP$x.lim)
base.size <- min(width.thermo/nCircles, 0.95/(ratio * sqrt(2)))
scaling <- sqrt(meandf)
xcenter <- rep(XX + (seq(nCircles) - 1/2)*base.size, times = nrow(scaling))
ycenter <- rep(YY, each = ncol(scaling))
## Area proportional to frequency, sqrt(2) used to maximize space occupation while
## avoiding overlap (worst case scenario is two neighbor modalities with frequencies of
## 0.5 and 0.5)
circles <- 0.5 * as.vector(base.size*t(scaling))
symbols(xcenter, ycenter, circles = circles,
inches = FALSE, bg = gg_color_hue(ncol(scaling)), add = TRUE)
width.thermo <- min(width.thermo, nCircles * base.size)
return(invisible(width.thermo))
}
## subsidiary function to daOTU_tree_plot
## Add squares to figure
addSquare <- function(lastPP, meandf, adj.x, width.thermo = ncol(meandf)*8*strwidth(" ")) {
## Add squares
## Requires plotrix
nSquares <- ncol(meandf)
XX <- max(lastPP$xx[1:lastPP$Ntip] + adj.x) + 0.3*8*strwidth(" ")
YY = lastPP$yy[1:lastPP$Ntip]
## baseline for squares radii
ratio <- diff(lastPP$y.lim)/diff(lastPP$x.lim)
base.size <- min(width.thermo/nSquares, 0.95/ratio)
scaling <- sqrt(meandf)
xcenter <- rep(XX + (seq(nSquares) - 1/2)*base.size, times = nrow(scaling))
ycenter <- rep(YY, each = ncol(scaling))
squares <- as.vector(base.size*t(scaling)) / sqrt(2)
symbols(xcenter, ycenter, squares = squares,
inches = FALSE, bg = gg_color_hue(ncol(scaling)), add = TRUE)
width.thermo <- min(width.thermo, nSquares * base.size)
return(invisible(width.thermo))
}
## subsidiary function to daOTU_tree_plot
## Add abundance, automatically calibrate scale to roughly match 1:4
addAbundance <- function(otuAbun, lastPP, adj.x, width.thermo,
abundance.type = c("circle", "bar"),
add.ruler = FALSE) {
scaling <- log(otuAbun, base = 10)
minAbun <- floor(min(scaling))
maxAbun <- floor(max(scaling)) ## Only to construct scale and breaks
if (maxAbun==minAbun) {
scale <- minAbun
} else {
scale <- seq(minAbun, maxAbun, by = floor((maxAbun - minAbun)/4) + 1)
}
breaks <- 10^scale
maxAbun <- max(scaling) ## to properly scale all quantities
abundance.type <- match.arg(abundance.type)
if (abundance.type == "circle") {
## Maximum and minimum circle radius
ratio <- diff(lastPP$y.lim)/diff(lastPP$x.lim)
max.size <- 0.95 /(ratio * 2 * sqrt(2))
min.size <- max.size / 20
## Scaling radius
scaling <- sqrt( (scaling - minAbun) / (maxAbun - minAbun))
scaling <- scaling * (max.size - min.size) + min.size
## Construct scale
scale <- sqrt( (scale - minAbun) / (maxAbun - minAbun))
scale <- scale * (max.size - min.size) + min.size
## Centre coordinates
adj.radii <- max(scaling)
xcenter <- rep(max(lastPP$xx[1:lastPP$Ntip] + adj.x) + 0.5*8*strwidth(" ") + width.thermo + adj.radii,
lastPP$Ntip)
ycenter <- lastPP$yy[1:lastPP$Ntip]
## Plot circles or segments
symbols(xcenter, ycenter, circles = scaling,
inches = FALSE, bg = "grey40", fg = "grey40", add = TRUE)
} else {
## Min and max widths
if (width.thermo >= 4*8*strwidth(" ")) {
max.size <- width.thermo / 2
} else {
max.size <- width.thermo
}
min.size <- max.size / 20
## Scaling widths
scaling <- (scaling - minAbun) / (maxAbun - minAbun) * (max.size - min.size) + min.size
## Construct scale
scale <- (scale - minAbun) / (maxAbun - minAbun) * (max.size - min.size) + min.size
XX <- rep(max(lastPP$xx[1:lastPP$Ntip] + adj.x) + 0.5*8*strwidth(" ") + width.thermo,
lastPP$Ntip) + scaling / 2
YY <- lastPP$yy[1:lastPP$Ntip]
symbols(XX, YY, inches = FALSE, rectangles = cbind(scaling, 0.5),
add = TRUE, fg = "grey40", bg = "grey40")
if (add.ruler) {
offset <- max(lastPP$xx[1:lastPP$Ntip] + adj.x) + 0.5 * 8 * strwidth(" ") + width.thermo
segments(x0 = offset + scale,
y0 = rep(lastPP$y.lim[1] - 0.5, length(scale)),
x1 = offset + scale,
y1 = rep(lastPP$y.lim[2] + 0.5, length(scale)), col = "grey80")
text(x = offset + scale,
y = lastPP$y.lim[1] - 1,
adj = c(0, 0.5),
labels = prettyNum(breaks),
srt = -90, cex = 0.7)
}
}
return(invisible(list(scale = scale, type = abundance.type,
breaks = breaks)))
}
## subsidiary function to daOTU_tree_plot
## Add legend
addLegend <- function(meandf, variable, scale, type, breaks) {
leg <- legend(x = "topleft",
xjust = 0,
legend = colnames(meandf),
fill = gg_color_hue(ncol(meandf)),
border = NA,
title = variable,
bty = "n")
leg.abundance <- legend(x = leg$rect$left,
y = leg$rect$top - 1.1*leg$rect$h,
col = "grey40",
legend = prettyNum(breaks),
bty = "n",
title = "Relative abundance",
plot = TRUE)
if (type == "circle") {
xcenter <- leg.abundance$text$x - max(scale) - strwidth(" ")
ycenter <- leg.abundance$text$y
symbols(xcenter, ycenter, circles = scale,
inches = FALSE, bg = "grey40", fg = "grey40", add = TRUE)
} else {
xcenter <- leg.abundance$text$x - strwidth(" ") - scale / 2
ycenter <- leg.abundance$text$y
symbols(xcenter, ycenter, rectangles = cbind(scale, 0.5),
inches = FALSE, bg = "grey40", fg = "grey40", add = TRUE)
}
}
## Tree plot with thermometer: plot a tree with all otus in set daOTUS and add
## overall abundance and distribution between levels of variable at tips of tree.
## Samples are first agreggated by level of variable to compute relative abundance
## within this level and different levels are then agreggated (and eventually weighted)
## to determine which levels contribute most to the overall relative abundance. Overall
## abundance is also plotted (in logarithmic scale) next to the thermometer
daOTU_tree_plot <- function(physeq, daOTUS, variable, colorTip = FALSE,
type = c("thermo", "circle", "square", "rectangle", "hist"),
abundance.type = c("bar", "circle"),
add.ruler = TRUE,
weight = NULL, ...) {
## Args:
## - physeq: phyloseq class object
## - daOTUS: set of OTUS to plot
## - variable: x axis, used for plotting and aggregating samples
## - type: type of graphic to plot on leaves of tree
## - abundance.type: type of symbol to use for abundance plotting
## - add.ruler: logical, should a ruler be added to abundance (only when abundance.type is bar)
## - colorTip: logical, should tips be colored according to dominant condition
## (defaults to FALSE)
## - weight: optional, contribution of each factor level of variable to the whole
## - ...: Additional arguments passed to ‘plot.phylo’.
##
## Returns,
## - plot of tree, thermometer and relative abundance
##
## check variable and otu set
numberOfProvidedTaxa <- length(daOTUS)
daOTUS <- intersect(daOTUS, taxa_names(physeq))
stopifnot(variable %in% sample_variables(physeq),
length(daOTUS) > 0)
if ( numberOfProvidedTaxa != length(daOTUS) ) {
cat(paste(length(daOTUS), "otus out of", numberOfProvidedTaxa,
"provided were found, use only those for plotting"), sep = "\n")
}
## Get count data frame
tdf <- otu_table(physeq)
if (!taxa_are_rows(physeq)) { tdf <- t(tdf) }
## Normalize counts to relative abundances
tdf <- apply(tdf, 2, function(x) x/sum(x))
tdf <- data.frame(tdf[daOTUS, ])
## sort OTUs by overall abundance
otuAbun <- sort(rowMeans(tdf), TRUE)
## Get within group mean
meandf <- aggregate(t(tdf), by = list(variable = get_variable(physeq, variable)), FUN = mean)
groupLevels <- rownames(meandf) <- meandf[ , 1]
meandf <- meandf[ , -1]
## Normalize by otu (use weight if necessary)
if (is.null(weight)) {
meandf <- t(apply(meandf, 2, function(x) x/sum(x)))
} else {
if (!is.null(names(weight)) && all(groupLevels %in% names(weight))) {
w <- weight[groupLevels]
} else {
cat("No name or non-matching names for weight, using the first weights.",
paste("Group names are", groupLevels), sep = "\n")
w <- weight[length(groupLevels)]
}
meandf <- t(apply(meandf, 2, function(x) w*x/sum(w*x)))
}
## Get best taxonomic annotation
bestAnn <- function(x) {
x <- x[!is.na(x)]
return(ifelse(length(x), x[length(x)], ""))
}
taxdf <- as(tax_table(physeq), "matrix")
ann <- apply(taxdf, 1, bestAnn)
taxdf <- data.frame(otu = rownames(taxdf), tax = paste(rownames(taxdf), ann))
rownames(taxdf) <- taxdf$otu
## get pruned tree
phy <- phy_tree(physeq)
phy <- drop.tip(phy, tip = setdiff(phy$tip.label, daOTUS))
## Reorder taxdf, meandf, otuAbun to match phy tips and add trailing
## spaces at end of phy tip labels
taxdf <- taxdf[phy$tip.label, ]
meandf <- meandf[phy$tip.label, ]
otuAbun <- otuAbun[phy$tip.label]
trailing.space <- paste(rep(" ", 8*ncol(meandf)*ifelse(ncol(meandf) >= 4, 1.5, 2)), collapse = "")
phy$tip.label <- paste(taxdf$tax, trailing.space, sep = "")
## Plot results
plot.phylo(phy, tip.color = "white", ...)
args <- list(...)
CEX <- ifelse("cex" %in% names(args), args$cex, par("cex"))
if (colorTip) {
tipColor <- gg_color_hue(ncol(meandf))[ apply(meandf, 1, which.max) ]
tiplabels(paste("", taxdf$tax), frame = "n", adj = c(0, 0.5), col = tipColor, cex = CEX)
} else {
tiplabels(paste("", taxdf$tax), frame = "n", adj = c(0, 0.5), cex = CEX)
}
width.thermo <- strwidth(paste(rep(" ", 8*ncol(meandf)), collapse = ""))
adj.x <- strwidth(as.character(taxdf$tax), units = "user", cex = CEX)
lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
## Add Graphics at leaves of tree
type <- match.arg(type)
width.thermo <- switch(type,
thermo = addThermo(lastPP, meandf, adj.x, width.thermo),
hist = addHist(lastPP, meandf, adj.x, width.thermo, CEX),
square = addSquare(lastPP, meandf, adj.x, width.thermo),
rectangle = addRectangle(lastPP, meandf, adj.x, width.thermo),
circle = addCircle(lastPP, meandf, adj.x, width.thermo)
)
## Add overall average abundance
abundance.type <- match.arg(abundance.type)
abunScale <- addAbundance(otuAbun, lastPP, adj.x, width.thermo, abundance.type, add.ruler)
## Add segments
XX <- lastPP$xx[1:lastPP$Ntip] + strwidth(as.character(taxdf$tax), cex = CEX)
YY <- lastPP$yy[1:lastPP$Ntip]
segments(x0 = XX, x1 = max(XX),
y0 = YY, y1 = YY, col = "grey80", lty = 2)
## Add legend
addLegend(meandf, variable, abunScale[[1]], abunScale[[2]], abunScale[[3]])
}
#' ggplot-like implementation of plot.phylo from the ape package for \code{phyloseq} class
#' objects. Allows more representation that ggphylo and plot_tree. Returns a data frame representing
#' the tree that is useful for ggplot graphical function.
#'
#' @title ggtree
#' @param physeq Required. An instance of a \code{phyloseq} class object that has
#' tree slot.
#' @param group Optional. Either a single character string matching a variable
#' name in the corresponding sam_data of `physeq`, or a factor with
#' the same length as the number of taxa in `physeq`. If provided, the samples
#' are aggregated by factor levels before being plotted
#' @param freq Logical. Should counts be replaced by frequencies (TRUE) or kept as such (FALSE).
#' Defaults to TRUE. TRUE corresponding to weighting all samples equally while FALSE
#' weights them with their library size.
#' @param method Optional. Edge fattening method; either 'linear' or
#' 'logarithmic' (or an abbrevation of these). Defaults to "linear"
#' @param width.lim Optional. Numeric. Minimum and maximal edge after fattening.
#' Defaults to c(0.1, 4). Set to c(0, x) for true linear scaling.
#' @param base Optional. Numeric. Base used for logarithmic scaling. Defaults to 10.
#' @param edge.group Optional. Either a single character string matching a variable
#' name in the corresponding tax_table of `physeq`, or a factor with
#' the same length as the number of taxa in `physeq`. Defaults to "Phylum"
#' @param edge.method Optional. Ancestral group reconstruction method; either 'majority' or
#' 'ace' (or an abbrevation of these). Defaults to "majority"
#' @param ... Optional. Additional arguments passed on to plot.phylo to determine
#' the shape of the tree.
#' @note ggtree assumes that the tree is rooted.
#' @return A list of two data frames properly formatted for ggplot.
ggtree <- function(physeq, group = NULL, freq = TRUE, method = "linear", base = 10,
edge.group = "Phylum", edge.method = "majority",
width.lim = c(0.1, 4), ...) {
## Exception handling
if (is.null(phy_tree(physeq, FALSE))) {
stop("Object \"physeq\" does not have a tree slot")
} else {
tree <- phy_tree(physeq)
}
if (!is.rooted(tree)) {
stop("Tree must be rooted, consider using midpoint rooting")
}
## Potentially normalize counts to frequencies
if (freq) {
physeq <- transform_sample_counts(physeq, function(x) x /sum(x))
}
## Merge samples by grouping factor.
if (!is.null(group)) {
physeq <- merge_samples(physeq, group)
}
## Get edge group factor
if (!is.null(edge.group)) {
x <- color_edges(physeq, edge.group, edge.method)
edge.group <- factor(names(x$palette)[match(x$edge, x$palette)])
tip.group <- factor(names(x$palette)[match(x$tip, x$palette)])
}
## Fatten edges and compute average
fattened.edges <- fattenEdges(physeq, method, width.lim, base)$edge.raw.width
mean.fattened.edges <- colMeans(fattened.edges)
edge.names <- colnames(fattened.edges)
fattened.edges <- data.frame(edge.name = edge.names,
t(fattened.edges))
mean.fattened.edges <- data.frame(edge.name = edge.names,
mean.width = mean.fattened.edges)
fattened.edges <- melt(fattened.edges, id.vars = "edge.name")
colnames(fattened.edges)[2:3] <- c("Sample", "width")
fattened.edges <- merge(fattened.edges, mean.fattened.edges)
## Call phylo.plot
plot.phylo(phy_tree(physeq), plot = FALSE, ...)
lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
ggdata <- data.frame(edge.name = edge.names,
edge.x1 = lastPP$xx[ lastPP$edge[, 1]],
edge.x2 = lastPP$xx[ lastPP$edge[, 2]],
edge.y1 = lastPP$yy[ lastPP$edge[, 1]],
edge.y2 = lastPP$yy[ lastPP$edge[, 2]])
ggdata$edge <- lastPP$edge
if (!is.null(edge.group)) {
edge.group <- data.frame(edge.name = edge.names, edge.group = edge.group)
ggdata <- merge(ggdata, edge.group)
}
ggdata <- merge(ggdata, fattened.edges)
if (lastPP == "cladogram") {
p <- ggplot(ggdata) + geom_segment(aes(x = edge.x1, y = edge.y1, xend = edge.x2, yend = edge.y2))
}
if (lastPP == "phylogram") {
p <- ggplot(ggdata) + geom_segment(aes(x = edge.x1, y = edge.y1, xend = edge.x1, yend = edge.y2))
p <- p + geom_segment(aes(x = edge.x1, y = edge.y2, xend = edge.x2, yend = edge.y2))
}
if (lastPP == "radial") {
}
if (lastPP == "") {}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.