# These code and documentation in this file is taken from phyloseq
# (https://github.com/joey711/phyloseq) with minimal mofication to work within
# the speedyseq package. These functions are included so that users can take
# advantage of speedyseq's faster `psmelt()` function when using phyloseq's
# plotting functions.
#' A flexible, informative barplot phyloseq data
#'
#' There are many useful examples of phyloseq barplot graphics in the
#' \href{http://joey711.github.io/phyloseq/plot_bar-examples}{phyloseq online
#' tutorials}. This function wraps \code{ggplot2} plotting, and returns a
#' \code{ggplot2} graphic object that can be saved or further modified with
#' additional layers, options, etc. The main purpose of this function is to
#' quickly and easily create informative summary graphics of the differences in
#' taxa abundance between samples in an experiment.
#'
#' @usage plot_bar(physeq, x="Sample", y="Abundance", fill=NULL,
#' title=NULL, facet_grid=NULL)
#'
#' @param physeq (Required). An \code{\link{otu_table-class}} or
#' \code{\link{phyloseq-class}}.
#'
#' @param x (Optional). Optional, but recommended, especially if your data
#' is comprised of many samples. A character string.
#' The variable in the melted-data that should be mapped to the x-axis.
#' See \code{\link{psmelt}}, \code{\link{melt}},
#' and \code{\link{ggplot}} for more details.
#'
#' @param y (Optional). A character string.
#' The variable in the melted-data that should be mapped to the y-axis.
#' Typically this will be \code{"Abundance"}, in order to
#' quantitatively display the abundance values for each OTU/group.
#' However, alternative variables could be used instead,
#' producing a very different, though possibly still informative, plot.
#' See \code{\link{psmelt}}, \code{\link{melt}},
#' and \code{\link{ggplot}} for more details.
#'
#' @param fill (Optional). A character string. Indicates which sample variable
#' should be used to map to the fill color of the bars.
#' The default is \code{NULL}, resulting in a gray fill for all bar segments.
#'
#' @param facet_grid (Optional). A formula object.
#' It should describe the faceting you want in exactly the same way as for
#' \code{\link[ggplot2]{facet_grid}},
#' and is ulitmately provided to \code{\link{ggplot}}2 graphics.
#' The default is: \code{NULL}, resulting in no faceting.
#'
#' @param title (Optional). Default \code{NULL}. Character string.
#' The main title for the graphic.
#'
#' @return A \code{\link[ggplot2]{ggplot}}2 graphic object -- rendered in the
#' graphical device as the default
#' \code{\link[base]{print}}/\code{\link[methods]{show}} method.
#'
#' @seealso
#' \href{http://joey711.github.io/phyloseq/plot_bar-examples}{phyloseq online
#' tutorials}.
#'
#' \code{\link{psmelt}}
#'
#' \code{\link{ggplot}}
#'
#' \code{\link{qplot}}
#'
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 aes_string
#' @importFrom ggplot2 geom_bar
#' @importFrom ggplot2 facet_grid
#' @importFrom ggplot2 element_text
#' @importFrom ggplot2 theme
#' @importFrom ggplot2 ggtitle
#'
#' @export
#'
#' @examples
#' data("GlobalPatterns")
#' gp.ch = subset_taxa(GlobalPatterns, Phylum == "Chlamydiae")
#' plot_bar(gp.ch)
#' plot_bar(gp.ch, fill="Genus")
#' plot_bar(gp.ch, x="SampleType", fill="Genus")
#' plot_bar(gp.ch, "SampleType", fill="Genus", facet_grid=~Family)
#' # See additional examples in the plot_bar online tutorial. Link above.
plot_bar = function(physeq, x="Sample", y="Abundance", fill=NULL,
title=NULL, facet_grid=NULL){
# Start by melting the data in the "standard" way using psmelt.
mdf = psmelt(physeq, as = "data.frame")
# Build the plot data structure
p = ggplot(mdf, aes_string(x=x, y=y, fill=fill))
# Add the bar geometric object. Creates a basic graphic. Basis for the rest.
# Test weather additional
p = p + geom_bar(stat="identity", position="stack", color="black")
# By default, rotate the x-axis labels (they might be long)
p = p + theme(axis.text.x=element_text(angle=-90, hjust=0))
# Add faceting, if given
if( !is.null(facet_grid) ){
p <- p + facet_grid(facet_grid)
}
# Optionally add a title to the plot
if( !is.null(title) ){
p <- p + ggtitle(title)
}
return(p)
}
#' Plot a phylogenetic tree with optional annotations
#'
#' There are many useful examples of phyloseq tree graphics in the
#' \href{http://joey711.github.io/phyloseq/plot_tree-examples}{phyloseq online
#' tutorials}. This function is intended to facilitate easy graphical
#' investigation of the phylogenetic tree, as well as sample data. Note that
#' for phylogenetic sequencing of samples with large richness, some of the
#' options in this function will be prohibitively slow to render, or too dense
#' to be interpretable. A rough ``rule of thumb'' is to use subsets of data
#' with not many more than 200 OTUs per plot, sometimes less depending on the
#' complexity of the additional annotations being mapped to the tree. It is
#' usually possible to create an unreadable, uninterpretable tree with modern
#' datasets. However, the goal should be toward parameter settings and data
#' subsets that convey (honestly, accurately) some biologically relevant
#' feature of the data. One of the goals of the \code{\link{phyloseq-package}}
#' is to make the determination of these features/settings as easy as possible.
#'
#' This function received an early development contribution from the work of
#' Gregory Jordan via \href{https://github.com/gjuggler/ggphylo}{the ggphylo
#' package}. \code{plot_tree} has since been re-written. For details see
#' \code{\link{tree_layout}}.
#'
#' @param physeq (Required). The data about which you want to
#' plot and annotate a phylogenetic tree, in the form of a single instance of
#' the \code{\link{phyloseq-class}}, containing at minimum a phylogenetic
#' tree component (try \code{\link{phy_tree}}). One of the major advantages
#' of this function over basic tree-plotting utilities in the
#' \code{\link{ape}}-package is the ability to easily annotate the tree with
#' sample variables and taxonomic information. For these uses, the
#' \code{physeq} argument should also have a \code{\link{sample_data}} and/or
#' \code{\link{tax_table}} component(s).
#'
#' @param method (Optional). Character string. Default \code{"sampledodge"}.
#' The name of the annotation method to use. This will be expanded in future
#' versions. Currently only \code{"sampledodge"} and \code{"treeonly"} are
#' supported. The \code{"sampledodge"} option results in points drawn next
#' to leaves if individuals from that taxa were observed, and a separate
#' point is drawn for each sample.
#'
#' @param nodelabf (Optional). A function. Default \code{NULL}.
#' If \code{NULL}, the default, a function will be selected for you based
#' upon whether or not there are node labels in \code{phy_tree(physeq)}. For
#' convenience, the phyloseq package includes two generator functions for
#' adding arbitrary node labels (can be any character string),
#' \code{\link{nodeplotdefault}}; as well as for adding bootstrap values in a
#' certain range, \code{\link{nodeplotboot}}. To not have any node labels in
#' the graphic, set this argument to \code{\link{nodeplotblank}}.
#'
#' @param color (Optional). Character string. Default \code{NULL}.
#' The name of the variable in \code{physeq} to map to point color.
#' Supported options here also include the reserved special variables of
#' \code{\link{psmelt}}.
#'
#' @param shape (Optional). Character string. Default \code{NULL}.
#' The name of the variable in \code{physeq} to map to point shape.
#' Supported options here also include the reserved special variables of
#' \code{\link{psmelt}}.
#'
#' @param size (Optional). Character string. Default \code{NULL}. The name of
#' the variable in \code{physeq} to map to point size. A special argument
#' \code{"abundance"} is reserved here and scales point size using abundance
#' in each sample on a log scale. Supported options here also include the
#' reserved special variables of \code{\link{psmelt}}.
#'
#' @param min.abundance (Optional). Numeric. The minimum number of individuals
#' required to label a point with the precise number. Default is \code{Inf},
#' meaning that no points will have their abundance labeled. If a vector,
#' only the first element is used.
#'
#' @param label.tips (Optional). Character string. Default is \code{NULL},
#' indicating that no tip labels will be printed. If \code{"taxa_names"},
#' then the name of the taxa will be added to the tree; either next to the
#' leaves, or next to the set of points that label the leaves. Alternatively,
#' if this is one of the rank names (from \code{rank_names(physeq)}), then
#' the identity (if any) for that particular taxonomic rank is printed
#' instead.
#'
#' @param text.size (Optional). Numeric. Should be positive. The
#' size parameter used to control the text size of taxa labels. Default is
#' \code{NULL}. If left \code{NULL}, this function will automatically
#' calculate a (hopefully) optimal text size given the vertical constraints
#' posed by the tree itself. This argument is included in case the
#' automatically-calculated size is wrong, and you want to change it. Note
#' that this parameter is only meaningful if \code{label.tips} is not
#' \code{NULL}.
#'
#' @param sizebase (Optional). Numeric. Should be positive.
#' The base of the logarithm used to scale point sizes to graphically
#' represent abundance of species in a given sample. Default is 5.
#'
#' @param base.spacing (Optional). Numeric. Default is \code{0.02}.
#' Should be positive. This defines the base-spacing between points at each
#' tip/leaf in the the tree. The larger this value, the larger the spacing
#' between points. This is useful if you have problems with overlapping
#' large points and/or text indicating abundance, for example. Similarly, if
#' you don't have this problem and want tighter point-spacing, you can shrink
#' this value.
#'
#' @param ladderize (Optional). Boolean or character string (either
#' \code{FALSE}, \code{TRUE}, or \code{"left"}). Default is \code{FALSE}.
#' This parameter specifies whether or not to \code{\link[ape]{ladderize}}
#' the tree (i.e., reorder nodes according to the depth of their enclosed
#' subtrees) prior to plotting. This tends to make trees more aesthetically
#' pleasing and legible in a graphical display. When \code{TRUE} or
#' \code{"right"}, ``right'' ladderization is used. When set to
#' \code{FALSE}, no ladderization is applied. When set to \code{"left"}, the
#' reverse direction (``left'' ladderization) is applied. This argument is
#' passed on to \code{\link{tree_layout}}.
#'
#' @param plot.margin (Optional). Numeric. Default is \code{0.2}. Should be
#' positive. This defines how much right-hand padding to add to the tree
#' plot, which can be required to not truncate tip labels. The margin value
#' is specified as a fraction of the overall tree width which is added to the
#' right side of the plot area. So a value of \code{0.2} adds twenty percent
#' extra space to the right-hand side of the plot.
#'
#' @param title (Optional). Default \code{NULL}. Character string. The main
#' title for the graphic.
#'
#' @param treetheme (Optional). A custom \code{\link{ggplot}}2
#' \code{\link[ggplot2]{theme}} layer to use for the tree. Supplants any
#' default theme layers used within the function. A value of \code{NULL}
#' uses a default, minimal-annotations theme. If anything other than a them
#' or \code{NULL}, the current global ggplot2 theme will result.
#'
#' @param justify (Optional). A character string indicating the type of
#' justification to use on dodged points and tip labels. A value of
#' \code{"jagged"}, the default, results in these tip-mapped elements being
#' spaced as close to the tips as possible without gaps. Currently, any
#' other value for \code{justify} results in a left-justified arrangement of
#' both labels and points.
#'
#' @return A \code{\link{ggplot}}2 plot.
#'
#' @seealso
#' \code{\link{plot.phylo}}
#'
#' There are many useful examples of phyloseq tree graphics in the
#' \href{http://joey711.github.io/phyloseq/plot_tree-examples}{phyloseq online
#' tutorials}.
#'
#' @importFrom scales log_trans
#'
#' @importFrom data.table data.table
#' @importFrom data.table setkey
#' @importFrom data.table setkeyv
#'
#' @importFrom ggplot2 geom_segment
#' @importFrom ggplot2 geom_point
#' @importFrom ggplot2 geom_text
#' @importFrom ggplot2 scale_x_continuous
#' @importFrom ggplot2 scale_size_continuous
#' @importFrom ggplot2 element_blank
#'
#' @export
#' @examples
#' # Using plot_tree() with the esophagus dataset.
#' # Please note that many more interesting examples are shown in the online
#' # tutorials at http://joey711.github.io/phyloseq/plot_tree-examples
#' data(esophagus)
#' # plot_tree(esophagus)
#' # plot_tree(esophagus, color="Sample")
#' # plot_tree(esophagus, size="Abundance")
#' # plot_tree(esophagus, size="Abundance", color="samples")
#' plot_tree(esophagus, size="Abundance", color="Sample", base.spacing=0.03)
#' plot_tree(esophagus, size="abundance", color="samples", base.spacing=0.03)
plot_tree = function(physeq, method="sampledodge", nodelabf=NULL,
color=NULL, shape=NULL, size=NULL,
min.abundance=Inf, label.tips=NULL, text.size=NULL,
sizebase=5, base.spacing = 0.02,
ladderize=FALSE, plot.margin=0.2, title=NULL,
treetheme=NULL, justify="jagged"){
########################################
# Support mis-capitalization of reserved variable names in color, shape, size
# This helps, for instance, with backward-compatibility where "abundance"
# was the reserved variable name for mapping OTU abundance entries
fix_reserved_vars = function(aesvar){
aesvar <- gsub("^abundance[s]{0,}$", "Abundance", aesvar, ignore.case=TRUE)
aesvar <- gsub("^OTU[s]{0,}$", "OTU", aesvar, ignore.case=TRUE)
aesvar <- gsub("^taxa_name[s]{0,}$", "OTU", aesvar, ignore.case=TRUE)
aesvar <- gsub("^sample[s]{0,}$", "Sample", aesvar, ignore.case=TRUE)
return(aesvar)
}
if(!is.null(label.tips)){label.tips <- fix_reserved_vars(label.tips)}
if(!is.null(color)){color <- fix_reserved_vars(color)}
if(!is.null(shape)){shape <- fix_reserved_vars(shape)}
if(!is.null(size) ){size <- fix_reserved_vars(size)}
########################################
if( is.null(phy_tree(physeq, FALSE)) ){
stop("There is no phylogenetic tree in the object you have provided.\n",
"Try phy_tree(physeq) to see for yourself.")
}
if(!inherits(physeq, "phyloseq")){
# If only a phylogenetic tree, then only tree available to overlay.
method <- "treeonly"
}
# Create the tree data.table
treeSegs <- phyloseq::tree_layout(phy_tree(physeq), ladderize=ladderize)
edgeMap = aes(x=xleft, xend=xright, y=y, yend=y)
vertMap = aes(x=x, xend=x, y=vmin, yend=vmax)
# Initialize phylogenetic tree.
# Naked, lines-only, unannotated tree as first layers. Edge (horiz) first, then vertical.
p = ggplot(data=treeSegs$edgeDT) + geom_segment(edgeMap) +
geom_segment(vertMap, data=treeSegs$vertDT)
# If no text.size given, calculate it from number of tips ("species", aka taxa)
# This is very fast. No need to worry about whether text is printed or not.
if(is.null(text.size)){
text.size <- phyloseq:::manytextsize(ntaxa(physeq))
}
# Add the species labels to the right.
if(!is.null(label.tips) & method!="sampledodge"){
# If method is sampledodge, then labels are added to the right of points, later.
# Add labels layer to plotting object.
labelDT = treeSegs$edgeDT[!is.na(OTU), ]
if(!is.null(tax_table(object=physeq, errorIfNULL=FALSE))){
# If there is a taxonomy available, merge it with the label data.table
taxDT = data.table(tax_table(physeq), OTU=taxa_names(physeq), key="OTU")
# Merge with taxonomy.
labelDT = merge(x=labelDT, y=taxDT, by="OTU")
}
if(justify=="jagged"){
# Tip label aesthetic mapping.
# Aesthetics can be NULL, and that aesthetic gets ignored.
labelMap <- aes_string(x="xright", y="y", label=label.tips, color=color)
} else {
# The left-justified version of tip-labels.
labelMap <- aes_string(x="max(xright, na.rm=TRUE)", y="y", label=label.tips, color=color)
}
p <- p + geom_text(labelMap, data=labelDT, size=I(text.size), hjust=-0.1, na.rm=TRUE)
}
# Node label section.
#
# If no nodelabf ("node label function") given, ask internal function to pick one.
# Is NULL by default, meaning will dispatch to `howtolabnodes` to select function.
# For no node labels, the "dummy" function `nodeplotblank` will return tree plot
# object, p, as-is, unmodified.
if(is.null(nodelabf)){
nodelabf = phyloseq:::howtolabnodes(physeq)
}
#### set node `y` as the mean of the vertical segment
# Use the provided/inferred node label function to add the node labels layer(s)
# Non-root nodes first
p = nodelabf(p, treeSegs$edgeDT[!is.na(label), ])
# Add root label (if present)
p = nodelabf(p, treeSegs$vertDT[!is.na(label), ])
# Theme specification
if(is.null(treetheme)){
# If NULL, then use the default tree theme.
treetheme <- theme(axis.ticks = element_blank(),
axis.title.x=element_blank(), axis.text.x=element_blank(),
axis.title.y=element_blank(), axis.text.y=element_blank(),
panel.background = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank())
}
if(inherits(treetheme, "theme")){
# If a theme, add theme layer to plot.
# For all other cases, skip this, which will cause default theme to be used
p <- p + treetheme
}
# Optionally add a title to the plot
if(!is.null(title)){
p <- p + ggtitle(title)
}
if(method!="sampledodge"){
# If anything but a sampledodge tree, return now without further decorations.
return(p)
}
########################################
# Sample Dodge Section
# Special words, c("Sample", "Abundance", "OTU")
# See psmelt()
########################################
# Initialize the species/taxa/OTU data.table
dodgeDT = treeSegs$edgeDT[!is.na(OTU), ]
# Merge with psmelt() result, to make all co-variables available
dodgeDT = merge(
x=dodgeDT,
y=data.table(psmelt(physeq, as = "data.table"), key="OTU"),
by="OTU"
)
if(justify=="jagged"){
# Remove 0 Abundance value entries now, not later, for jagged.
dodgeDT <- dodgeDT[Abundance > 0, ]
}
# Set key. Changes `dodgeDT` in place. OTU is first key, always.
if( !is.null(color) | !is.null(shape) | !is.null(size) ){
# If color, shape, or size is chosen, setkey by those as well
setkeyv(dodgeDT, cols=c("OTU", color, shape, size))
} else {
# Else, set key by OTU and sample name.
setkey(dodgeDT, OTU, Sample)
}
# Add sample-dodge horizontal adjustment index. In-place data.table assignment
dodgeDT[, h.adj.index := 1:length(xright), by=OTU]
# `base.spacing` is a user-input parameter.
# The sampledodge step size is based on this and the max `x` value
if(justify=="jagged"){
dodgeDT[, xdodge:=(xright + h.adj.index * base.spacing * max(xright, na.rm=TRUE))]
} else {
# Left-justified version, `xdodge` always starts at the max of all `xright` values.
dodgeDT[, xdodge := max(xright, na.rm=TRUE) + h.adj.index * base.spacing * max(xright, na.rm=TRUE)]
# zeroes removed only after all sample points have been mapped.
dodgeDT <- dodgeDT[Abundance > 0, ]
}
# The general tip-point map. Objects can be NULL, and that aesthetic gets ignored.
dodgeMap <- aes_string(x="xdodge", y="y", color=color, fill=color,
shape=shape, size=size)
p <- p + geom_point(dodgeMap, data=dodgeDT, na.rm=TRUE)
# Adjust point size transform
if( !is.null(size) ){
p <- p + scale_size_continuous(trans=log_trans(sizebase))
}
# Optionally-add abundance value label to each point.
# User controls this by the `min.abundance` parameter.
# A value of `Inf` implies no labels.
if( any(dodgeDT$Abundance >= min.abundance[1]) ){
pointlabdf = dodgeDT[Abundance>=min.abundance[1], ]
p <- p + geom_text(mapping=aes(xdodge, y, label=Abundance),
data=pointlabdf, size=text.size, na.rm=TRUE)
}
# If indicated, add the species labels to the right of dodged points.
if(!is.null(label.tips)){
# `tiplabDT` has only one row per tip, the farthest horizontal
# adjusted position (one for each taxa)
tiplabDT = dodgeDT
tiplabDT[, xfartiplab:=max(xdodge), by=OTU]
tiplabDT <- tiplabDT[h.adj.index==1, .SD, by=OTU]
if(!is.null(color)){
if(color %in% sample_variables(physeq, errorIfNULL=FALSE)){
color <- NULL
}
}
labelMap <- NULL
if(justify=="jagged"){
labelMap <- aes_string(x="xfartiplab", y="y", label=label.tips, color=color)
} else {
labelMap <- aes_string(x="max(xfartiplab, na.rm=TRUE)", y="y", label=label.tips, color=color)
}
# Add labels layer to plotting object.
p <- p + geom_text(labelMap, tiplabDT, size=I(text.size), hjust=-0.1, na.rm=TRUE)
}
# Plot margins.
# Adjust the tree graphic plot margins.
# Helps to manually ensure that graphic elements aren't clipped,
# especially when there are long tip labels.
min.x <- -0.01 # + dodgeDT[, min(c(xleft))]
max.x <- dodgeDT[, max(xright, na.rm=TRUE)]
if("xdodge" %in% names(dodgeDT)){
max.x <- dodgeDT[, max(xright, xdodge, na.rm=TRUE)]
}
if(plot.margin > 0){
max.x <- max.x * (1.0 + plot.margin)
}
p <- p + scale_x_continuous(limits=c(min.x, max.x))
return(p)
}
################################################################################
#' Create an ecologically-organized heatmap using ggplot2 graphics
#'
#' There are many useful examples of phyloseq heatmap graphics in the
#' \href{http://joey711.github.io/phyloseq/plot_heatmap-examples}{phyloseq online tutorials}.
#' In a 2010 article in BMC Genomics, Rajaram and Oono show describe an
#' approach to creating a heatmap using ordination methods to organize the
#' rows and columns instead of (hierarchical) cluster analysis. In many cases
#' the ordination-based ordering does a much better job than h-clustering.
#' An immediately useful example of their approach is provided in the NeatMap
#' package for R. The NeatMap package can be used directly on the abundance
#' table (\code{\link{otu_table-class}}) of phylogenetic-sequencing data, but
#' the NMDS or PCA ordination options that it supports are not based on ecological
#' distances. To fill this void, phyloseq provides the \code{plot_heatmap()}
#' function as an ecology-oriented variant of the NeatMap approach to organizing
#' a heatmap and build it using ggplot2 graphics tools.
#' The \code{distance} and \code{method} arguments are the same as for the
#' \code{\link{plot_ordination}} function, and support large number of
#' distances and ordination methods, respectively, with a strong leaning toward
#' ecology.
#' This function also provides the options to re-label the OTU and sample
#' axis-ticks with a taxonomic name and/or sample variable, respectively,
#' in the hope that this might hasten your interpretation of the patterns
#' (See the \code{sample.label} and \code{taxa.label} documentation, below).
#' Note that this function makes no attempt to overlay hierarchical
#' clustering trees on the axes, as hierarchical clustering is not used to
#' organize the plot. Also note that each re-ordered axis repeats at the edge,
#' and so apparent clusters at the far right/left or top/bottom of the
#' heat-map may actually be the same. For now, the placement of this edge
#' can be considered arbitrary, so beware of this artifact of this graphical
#' representation. If you benefit from this phyloseq-specific implementation
#' of the NeatMap approach, please cite both our packages/articles.
#'
#' This approach borrows heavily from the \code{heatmap1} function in the
#' \code{NeatMap} package. Highly recommended, and we are grateful for their
#' package and ideas, which we have adapted for our specific purposes here,
#' but did not use an explicit dependency. At the time of the first version
#' of this implementation, the NeatMap package depends on the rgl-package,
#' which is not needed in phyloseq, at present. Although likely a transient
#' issue, the rgl-package has some known installation issues that have further
#' influenced to avoid making NeatMap a formal dependency (Although we love
#' both NeatMap and rgl!).
#'
#' @param physeq (Required). The data, in the form of an instance of the
#' \code{\link{phyloseq-class}}. This should be what you get as a result
#' from one of the
#' \code{\link{import}} functions, or any of the processing downstream.
#' No data components beyond the \code{\link{otu_table}} are strictly
#' necessary, though they may be useful if you want to re-label the
#' axis ticks according to some observable or taxonomic rank, for instance,
#' or if you want to use a \code{\link{UniFrac}}-based distance
#' (in which case your \code{physeq} data would need to have a tree included).
#'
#' @param method (Optional).
#' The ordination method to use for organizing the
#' heatmap. A great deal of the usefulness of a heatmap graphic depends upon
#' the way in which the rows and columns are ordered.
#'
#' @param distance (Optional). A character string.
#' The ecological distance method to use in the ordination.
#' See \code{\link{distance}}.
#'
#' @param sample.label (Optional). A character string.
#' The sample variable by which you want to re-label the sample (horizontal) axis.
#'
#' @param taxa.label (Optional). A character string.
#' The name of the taxonomic rank by which you want to
#' re-label the taxa/species/OTU (vertical) axis.
#' You can see available options in your data using
#' \code{\link{rank_names}(physeq)}.
#'
#' @param low (Optional). A character string. An R color.
#' See \code{?\link{colors}} for options support in R (there are lots).
#' The color that represents the lowest non-zero value
#' in the heatmap. Default is a dark blue color, \code{"#000033"}.
#'
#' @param high (Optional). A character string. An R color.
#' See \code{\link{colors}} for options support in R (there are lots).
#' The color that will represent the highest
#' value in the heatmap. The default is \code{"#66CCFF"}.
#' Zero-values are treated as \code{NA}, and set to \code{"black"}, to represent
#' a background color.
#'
#' @param na.value (Optional). A character string. An R color.
#' See \code{\link{colors}} for options support in R (there are lots).
#' The color to represent what is essentially the background of the plot,
#' the non-observations that occur as \code{NA} or
#' \code{0} values in the abundance table. The default is \code{"black"}, which
#' works well on computer-screen graphics devices, but may be a poor choice for
#' printers, in which case you might want this value to be \code{"white"}, and
#' reverse the values of \code{high} and \code{low}, above.
#'
#' @param trans (Optional). \code{"trans"}-class transformer-definition object.
#' A numerical transformer to use in
#' the continuous color scale. See \code{\link[scales]{trans_new}} for details.
#' The default is \code{\link{log_trans}(4)}.
#'
#' @param max.label (Optional). Integer. Default is 250.
#' The maximum number of labeles to fit on a given axis (either x or y).
#' If number of taxa or samples exceeds this value,
#' the corresponding axis will be stripped of any labels.
#'
#' This supercedes any arguments provided to
#' \code{sample.label} or \code{taxa.label}.
#' Make sure to increase this value if, for example,
#' you want a special label
#' for an axis that has 300 indices.
#'
#' @param title (Optional). Default \code{NULL}. Character string.
#' The main title for the graphic.
#'
#' @param sample.order (Optional). Default \code{NULL}.
#' Either a single character string matching
#' one of the \code{\link{sample_variables}} in your data,
#' or a character vector of \code{\link{sample_names}}
#' in the precise order that you want them displayed in the heatmap.
#' This overrides any ordination ordering that might be done
#' with the \code{method}/\code{distance} arguments.
#'
#' @param taxa.order (Optional). Default \code{NULL}.
#' Either a single character string matching
#' one of the \code{\link{rank_names}} in your data,
#' or a character vector of \code{\link{taxa_names}}
#' in the precise order that you want them displayed in the heatmap.
#' This overrides any ordination ordering that might be done
#' with the \code{method}/\code{distance} arguments.
#'
#' @param first.sample (Optional). Default \code{NULL}.
#' A character string matching one of the \code{\link{sample_names}}
#' of your input data (\code{physeq}).
#' It will become the left-most sample in the plot.
#' For the ordination-based ordering (recommended),
#' the left and right edges of the axes are adjaacent in a continuous ordering.
#' Therefore, the choice of starting sample is meaningless and arbitrary,
#' but it is aesthetically poor to have the left and right edge split
#' a natural cluster in the data.
#' This argument allows you to specify the left edge
#' and thereby avoid cluster-splitting, emphasize a gradient, etc.
#'
#' @param first.taxa (Optional). Default \code{NULL}.
#' A character string matching one of the \code{\link{taxa_names}}
#' of your input data (\code{physeq}).
#' This is equivalent to \code{first.sample} (above),
#' but for the taxa/OTU indices, usually the vertical axis.
#'
#' @param ... (Optional). Additional parameters passed to \code{\link{ordinate}}.
#'
#' @return
#' A heatmap plot, in the form of a \code{\link{ggplot}2} plot object,
#' which can be further saved and modified.
#'
#' @references
#' Because this function relies so heavily in principle, and in code, on some of the
#' functionality in NeatMap, please site their article if you use this function
#' in your work.
#'
#' Rajaram, S., & Oono, Y. (2010).
#' NeatMap--non-clustering heat map alternatives in R. BMC Bioinformatics, 11, 45.
#'
#' Please see further examples in the
#' \href{http://joey711.github.io/phyloseq/plot_heatmap-examples}{phyloseq online tutorials}.
#'
#' @importFrom vegan scores
#'
#' @importFrom scales log_trans
#'
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 scale_fill_gradient
#' @importFrom ggplot2 scale_y_discrete
#' @importFrom ggplot2 scale_x_discrete
#' @importFrom ggplot2 scale_fill_gradient
#' @importFrom ggplot2 geom_raster
#' @importFrom ggplot2 theme
#'
#' @export
#' @examples
#' data("GlobalPatterns")
#' gpac <- subset_taxa(GlobalPatterns, Phylum=="Crenarchaeota")
#' # FYI, the base-R function uses a non-ecological ordering scheme,
#' # but does add potentially useful hclust dendrogram to the sides...
#' gpac <- subset_taxa(GlobalPatterns, Phylum=="Crenarchaeota")
#' # Remove the nearly-empty samples (e.g. 10 reads or less)
#' gpac = prune_samples(sample_sums(gpac) > 50, gpac)
#' # Arbitrary order if method set to NULL
#' plot_heatmap(gpac, method=NULL, sample.label="SampleType",
#' taxa.label="Family")
#' # Use ordination
#' plot_heatmap(gpac, sample.label="SampleType", taxa.label="Family")
#' # Use ordination for OTUs, but not sample-order
#' plot_heatmap(gpac, sample.label="SampleType", taxa.label="Family",
#' sample.order="SampleType")
#' # Specifying both orders omits any attempt to use ordination. The following
#' # should be the same.
#' p0 = plot_heatmap(gpac, sample.label="SampleType", taxa.label="Family",
#' taxa.order="Phylum", sample.order="SampleType")
#' p1 = plot_heatmap(gpac, method=NULL, sample.label="SampleType",
#' taxa.label="Family", taxa.order="Phylum", sample.order="SampleType")
#' #expect_equivalent(p0, p1)
#' # Example: Order matters. Random ordering of OTU indices is difficult to
#' # interpret, even with structured sample order
#' rando = sample(taxa_names(gpac), size=ntaxa(gpac), replace=FALSE)
#' plot_heatmap(gpac, method=NULL, sample.label="SampleType",
#' taxa.label="Family", taxa.order=rando, sample.order="SampleType")
#' # # Select the edges of each axis.
#' # First, arbitrary edge, ordering
#' plot_heatmap(gpac, method=NULL)
#' # Second, biological-ordering (instead of default ordination-ordering), but
#' # arbitrary edge
#' plot_heatmap(gpac, taxa.order="Family", sample.order="SampleType")
#' # Third, biological ordering, selected edges
#' plot_heatmap(gpac, taxa.order="Family", sample.order="SampleType",
#' first.taxa="546313", first.sample="NP2")
#' # Fourth, add meaningful labels
#' plot_heatmap(gpac, sample.label="SampleType", taxa.label="Family",
#' taxa.order="Family", sample.order="SampleType", first.taxa="546313",
#' first.sample="NP2")
plot_heatmap <- function(physeq, method="NMDS", distance="bray",
sample.label=NULL, taxa.label=NULL,
low="#000033", high="#66CCFF", na.value="black", trans=log_trans(4),
max.label=250, title=NULL, sample.order=NULL, taxa.order=NULL,
first.sample=NULL, first.taxa=NULL, ...){
# User-override ordering
if( !is.null(taxa.order) & length(taxa.order)==1 ){
# Assume `taxa.order` is a tax_table variable. Use it for ordering.
rankcol = which(rank_names(physeq) %in% taxa.order)
taxmat = as(tax_table(physeq)[, 1:rankcol], "matrix")
taxa.order = apply(taxmat, 1, paste, sep="", collapse="")
names(taxa.order) <- taxa_names(physeq)
taxa.order = names(sort(taxa.order, na.last=TRUE))
}
if( !is.null(sample.order) & length(sample.order)==1 ){
# Assume `sample.order` is a sample variable. Use it for ordering.
sample.order = as.character(get_variable(physeq, sample.order))
names(sample.order) <- sample_names(physeq)
sample.order = names(sort(sample.order, na.last=TRUE))
}
if( !is.null(method) & (is.null(taxa.order) | is.null(sample.order)) ){
# Only attempt NeatMap if method is non-NULL & at least one of
# taxa.order and sample.order is not-yet defined.
# If both axes orders pre-defined by user, no need to perform ordination...
# Copy the approach from NeatMap by doing ordination on samples, but use
# phyloseq-wrapped distance/ordination procedures.
# Reorder by the angle in radial coordinates on the 2-axis plane.
# In case of NMDS iterations, capture the output so it isn't dumped on standard-out
junk = utils::capture.output(ps.ord <- ordinate(physeq, method, distance, ...), file=NULL)
if( is.null(sample.order) ){
siteDF = NULL
# Only define new ord-based sample order if user did not define one already
trash1 = try({siteDF <- scores(ps.ord, choices = c(1, 2), display="sites", physeq=physeq)},
silent = TRUE)
if(inherits(trash1, "try-error")){
# warn that the attempt to get ordination coordinates for ordering failed.
warning("Attempt to access ordination coordinates for sample ordering failed.\n",
"Using default sample ordering.")
}
if(!is.null(siteDF)){
# If the score accession seemed to work, go ahead and replace sample.order
sample.order <- sample_names(physeq)[order(phyloseq:::RadialTheta(siteDF))]
}
}
if( is.null(taxa.order) ){
# re-order species/taxa/OTUs, if possible,
# and only if user did not define an order already
specDF = NULL
trash2 = try({specDF <- scores(ps.ord, choices=c(1, 2), display="species", physeq=physeq)},
silent = TRUE)
if(inherits(trash2, "try-error")){
# warn that the attempt to get ordination coordinates for ordering failed.
warning("Attempt to access ordination coordinates for feature/species/taxa/OTU ordering failed.\n",
"Using default feature/species/taxa/OTU ordering.")
}
if(!is.null(specDF)){
# If the score accession seemed to work, go ahead and replace sample.order
taxa.order = taxa_names(physeq)[order(phyloseq:::RadialTheta(specDF))]
}
}
}
# Now that index orders are determined, check/assign edges of axes, if specified
if( !is.null(first.sample) ){
sample.order = phyloseq:::chunkReOrder(sample.order, first.sample)
}
if( !is.null(first.taxa) ){
taxa.order = phyloseq:::chunkReOrder(taxa.order, first.taxa)
}
# melt physeq with the standard user-accessible data melting function
# for creating plot-ready data.frames, psmelt.
# This is also used inside some of the other plot_* functions.
adf = psmelt(physeq, as = "data.frame")
# Coerce the main axis variables to character.
# Will reset them to factor if re-ordering is needed.
adf$OTU = as(adf$OTU, "character")
adf$Sample = as(adf$Sample, "character")
if( !is.null(sample.order) ){
# If sample-order is available, coerce to factor with special level-order
adf$Sample = factor(adf$Sample, levels=sample.order)
} else {
# Make sure it is a factor, but with default order/levels
adf$Sample = factor(adf$Sample)
}
if( !is.null(taxa.order) ){
# If OTU-order is available, coerce to factor with special level-order
adf$OTU = factor(adf$OTU, levels=taxa.order)
} else {
# Make sure it is a factor, but with default order/levels
adf$OTU = factor(adf$OTU)
}
## Now the plotting part
# Initialize p.
p = ggplot(adf, aes(x = Sample, y = OTU, fill=Abundance)) +
geom_raster()
# # Don't render labels if more than max.label
# Samples
if( nsamples(physeq) <= max.label ){
# Add resize layer for samples if there are fewer than max.label
p <- p + theme(
axis.text.x = element_text(
size=phyloseq:::manytextsize(nsamples(physeq), 4, 30, 12),
angle=-90, vjust=0.5, hjust=0
)
)
} else {
# Remove the labels from any rendering.
p = p + scale_x_discrete("Sample", labels="")
}
# OTUs
if( ntaxa(physeq) <= max.label ){
# Add resize layer for OTUs if there are fewer than max.label
p <- p + theme(
axis.text.y = element_text(
size=phyloseq:::manytextsize(ntaxa(physeq), 4, 30, 12)
)
)
} else {
# Remove the labels from any rendering.
p = p + scale_y_discrete("OTU", labels="")
}
# # Axis Relabeling (Skipped if more than max.label):
# Re-write sample-labels to some sample variable...
if( !is.null(sample.label) & nsamples(physeq) <= max.label){
# Make a sample-named char-vector of the values for sample.label
labvec = as(get_variable(physeq, sample.label), "character")
names(labvec) <- sample_names(physeq)
if( !is.null(sample.order) ){
# Re-order according to sample.order
labvec = labvec[sample.order]
}
# Replace any NA (missing) values with "" instead. Avoid recycling labels.
labvec[is.na(labvec)] <- ""
# Add the sample.label re-labeling layer
p = p + scale_x_discrete(sample.label, labels=labvec)
}
if( !is.null(taxa.label) & ntaxa(physeq) <= max.label){
# Make a OTU-named vector of the values for taxa.label
labvec <- as(tax_table(physeq)[, taxa.label], "character")
names(labvec) <- taxa_names(physeq)
if( !is.null(taxa.order) ){
# Re-order according to taxa.order
labvec <- labvec[taxa.order]
}
# Replace any NA (missing) values with "" instead. Avoid recycling labels.
labvec[is.na(labvec)] <- ""
# Add the taxa.label re-labeling layer
p = p + scale_y_discrete(taxa.label, labels=labvec)
}
# Color scale transformations
if( !is.null(trans) ){
p = p + scale_fill_gradient(low=low, high=high, trans=trans, na.value=na.value)
} else {
p = p + scale_fill_gradient(low=low, high=high, na.value=na.value)
}
# Optionally add a title to the plot
if( !is.null(title) ){
p = p + ggtitle(title)
}
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.