#' Customisable ggplot2 ordination plot
#'
#' Draw ordination plot. Utilises psExtra object produced by of \code{\link{ord_calc}}.
#' - For an extensive tutorial see \href{https://david-barnett.github.io/microViz/articles/web-only/ordination.html}{the ordination vignette}.
#' - For interpretation see the the relevant pages on PCA, PCoA, RDA, or CCA on the GUide to STatistical Analysis in Microbial Ecology (GUSTA ME) website: \url{https://sites.google.com/site/mb3gustame/}
#'
#' How to specify the plot_taxa argument (when using PCA, CCA or RDA):
#' - FALSE --> plot no taxa vectors or labels
#' - integer vector e.g. 1:3 --> plot labels for top 3 taxa (by longest line length)
#' - single numeric value e.g. 0.75 --> plot labels for taxa with line length > 0.75
#' - character vector e.g. c('g__Bacteroides', 'g__Veillonella') --> plot labels for the exactly named taxa
#'
#' @param data
#' psExtra object with ordination attached, i.e. output from ord_calc
#' @param axes
#' which axes to plot: numerical vector of length 2, e.g. 1:2 or c(3,5)
#' @param plot_taxa
#' if ord_calc method was "PCA/RDA" draw the taxa loading vectors (see details)
#' @param tax_vec_length taxon arrow vector scale multiplier.
#' NA = auto-scaling, or provide a numeric multiplier yourself.
#' @param tax_vec_style_all
#' list of named aesthetic attributes for all (background) taxon vectors
#' @param tax_vec_style_sel
#' list of named aesthetic attributes for taxon vectors for the taxa selected by plot_taxa
#' @param tax_lab_length
#' scale multiplier for label distance/position for any selected taxa
#' @param tax_lab_style
#' list of style options for the taxon labels, see tax_lab_style() function.
#' @param taxon_renamer
#' function that takes any plotted taxon names and returns modified names for labels
#' @param plot_samples
#' if TRUE, plot sample points with geom_point
#' @param constraint_vec_length
#' constraint arrow vector scale multiplier.
#' NA = auto-scaling, or provide a numeric multiplier yourself.
#' @param constraint_vec_style
#' list of aesthetics/arguments (colour, alpha etc) for the constraint vectors
#' @param constraint_lab_length label distance/position for any constraints
#' (relative to default position which is proportional to correlations with each axis)
#' @param constraint_lab_style
#' list of aesthetics/arguments (colour, size etc) for the constraint labels
#' @param var_renamer
#' function to rename constraining variables for plotting their labels
#' @param scaling
#' Type 2, or type 1 scaling. For more info,
#' see \url{https://sites.google.com/site/mb3gustame/constrained-analyses/redundancy-analysis}.
#' Either "species" or "site" scores are scaled by (proportional) eigenvalues,
#' and the other set of scores is left unscaled (from ?vegan::scores.cca)
#' @param auto_caption
#' size of caption with info about the ordination, NA for none
#' @param center expand plot limits to center around origin point (0,0)
#' @param clip clipping of labels that extend outside plot limits?
#' @param expand expand plot limits a little bit further than data range?
#' @param interactive
#' creates plot suitable for use with ggiraph (used in ord_explore)
#' @param ...
#' pass aesthetics arguments for sample points,
#' drawn with geom_point using aes_string
#'
#' @return ggplot
#' @export
#' @seealso \code{\link{tax_lab_style}} / \code{\link{tax_lab_style}} for styling labels
#' @seealso \code{\link{ord_explore}} for interactive ordination plots
#' @seealso \code{\link{ord_calc}} for calculating an ordination to plot with ord_plot
#'
#' @examples
#' library(ggplot2)
#' data("dietswap", package = "microbiome")
#'
#' # create a couple of numerical variables to use as constraints or conditions
#' dietswap <- dietswap %>%
#' ps_mutate(
#' weight = dplyr::recode(bmi_group, obese = 3, overweight = 2, lean = 1),
#' female = dplyr::if_else(sex == "female", true = 1, false = 0)
#' )
#'
#' # unconstrained PCA ordination
#' unconstrained_aitchison_pca <- dietswap %>%
#' tax_transform("clr", rank = "Genus") %>%
#' ord_calc() # method = "auto" --> picks PCA as no constraints or distances
#'
#' unconstrained_aitchison_pca %>%
#' ord_plot(colour = "bmi_group", plot_taxa = 1:5) +
#' stat_ellipse(aes(linetype = bmi_group, colour = bmi_group))
#'
#' # you can generate an interactive version of the plot by specifying
#' # interactive = TRUE, and passing a variable name to another argument
#' # called `data_id` which is required for interactive point selection
#' interactive_plot <- unconstrained_aitchison_pca %>%
#' ord_plot(
#' colour = "bmi_group", plot_taxa = 1:5,
#' interactive = TRUE, data_id = "sample"
#' )
#'
#' # to start the html viewer, and allow selecting points, we must use a
#' # ggiraph function called girafe and set some options and css
#' ggiraph::girafe(
#' ggobj = interactive_plot,
#' options = list(
#' ggiraph::opts_selection(
#' css = ggiraph::girafe_css(
#' css = "fill:orange;stroke:black;",
#' point = "stroke-width:1.5px"
#' ),
#' type = "multiple", # this activates lasso selection (click top-right)
#' only_shiny = FALSE # allows interactive plot outside of shiny app
#' )
#' )
#' )
#'
#'
#' # remove effect of weight with conditions arg
#' # scaling weight with scale_cc is not necessary as only 1 condition is used
#' dietswap %>%
#' tax_transform("clr", rank = "Genus") %>%
#' ord_calc(conditions = "weight", scale_cc = FALSE) %>%
#' ord_plot(colour = "bmi_group") +
#' stat_ellipse(aes(linetype = bmi_group, colour = bmi_group))
#'
#' # alternatively, constrain variation on weight and female
#' constrained_aitchison_rda <- dietswap %>%
#' tax_transform("clr", rank = "Genus") %>%
#' ord_calc(constraints = c("weight", "female")) # constraints --> RDA
#'
#' constrained_aitchison_rda %>%
#' ord_plot(colour = "bmi_group", constraint_vec_length = 2) +
#' stat_ellipse(aes(linetype = bmi_group, colour = bmi_group))
#'
#' # ggplot allows additional customisation of the resulting plot
#' p <- constrained_aitchison_rda %>%
#' ord_plot(colour = "bmi_group", plot_taxa = 1:3) +
#' lims(x = c(-5, 6), y = c(-5, 5)) +
#' scale_colour_brewer(palette = "Set1")
#'
#' p + stat_ellipse(aes(linetype = bmi_group, colour = bmi_group))
#' p + stat_density2d(aes(colour = bmi_group))
#'
#' # you can rename the taxa on the labels with any function that
#' # takes and modifies a character vector
#' constrained_aitchison_rda %>%
#' ord_plot(
#' colour = "bmi_group",
#' plot_taxa = 1:3,
#' taxon_renamer = function(x) stringr::str_extract(x, "^.")
#' ) +
#' lims(x = c(-5, 6), y = c(-5, 5)) +
#' scale_colour_brewer(palette = "Set1")
#'
#' # You can plot PCoA and constrained PCoA plots too.
#' # You don't typically need/want to use transformed taxa variables for PCoA
#' # But it is good practice to call tax_transform("identity") so that
#' # the automatic caption can record that no transformation was applied
#' dietswap %>%
#' tax_agg("Genus") %>%
#' tax_transform("identity") %>%
#' # so caption can record (lack of) transform
#' dist_calc("bray") %>%
#' # bray curtis
#' ord_calc() %>%
#' # guesses you want an unconstrained PCoA
#' ord_plot(colour = "bmi_group")
#'
#' # it is possible to facet these plots
#' # (although I'm not sure it makes sense to)
#' # but only unconstrained ordination plots and with plot_taxa = FALSE
#' unconstrained_aitchison_pca %>%
#' ord_plot(color = "sex", auto_caption = NA) +
#' facet_wrap("sex") +
#' theme(line = element_blank()) +
#' stat_density2d(aes(colour = sex)) +
#' guides(colour = "none")
#'
#' unconstrained_aitchison_pca %>%
#' ord_plot(color = "bmi_group", plot_samples = FALSE, auto_caption = NA) +
#' facet_wrap("sex") +
#' theme(line = element_blank(), axis.text = element_blank()) +
#' stat_density2d_filled(show.legend = FALSE) +
#' geom_point(size = 1, shape = 21, colour = "black", fill = "white")
ord_plot <-
function(data,
axes = 1:2,
plot_taxa = FALSE,
tax_vec_length = NA,
tax_vec_style_all = vec_tax_all(),
tax_vec_style_sel = vec_tax_sel(),
tax_lab_length = tax_vec_length * 1.1,
tax_lab_style = list(),
taxon_renamer = function(x) identity(x),
constraint_vec_length = NA,
constraint_vec_style = vec_constraint(),
constraint_lab_length = constraint_vec_length * 1.1,
constraint_lab_style = list(),
var_renamer = function(x) identity(x),
plot_samples = TRUE,
scaling = 2, # or "species" scaling in vegan lingo
auto_caption = 8,
center = FALSE,
clip = "off",
expand = !center,
interactive = FALSE,
...) {
check_is_psExtra(data, argName = "data")
ps <- ps_get(data)
ordination <- ord_get(data)
# check ordination and phyloseq size (should never fail if ord_calc used)
stopifnot(stats::nobs(ordination) == phyloseq::nsamples(ps))
# check input data object class and extract the most used objects to function env
if (identical(ordination, NULL)) stop("data must be psExtra output of ord_calc")
info <- info_get(data)
ordInfo <- info[["ord_info"]]
isConstrained <- length(ordInfo$constraints) > 0
# return named list of arguments matching either phyloseq variable names or
# numbers/colors/ggplot2_shapes (throws error if any are invalid)
ellipses <- checkValidEllipsesOrdPlot(..., ps = ps)
# get and transform aesthetic metadata ------------------------------------
meta <- samdatAsDataframe(ps)
# set variable and fixed ggplot aesthetics based on metadata names check
aestheticArgs <- ellipses[ellipses %in% colnames(meta)]
fixed_aesthetics <- ellipses[!ellipses %in% colnames(meta)]
# set colour variables to factors, if they're not null or numeric-like
if (!is.null(aestheticArgs$colour)) {
if (inherits(meta[[aestheticArgs$colour]], c("numeric", "difftime"))) {
meta[[aestheticArgs$colour]] <- as.numeric(meta[[aestheticArgs$colour]])
} else {
meta[[aestheticArgs$colour]] <- as.factor(meta[[aestheticArgs$colour]])
}
}
# and coerce shape variable to factor if it is a non-fixed variable
if (!is.null(aestheticArgs$shape)) {
meta[[aestheticArgs$shape]] <- as.factor(meta[[aestheticArgs$shape]])
}
# get data point positions ------------------------------------------------
# NMDS and DCA ordinations needs alternative handling
if (inherits(ordination, c("decorana", "metaMDS"))) {
siteScoresDf <- as.data.frame(
vegan::scores(ordination, display = "sites", choices = axes)
)
axesLabs <- axesNames <- colnames(siteScoresDf)
} else {
# get all scores of ordination, to ensure consistent scaling
ordsum <- vegan::scores(
x = ordination, scaling = scaling,
choices = seq_len(max(axes)), display = "all"
)
# retrieve scores from model object
siteScoresDf <- as.data.frame(ordsum[["sites"]][, axes, drop = FALSE])
# if RDA/PCA method: get species scores (aka feature loadings)
if (info$ord_info$method %in% c("RDA", "PCA", "CCA")) {
speciesScoresDf <- as.data.frame(ordsum[["species"]][, axes, drop = FALSE])
}
# if constrained model: get constraints coordinates for plotting
if (isConstrained) {
constraintDf <- as.data.frame(ordsum[["biplot"]][, axes, drop = FALSE])
}
# extract "explained variation" for labelling axes
eigVals <- vegan::eigenvals(ordination)
explainedVar <- eigVals[axes] / sum(eigVals)
axesNames <- colnames(siteScoresDf)
axesLabs <- paste0(axesNames, " [", sprintf("%.1f", 100 * explainedVar), "%]")
}
# bind ordination axes vectors to metadata subset for plotting
df <- dplyr::bind_cols(siteScoresDf, meta)
# build ggplot ------------------------------------------------------------
## samples ----------------------------------------------------------------
p <- ggplot2::ggplot(data = df, mapping = ggplot2::aes(
x = .data[[axesNames[1]]], y = .data[[axesNames[2]]]
)) +
ggplot2::theme_minimal() +
ggplot2::labs(x = axesLabs[1], y = axesLabs[2]) +
ggplot2::coord_cartesian(clip = clip, default = TRUE, expand = expand)
# set geom_point variable aesthetics
aesthetics <- buildAesFromListOfStrings(aestheticArgs)
# gather all args for use in geom_point (sample data)
geompointArgs <- c(list(mapping = aesthetics), fixed_aesthetics)
# add sample/site points, sized dynamically or fixed size
if (plot_samples) {
if (isTRUE(interactive)) {
p <- p + do.call(ggiraph::geom_point_interactive, args = geompointArgs)
} else {
p <- p + do.call(ggplot2::geom_point, args = geompointArgs)
}
}
## taxa -------------------------------------------------------------------
# add loadings/ species-scores arrows for RDA/PCA methods
if (ordInfo[["method"]] %in% c("RDA", "CCA", "PCA")) {
# return subselection of taxa for which to draw labels on plot
selectSpeciesScoresDf <- subsetTaxaDfLabel(
speciesScoresDf = speciesScoresDf, plot_taxa = plot_taxa
)
# if a selection of species scores (for labelling) was calculated,
# add taxa lines and labels to plot
if (!identical(selectSpeciesScoresDf, NULL)) {
# automatic taxa vector length setting
if (identical(tax_vec_length, NA)) {
tax_vec_length <- computeAutoVecLength(
vecDf = speciesScoresDf, pointsDf = siteScoresDf, prop = 0.85
)
}
# (semi-transparent) lines for all taxa
p <- ord_arrows(
p = p, data = speciesScoresDf * tax_vec_length,
axesNames = axesNames, styleList = tax_vec_style_all,
defaultStyles = vec_tax_all()
)
# (opaque) lines for selected taxa
p <- ord_arrows(
p = p, data = selectSpeciesScoresDf * tax_vec_length,
axesNames = axesNames, styleList = tax_vec_style_sel,
defaultStyles = vec_tax_sel()
)
# add taxa labels
p <- ord_labels(
p = p, data = selectSpeciesScoresDf * tax_lab_length,
axesNames = axesNames, renamer = taxon_renamer,
styleList = tax_lab_style, defaultStyles = tax_lab_style()
)
}
}
## constraints -----------------------------------------------------------
# if constrained ordination, plot constraints
if (isConstrained) {
# automatic constraint length setting
if (identical(constraint_vec_length, NA)) {
constraint_vec_length <- computeAutoVecLength(
vecDf = constraintDf, pointsDf = siteScoresDf, prop = 0.45
)
}
# draw vector segments at length set by constraint_vec_length argument
# (proportion of original length)
p <- ord_arrows(
p = p, data = constraintDf * constraint_vec_length,
axesNames = axesNames, styleList = constraint_vec_style,
defaultStyles = vec_constraint()
)
# draw vector tip labels at length set by constraint_lab_length argument
p <- ord_labels(
p = p, data = constraintDf * constraint_lab_length,
axesNames = axesNames, renamer = var_renamer,
styleList = constraint_lab_style,
defaultStyles = constraint_lab_style()
)
}
## caption and center ----------------------------------------------------
# add automated caption if requested (default size = 8)
p <- ord_caption(
p = p, ps = ps, cap_size = auto_caption, info = info, scaling = scaling
)
# center the plot if requested using helper function
if (isTRUE(center)) p <- center_plot(p, clip = clip, expand = expand)
return(p)
}
# helper functions ------------------------------------------------------------
# https://stackoverflow.com/a/74424353/9005116
buildAesFromListOfStrings <- function(args) {
args <- lapply(X = args, FUN = function(x) {
if (rlang::is_string(x)) rlang::data_sym(x) else x
})
return(do.call(what = ggplot2::aes, args = args))
}
#' Add caption text to ordination ggplot
#'
#' @param p ggplot
#' @param ps phyloseq object to assess dimensions
#' @param cap_size caption font size (or NA for no caption addition)
#' @param info psExtraInfo list containing most info for caption
#' @param scaling type of scaling used
#'
#' @return ggplot
#' @noRd
ord_caption <- function(p, ps, cap_size, info, scaling) {
if (identical(NA, cap_size)) {
return(p) # return unchanged
} else {
o <- info$ord_info$method
# some ordinations should have scaling type reported, when not the default
if (o %in% c("PCA", "RDA", "CCA", "CAP") && scaling != 2) {
o <- paste0(o, " (scaling=", scaling, ")")
}
if (length(info$ord_info$constraints) > 0) {
o <- paste0(o, " constraints=", info$ord_info$constraints)
}
if (length(info$ord_info$conditions) > 0) {
o <- paste0(o, " conditions=", info$ord_info$conditions)
}
# caption gets n taxa and samples info
caption <- paste0(
nrow(p[["data"]]), " samples & ", phyloseq::ntaxa(ps),
" taxa (", info$tax_agg, "). ", o
)
# any transformations and distances should be listed
if (length(info$tax_trans) > 0) {
caption <- paste0(caption, " tax_transform=", info$tax_trans)
}
if (length(info$dist_method) > 0) {
caption <- paste0(caption, " dist=", info$dist_method)
}
# add the caption
p <- p + ggplot2::labs(caption = caption) +
ggplot2::theme(plot.caption = ggplot2::element_text(size = cap_size))
return(p)
}
}
## centering plot ------------------------------------------------------------
center_plot <- function(plot, clip = "off", expand = TRUE) {
lims <- get_plot_limits(plot)
plot + ggplot2::coord_cartesian(
xlim = c(-max(abs(lims$x)), max(abs(lims$x))),
ylim = c(-max(abs(lims$y)), max(abs(lims$y))),
default = TRUE, clip = clip, expand = expand
)
}
get_plot_limits <- function(plot) {
gb <- ggplot2::ggplot_build(plot)
list(
x = c(
min = gb$layout$panel_params[[1]]$x.range[1],
max = gb$layout$panel_params[[1]]$x.range[2]
),
y = c(
min = gb$layout$panel_params[[1]]$y.range[1],
max = gb$layout$panel_params[[1]]$y.range[2]
)
)
}
## arrow length helpers ------------------------------------------------------
# rescale maximum length of vectors to be a proportion of maximum distance of
# points from origin
computeAutoVecLength <- function(vecDf, pointsDf, prop = 0.85) {
x <- max(rowVecNorms(pointsDf)) / max(rowVecNorms(vecDf))
tax_vec_length <- x * prop
return(tax_vec_length)
}
# calculate lengths of 2D vectors from rows of dataframe
rowVecNorms <- function(df, cols = 1:2) {
x <- apply(X = df[, cols, drop = FALSE], MARGIN = 1, FUN = vecNormEuclid)
return(x)
}
# finds the euclidean norm (length) of the vector given
# useful for adjusting the length of loading/constraint arrows
# (when given x value and y value in vec)
vecNormEuclid <- function(vec) norm(vec, type = "2")
## other helpers -------------------------------------------------------------
# Takes dataframe with rownames as taxa names, and data columns are ordination
# dimensions, the first two being the ones to be plotted.
# Returns subset of that dataframe with only taxa that will be labelled,
# subset returned depends on the plot_taxa argument, which is user supplied.
subsetTaxaDfLabel <- function(speciesScoresDf, plot_taxa) {
# calculate initial line length for taxa vectors
speciesLineLength <- rowVecNorms(df = speciesScoresDf, cols = 1:2)
# return subselection of taxa for which to draw labels on plot
selectSpeciesScoresDf <- switch(
EXPR = class(plot_taxa[[1]]),
# default plot_taxa == TRUE --> line length > 1
"logical" = {
if (isTRUE(plot_taxa)) {
speciesScoresDf[speciesLineLength > 1, , drop = FALSE]
} else {
NULL
}
},
# integer e.g. 1:3 --> plot labels for top 3 taxa (by line length)
"integer" = {
speciesScoresDf[rev(order(speciesLineLength)), ][plot_taxa, , drop = FALSE]
},
# numeric e.g. 0.75 --> plot labels for taxa with line length > 0.75
"numeric" = {
speciesScoresDf[speciesLineLength > plot_taxa[[1]], , drop = FALSE]
},
# character e.g. c('g__Bacteroides', 'g__Veillonella')
# --> plot labels for exactly named taxa
"character" = {
speciesScoresDf[rownames(speciesScoresDf) %in% plot_taxa, , drop = FALSE]
}
)
return(selectSpeciesScoresDf)
}
## helpers check aesthetic args ----------------------------------------------
# return named list of arguments matching either phyloseq variable names or
# numbers/colors/ggplot2_shapes (throws error if any are invalid)
checkValidEllipsesOrdPlot <- function(..., ps) {
# get ellipses optional arguments (aesthetics for geom_point)
ellipses <- list(...)
# properly delete any ellipses arguments set to NULL
if (length(ellipses) > 0) ellipses[sapply(ellipses, is.null)] <- NULL
# check there are STILL ellipses args left after removing nulls
if (length(ellipses) > 0) {
# check aesthetics colour, shape, size and alpha are all in dataset (or numeric-esque)
variables <- phyloseq::sample_variables(ps)
for (v in ellipses) {
if (
!is.null(v) && !inherits(v, c("logical", "numeric", "integer")) &&
!(v %in% c(variables, grDevices::colors(), ggplot2_shapes()))
) {
if (isFALSE(grepl("^\\#[0-9A-F]{3,6}", v, ignore.case = TRUE))) {
stop(v, " is not a variable in the sample metadata (or color / shape)")
}
}
}
}
return(ellipses)
}
# generates vector of ggplot2 shapes
ggplot2_shapes <- function() {
c(
"circle", paste("circle", c("open", "filled", "cross", "plus", "small")),
"bullet",
"square", paste("square", c("open", "filled", "cross", "plus", "triangle")),
"diamond", paste("diamond", c("open", "filled", "plus")),
"triangle", paste("triangle", c("open", "filled", "square")),
paste("triangle down", c("open", "filled")),
"plus", "cross", "asterisk"
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.