#' Calculate distances between pairs of samples in phyloseq object
#'
#' @description
#' Can compute various sample-sample distances using the microbiota composition of your samples:
#'
#' - Bray Curtis ('bray') or any other ecological distance from phyloseq::distance() / vegan::vegdist()
#' - UniFrac distances (using the GUniFrac package)
#' - generalised: 'gunifrac' (optionally set weighting alpha in gunifrac alpha)
#' - unweighted: 'unifrac'
#' - weighted: 'wunifrac'
#' - Aitchison distance (Euclidean distance after centered log ratio transform clr, see details)
#' - Euclidean distance
#'
#' Use dist_calc with psExtra output of tax_transform (or tax_agg).
#' It returns a psExtra object containing the phyloseq and the name of the distance used
#' in addition to the distance matrix itself.
#' The resulting object is intended to be piped into ord_calc or dist_permanova functions.
#' Alternatively you can directly access the distance matrix with dist_get().
#'
#' @section Aitchison distance note:
#'
#' You should EITHER:
#' 1. skip the dist_calc function and call ord_calc(method = "PCA") directly on an object with taxa transformed with tax_transform(trans = "clr")
#' 2. pass an object with untransformed (or 'identity' transformed) taxa to the data argument of dist_calc() and specify dist = "aitchison".
#'
#' If ordination plots with taxon loading vectors are desired, users require option 1.
#' If the distance matrix is required for permanova, users require option 2.
#'
#' @section Binary Jaccard distance note:
#'
#' Jaccard distance can be computed on abundances, but often in microbiome
#' research it is the Binary Jaccard distance that is desired. So remember to
#' first perform a "binary" transformation with `tax_transform("binary")`,
#' OR pass an additional argument to `dist_calc("jaccard", binary = TRUE)`
#'
#' @param data psExtra object, e.g. output from tax_transform()
#' @param dist name of distance to calculate between pairs of samples
#' @param gunifrac_alpha
#' setting alpha value only relevant if gunifrac distance used
#' @param ...
#' optional distance-specific named arguments passed to phyloseq::distance()
#'
#' @return psExtra object including distance matrix and name of distance used
#' @export
#'
#' @seealso \code{\link{tax_transform}} for the function to use before dist_calc
#' @seealso \code{\link{ord_calc}}
#' @seealso \code{\link{ord_plot}}
#' @seealso \code{\link{dist_permanova}}
#' @seealso \code{phyloseq::\link[phyloseq:distance]{distance}}
#' @seealso \code{vegan::\link[vegan:vegdist]{vegdist}}
#'
#' @examples
#' # bray curtis distance on genera-level features
#' data("dietswap", package = "microbiome")
#' bc <- dietswap %>%
#' tax_agg("Genus") %>%
#' dist_calc("bray")
#' bc
#' class(bc)
#'
#' # gunifrac distance using phyloseq input
#' data("esophagus", package = "phyloseq")
#' gunifrac <- esophagus %>%
#' dist_calc("gunifrac") %>%
#' dist_get()
#' class(gunifrac)
dist_calc <- function(data,
dist = "bray",
gunifrac_alpha = 0.5,
...) {
# check valid distance name was supplied
rlang::arg_match(arg = dist, multiple = FALSE, values = union(c(
"bray", "gunifrac", "unifrac", "wunifrac",
"aitchison", "robust.aitchison", "euclidean"
), unlist(phyloseq::distanceMethodList)))
# check input data object class
isDistCalcDataPhyloseq(data)
# get components
ps <- ps_get(data)
info <- info_get(data)
# calculate distance matrix #
if (dist %in% c("aitchison", "robust.aitchison")) {
# aitchison distance (or robust aitchison)
distMat <- distMatAitchison(ps = ps, dist = dist, info = info)
} else if (grepl(pattern = "unifrac", dist)) {
# unifrac distances using GUniFrac package
# define unifrac result ID required by GUniFrac package
uniID <- switch(
EXPR = dist,
"unifrac" = "UW",
"wunifrac" = 1,
"gunifrac" = gunifrac_alpha
)
# update distance name
if (identical(dist, "gunifrac")) dist <- paste0(dist, "_", uniID)
# wunifrac is just setting gunifrac alpha to 1
if (identical(dist, "wunifrac")) gunifrac_alpha <- 1
distMat <- distMatUnifrac(
ps = ps, gunifrac_alpha = gunifrac_alpha, uniID = uniID
)
} else if (dist %in% unlist(phyloseq::distanceMethodList)) {
# calculate distance matrix if distance is supported in phyloseq
distMat <- phyloseq::distance(ps, method = dist, type = "samples", ...)
} else {
stop("Invalid distance measure named in dist argument: ", dist)
}
if (!is(data, "psExtra")) data <- psExtra(data, info = new_psExtraInfo())
info <- modify_psExtraInfo(info, dist_method = dist)
data@dist <- distMat
data@info <- info
return(data)
}
# calculates (robust.)aitchison distance matrix from phyloseq: ps
# (psExtra `info` required for transformation check)
#
distMatAitchison <- function(ps, dist, info) {
if (identical(info$tax_trans, "clr") || identical(info$tax_trans, "rclr")) {
rlang::abort(call = rlang::caller_env(1), message = c(
"dist_calc 'aitchison' distance requires count data",
i = paste0(
"your data are ", info$tax_trans,
"-transformed (according to psExtra info)"
),
i = "see the ?dist_calc details section for more info"
))
}
if (dist == "aitchison") trans <- "clr"
if (dist == "robust.aitchison") trans <- "rclr"
transformedOTUtable <- t(microbiome::abundances(x = ps, transform = trans))
distMat <- stats::dist(transformedOTUtable, method = "euclidean")
return(distMat)
}
# calculates any unifrac distance matrix from phyloseq: ps
# gunifrac_alpha
distMatUnifrac <- function(ps, gunifrac_alpha, uniID) {
if (identical(ps@phy_tree, NULL)) {
warning(
"unifrac distances require un-aggregated taxa and a phylogenetic tree."
)
}
rlang::check_installed("GUniFrac", reason = "to use unifrac distances")
# GUniFrac is much faster than phyloseq version of unifrac measures
# and results are the same (to floating point precision)
distMats <- GUniFrac::GUniFrac(
otu.tab = otu_get(ps),
tree = phyloseq::phy_tree(ps),
alpha = c(gunifrac_alpha),
verbose = FALSE
)[["unifracs"]]
distMat <- stats::as.dist(distMats[, , paste0("d_", uniID)])
return(distMat)
}
# data class checker, also used in dist_calc_seq()
isDistCalcDataPhyloseq <- function(data) {
if (!methods::is(data, "phyloseq")) {
rlang::abort(call = rlang::caller_env(), message = c(
"data for dist_calc must be of class 'psExtra'",
" " = "e.g. output of tax_agg or tax_transform",
"x" = paste("data is class:", paste(class(data), collapse = " "))
))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.