| bdiv_functions | R Documentation |
Beta Diversity Metrics
aitchison(
counts,
pseudocount = NULL,
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
bhattacharyya(
counts,
norm = "percent",
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
bray(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
canberra(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
chebyshev(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
chord(counts, margin = 1L, pairs = NULL, cpus = n_cpus())
clark(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
divergence(
counts,
norm = "percent",
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
euclidean(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
gower(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
hellinger(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
horn(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
jensen(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
jsd(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
lorentzian(
counts,
norm = "percent",
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
manhattan(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
matusita(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
minkowski(
counts,
norm = "percent",
power = 1.5,
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
morisita(counts, margin = 1L, pairs = NULL, cpus = n_cpus())
motyka(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
psym_chisq(
counts,
norm = "percent",
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
soergel(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
squared_chisq(
counts,
norm = "percent",
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
squared_chord(
counts,
norm = "percent",
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
squared_euclidean(
counts,
norm = "percent",
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
topsoe(counts, norm = "percent", margin = 1L, pairs = NULL, cpus = n_cpus())
wave_hedges(
counts,
norm = "percent",
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
hamming(counts, margin = 1L, pairs = NULL, cpus = n_cpus())
jaccard(counts, margin = 1L, pairs = NULL, cpus = n_cpus())
ochiai(counts, margin = 1L, pairs = NULL, cpus = n_cpus())
sorensen(counts, margin = 1L, pairs = NULL, cpus = n_cpus())
unweighted_unifrac(
counts,
tree = NULL,
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
weighted_unifrac(
counts,
tree = NULL,
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
normalized_unifrac(
counts,
tree = NULL,
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
generalized_unifrac(
counts,
tree = NULL,
alpha = 0.5,
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
variance_adjusted_unifrac(
counts,
tree = NULL,
margin = 1L,
pairs = NULL,
cpus = n_cpus()
)
counts |
A numeric matrix of count data where each column is a
feature, and each row is a sample. Any object coercible with
|
pseudocount |
The value to add to all counts in |
margin |
If your samples are in the matrix's rows, set to |
pairs |
Which combinations of samples should distances be
calculated for? The default value ( |
cpus |
How many parallel processing threads should be used. The
default, |
norm |
Normalize the incoming counts. Options are:
Default: |
power |
Scaling factor for the magnitude of differences between
communities ( |
tree |
A |
alpha |
How much weight to give to relative abundances; a value
between 0 and 1, inclusive. Setting |
A dist object.
Given:
n : The number of features.
X_i, Y_i : Absolute counts for the i-th feature in samples X and Y.
X_T, Y_T : Total counts in each sample. X_T = \sum_{i=1}^{n} X_i
P_i, Q_i : Proportional abundances of X_i and Y_i. P_i = X_i / X_T
X_L, Y_L : Mean log of abundances. X_L = \frac{1}{n}\sum_{i=1}^{n} \ln{X_i}
R_i : The range of the i-th feature across all samples (max - min).
Aitchison distance aitchison() | \sqrt{\sum_{i=1}^{n} [(\ln{X_i} - X_L) - (\ln{Y_i} - Y_L)]^2} |
Bhattacharyya distance bhattacharyya() | -\ln{\sum_{i=1}^{n}\sqrt{P_{i}Q_{i}}} |
Bray-Curtis dissimilarity bray() | \displaystyle \frac{\sum_{i=1}^{n} |P_i - Q_i|}{\sum_{i=1}^{n} (P_i + Q_i)} |
Canberra distance canberra() | \displaystyle \sum_{i=1}^{n} \frac{|P_i - Q_i|}{P_i + Q_i} |
Chebyshev distance chebyshev() | \max(|P_i - Q_i|) |
Chord distance chord() | \displaystyle \sqrt{\sum_{i=1}^{n} \left(\frac{X_i}{\sqrt{\sum_{j=1}^{n} X_j^2}} - \frac{Y_i}{\sqrt{\sum_{j=1}^{n} Y_j^2}}\right)^2} |
Clark's divergence distance clark() | \displaystyle \sqrt{\sum_{i=1}^{n}\left(\frac{P_i - Q_i}{P_i + Q_i}\right)^{2}} |
Divergence divergence() | \displaystyle 2\sum_{i=1}^{n} \frac{(P_i - Q_i)^2}{(P_i + Q_i)^2} |
Euclidean distance euclidean() | \sqrt{\sum_{i=1}^{n} (P_i - Q_i)^2} |
Gower distance gower() | \displaystyle \frac{1}{n}\sum_{i=1}^{n}\frac{|P_i - Q_i|}{R_i} |
Hellinger distance hellinger() | \sqrt{\sum_{i=1}^{n}(\sqrt{P_i} - \sqrt{Q_i})^{2}} |
Horn-Morisita dissimilarity horn() | \displaystyle 1 - \frac{2\sum_{i=1}^{n}P_{i}Q_{i}}{\sum_{i=1}^{n}P_i^2 + \sum_{i=1}^{n}Q_i^2} |
Jensen-Shannon distance jensen() | \displaystyle \sqrt{\frac{1}{2}\left[\sum_{i=1}^{n}P_i\ln\left(\frac{2P_i}{P_i + Q_i}\right) + \sum_{i=1}^{n}Q_i\ln\left(\frac{2Q_i}{P_i + Q_i}\right)\right]} |
Jensen-Shannon divergence (JSD) jsd() | \displaystyle \frac{1}{2}\left[\sum_{i=1}^{n}P_i\ln\left(\frac{2P_i}{P_i + Q_i}\right) + \sum_{i=1}^{n}Q_i\ln\left(\frac{2Q_i}{P_i + Q_i}\right)\right] |
Lorentzian distance lorentzian() | \sum_{i=1}^{n}\ln{(1 + |P_i - Q_i|)} |
Manhattan distance manhattan() | \sum_{i=1}^{n} |P_i - Q_i| |
Matusita distance matusita() | \sqrt{\sum_{i=1}^{n}\left(\sqrt{P_i} - \sqrt{Q_i}\right)^2} |
Minkowski distance minkowski() | \sqrt[p]{\sum_{i=1}^{n} (P_i - Q_i)^p} Where p is the geometry of the space. |
|
Morisita dissimilarity * Integers Only morisita() | \displaystyle 1 - \frac{2\sum_{i=1}^{n}X_{i}Y_{i}}{\displaystyle \left(\frac{\sum_{i=1}^{n}X_i(X_i - 1)}{X_T(X_T - 1)} + \frac{\sum_{i=1}^{n}Y_i(Y_i - 1)}{Y_T(Y_T - 1)}\right)X_{T}Y_{T}} |
Motyka dissimilarity motyka() | \displaystyle \frac{\sum_{i=1}^{n} \max(P_i, Q_i)}{\sum_{i=1}^{n} (P_i + Q_i)} |
Probabilistic Symmetric \chi^2 distance psym_chisq() | \displaystyle 2\sum_{i=1}^{n}\frac{(P_i - Q_i)^2}{P_i + Q_i} |
Soergel distance soergel() | \displaystyle \frac{\sum_{i=1}^{n} |P_i - Q_i|}{\sum_{i=1}^{n} \max(P_i, Q_i)} |
Squared \chi^2 distance squared_chisq() | \displaystyle \sum_{i=1}^{n}\frac{(P_i - Q_i)^2}{P_i + Q_i} |
Squared Chord distance squared_chord() | \sum_{i=1}^{n}\left(\sqrt{P_i} - \sqrt{Q_i}\right)^2 |
Squared Euclidean distance squared_euclidean() | \sum_{i=1}^{n} (P_i - Q_i)^2 |
Topsoe distance topsoe() | \displaystyle \sum_{i=1}^{n}P_i\ln\left(\frac{2P_i}{P_i + Q_i}\right) + \sum_{i=1}^{n}Q_i\ln\left(\frac{2Q_i}{P_i + Q_i}\right) |
Wave Hedges distance wave_hedges() | \displaystyle \frac{\sum_{i=1}^{n} |P_i - Q_i|}{\sum_{i=1}^{n} \max(P_i, Q_i)} |
Given:
A, B : Number of features in each sample.
J : Number of features in common.
Dice-Sorensen dissimilarity sorensen() | \displaystyle \frac{2J}{(A + B)} |
Hamming distance hamming() | \displaystyle (A + B) - 2J |
Jaccard distance jaccard() | \displaystyle 1 - \frac{J}{(A + B - J)]} |
Otsuka-Ochiai dissimilarity ochiai() | \displaystyle 1 - \frac{J}{\sqrt{AB}} |
Given n branches with lengths L and a pair of samples' binary
(A and B) or proportional abundances (P and Q) on
each of those branches.
Unweighted UniFrac unweighted_unifrac() | \displaystyle \frac{1}{n}\sum_{i=1}^{n} L_i|A_i - B_i| |
Weighted UniFrac weighted_unifrac() | \displaystyle \sum_{i=1}^{n} L_i|P_i - Q_i| |
Normalized Weighted UniFrac normalized_unifrac() | \displaystyle \frac{\sum_{i=1}^{n} L_i|P_i - Q_i|}{\sum_{i=1}^{n} L_i(P_i + Q_i)} |
Generalized UniFrac (GUniFrac) generalized_unifrac() | \displaystyle \frac{\sum_{i=1}^{n} L_i(P_i + Q_i)^{\alpha}\left|\displaystyle \frac{P_i - Q_i}{P_i + Q_i}\right|}{\sum_{i=1}^{n} L_i(P_i + Q_i)^{\alpha}} Where \alpha is a scalable weighting factor. |
Variance-Adjusted Weighted UniFrac variance_adjusted_unifrac() | \displaystyle \frac{\displaystyle \sum_{i=1}^{n} L_i\displaystyle \frac{|P_i - Q_i|}{\sqrt{(P_i + Q_i)(2 - P_i - Q_i)}} }{\displaystyle \sum_{i=1}^{n} L_i\displaystyle \frac{P_i + Q_i}{\sqrt{(P_i + Q_i)(2 - P_i - Q_i)}} } |
See vignette('unifrac') for detailed example UniFrac calculations.
Levy, A., Shalom, B. R., & Chalamish, M. (2024). A guide to similarity measures. arXiv.
Cha, S.-H. (2007). Comprehensive survey on distance/similarity measures between probability density functions. International Journal of Mathematical Models and Methods in Applied Sciences, 1(4), 300–307.
# Example counts matrix
t(ex_counts)
bray(ex_counts)
jaccard(ex_counts)
generalized_unifrac(ex_counts, tree = ex_tree)
# Only calculate distances for Saliva vs all.
bray(ex_counts, pairs = 1:3)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.