Nothing
#' Compute similarity metrics between sites based on species composition
#'
#' This function generates a `data.frame` where each row provides one or
#' several similarity metrics between pairs of sites, based on a co-occurrence
#' `matrix` with sites as rows and species as columns.
#'
#' @param comat A co-occurrence `matrix` with sites as rows and species as
#' columns.
#'
#' @param metric A `character` vector or a single `character` string specifying
#' the metrics to compute (see Details). Available options are `"abc"`, `"ABC"`,
#' `"Jaccard"`, `"Jaccardturn"`, `"Sorensen"`, `"Simpson"`, `"Bray"`,
#' `"Brayturn"`, and `"Euclidean"`. If `"all"` is specified, all metrics will
#' be calculated. Can be set to `NULL` if `formula` is used.
#'
#' @param formula A `character` vector or a single `character` string specifying
#' custom formula(s) based on the `a`, `b`, `c`, `A`, `B`, and `C` quantities
#' (see Details). The default is `NULL`.
#'
#' @param method A `character` string specifying the method to compute `abc`
#' (see Details). The default is `"prodmat"`, which is more efficient but
#' memory-intensive. Alternatively, `"loops"` is less memory-intensive but
#' slower.
#'
#' @return
#' A `data.frame` with the additional class
#' `bioregion.pairwise.metric`, containing one or several similarity
#' metrics between pairs of sites. The first two columns represent the pairs of
#' sites. There is one column per similarity metric provided in `metric` and
#' `formula`, except for the `abc` and `ABC` metrics, which are stored in three
#' separate columns (one for each letter).
#'
#' @details
#' With `a` the number of species shared by a pair of sites, `b`
#' species only present in the first site and `c` species only present in
#' the second site.
#'
#' Jaccard = 1 - (b + c) / (a + b + c)
#'
#' Jaccardturn = 1 - 2min(b, c) / (a + 2min(b, c)) (Baselga, 2012)
#'
#' Sorensen = 1 - (b + c) / (2a + b + c)
#'
#' Simpson = 1 - min(b, c) / (a + min(b, c))
#'
#' If abundances data are available, Bray-Curtis and its turnover component can
#' also be computed with the following equation:
#'
#' Bray = 1 - (B + C) / (2A + B + C)
#'
#' Brayturn = 1 - min(B, C) / (A + min(B, C)) (Baselga, 2013)
#'
#' with `A` the sum of the lesser values for common species shared by a pair of
#' sites. `B` and `C` are the total number of specimens counted at both sites
#' minus `A`.
#'
#' `formula` can be used to compute customized metrics with the terms
#' `a`, `b`, `c`, `A`, `B`, and `C`. For example
#' `formula = c("1 - pmin(b,c) / (a + pmin(b,c))", "1 - (B + C) / (2*A + B + C)")`
#' will compute the Simpson and Bray-Curtis similarity metrics, respectively.
#' Note that `pmin` is used in the Simpson formula because `a`, `b`, `c`, `A`,
#' `B` and `C` are `numeric` vectors.
#'
#' Euclidean computes the Euclidean similarity between each pair of site
#' following this equation:
#'
#' Euclidean = 1 / (1 + d_ij)
#'
#' Where d_ij is the Euclidean distance between site i and
#' site j in terms of species composition.
#'
#' @references
#' Baselga A (2012) The Relationship between Species Replacement,
#' Dissimilarity Derived from Nestedness, and Nestedness.
#' \emph{Global Ecology and Biogeography} 21, 1223--1232.
#'
#' Baselga A (2013) Separating the two components of abundance-based
#' dissimilarity: balanced changes in abundance vs. abundance gradients.
#' \emph{Methods in Ecology and Evolution} 4, 552--557.
#'
#' @seealso
#' For more details illustrated with a practical example,
#' see the vignette:
#' \url{https://biorgeo.github.io/bioregion/articles/a3_pairwise_metrics.html}.
#'
#' Associated functions:
#' [dissimilarity] [similarity_to_dissimilarity]
#'
#' @author
#' Maxime Lenormand (\email{maxime.lenormand@inrae.fr}) \cr
#' Pierre Denelle (\email{pierre.denelle@gmail.com}) \cr
#' Boris Leroy (\email{leroy.boris@gmail.com})
#'
#' @examples
#' comat <- matrix(sample(0:1000, size = 50, replace = TRUE,
#' prob = 1 / 1:1001), 5, 10)
#' rownames(comat) <- paste0("Site", 1:5)
#' colnames(comat) <- paste0("Species", 1:10)
#'
#' sim <- similarity(comat, metric = c("abc", "ABC", "Simpson", "Brayturn"))
#'
#' sim <- similarity(comat, metric = "all",
#' formula = "1 - (b + c) / (a + b + c)")
#'
#' @export
similarity <- function(comat,
metric = "Simpson",
formula = NULL,
method = "prodmat"){
# List of metrics based on abc
lsmetricabc <- c("abc", "Jaccard", "Jaccardturn", "Sorensen", "Simpson")
# List of metrics based on ABC
lsmetricABC <- c("ABC", "Bray", "Brayturn")
# List of metrics based on other features
lsmetrico <- c("Euclidean")
# Control inputs
controls(args = method, data = NULL, type = "character")
if (!(method %in% c("prodmat", "loops"))) {
stop(paste0("Please choose method from the following:\n",
"prodmat or loops."),
call. = FALSE)
}
if (is.null(metric) & is.null(formula)) {
stop("metric or formula should be used.", call. = FALSE)
}
if(!is.null(metric)){
controls(args = metric, data = NULL, type = "character_vector")
if ("all" %in% metric) {
metric <- c(lsmetricabc, lsmetricABC, lsmetrico)
}
if (length(intersect(c(lsmetricabc, lsmetricABC, lsmetrico), metric)) !=
length(metric)) {
stop(paste0("One or several metric(s) chosen are not",
" available.\n",
"Please choose from the following:\n",
"abc, Jaccard, Jaccardturn, Sorensen, Simpson, ABC, ",
"Bray, Brayturn or Euclidean."),
call. = FALSE)
}
}
if(!is.null(formula)){
controls(args = formula, data = NULL, type = "character_vector")
}
abcinformula <- (sum(c(grepl("a", formula), grepl("b", formula),
grepl("c", formula))) > 0)
ABCinformula <- (sum(c(grepl("A", formula), grepl("B", formula),
grepl("C", formula))) > 0)
controls(args = NULL, data = comat, type = "input_matrix")
minco <- min(comat)
if (minco < 0) {
if("Euclidean" %in% metric){
message("Negative value(s) detected in comat!")
}else{
stop("Negative value detected in comat.",
call. = FALSE)
}
}
# Extract site id
siteid <- rownames(comat)
if(is.null(siteid)){
siteid <- as.character(1:dim(comat)[1])
message("No rownames detected, they have been assigned automatically.")
}
# Initialize output
res <- NULL
# abcp: compute abc for presence data
testabc <- ((length(intersect(lsmetricabc, metric)) > 0) |
abcinformula) # check if abc should be computed
if (testabc) {
comatp <- comat
comatp[comatp != 0] <- 1
if (method == "prodmat") {
# Compute the number of species in common "a" with matrix multiplication
# comatp%*%t(comatp)
sumrow <- apply(comatp, 1, sum)
# abcp <- prodmat(comatp,t(comatp))
abcp <- Matrix::tcrossprod(comatp)
rownames(abcp) <- siteid
colnames(abcp) <- siteid
# Create a data.frame from the matrix with mat_to_net (little trick to
# deal with 0s)
abcp[abcp == 0] <- -1
abcp[lower.tri(abcp, diag = TRUE)] <- 0
abcp <- mat_to_net(abcp, weight = TRUE, remove_zeroes = TRUE)
colnames(abcp) <- c("Site1", "Site2", "a")
abcp[abcp[, 3] == -1, 3] <- 0
# Compute b and c
abcp$b <- 0
abcp$c <- 0
abcp[, 4] <- sumrow[match(abcp[, 1], siteid)] - abcp[, 3]
abcp[, 5] <- sumrow[match(abcp[, 2], siteid)] - abcp[, 3]
}
if (method == "loops") {
# Use abc Rcpp function in src (three loops)
abcp <- abc(comatp)
abcp <- data.frame(Site1 = siteid[abcp[, 1]], Site2 = siteid[abcp[, 2]],
a = abcp[, 3], b = abcp[, 4], c = abcp[, 5])
}
# Update res if NULL
if (is.null(res)) {
res <- abcp[, 1:2]
}
# Compute metrics based on abc
if ("Jaccard" %in% metric) {
res$Jaccard <- 1 - (abcp$b + abcp$c) / (abcp$a + abcp$b + abcp$c)
}
if ("Jaccardturn" %in% metric) {
res$Jaccardturn <- 1 - 2 * pmin(abcp$b, abcp$c) /
(abcp$a + 2 * pmin(abcp$b, abcp$c))
}
if ("Sorensen" %in% metric) {
res$Sorensen <- 1 - (abcp$b + abcp$c) / (2 * abcp$a + abcp$b + abcp$c)
}
if ("Simpson" %in% metric) {
res$Simpson <- 1 - pmin(abcp$b, abcp$c) / (abcp$a + pmin(abcp$b, abcp$c))
}
# Attach abcp if abc in formula
if (abcinformula) {
a <- abcp$a
b <- abcp$b
c <- abcp$c
}
}
# abca: compute ABC for abundance data
testABC <- ((length(intersect(lsmetricABC, metric)) > 0) |
ABCinformula) # check if ABC should be computed
if (testABC) {
# Use abc Rcpp function in src (three loops)
abca <- abc(comat)
abca <- data.frame(Site1 = siteid[abca[, 1]], Site2 = siteid[abca[, 2]],
A = abca[, 3], B = abca[, 4], C = abca[, 5])
# Update res if NULL
if (is.null(res)) {
res <- abca[, 1:2]
}
# Compute metrics based on ABC
if ("Bray" %in% metric) {
res$Bray <- 1 - (abca$B + abca$C) / (2 * abca$A + abca$B + abca$C)
}
if ("Brayturn" %in% metric) {
res$Brayturn <- 1 - pmin(abca$B, abca$C) /
(abca$A + pmin(abca$B, abca$C))
}
# Attach abca if ABC in formula
if (ABCinformula) {
A <- abca$A
B <- abca$B
C <- abca$C
}
}
# Compute Euclidean similarity between site using dist()
if ("Euclidean" %in% metric) {
eucl <- as.matrix(stats::dist(comat))
rownames(eucl) <- siteid
colnames(eucl) <- siteid
eucl[eucl == 0] <- -1
eucl[lower.tri(eucl, diag = TRUE)] <- 0
eucl <- mat_to_net(eucl, weight = TRUE, remove_zeroes = TRUE)
colnames(eucl) <- c("Site1", "Site2", "Euclidean")
eucl[eucl[, 3] == -1, 3] <- 0
# Update res if NULL
if (is.null(res)) {
res <- eucl[, 1:2]
}
res$Euclidean <- 1 / (1 + eucl$Euclidean)
}
# Compute metrics based on abc
if ("abc" %in% metric) {
res$a <- abcp$a
res$b <- abcp$b
res$c <- abcp$c
}
if ("ABC" %in% metric) {
res$A <- abca$A
res$B <- abca$B
res$C <- abca$C
}
# Compute equation in formula
if (!is.null(formula)) {
for (k in 1:length(formula)) {
res <- cbind(res, eval(parse(text = formula[k])))
colnames(res)[dim(res)[2]] <- formula[k]
}
}
# Create output class
class(res) <- append("bioregion.pairwise.metric", class(res))
# Inform nature of the output
attr(res, "type") <- "similarity"
attr(res, "nb_sites") <- nrow(comat)
attr(res, "nb_species") <- ncol(comat)
# Return the output
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.