Nothing
#' Tsallis Entropy of a Community
#'
#' Estimate the entropy of species from abundance or probability data.
#' Several estimators are available to deal with incomplete sampling.
#'
#' Bias correction requires the number of individuals.
#' See [div_hill] for estimators.
#'
#' Entropy can be estimated at a specified level of interpolation or
#' extrapolation, either a chosen sample size or sample coverage
#' \insertCite{Chao2014}{divent}, rather than its asymptotic value.
#' See [accum_tsallis] for details.
#'
#' @inheritParams check_divent_args
#' @param x An object, that may be a numeric vector containing abundances or probabilities,
#' or an object of class [abundances] or [probabilities].
#' @param ... Unused.
#'
#' @returns A tibble with the site names, the estimators used and the estimated entropy.
#'
#' @references
#' \insertAllCited{}
#'
#' @examples
#' # Entropy of each community
#' ent_tsallis(paracou_6_abd, q = 2)
#' # gamma entropy
#' ent_tsallis(paracou_6_abd, q = 2, gamma = TRUE)
#'
#' # At 80% coverage
#' ent_tsallis(paracou_6_abd, level = 0.8)
#'
#' @name ent_tsallis
NULL
#' @rdname ent_tsallis
#'
#' @export
ent_tsallis <- function(x, q = 1, ...) {
UseMethod("ent_tsallis")
}
#' @rdname ent_tsallis
#'
#' @param estimator An estimator of entropy.
#'
#' @export
ent_tsallis.numeric <- function(
x,
q = 1,
estimator = c("UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger",
"Marcon", "UnveilC", "UnveiliC", "ZhangGrabchak", "naive",
"Bonachela", "Holste"),
level = NULL,
probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"),
unveiling = c("geometric", "uniform", "none"),
richness_estimator = c("jackknife", "iChao1", "Chao1", "rarefy", "naive"),
jack_alpha = 0.05,
jack_max = 10,
coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"),
sample_coverage = NULL,
as_numeric = FALSE,
...,
check_arguments = TRUE) {
# Check arguments
estimator <- match.arg(estimator)
probability_estimator <- match.arg(probability_estimator)
unveiling <- match.arg(unveiling)
richness_estimator <- match.arg(richness_estimator)
coverage_estimator <- match.arg(coverage_estimator)
if (any(check_arguments)) {
check_divent_args()
if (any(x < 0)) {
cli::cli_abort("Species probabilities or abundances must be positive.")
}
}
# Entropy of a vector of probabilities ----
if (abs(sum(x) - 1) < length(x) * .Machine$double.eps) {
if (!is.null(level)) {
cli::cli_abort("Entropy can't be estimated at a level without the abundance of species.")
}
# Probabilities sum to 1, allowing rounding error
prob <- x[x > 0]
# Avoid big numbers with this rather than prob * ln_q(1/prob, q)
ent_species <- -prob^q * ln_q(prob, q = q)
the_entropy <- sum(ent_species)
if (as_numeric) {
return(the_entropy)
} else {
return(
tibble::tibble_row(
estimator = "naive",
order = q,
entropy = the_entropy
)
)
}
}
# Eliminate 0
abd <- x[x > 0]
# Sample size
sample_size <- sum(abd)
# Probabilities
prob <- abd / sample_size
# Number of observed species
s_obs <- length(abd)
# Entropy of a vector of abundances ----
if (is.null(level)) {
## Exit if x contains no or a single species ----
if (length(abd) < 2) {
if (length(abd) == 0) {
if (as_numeric) {
return(NA)
} else {
return(
tibble::tibble_row(
estimator = "No Species",
order = q,
entropy = NA
)
)
}
} else {
if (as_numeric) {
return(0)
} else {
return(
tibble::tibble_row(
estimator = "Single Species",
order = q,
entropy = 0
)
)
}
}
} else {
# Probabilities instead of abundances
if (sample_size < 2) {
cli::cli_alert_warning(
"Entropy estimators can't apply to probability data.",
)
cli::cli_alert("{.code estimator} forced to 'naive'.")
estimator <- "naive"
}
}
## Metacommunity estimation ----
# abd may not be integers, then sample_coverage is given
# sample_coverage is between 0 and 1 (by check_arguments), sum(abd) must be an integer.
# estimator may be ChaoShen or Marcon (max(ChaoShen, Grassberger))
if (
!is.null(sample_coverage) &
is_integer_values(sample_size) &
(estimator == "ChaoShen" | estimator == "Marcon")
) {
cp <- sample_coverage * abd / sample_size
chao_shen <- -sum(cp^q * ln_q(cp, q = q) / (1 - (1 - cp)^sample_size))
if (estimator == "Marcon") {
# Calculate Grassberger's estimator
if (q == 1) {
grassberger <- sum(
abd / sample_size * (log(sample_size) - digamma(abd) -
(1 - round(abd) %% 2 * 2) / (abd + 1))
)
} else {
grassberger <- (1 - sample_size^(-q) * sum(e_n_q(abd, q = q))) / (q - 1)
}
} else grassberger <- 0
# Take the max
if (chao_shen > grassberger) {
if (as_numeric) {
return(chao_shen)
} else {
return(
tibble::tibble_row(
estimator = "ChaoShen",
order = q,
entropy = chao_shen
)
)
}
} else {
if (as_numeric) {
return(grassberger)
} else {
return(
tibble::tibble_row(
estimator = "Grassberger",
order = q,
entropy = grassberger
)
)
}
}
}
## Naive estimator ----
if (!is_integer_values(abd)) {
cli::cli_alert_warning("The estimator can't be applied to non-integer values.")
estimator <- "naive"
}
if (estimator == "naive") {
prob <- abd / sample_size
# Avoid big numbers with this rather than prob * ln_q(1/prob, q)
# Eliminate unobserved species. Useless because filtered before
# ent_species <- -prob^q * ln_q(prob, q)
# ent_species[prob == 0] <- 0
the_entropy <- -sum(prob^q * ln_q(prob, q))
if (as_numeric) {
return(the_entropy)
} else {
return(
tibble::tibble_row(
estimator = estimator,
order = q,
entropy = the_entropy
)
)
}
}
# Common code for ZhangGrabchak. Useless if EntropyEstimation is used.
# if (estimator == "ZhangGrabchak" | estimator == "ChaoWangJost" | estimator == "ChaoJost") {
# prob <- abd / sample_size
# V <- seq_len(sample_size - 1)
# # p_V_abd is an array, containing (1 - (s_obs - 1) / (sample_size - j))
# # for each species (lines) and all j from 1 to sample_size - 1
# p_V_abd <- outer(abd, V, function(abd, j) 1- (abd - 1) / (sample_size - j))
# # Useful values are products from j = 1 to v, so prepare cumulative products
# p_V_abd <- t(apply(p_V_abd, 1, cumprod))
# # Sum of products weighted by w_v
# sum_prod <- function(s) {
# used_v <- seq_len(sample_size - abd[s])
# return(sum(w_v[used_v] * p_V_abd[s, used_v]))
# }
# }
# Specific code for q=1
# # Weights
# w_v <- 1/V
# the_entropy <- sum(prob * vapply(seq_along(abd), S_v, 0))
## Shannon ----
if (q == 1) {
return(
ent_shannon.numeric(
abd,
estimator = estimator,
probability_estimator = probability_estimator,
unveiling = unveiling,
richness_estimator = richness_estimator,
jack_alpha = jack_alpha,
jack_max = jack_max,
coverage_estimator = coverage_estimator,
as_numeric = as_numeric,
check_arguments = FALSE
)
)
}
## Not Shannon ----
if (estimator == "ZhangGrabchak" | estimator == "ChaoJost") {
# Weights. Useless here if package EntropyEstimation is used,
# but weights are necessary for ChaoJost
# i <- seq_len(abd)
# w_vi <- (i - q) / i
# w_v <- cumprod(w_vi)
# ZhangGrabchak <- sum(prob * vapply(seq_along(abd), sum_prod, 0)) / (1 - q)
# Use EntropyEstimation instead
if (q == 0) {
ent_ZhangGrabchak <- s_obs - 1
} else {
ent_ZhangGrabchak <- EntropyEstimation::Tsallis.z(abd, q)
}
# ZhangGrabchak stops here, but ChaoWangJost adds an estimator of the bias
if (estimator == "ZhangGrabchak") {
if (as_numeric) {
return(ent_ZhangGrabchak)
} else {
return(
tibble::tibble_row(
estimator = estimator,
order = q,
entropy = ent_ZhangGrabchak
)
)
}
}
# Calculate abundance distribution
s_1 <- sum(abd == 1)
s_2 <- sum(abd == 2)
# Calculate A (Chao & Jost, 2015, eq. 6b)
A <- chao_A(abd)
# Eq 7d in Chao & Jost (2015).
# Terms for r in seq_len(sample_size-1) equal (-1)^r * w_v[r] * (A-1)^r.
# w_v is already available from ZhangGrabchak
i <- seq_len(sample_size)
# Weights: here only if EntropyEstimation is used. Else, they have been calculated above.
w_vi <- (i - q) / i
w_v <- cumprod(w_vi)
if (A == 1) {
# The general formula of Eq 7d has a 0/0 part that must be forced to 0
bias_chao_jost <- 0
} else {
eq7d_sum <- vapply(
seq_len(sample_size - 1),
function(r) {
w_v[r] * (1 - A)^r
},
FUN.VALUE = 0
)
# Calculate the estimator of the bias.
# eq7d_sum contains all terms of the sum except for r=0: the missing term equals 1.
# The bias in Chao & Jost (2015) is that of the Hill number.
# It must be divided by 1-q to be applied to entropy.
bias_chao_jost <- (
s_1 / sample_size *
(1 - A)^(1 - sample_size) * (A^(q - 1) - sum(eq7d_sum) - 1)
) / (1 - q)
}
the_entropy <- ent_ZhangGrabchak + bias_chao_jost
if (as_numeric) {
return(the_entropy)
} else {
return(
tibble::tibble_row(
estimator = estimator,
order = q,
entropy = the_entropy
)
)
}
}
if (estimator == "ChaoShen" | estimator == "Marcon") {
sample_coverage <- coverage.numeric(
abd,
estimator = coverage_estimator,
as_numeric = TRUE,
check_arguments = FALSE
)
prob_cov <- sample_coverage * abd / sample_size
}
if (estimator == "GenCov") {
if (probability_estimator == "naive") {
cli::cli_alert_warning(
"{.code probability_estimator} can't be 'naive' with estimator 'GenCov'."
)
cli::cli_alert("It is forced to 'Chao2015'.")
}
if (unveiling != "none") {
cli::cli_alert_warning(
"{.code unveiling} is forced to 'none' with estimator 'GenCov'."
)
}
prob_cov <- probabilities.numeric(
abd,
estimator = probability_estimator,
unveiling = "none",
richness_estimator = richness_estimator,
jack_alpha = jack_alpha,
jack_max = jack_max,
coverage_estimator = coverage_estimator,
q = q,
as_numeric = TRUE,
check_arguments = FALSE
)
}
if (estimator == "ChaoShen" | estimator == "Marcon" | estimator == "GenCov") {
ent_cov <- -sum(prob_cov^q * ln_q(prob_cov, q) / (1 - (1 - prob_cov)^sample_size))
}
if (estimator == "ChaoShen" | estimator == "GenCov") {
if (as_numeric) {
return(ent_cov)
} else {
return(
tibble::tibble_row(
estimator = estimator,
order = q,
entropy = ent_cov
)
)
}
}
if (estimator == "Grassberger" | estimator == "Marcon") {
ent_Grassberger <- (1 - sample_size^(-q) * sum(e_n_q(abd, q))) / (q - 1)
}
if (estimator == "Grassberger") {
if (as_numeric) {
return(ent_Grassberger)
} else {
return(
tibble::tibble_row(
estimator = estimator,
order = q,
entropy = ent_Grassberger
)
)
}
}
if (estimator == "Marcon") {
the_entropy <- max(ent_cov, ent_Grassberger)
if (as_numeric) {
return(the_entropy)
} else {
return(
tibble::tibble_row(
estimator = estimator,
order = q,
entropy = the_entropy
)
)
}
}
if (estimator == "Holste") {
the_entropy <- 1 / (1 - q) *
(beta(s_obs + sample_size, q) * sum(1 / beta(abd + 1, q)) - 1)
if (as_numeric) {
return(the_entropy)
} else {
return(
tibble::tibble_row(
estimator = estimator,
order = q,
entropy = the_entropy
)
)
}
}
if (estimator == "Bonachela") {
the_entropy <- 1 / (1 - q) *
(beta(2 + sample_size, q) * sum(1 / beta(abd + 1, q)) - 1)
if (as_numeric) {
return(the_entropy)
} else {
return(
tibble::tibble_row(
estimator = estimator,
order = q,
entropy = the_entropy
)
)
}
}
if (estimator == "UnveilC" | estimator == "UnveiliC" | estimator == "UnveilJ") {
# Unveil the probabilities
prob_unv <- probabilities.numeric(
abd,
estimator = probability_estimator,
unveiling = unveiling,
richness_estimator = switch(
estimator,
UnveilJ = "jackknife",
UnveilC = "Chao1",
UnveiliC = "iChao1"
),
jack_alpha = jack_alpha,
jack_max = jack_max,
coverage_estimator = coverage_estimator,
q = q,
as_numeric = TRUE,
check_arguments = FALSE
)
s_est <- length(prob_unv)
# Naive estimator applied to unveiled distribution
the_entropy <- -sum(prob_unv^q * ln_q(prob_unv, q))
if (as_numeric) {
return(the_entropy)
} else {
return(
tibble::tibble_row(
estimator = estimator,
order = q,
entropy = the_entropy
)
)
}
}
cli::cli_alert_warning("{.code estimator} was not recognized")
return(NA)
}
# Entropy at a level ----
# Single species.
if (s_obs == 1 | level == 1) {
if (as_numeric) {
return(0)
} else {
return(
tibble::tibble_row(
estimator = "Single Species",
order = q,
level = 1,
entropy = 0
)
)
}
}
# If level is coverage, get size
if (level < 1) {
level <- coverage_to_size.numeric(
abd,
sample_coverage = level,
estimator = coverage_estimator,
as_numeric = TRUE,
check_arguments = FALSE
)
}
if (level == sample_size) {
# No interpolation/extrapolation needed: return the observed entropy
the_entropy <- -sum(prob^q * ln_q(prob, q))
if (as_numeric) {
return(the_entropy)
} else {
return(
tibble::tibble_row(
estimator = "Sample",
order = q,
level = level,
entropy = the_entropy
)
)
}
}
## Integer q ----
if (q == 0) {
# Richness - 1. Same result as general formula but faster
the_richness <- div_richness.numeric(
abd,
# Unused
estimator = richness_estimator,
jack_alpha = jack_alpha,
jack_max = jack_max,
level = level,
probability_estimator = probability_estimator,
unveiling = unveiling,
coverage_estimator = coverage_estimator,
as_numeric = FALSE,
check_arguments = FALSE
)
# Calculate entropy and return
if (as_numeric) {
return(the_richness$diversity - 1)
} else {
the_richness <- dplyr::mutate(
the_richness,
entropy = .data$diversity - 1,
.keep = "unused"
)
return(the_richness)
}
} else if (q == 1) {
# Shannon. General formula is not defined at q=1
return(
ent_shannon(
x,
estimator = estimator,
level = level,
probability_estimator = probability_estimator,
unveiling = unveiling,
richness_estimator = richness_estimator,
jack_alpha = jack_alpha,
jack_max = jack_max,
coverage_estimator = coverage_estimator,
as_numeric = as_numeric,
check_arguments = FALSE
)
)
} else if (q == 2) {
# Simpson.
return(
ent_simpson(
x,
# No estimator needed. Avoid raising an error with an unknown one.
estimator = "naive",
level = level,
probability_estimator = probability_estimator,
unveiling = unveiling,
richness_estimator = richness_estimator,
jack_alpha = jack_alpha,
jack_max = jack_max,
as_numeric = as_numeric,
check_arguments = FALSE
)
)
}
## Non-integer q ----
if (level <= sample_size) {
### Interpolation ----
# Obtain Abundance Frequency Count
abd_freq <- abd_freq_count(abd, level = level, check_arguments = FALSE)
# Calculate entropy (Chao et al., 2014, eq. 6)
the_entropy <- (
sum(
((seq_len(level)) / level)^q * abd_freq$number_of_species) - 1
)/(1 - q)
if (as_numeric) {
return(the_entropy)
} else {
return(
tibble::tibble_row(
estimator = "Interpolation",
order = q,
level = level,
entropy = the_entropy
)
)
}
} else {
### Extrapolation ----
# Unveil the full distribution that rarefies to the observed entropy
prob_unv <- probabilities.numeric(
abd,
estimator = probability_estimator,
unveiling = unveiling,
richness_estimator = richness_estimator,
jack_max = jack_max,
coverage_estimator = coverage_estimator,
q = q,
as_numeric = TRUE,
check_arguments = FALSE
)
# Obtain Abundance Frequence Count at level (Chao et al., 2014, eq. 5)
s_level <- vapply(
seq_len(level),
function(nu) {
sum(
exp(
lchoose(level, nu) + nu * log(prob_unv) +
(level - nu) * log(1 - prob_unv)
)
)
},
FUN.VALUE = 0
)
# Estimate entropy (Chao et al., 2014, eq. 6)
the_entropy <- (sum((seq_len(level) / level)^q * s_level) - 1) / (1 - q)
if (as_numeric) {
return(the_entropy)
} else {
return(
tibble::tibble_row(
estimator = richness_estimator,
order = q,
level = level,
entropy = the_entropy
)
)
}
}
}
#' @rdname ent_tsallis
#'
#' @export
ent_tsallis.species_distribution <- function(
x,
q = 1,
estimator = c("UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger",
"Marcon", "UnveilC", "UnveiliC", "ZhangGrabchak", "naive",
"Bonachela", "Holste"),
level = NULL,
probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"),
unveiling = c("geometric", "uniform", "none"),
richness_estimator = c("jackknife", "iChao1", "Chao1", "rarefy", "naive"),
jack_alpha = 0.05,
jack_max = 10,
coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"),
gamma = FALSE,
as_numeric = FALSE,
...,
check_arguments = TRUE) {
# Check arguments
estimator <- match.arg(estimator)
probability_estimator <- match.arg(probability_estimator)
unveiling <- match.arg(unveiling)
richness_estimator <- match.arg(richness_estimator)
coverage_estimator <- match.arg(coverage_estimator)
if (any(check_arguments)) {
check_divent_args()
if (any(x < 0)) {
cli::cli_abort("Species probabilities or abundances must be positive.")
}
}
if (gamma) {
return(
ent_gamma_tsallis(
species_distribution = x,
q = q,
estimator = estimator,
level = level,
probability_estimator = probability_estimator,
unveiling = unveiling,
richness_estimator = richness_estimator,
jack_alpha = jack_alpha,
jack_max = jack_max,
coverage_estimator = coverage_estimator,
as_numeric = as_numeric
)
)
} else {
# Apply ent_tsallis.numeric() to each site
# Creates a list if as_numeric is FALSE or a numeric vector else
ent_tsallis_sites <- apply(
# Eliminate site and weight columns
x[, !colnames(x) %in% non_species_columns],
# Apply to each row
MARGIN = 1,
FUN = ent_tsallis.numeric,
# Arguments
q = q,
estimator = estimator,
level = level,
probability_estimator = probability_estimator,
unveiling = unveiling,
richness_estimator = richness_estimator,
jack_alpha = jack_alpha,
jack_max = jack_max,
coverage_estimator = coverage_estimator,
as_numeric = as_numeric,
check_arguments = FALSE
)
if (as_numeric) {
return(ent_tsallis_sites)
} else {
return(
# Make a tibble with site, estimator and entropy
tibble::tibble(
# Restore non-species columns
x[colnames(x) %in% non_species_columns],
# Coerce the list returned by apply into a dataframe
do.call(rbind.data.frame, ent_tsallis_sites)
)
)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.