# ==============================================================================
# Diversity Metric
# ==============================================================================
#' Taxonomic Diversity
#' @description Calculate the percentage of each sample represented by the
#' specified taxon or taxa.
#' @param .dataframe A data frame where each row should represent the number of
#' individuals enumerated for a single taxon collected during a single sampling event.
#' @param .key_col One unquoted column name that represents a key (i.e., unique ID)
#' for a sampling event for which to group (i.e., aggregate) the data.
#' @param .counts_col One unquoted column name that represents taxonomic counts.
#' @param .filter A logical statement to subset the data frame prior to calculating
#' the metric of interest.
#' @param .unnest_col One unqouted column name that represents nested data.
#' If this column is NULL (default), then the data will not be unnested.
#' @param .group_col One unquoted column name that represents a taxomic rank
#' or group of interest.
#' @param .job A character string specifying the diversity metric of interest.
#' Below is a list of exceptable inputs:
##' \itemize{
##' \item{"shannon"}{Description needed}
##' \item{"effective_shannon"}{Description needed}
##' \item{"simpson""}{Description needed}
##' \item{"invsimpson"}{Description needed}
##' \item{"gini_simpson"}{Description needed}
##' \item{"effective_simpson"}{Description needed}
##' \item{"pielou"}{Description needed}
##' \item{"margalef"}{Description needed}
##' \item{"menhinick"}{Description needed}
##' \item{"hill"}{Description needed}
##' \item{"renyi"}{Description needed}
##' }
#' @param .base_log The base log value used during the calculation of
#' Shannon Diversity index ("shannon") or Effective Shannon Diversity ("effective_shannon").
#' The default value is two.
#' @param .q The exponent used during the calculation of Hill Numbers ("hill") and
#' Renyi Entropy ("renyi").
#' @return A numeric vector.
#' @importFrom rlang .data
#' @export
taxa_div <- function(.dataframe, .key_col, .counts_col,
.group_col,
.filter = NULL,
.job, .base_log = 2, .q,
.unnest_col = NULL) {
#------------------------------------------------------------------------------
prep.df <- .prep_div(
.dataframe = .dataframe,
.key_col = {{ .key_col }},
.counts_col = {{ .counts_col }},
.group_col = {{ .group_col }},
.filter = {{ .filter }},
.unnest_col = {{ .unnest_col }}
)
#------------------------------------------------------------------------------
if (.job %in% c("shannon", "effective_shannon")) {
final.vec <- prep.df %>%
dplyr::group_by({{ .key_col }}, {{ .group_col }}, .data$mmir_total) %>%
dplyr::summarize(count = sum({{ .counts_col }})) %>%
dplyr::mutate(p = .data$count / .data$mmir_total) %>%
dplyr::mutate(log_p = -.data$p * log(.data$p, .base_log)) %>%
dplyr::group_by({{ .key_col }}) %>%
dplyr::summarize(final = sum(.data$log_p, na.rm = TRUE)) %>%
original_order(.dataframe, {{ .key_col }}) %>%
dplyr::mutate(final = dplyr::if_else(!is.na(.data$final),
.data$final,
as.double(0))) %>%
dplyr::pull(.data$final)
if (.job == "effective_shannon") final.vec <- exp(final.vec)
}
#------------------------------------------------------------------------------
if (.job %in% c("simpson", "invsimpson", "gini_simpson", "effective_simpson")) {
final.vec <- prep.df %>%
dplyr::group_by({{ .key_col }}, {{ .group_col }}, .data$mmir_total) %>%
dplyr::summarize(count = sum({{ .counts_col }})) %>%
dplyr::mutate(
p = .data$count / .data$mmir_total,
p = .data$p^2
) %>%
dplyr::group_by({{ .key_col }}) %>%
dplyr::summarize(final = sum(.data$p, na.rm = TRUE)) %>%
original_order(.dataframe, {{ .key_col }}) %>%
dplyr::mutate(final = dplyr::if_else(
!is.na(.data$final),
as.double(.data$final),
as.double(0)
)) %>%
dplyr::pull(.data$final)
if (.job == "simpson") final.vec
if (.job == "gini_simpson") {
final.vec <- ifelse(final.vec > 0, 1 - final.vec, 0)
}
if (.job == "invsimpson") {
final.vec <- ifelse(final.vec != 0, 1 / final.vec, 0)
}
if (.job == "effective_simpson") {
final.vec <- ifelse(final.vec != 1, 1 / (1 - final.vec), 0)
}
}
#------------------------------------------------------------------------------
if (.job %in% c("pielou", "margalef", "menhinick")) {
rich.vec <- taxa_rich(
.dataframe = .dataframe,
.key_col = {{ .key_col }},
.counts_col = {{ .counts_col }},
.group_col = {{ .group_col }},
.filter = {{ .filter }},
.unnest_col = {{ .unnest_col }}
)
if (.job == "pielou") final.vec <- log10(rich.vec)
if (.job %in% c("margalef", "menhinick")) {
abund.vec <- taxa_abund(
.dataframe = .dataframe,
.key_col = {{ .key_col }},
.counts_col = {{ .counts_col }},
.filter = {{ .filter }},
.unnest_col = {{ .unnest_col }}
)
if (.job == "margalef") {
final.vec <- ifelse(rich.vec != 1 & abund.vec != 0,
(rich.vec - 1) / log10(abund.vec),
0
)
}
if (.job == "menhinick") {
final.vec <- ifelse(abund.vec != 0,
rich.vec / sqrt(abund.vec),
0
)
}
}
}
#------------------------------------------------------------------------------
if (.job %in% c("hill", "renyi")) {
final.vec <- prep.df %>%
dplyr::group_by({{ .key_col }}, {{ .group_col }}, .data$mmir_total) %>%
dplyr::summarize(
.counts_col = sum({{ .counts_col }}),
na.rm = TRUE
) %>%
dplyr::mutate(
p = {{ .counts_col }} / .data$mmir_total,
p = .data$p^.q
) %>%
dplyr::group_by({{ .key_col }}) %>%
dplyr::summarize(final = sum(.data$p, na.rm = TRUE)) %>%
original_order(.dataframe, {{ .key_col }}) %>%
dplyr::pull(.data$final)
if (.job == "hill") final.vec <- final.vec^(1 / (1 - .q))
if (.job == "renyi") final.vec <- (1 / (1 - .q)) * log(final.vec)
}
#------------------------------------------------------------------------------
return(final.vec)
}
.prep_div <- function(.dataframe, .key_col, .counts_col, .group_col,
.filter = NULL,
.unnest_col = NULL) {
prep.df <- prep_taxa_df(
.dataframe = .dataframe,
.key_col = {{ .key_col }},
.unnest_col = {{ .unnest_col }},
.filter = {{ .filter }}
)
final.df <- tidyr::complete(prep.df, {{ .key_col }}, {{ .group_col }}) %>%
dplyr::mutate({{ .counts_col }} := dplyr::if_else(
is.na({{ .counts_col }}),
as.double(0),
as.double({{ .counts_col }})
)) %>%
dplyr::group_by({{ .key_col }}) %>%
dplyr::mutate(mmir_total = sum({{ .counts_col }})) %>%
dplyr::ungroup()
return(final.df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.