Nothing
#' Screening of site by taxa matrix
#'
#' Screens site by taxa matrix for potential minimum occurrence and frequency
#' problems.
#'
#' This function provides a warning for low numbers of observations and data
#' sets with too few observations to make any reasonable interpretation. It
#' also warns users when 100 percent occurence is detected and how much of the
#' data set meets this criterion, as IndVal based analyses are not ideal for
#' this type of data and we might recommend other techniques (Baker and King
#' 2013). The function also assess whether occurrence frequencies for any taxa
#' fall below 3, and call attention to very small minimum split sizes.
#'
#' @param txa A site by taxon matrix of sampled counts at each sampling
#' location.
#' @param minSplt The minimum split size used for partitioning. The default is
#' to use the argument form the original TITAN function call.
#' @param messaging If \code{TRUE}, provide progress messages.
#' @return A site by taxon matrix of sampled counts at each sampling location.
#' @references Baker, ME and RS King. 2010. A new method for detecting and
#' interpreting biodiversity and ecological community thresholds. Methods in
#' Ecology and Evolution 1(1): 25:37.
#' @references Baker ME and RS King. 2013. Of TITAN and straw men: an appeal for
#' greater understanding of community data. Freshwater Science 32(2):489-506.
#' @author M. Baker and R. King
txa.screen <- function(txa, minSplt = minSplt, messaging = TRUE) {
if (messaging) cli_alert_info("Screening taxa...")
taxa <- data.matrix(txa)
numUnit <- nrow(txa)
numSpp <- ncol(txa)
if (numUnit < 10) cli_abort("Number of observations too small.")
if (numUnit < 20) cli_warn("Low number of observations.")
ocrnc <- colSums(taxa > 0, na.rm = TRUE)
minTaxa <- min(ocrnc)
if (max(ocrnc) == numUnit) {
detected_times <- sum(ocrnc == numUnit)
pct_of_taxa <- round((detected_times/numSpp) * 100, digits = 1)
if(messaging) cli_alert(
" 100% occurrence detected {detected_times} times ({pct_of_taxa}% of taxa),"
)
if(messaging) cli_alert(" use of TITAN less than ideal for this data type.")
}
if (minTaxa < 3) {
cli_abort("Minimum taxon occurrence frequency is 3, your data do not meet this criterion")
}
if (minSplt < 5) {
if (minSplt < 3) {
cli_abort("Minimum split size should be greater than 2, preferably 5 or more; please set minSplt to larger value and try again")
} else {
cli_warn("Minimum split size is preferably 5 or more; please double check minSplt settings")
}
}
if(messaging) cli_alert_success(" Taxa frequency screen complete.")
taxa
}
#' Partitions environmental gradient for analysis
#'
#' This function compares the number of records in the environmental vector and
#' the umber of rows in the taxa matrix to ensure they are compatible. It then
#' uses the rank order of environmental values at each sampling location to
#' determine group membership of all sampling sites at each level of
#' partitioning.
#'
#' @param env A vector of values for each sampling location along the
#' environmental gradient.
#' @param taxa A site by taxon matrix containing observed counts at each
#' sampling location.
#' @param minSplt The minimum split size used in binary partitioning. The
#' default is to use the argument from the original TITAN function call.
#' @param messaging message user?
#' @return A list of seven objects: \describe{
#'
#' \item{env}{an environmental vector}
#'
#' \item{numUnit}{the number of sample units in env}
#'
#' \item{numTxa}{the number of distinct taxonomic units in taxa}
#'
#' \item{numClass}{the number of candidate partitions (numUnit-2*minSplt)}
#'
#' \item{srtEnv}{a sorted version of the environmental vector}
#'
#' \item{envcls}{a vector of environmental values used to distinguish
#' partitions}
#'
#' \item{eclass}{a matrix of group membership relative to each partition in
#' envcls}
#'
#' }
#' @references Baker, ME and RS King. 2010. A new method for detecting and
#' interpreting biodiversity and ecological community thresholds. Methods in
#' Ecology and Evolution 1(1): 25:37.
#' @author M. Baker and R. King
#' @keywords TITAN
env.part <- function(env, taxa, minSplt = minSplt, messaging = TRUE) {
if(messaging) cli_alert_info("Partitioning along gradient...")
nUnit <- length(env)
numUnit <- nrow(taxa)
if (nUnit != numUnit) cli_abort("Number of sites not equal between env vector and taxa matrix")
env <- as.matrix(env)
rankEnv <- rank(env, ties.method = "random")
srtEnv <- sort(env)
srtEnv2 <- sort(rankEnv)
numTxa <- ncol(taxa)
envcls <- srtEnv[(minSplt):(numUnit - minSplt)]
numClass <- length(envcls)
eclass <- matrix(NA, numUnit, numClass)
for (c in 1:numClass) {
eclass[, c] <- ((rankEnv > srtEnv2[minSplt + (c - 1)]) * 1) + 1
}
list(env, numUnit, numTxa, numClass, srtEnv, envcls, eclass)
}
#' Summarizes results from TITAN's analaysis of observed data
#'
#' This function populates the first seven columns of the 'sppmax' output table
#' with results summarized for each taxon from the output of 'getivz'.
#'
#' The items summarized for each taxon include (1) the env value at the IndVal
#' maximum, (2) the env value at the z-score maximum, (3) the taxon's occurrence
#' frequency, (4) group membership (decreaser or increaser), (5) the observed
#' IndVal score, (6) the probability that the taxon's IndVal score could be
#' generated by random chance, (7) the z score. As the values 3-7 are computed
#' for every taxon at each candidate change point, values for the table are
#' determined by the maximum (IndVal or z score) indicated by the value of
#' 'imax'. For further detail regarding the advantages and disadvantages of
#' using imax=T or imax=F, see Baker and King (2013).
#'
#' @param ivzScores The product of the 'getivz' function. A data matrix
#' comprised of four submatrices including group membership, z scores,
#' IndVals, and p values.
#' @param taxa A site by taxon matrix with counts observed at each sampling
#' location.
#' @param srtEnv A sorted version of the environmental gradient.
#' @param minSplt The minimum split size used to determine partitions along the
#' environmental gradient. The defualt is to use the argument from the
#' original TITAN function call.
#' @param imax A logical indicating whether taxon-specific change points should
#' be determined using IndVal maxima or z-score maxima (as in TITAN v1.0).
#' @return The function output consists of a list of three objects: \describe{
#'
#' \item{obs1}{a logical indicating decreasing taxa}
#'
#' \item{obs2}{a logical indicating increasing taxa}
#'
#' \item{sppmax}{a partially completed summary output table for all taxa}
#'
#' }
#' @references Baker, ME and RS King. 2010. A new method for detecting and
#' interpreting biodiversity and ecological community thresholds. Methods in
#' Ecology and Evolution 1(1): 25:37.
#' @references Baker ME and RS King. 2013. Of TITAN and straw men: an appeal for
#' greater understanding of community data. Freshwater Science 32(2):489-506.
#' @author M. Baker and R. King
#' @seealso [getivz()], [titan()]
#' @keywords TITAN
obs.summ <- function(ivzScores, taxa, srtEnv, minSplt = minSplt, imax = imax) {
## Prep output table sppmax
numTxa = ncol(taxa)
sppmax <- matrix(NA, numTxa, 16)
rownames(sppmax) <- colnames(taxa)
colnames(sppmax) <- c("ienv.cp", "zenv.cp", "freq", "maxgrp",
"IndVal", "obsiv.prob", "zscore", "5%", "10%", "50%", "90%",
"95%", "purity", "reliability", "z.median", "filter")
cli_alert_info("Summarizing observed results...")
## Obtain env values at IndVal, z-score maxima
max.ival <- apply(ivzScores[((numTxa * 2) + 1):(numTxa * 3),], 1, which.max)
max.zval <- apply(ivzScores[(numTxa + 1):(numTxa * 2), ], 1,which.max)
## Store direction of observed IndVals
obs1 <- rep(NA, numTxa)
obs2 <- rep(NA, numTxa)
sppmax[, 16] = 0
## Write imax, zmax, freq, maxgrp, IndVal, iv.prob, zscores to
## table
for (i in 1:numTxa) {
sppmax[i, 1] <- (srtEnv[minSplt + (max.ival[i] - 1)] +
srtEnv[minSplt + max.ival[i]])/2
sppmax[i, 2] <- (srtEnv[minSplt + (max.zval[i] - 1)] +
srtEnv[minSplt + max.zval[i]])/2
sppmax[i, 3] <- sum(taxa[, i] > 0, na.rm = TRUE)
if (imax == FALSE) {
if (i == 1) {
cli_alert_info("Estimating taxa change points using z-score maxima...")
}
sppmax[i, 4] <- ivzScores[i, max.zval[i]]
sppmax[i, 5] <- ivzScores[((numTxa * 2) + i), max.zval[i]]
sppmax[i, 6] <- ivzScores[((numTxa * 3) + i), max.zval[i]]
sppmax[i, 7] <- ivzScores[((numTxa * 1) + i), max.zval[i]]
sppmax[, 7] <- round(sppmax[, 7], 2)
sppmax[, 5] <- round(sppmax[, 5], 2)
obs1[i] <- ivzScores[i, max.zval[i]] == 1
obs2[i] <- ivzScores[i, max.zval[i]] == 2
} else {
if (i == 1) {
cli_alert_info("Estimating taxa change points using raw IndVal maxima...")
}
sppmax[i, 4] <- ivzScores[i, max.ival[i]]
sppmax[i, 5] <- ivzScores[((numTxa * 2) + i), max.ival[i]]
sppmax[i, 6] <- ivzScores[((numTxa * 3) + i), max.ival[i]]
sppmax[i, 7] <- ivzScores[((numTxa * 1) + i), max.ival[i]]
sppmax[, 7] <- round(sppmax[, 7], 2)
sppmax[, 5] <- round(sppmax[, 5], 2)
obs1[i] <- ivzScores[i, max.ival[i]] == 1
obs2[i] <- ivzScores[i, max.ival[i]] == 2
}
}
list(obs1, obs2, sppmax)
}
#' Summarizes the results of the community-level sum(z) analysis
#'
#' This function populates the sumz.cp table using the resuls from function
#' 'ivzsums' and, if 'boot'=TRUE, calls 'ivsums.f' to compute ivz sums filtered
#' by pure and reliable taxa.
#'
#' The function locates the env values of sum(z) maxima, then if 'boot'=TRUE,
#' locates the the env value of the filtered sum(z) and provides bootstrap
#' quantiles of both filtered and unfiltered distributions.
#'
#' @param ivzScores The product of the 'getivz' function. A data matrix
#' comprised of four submatrices including group membership, z scores,
#' IndVals, and p values.
#' @param ivz The product of the 'ivzsums' function. A data matrix comprised of
#' two parallel vectors of sum(z-) and sum(z+) scores.
#' @param srtEnv A sorted version of the environmental gradient.
#' @param sppmax The taxon-specific summary output table from TITAN, passed to
#' 'ivzsums.f'.
#' @param maxSumz A vector of sum(z) maxima across bootstrap replicates.
#' @param maxFsumz A vector of sum(z) maxima filtered by pure and reliable taxa
#' across bootstrap replicates.
#' @param minSplt The minimum split size used to partition the environmental
#' gradient. The default is to use the argument specified by the original
#' TITAN function call.
#' @param boot A logical indicating whether the bootstrap procedure should be
#' implemented. The default is to use the argument specified by the original
#' TITAN function call.
#' @return A list with two objects:
#'
#' \describe{
#'
#' \item{sumz.cp}{A second summary output table from TITAN to accompany
#' 'sppmax'.}
#'
#' \item{ivz.f}{The product of the 'ivzsums.f' function. A data matrix
#' comprised of two parallel vectors of filtered sum(z-) and sum(z+) scores.}
#'
#' }
#' @references Baker, ME and RS King. 2010. A new method for detecting and
#' interpreting biodiversity and ecological community thresholds. Methods in
#' Ecology and Evolution 1(1): 25:37.
#' @references King, RS and ME Baker 2010. Considerations for identifying and
#' interpreting ecological community thresholds. Journal of the North American
#' Benthological Association 29(3):998-1008.
#' @author M. Baker and R. King
#' @seealso [ivzsums()], [ivzsums.f()], [getivz()], [titan()]
#' @keywords TITAN sum(z)
sumz.tab <- function(ivzScores, ivz, srtEnv, sppmax, maxSumz = maxSumz,
maxFsumz = maxFsumz, minSplt = minSplt, boot = boot) {
sumz.cp <- matrix(NA, 4, 6)
colnames(sumz.cp) <- c("cp", "0.05", "0.10", "0.50", "0.90",
"0.95")
rownames(sumz.cp) <- c("sumz-", "sumz+", "fsumz-", "fsumz+")
sumz.cp[1, 1] <- (srtEnv[minSplt + (which.max(ivz[, 1]) - 1)] +
srtEnv[minSplt + which.max(ivz[, 1])])/2
sumz.cp[2, 1] <- (srtEnv[minSplt + (which.max(ivz[, 2]) - 1)] +
srtEnv[minSplt + which.max(ivz[, 2])])/2
ivz.f <- 0
if (boot) {
ivz.f <- ivzsums.f(ivzScores, sppmax)
if (!is.na(sum(ivz.f[, 1]))) {
sumz.cp[3, 1] <- (srtEnv[minSplt + (which.max(ivz.f[,
1]) - 1)] + srtEnv[minSplt + which.max(ivz.f[,
1])])/2
}
if (!is.na(sum(ivz.f[, 2]))) {
sumz.cp[4, 1] <- (srtEnv[minSplt + (which.max(ivz.f[,
2]) - 1)] + srtEnv[minSplt + which.max(ivz.f[,
2])])/2
}
sumz.cp[1, 2:6] <- quantile(maxSumz[, 1], probs = c(0.05,
0.1, 0.5, 0.9, 0.95), na.rm = TRUE)
sumz.cp[2, 2:6] <- quantile(maxSumz[, 2], probs = c(0.05,
0.1, 0.5, 0.9, 0.95), na.rm = TRUE)
sumz.cp[3, 2:6] <- quantile(maxFsumz[, 1], probs = c(0.05,
0.1, 0.5, 0.9, 0.95), na.rm = TRUE)
sumz.cp[4, 2:6] <- quantile(maxFsumz[, 2], probs = c(0.05,
0.1, 0.5, 0.9, 0.95), na.rm = TRUE)
}
sumzTabList <- list(sumz.cp, ivz.f)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.