#' Synoptic tables and calculation of cluster-wise frequencies, fidelity and
#' differential species character
#'
#' @description
#' Synoptic tables are a tool for the visualization and interpretation of previously
#' defined plant species groups (clusters), e.g. from cluster analysis, classification methods or
#' pre-defined categories, e.g. spatial distribution units.
#' They help to determine characteristic patterning of species occurrences in plant communities
#' by calculating cluster-wise percentage or absolute frequencies, mean/median cover values, fidelity
#' (phi) or differential species character.
#'
#' \code{syntable} function calculates an unordered synoptic table for plant community analysis, using
#' an input species-sample data frame and a vector of cluster identity input.
#' The unordered output table can be sorted automatically with \code{\link[goeveg]{synsort}} function
#' in this package.
#'
#' @param matrix Species matrix or data frame with species in columns and samples in rows.
#' Missing values (NA) will be transformed to 0.
#' If non-numeric abundance values are present, the matrix will be transformed to presence/absence with all
#' non-zero values defined as 1.
#' Species and sample names must be defined as column- and row names, respectively.
#' @param cluster Integer or character vector/factor with classification cluster identity. Ensure matching order of
#' cluster identity and samples in matrix for correct allocation of cluster numbers to samples.
#' @param abund Type of abundances. Define whether input species matrix or data frame is percentage cover (\code{abund = "percentage"}, default)
#' or presence/absence data (\code{abund = "pa"}, with values 0/1). You may use function \code{\link{cov2per}} to transform
#' cover-abundance values from different scales into percentage cover.
#' @param type Type of synoptic table output \code{type = c("percfreq", "totalfreq", "mean",
#' "median", "diffspec", "phi")}. See Details.
#' @param digits Integer indicating the number of decimal places to be displayed in result tables (default 0)
#'
#' @section Details:
#' For synoptic table calculation, six types are available.
#' \itemize{
#' \item \code{type = "percfreq" } Creates a percentage frequency table \emph{(default)}
#' \item \code{type = "totalfreq" } Creates an absolute frequency table
#' \item \code{type = "mean" } Calculates mean of species values given in \code{matrix} per cluster
#' \item \code{type = "median" } Calculates median of species values given in \code{matrix} per
#' cluster
#' \item \code{type = "diffspec" } Calculates differential character of species according to
#' Tsiripidis et al. 2009, with resulting character p = positive, n = negative, pn = positive-
#' negative or no differential character (-). Consider that differential character is always
#' restricted to some and not necessarily all of the other units, thus considering percentage
#' frequency is essential for correct interpretation of the diagnostic species character.
#' This calculation needs at least three groups.
#' \item \code{type = "phi" } Calculates fidelity measure phi (algorithm basing on Sokal & Rohlf
#' 1995, Bruelheide 2000). Values are ranging between -1 and 1 with high values near 1 indicating
#' high fidelity.
#' }
#'
#' For sorting the output synoptic table, use \code{\link{synsort}} function, providing several
#' options.
#'
#' @return
#' The function returns an (invisible) list of result components.
#' \item{\code{$syntable }}{unordered synoptic table for given species and clusters}
#' \item{\code{$samplesize }}{total number of samples per cluster}
#'
#'Additionally for differential species character calculation:
#' \item{\code{$onlydiff }}{Synoptic table only with differential species}
#' \item{\code{$others }}{List of non-differential species}
#' \item{\code{$differentials }}{Lists differential species for each cluster}
#'
#'
#' @references
#' Bruelheide, H. (2000): A new measure of fidelity and its application to defining species groups.
#' \emph{Journal of Vegetation Science} \strong{11}: 167-178. \doi{https://doi.org/10.2307/3236796}
#'
#' Chytry, M., Tichy, L., Holt, J., Botta-Dukat, Z. (2002): Determination of diagnostic species with
#' statistical fidelity measures. \emph{Journal of Vegetation Science} \strong{13}: 79-90. \doi{https://doi.org/10.1111/j.1654-1103.2002.tb02025.x}
#'
#' Sokal, R.R. & Rohlf, F.J. (1995): Biometry. 3rd edition Freemann, New York.
#'
#' Tsiripidis, I., Bergmeier, E., Fotiadis, G. & Dimopoulos, P. (2009): A new algorithm for the
#' determination of differential taxa. \emph{Journal of Vegetation Science} \strong{20}: 233-240. \doi{https://doi.org/10.1111/j.1654-1103.2009.05273.x}
#'
#' @author Jenny Schellenberg (\email{jschell@gwdg.de}) and Friedemann von Lampe
#' @seealso \code{\link{synsort}}
#' @examples
#' ## Synoptic table of Scheden vegetation data
#' library(cluster)
#' pam1 <- pam(schedenveg, 4) # PAM clustering with 4 clusters output
#'
#' ## 1) Unordered synoptic percentage frequency table
#' percfreq <- syntable(schedenveg, pam1$clustering, abund = "percentage",
#' type = "percfreq")
#' percfreq # view results
#'
#' ## 2) Differential species analysis
#' differential <- syntable(schedenveg, pam1$clustering, abund = "percentage",
#' type = "diffspec")
#' # show complete table with differential character of species
#' differential$syntable
#' # list differential species for second cluster
#' differential$differentials[2]
#'
#' ## 3) Synoptic table with phi fidelity
#' phitable <- syntable(schedenveg, pam1$clustering, abund = "percentage",
#' type = "phi")
#' phitable
#'
#' ## 4) Synoptic percentage frequency table based on historical classification from 1997
#' percfreq <- syntable(schedenveg, schedenenv$comm, abund = "percentage",
#' type = "percfreq")
#' percfreq
#'
#' @export
syntable <- function(matrix, cluster, abund = "percentage", type = "percfreq", digits = 0) {
narep <- FALSE
# Check for "" values and replace by NA
if (length(matrix[matrix == ""]) != 0) {
matrix[matrix == ""] <- NA
narep <- TRUE
}
# Check for NA values
if (any(is.na(matrix))) {
matrix[is.na(matrix)] <- 0
narep <- TRUE
}
if(narep == TRUE) print("NA and/or empty character values replaced by 0.")
if (any(is.na(cluster))) {
stop("NA values in cluster not allowed.")
}
# Check if values are numeric
if(any(is.na(suppressWarnings(as.numeric(unlist(matrix)))))) {
warning("Non-numeric cover values transformed into 1. Using presence/absence scale.")
matrix[matrix != 0] <- 1
abund = "pa"
}
# Conversion to numeric dataframe
matrix <- as.data.frame(apply(matrix, 2, as.numeric))
# Define groups
cluster <- as.factor(cluster)
group <- levels(cluster)
# Create empty synoptic table
syntab <- data.frame(matrix(NA, nrow = length(names(matrix)), ncol = length(group)))
names(syntab) <- group
rownames(syntab) <- names(matrix)
# Calculate results depending on choices
if (abund != "percentage" & abund != "pa") {
stop("Argument 'abund' must be either percentages ('percentage') or
presence/absence data ('pa')")
} else {
if (type=="totalfreq") {
for (i in cluster) {syntab[,i] <- apply(matrix[cluster==i,]>0, 2 ,sum)}
samplesize <- tapply(rep(1, length(cluster)), cluster, sum)
results <- list("syntable" = syntab,
"samplesize" = samplesize)
} else if (type=="percfreq") {
samplesize <- tapply(rep(1,length(cluster)), cluster ,sum)
for (i in cluster) {
syntab[,i] <- round(apply(matrix[cluster==i,]>0, 2, sum) * (100/samplesize[i]), digits=digits)}
results <- list("syntable" = syntab, "samplesize" = samplesize)
} else if (type=="mean" & abund=="percentage") {
samplesize <- tapply(rep(1,length(cluster)),cluster,sum)
for (i in cluster) { syntab[,i] <- round(apply(matrix[cluster==i,],2,mean), digits=digits) }
results <- list("syntable" = syntab, "samplesize" = samplesize)
} else if (type=="mean" & abund=="pa") {
stop("Cannot calculate mean cover in clusters with presence/absence values.")
} else if (type=="median" & abund=="percentage") {
samplesize <- tapply(rep(1,length(cluster)),cluster,sum)
for (i in cluster) { syntab[,i] <- round(apply(matrix[cluster==i,],2,median), digits=digits) }
results <- list("syntable" = syntab, "samplesize" = samplesize)
} else if (type=="median" & abund=="pa") {
stop("Cannot calculate median cover in clusters with presence/absence values.")
} else if (type=="diffspec") {
# create progress bar
pb <- txtProgressBar(min = 1, max = 23, style = 3)
#Calculate samplesize and percentage frequency
samplesize <- tapply(rep(1,length(cluster)),cluster,sum)
for (i in cluster) {
syntab[,i] <- round(apply(matrix[cluster==i,]>0, 2, sum) * (100/samplesize[i]), digits=digits)}
syn <- syntab
setTxtProgressBar(pb, 2)
small <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
names(small) <- group
for (i in 1:length(rownames(syn))) { small[i,] <- sort(as.numeric(syn[i,])) }
setTxtProgressBar(pb, 3)
Avsmall <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
Avsmall[,1] <- small[,1]
for (i in 1:length(rownames(syn))) { for ( k in 2:length(group)) {
Avsmall[i,k] <- mean(c(as.numeric(small[i,1:k]))) } }
setTxtProgressBar(pb, 4)
CondAvsmall <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
CondAvsmall[,1] <- rep("", length(rownames(syn)))
for ( k in 1:length(rownames(syn))) {
if (small[k,2] >= 2*Avsmall[k,1]+20) {CondAvsmall[k,2] <- "1"}
else {CondAvsmall[k,2] <- "0"} }
for ( i in 3:length(group)) { for ( k in 1:length(rownames(syn))) {
if (CondAvsmall[k,i-1]=="1") {CondAvsmall[k,i] <- "1"}
else if (small[k,i] >= 2*Avsmall[k,i-1]+20) {CondAvsmall[k,i] <- "1"}
else {CondAvsmall[k,i] <- "0"} } }
setTxtProgressBar(pb, 5)
Avsmall2 <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
Avsmall2[,1] <- Avsmall[,1]
for ( i in 2:length(group)) { for ( k in 1:length(rownames(syn))) {
if (CondAvsmall[k,i]=="1") {Avsmall2[k,i] <- Avsmall2[k,i-1]}
else {Avsmall2[k,i] <- Avsmall[k,i]} } }
setTxtProgressBar(pb, 6)
large <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
names(large) <- group
for (i in 1:length(rownames(syn))) { large[i,] <- sort(as.numeric(syn[i,]), decreasing = T) }
setTxtProgressBar(pb, 7)
Avlarge <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
Avlarge[,1] <- large[,1]
for (i in 1:length(rownames(syn))) { for ( k in 2:length(group)) {
Avlarge[i,k] <- mean(c(as.numeric(large[i,1:k]))) } }
setTxtProgressBar(pb, 8)
CondAvlarge <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
CondAvlarge[,1] <- rep("", length(rownames(syn)))
for ( k in 1:length(rownames(syn))) {
if (large[k,2] <= (Avlarge[k,1]/2)-10) {CondAvlarge[k,2] <- "1"}
else {CondAvlarge[k,2] <- "0"} }
for ( i in 3:length(group)) { for ( k in 1:length(rownames(syn))) {
if (CondAvlarge[k,i-1]=="1") {CondAvlarge[k,i] <- "1"}
else if (large[k,i] <= (Avlarge[k,i-1]/2)-10) {CondAvlarge[k,i] <- "1"}
else {CondAvlarge[k,i] <- "0"} } }
setTxtProgressBar(pb, 9)
Avlarge2 <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
Avlarge2[,1] <- Avlarge[,1]
for ( i in 2:length(group)) { for ( k in 1:length(rownames(syn))) {
if (CondAvlarge[k,i]=="1") {Avlarge2[k,i] <- Avlarge2[k,i-1]}
else {Avlarge2[k,i] <- Avlarge[k,i]} } }
setTxtProgressBar(pb, 10)
CondAvsmall2 <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
CondAvsmall2[,1] <- rep("", length(rownames(syn)))
for ( i in 2:length(group)) {for ( k in 1:length(rownames(syn))) {
if (small[k,i] > (Avlarge2[k,i-1]/2)-10) {CondAvsmall2[k,i] <- "1"}
else {CondAvsmall2[k,i] <- "0"} } }
setTxtProgressBar(pb, 11)
Avsmall3 <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
Avsmall3[,1] <- Avsmall2[,1]
for ( i in 2:length(group)) { for ( k in 1:length(rownames(syn))) {
if (CondAvsmall2[k,i]=="1") {Avsmall3[k,i] <- Avsmall3[k,i-1]}
else {Avsmall3[k,i] <- Avsmall2[k,i]} } }
setTxtProgressBar(pb, 12)
CondAvlarge2 <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
CondAvlarge2[,1] <- rep("", length(rownames(syn)))
for ( i in 2:length(group)) { for ( k in 1:length(rownames(syn))) {
if (large[k,i] < 2*Avsmall2[k,i-1]+20) {CondAvlarge2[k,i] <- "1"}
else {CondAvlarge2[k,i] <- "0"} } }
setTxtProgressBar(pb, 13)
Avlarge3 <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
Avlarge3[,1] <- Avlarge2[,1]
for ( i in 2:length(group)) { for ( k in 1:length(rownames(syn))) {
if (CondAvlarge2[k,i]=="1") {Avlarge3[k,i] <- Avlarge3[k,i-1]}
else {Avlarge3[k,i] <- Avlarge2[k,i]} } }
setTxtProgressBar(pb, 14)
Small_Avsmall3 <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
for ( i in 1:length(group)) { for ( k in 1:length(rownames(syn))) {
if (syn[k,1] == "") {Small_Avsmall3[k,i] <- ""}
else if (syn[k,i]>=2*max(Avsmall3[k,])+20) {Small_Avsmall3[k,i] <- "p"}
else if (syn[k,i] < 2*max(Avsmall3[k,])+20) {Small_Avsmall3[k,i] <- "n"}
else {stop("Calculation of differential species failed: Check synoptic table input")} } }
setTxtProgressBar(pb, 15)
Large_AvLarge3 <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
for ( i in 1:length(group)) { for ( k in 1:length(rownames(syn))) {
if (syn[k,1] == "") {Large_AvLarge3[k,i] <- ""}
else if (syn[k,i] > min(Avlarge3[k,])/2-10) {Large_AvLarge3[k,i] <- "p"}
else if (syn[k,i] <= min(Avlarge3[k,])/2-10) {Large_AvLarge3[k,i] <- "n"}
else {stop("Calculation of differential species failed: Check synoptic table input")} } }
setTxtProgressBar(pb, 16)
Sum_cond <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
for ( i in 1:length(group)) { for ( k in 1:length(rownames(syn))) {
Sum_cond[k,i] <- paste0(Small_Avsmall3[k,i], Large_AvLarge3[k,i]) } }
setTxtProgressBar(pb, 17)
Results <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
for ( i in 1:length(group)) { for ( k in 1:length(rownames(syn))) {
if (Sum_cond[k,i] == "pp") {Results[k,i] <- "p"}
else if (Sum_cond[k,i] == "nn") {Results[k,i] <- "n"}
else if (Sum_cond[k,i] == "pn") {Results[k,i] <- "pn"}
else {Results[k,i] <- "-"} } }
rownames(Results) <- rownames(syn)
names(Results) <- group
setTxtProgressBar(pb, 18)
subresult <- data.frame(matrix(NA, nrow=length(rownames(syn)), ncol=length(group)))
for ( i in 1:length(group)) { for ( k in 1:length(rownames(Results))) {
if (Results[k,i] == "-") {subresult[k,i] <- "0"}
else if (Results[k,i] == "n") {subresult[k,i] <- "1"}
else if (Results[k,i] == "p") {subresult[k,i] <- "1"}
else if (Results[k,i] == "pn") {subresult[k,i] <- "1"}
else {subresult[k,i] <- "0"} } }
subresult <- as.data.frame(apply(subresult,2,as.numeric))
setTxtProgressBar(pb, 19)
onlydiff <- Results[apply(subresult,1,sum)>0,]
others <- rownames(Results[apply(subresult,1,sum)==0,])
positivespec <- data.frame(as.matrix(NA, nrow=500, ncol=200))
for ( i in 1:length(group)) {
if (length(rownames(Results[Results[,i]=="p",]) > 0)) {
a <- rownames(Results[Results[,i]=="p",])
for ( k in 1:length(a)) {
positivespec[i,k] <- rownames(Results[Results[,i]=="p",])[k] }
} else {positivespec[i,1] <- NA} }
names(positivespec) <- rep("", length(positivespec[1,]))
setTxtProgressBar(pb, 20)
negativespec <- data.frame(as.matrix(NA, nrow=500, ncol=200))
for ( i in 1:length(group)) {
if (length(rownames(Results[Results[,i]=="n",]) > 0)) {
a <- rownames(Results[Results[,i]=="n",])
for ( k in 1:length(a)) {
negativespec[i,k] <- rownames(Results[Results[,i]=="n",])[k] }
} else {negativespec[i,1] <- NA} }
names(negativespec) <- rep("", length(negativespec[1,]))
setTxtProgressBar(pb, 21)
positivenegativespec <- data.frame(as.matrix(NA, nrow=500, ncol=200))
for ( i in 1:length(group)) {
if (length(rownames(Results[Results[,i]=="pn",]) > 0)) {
a <- rownames(Results[Results[,i]=="pn",])
for ( k in 1:length(a)) {
positivenegativespec[i,k] <- rownames(Results[Results[,i]=="pn",])[k] }
} else {positivenegativespec[i,1] <- NA} }
names(positivenegativespec) <- rep("", length(positivenegativespec[1,]))
setTxtProgressBar(pb, 22)
diffspeclist <- list()
for (i in 1:length(group)) {
pos <- positivespec[i, ]
pos <- pos[!is.na(pos)]
if (length(pos) > 0) {pos=pos} else {pos <- noquote("no positive diff species")}
neg <- negativespec[i,]
neg <- neg[!is.na(neg)]
if (length(neg) > 0) {neg=neg} else {neg <- noquote("no negative diff species")}
posneg <- positivenegativespec[i,]
posneg <- posneg[!is.na(posneg)]
if (length(posneg) > 0) {posneg=posneg} else {posneg <- noquote("no positive/negative diff species")}
name <- group[i]
tmp <- list("positive diff" = pos, "negative diff" = neg, "positive/negative diff" = posneg)
diffspeclist[[name]] <- tmp }
setTxtProgressBar(pb, 23)
# Close progress bar
close(pb)
results <- list("syntable" = Results,
"onlydiff" = onlydiff,
"others" = others,
"samplesize" = samplesize,
"differentials" = diffspeclist)
} else if (type == "phi") {
# Calculate total frequency and sample size
for (i in cluster) {syntab[,i] <- apply(matrix[cluster == i,]>0, 2 ,sum)}
samplesize <- tapply(rep(1, length(cluster)), cluster, sum)
N = length(matrix[,1])
n = rowSums(syntab)
phitab <- data.frame(matrix(NA, nrow=length(names(matrix)), ncol=length(group)))
for (i in 1:length(group)) {
for (k in 1:length(matrix[1,])) {
phitab[k,i] <- (N * syntab[k,i] - n[k] * samplesize[i]) / sqrt(n[k] * samplesize[i]*(N-n[k]) * (N-samplesize[i]))
}}
names(phitab) <- group
rownames(phitab) <- names(matrix)
results <- list("syntable" = phitab,
"samplesize" = samplesize)
} else {stop("Cannot calculate synoptic table. Define correct type of species matrix values to use (abund = c('percentage', 'pa')). \nCheck correct type of synoptic table output type (type = c('totalfreq', 'percfreq', 'mean', 'median', 'diffspec')).")
}
}
return(invisible(results))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.