#' @title Assess Trait Species Coverage
#'
#' @description Calculate the number of individuals and taxa per survey that
#' have any information available for selected species traits.
#'
#' @param tree.data the data frame with tree abundance data in the TreeCo format
#' @param trait.data the data frame with edited and averaged trait data in the
#' TreeCo format
#' @param group vector with the the column that should be used to aggregate the
#' data. Default to 'ordem'.
#' @param spp.name the name of the columns containing the (morpho)species names.
#' Default to the TreeCo column 'species.correct'.
#' @param trait.list a vector with the names of the traits should be included in
#' the calculations.
#' @param rm.flora should the records of (morpho)species not found within the
#' main sampling but found in the floristics or natural regeneration of the
#' same survey. Default to TRUE.
#' @param trait.miss cutoff of number of missing traits
#'
#' @importFrom data.table as.data.table merge.data.table setnames setkey CJ := .EACHI
#' @importFrom knitr kable
#'
#' @return a data frame with number of individuals (N) and taxa (S) for the each
#' group and the percentage of N and S with all selected traits (columns
#' 'N.traits' and 'S.traits'), as well as a metric of the completeness of the
#' traits within each group (columns 'miss.traits.N' and 'miss.traits.S').
#'
#' @details For each selected group, the function calculates the percentage of
#' individuals and species that have information for all the traits listed in
#' `trait.list`. This can be used as a measure of the trait coverage within
#' each group.
#'
#' The function also returns a metric of how complete the trait information is
#' for each group (i.e. survey). This metric is obtained by assigned for each
#' taxa the number of missing traits, which goes from zero (all traits are
#' available) to the maximum number of traits listed in `trait.list`. These
#' number of missing traits are standardized by the maximum number of traits,
#' so they vary between 0 and 1. Finally, these values are weighted by the number
#' of individuals/taxa of each level of missing traits. That is, this metric
#' is the weighted average of the number of missing traits per taxa. The closer to zero,
#' more complete the taxa are in terms of the traits available.
#'
#' Some TreeCo surveys contain extra species lists from outside the sampling
#' plots/transects. These lists are often obtaining from floristic surveys in
#' the same sites where the sampling was conducted. Other surveys stored in
#' TreeCo present the records for species present in the adult and natural
#' regeneration components of the forest. If a species is present in the
#' floristic survey but not in the plots, the species enters the database with
#' an abundance value equal to zero. Similarly, if a species is present in the
#' natural regeneration but not in the adult component, the species enters the
#' database with an abundance between zero and one. This records are excluded by
#' default, using the argument `rm.flora`.
#'
#'
#' @seealso \link[TreeCo]{indetSpecies}
#'
#' @author Renato A. F. de Lima
#'
#' @export traitCover
#'
traitCover <- function(tree.data = NULL,
trait.data = NULL,
group = "ordem",
spp.name = "Name_submitted",
trait.list = c("wsg_gcm3","MaxHeight_m",
"SeedMass_g",
"extinction","endemism",
"LeafArea","LeafType",
"dispersal.syndrome",
"ecological.group"),
rm.flora = TRUE,
trait.miss = 3) {
#Escaping R CMD check notes from using data.table syntax
spp.names <- group.by <- N <- S <- V1 <- NULL
S.traits <- N.traits <- na.traits <- . <- NULL
## Checking input
if (is.null(tree.data) | is.null(trait.data))
stop("Please provide the input species abundance and/or trait data")
## Removing species not found in the main tree sampling (natural regeneration of floristics) - Is it necessary for trait analysis?
if (rm.flora)
tree.data <- tree.data[tree.data$N >= 1,]
## Defining the column with the grouping variable(s) and taxon names
tree.data$group.by <- tree.data[, group]
tree.data$spp.names <- tree.data[, spp.name]
DT.trees <- data.table::as.data.table(tree.data[, c("group.by", "spp.names", "N")])
trait.data$spp.names <- trait.data[, spp.name]
trait.data$na.traits <- apply(trait.data[, trait.list], 1, function(x) sum(is.na(x)))
DT.traits <- data.table::as.data.table(trait.data[, c("spp.names", trait.list, "na.traits")])
DT <- data.table::merge.data.table(DT.trees, DT.traits, by="spp.names", all.x = TRUE, sort = FALSE)
low.trait.taxa <- DT.traits[na.traits >= trait.miss, .(spp.names, na.traits)]
low.trait.taxa <- low.trait.taxa[grepl(" ", spp.names),]
cat("Species with 3 or more missing traits (may hinder functional diversity analyses): \n",
knitr::kable(low.trait.taxa, row.names = FALSE, col.names = c("Species", "NA.traits")),"", sep="\n")
## Obtaining the total number of records, individuals and species per group
data.table::setkey(DT, group.by)
summ.DT <- DT[ , sum(N), by = group.by] # Number of individuals
data.table::setnames(summ.DT, c("V1"), c("N"))
summ.DT[, S := DT[ , data.table::uniqueN(spp.names), by = group.by]$V1,] # Number of species
## Obtaining the number of species and individuals within the selected trait coverage classes
data.table::setkey(DT, group.by, na.traits)
tmp.DT <- DT[data.table::CJ(group.by, na.traits, unique = TRUE), sum(N), by = .EACHI]
maximo <- max(tmp.DT$na.traits, na.rm = TRUE)
tmp.DT[is.na(na.traits), na.traits := maximo]
tmp.DT[is.na(V1), V1 := 0]
data.table::setnames(tmp.DT, c("V1"), c("N.traits"))
tmp.DT[, S.traits := DT[data.table::CJ(group.by, na.traits, unique = TRUE),
data.table::uniqueN(spp.names), by = .EACHI]$V1, ]
tmp.DT[N.traits == 0 & S.traits == 1, S.traits := 0]
## Calculating the number of individuals and species with all traits
data.table::setkey(tmp.DT, group.by)
all.DT <- tmp.DT[na.traits == 0, lapply(.SD, sum),
by = group.by, .SDcols = c("N.traits", "S.traits")]
summ.DT <- data.table::merge.data.table(summ.DT, all.DT, by="group.by", all.x = TRUE, sort = FALSE)
## Calculating the weighted average of the standardized classes of trait coverage (0 = all traits, 1 = all traits missing)
tmp.DT[ , na.traits := na.traits / maximo]
wmean.N <- tmp.DT[, lapply(.SD, weighted.stats, w = N.traits),
by = group.by, .SDcols= c("na.traits")]
data.table::setnames(wmean.N, c("na.traits"), c("miss.traits.N"))
summ.DT <- data.table::merge.data.table(summ.DT, wmean.N, by="group.by", all.x = TRUE, sort = FALSE)
wmean.S <- tmp.DT[, lapply(.SD, weighted.stats, w = S.traits),
by = group.by, .SDcols= c("na.traits")]
data.table::setnames(wmean.S, c("na.traits"), c("miss.traits.S"))
summ.DT <- data.table::merge.data.table(summ.DT, wmean.S, by="group.by", all.x = TRUE, sort = FALSE)
## Getting the percentages
summ.DT[ , c("N.traits") := lapply(.SD, function(x) round(100 * x/N, 4)),
.SDcols = c("N.traits")]
summ.DT[ , c("S.traits") := lapply(.SD, function(x) round(100 * x/S, 4)),
.SDcols = c("S.traits")]
## Preparing to return
data.table::setnames(summ.DT, c("group.by"), c(group))
return(as.data.frame(summ.DT))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.