#' @title Aggregate TreeCo Species Abundance Table per Group
#'
#' @description by ordem or SiteCode
#'
#' @param tree.data a data frame with tree abundance data.
#' @param site.data a data frame with site and plot metadata (optional).
#' @param spp.group a column with the species names to be used for aggregation.
#' @param site.group a column with the site or plot codes/names to be used for aggregation.
#' @param cols.to.sum a vector with the names of the columns in `tree.data` to
#' be summed over species and groups (generally the numerical variables)
#' @param cols.to.paste a vector with the names of the columns in `tree.data` to
#' be be pasted over species and groups (generally the categorical variables)
#' @param effort the name of the columns in `site.data` containg the effort of
#' each inventory in hectares
#'
#' @importFrom dplyr left_join
#' @importFrom utils head
#' @importFrom data.table data.table setnames setkeyv merge.data.table uniqueN
#'
#' @return a data frame with the aggregated information per species name per
#' group (e.g. plot ID or site code)
#'
#' @details Users ...
#'
#' @author Renato A. F. de Lima
#'
#' @export aggregateAbundances
#'
aggregateAbundances <- function(tree.data = NULL,
site.data = NULL,
spp.group = "species.correct",
site.group = "ordem",
cols.to.sum = c("N", "AB", "DA", "DoA", "DR",
"DoR", "F", "IVC", "IVI"),
cols.to.paste = c("ordem", "refID", "SiteCode",
"SubSite", "Name_submitted", "family",
"genus", "epiteth", "infra.epiteth",
"authorship", "species.correct",
"taxon.rank", "family.original",
"genus.original", "species.original",
"author.original","notes",
"accession","coletor","number",
"DetBy","DetDate", "obs"),
effort = "effort_ha"
) {
## Checking input
if (is.null(tree.data))
stop("Please provide the input species abundance table")
if (!"data.frame" %in% class(tree.data))
stop("Input needs to be a data frame or equivalent")
if (!spp.group %in% colnames(tree.data) |
!site.group %in% colnames(tree.data))
stop("Columns with the species and groups not found")
if (any(!cols.to.sum %in% colnames(tree.data))) {
miss.cols <- cols.to.sum[!cols.to.sum %in% colnames(tree.data)]
cols.to.sum <- cols.to.sum[!cols.to.sum %in% miss.cols]
warning("Some columns to sum were not found in the input data frame: ",
paste(miss.cols, collapse = ", "), call. = FALSE)
}
if (any(!cols.to.paste %in% colnames(tree.data))) {
miss.cols <- cols.to.paste[!cols.to.paste %in% colnames(tree.data)]
cols.to.paste <- cols.to.paste[!cols.to.paste %in% miss.cols]
warning("Some columns to paste were not found in the input data frame: ",
paste(miss.cols, collapse = ", "), call. = FALSE)
}
## Obtaining the total sums and unique characters per species per inventory
DT <- data.table::data.table(tree.data)
data.table::setnames(DT, spp.group, "spp.group.by")
data.table::setnames(DT, site.group, "site.group.by")
data.table::setkeyv(DT, c("site.group.by", "spp.group.by"))
to_numeric <- function(x) { suppressWarnings(as.numeric(x)) }
DT[ , (cols.to.sum) := lapply(.SD, to_numeric), .SDcols = cols.to.sum]
soma <- DT[ , lapply(.SD, sum, na.rm = TRUE), by = c("site.group.by", "spp.group.by"),
.SDcols = cols.to.sum]
cols.total <- paste0(head(cols.to.sum, 2), "total")
N.AB.cols <- head(cols.to.sum, 2)
somaTotal <- DT[ , lapply(.SD, sum, na.rm = TRUE), by = c("site.group.by"),
.SDcols = N.AB.cols]
data.table::setnames(somaTotal, N.AB.cols, cols.total)
soma <- data.table::merge.data.table(soma, somaTotal)
cols.to.paste1 <- cols.to.paste[cols.to.paste %in% names(DT)]
squish <- function (x) {
x <- gsub("\\s\\s+", " ", x, perl = TRUE)
gsub("^ | $", "", x, perl = TRUE)
}
DT[ , (cols.to.paste1) := lapply(.SD, squish), .SDcols = cols.to.paste1]
to_paste <- function(x) { paste(unique(x), collapse="|") }
cola <- DT[ , lapply(.SD, to_paste), by = c("site.group.by", "spp.group.by"),
.SDcols = cols.to.paste1]
rm_na <- function (x) {
x <- gsub("^NA\\|NA\\|NA$", "", x, perl = TRUE)
x <- gsub("^NA\\|NA$", "", x, perl = TRUE)
x <- gsub("^NA\\|NA\\|", "", x, perl = TRUE)
x <- gsub("\\|NA\\|NA$", "", x, perl = TRUE)
gsub("^NA\\||\\|NA$", "", x, perl = TRUE)
}
cola[ , (cols.to.paste1) := lapply(.SD, rm_na), .SDcols = cols.to.paste1]
trees1 <- data.table::merge.data.table(soma, cola,
by = c("site.group.by", "spp.group.by"))
data.table::setnames(trees1, "spp.group.by", spp.group)
data.table::setnames(trees1, "site.group.by", site.group)
## Correcting DR and DoR for each site --------------------------------
## Variable names still hardcoded
trees2 <- as.data.frame(trees1)
trees2 <-
trees2[, match(c(names(tree.data), cols.total), names(trees2), nomatch = 0)]
#Missing DR
check_these <- trees2$DR %in% 0 & !trees2$N %in% 0
if (any(check_these))
trees2$DR[check_these] <-
round(100*trees2$N[check_these]/trees2$Ntotal[check_these], 5)
check_these <- trees2$DoR %in% 0 & !trees2$AB %in% 0
if (any(check_these))
trees2$DoR[check_these] <-
round(100*(trees2$AB[check_these]/trees2$ABtotal[check_these]), 7)
##Fixing problems for multiple-plots inventories
multi.sites <- unique(trees2[, site.group][trees2$DR > 100 |
trees2$DoR > 100|
trees2$IVC > 200|
trees2$IVI > 300])
check_these <- trees2[, site.group] %in% multi.sites
if (any(check_these)) {
trees2$DR[check_these] <-
round(100*trees2$N[check_these]/trees2$Ntotal[check_these], 5)
trees2$DoR[check_these] <-
round(100*(trees2$AB[check_these]/trees2$ABtotal[check_these]), 7)
trees2$IVC[check_these] <-
trees2$DR[check_these] + trees2$DoR[check_these]
}
if ("DR" %in% names(trees2))
trees2$DR[is.nan(trees2$DR)] <- NA
if ("DoR" %in% names(trees2))
trees2$DoR[is.nan(trees2$DoR)] <- NA
if ("IVC" %in% names(trees2))
trees2$IVC[is.nan(trees2$IVC)] <- NA
#Fixing mising data and other problems
check_these <- trees2$DoR %in% 0 &
!trees2$DR %in% 0 &
!trees2$IVC %in% 0
if (any(check_these)) trees2$DoR[check_these] <- 0.001
### NAO ENTENDI MAIS A CORREÇÃO AQUI: SUBTITUI AB POR DOR??? ANULANDO
# tmp <- unique(trees2[, site.group][trees2$AB %in% 0 &
# !trees2$DoR %in% 0 &
# !is.na(trees2$DoR)])
# check_these <- trees2[, site.group] %in% tmp & trees2$AB %in% 0
# trees2$AB[check_these] <- trees2$DoR[check_these]
## Getting DA and DoA based on total effort -----------------------------
if (!is.null(site.data)) {
## Getting the inventory list
tmp <-
unique(trees2[, site.group][trees2[, site.group] %in% site.data[,site.group]])
tmp1 <-
as.character(unique(trees2[, site.group][!trees2[, site.group] %in%
site.data[,site.group]]))
tmp1 <- strsplit(tmp1[grepl("\\|",tmp1)],"\\|")
tmp2 <- c(as.list(tmp),tmp1)
lista <- c(tmp, tmp1)
## Getting the effort per inventory and merging with the abundance data
effort.tab <-
cbind.data.frame(toto = sapply(lista, function(x) paste(x, collapse="|")),
toto1 = sapply(lista,
function(x)
sum(site.data[ ,effort][site.data[, site.group] %in%
x], na.rm=TRUE)))
names(effort.tab) <- c(site.group, effort)
trees2 <- dplyr::left_join(trees2, effort.tab, by = site.group)
## Calculating DA (trees ha-1) e DoA (basal area ha-1)
trees2$DA1 <- trees2$N/trees2[ ,effort]
trees2$DoA1 <- trees2$AB/trees2[ ,effort]
}
return(trees2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.