#==============================================================================
#Tolerance Metrics
#==============================================================================
#'Compute Tolerance Indices
#'
#'@param long.df Taxonomic counts arrange in a long data format (i.e., each
#'row represents a unique sample and taxon).
#'@param master.df A master taxa list including taxonomic ranks Phylum through
#'the specified taxonomic rank (Family or Genus) and the an
#'associated list of tolerance values. The default is set to the master taxa
#'list included in the BIBI package. The master taxa list can be viewed with
#'the following script: master.df <- data(master)
#'@param tolerance.value The name of the column in the master taxon list
#'(specified using the master variable) that contains tolerance values on
#'a scale of 0-10.
#'@param taxa.rank The taxonomic rank used to perform the analysis. You must
#'sepecify either "FAMILY" or "GENUS" (Defualt = "FAMILY").
#'@param remove_na If taxa are missing tolerance assigned tolerance values
#' (tolerance.value == NA) then the scores could be skewed downward. Setting this
#' parameter to TRUE will exclude these taxa and provide a better estimate of the
#' tolerance measure being calculated.
#'@return The average tolerance score per individual for each unique sampling event.
#'This metric is calculated at the family or genus level. If taxon does not
#'have an assigned tolerance value it will not contribute to the final score. The
#'count representing each taxon is multiplied by the taxon's tolerance value.
#'If the tolerance value is missing it would be treated effectively as a zero,
#'which would reduce the average tolerance score; to avoid this issue, it is
#'more effective to remove the taxa without any tolerance values. However, if a
#'large portion of the sample is composed of a taxon or taxa without
#'assigned tolerance values the final value may be a poor representation
#'of the sample. Every effort should be made to assign each taxon a tolerance value.
#'If you have tolerance values that you would like to contribute and share
#'through the BIBI package please email Zachary M. Smith (zsmith@icprb.org).
#'@export
tol_index <- function(long.df, master.df, tolerance.value = "BIBI_TV",
taxa.rank = "FAMILY", remove_na = TRUE) {
keep.cols <- c("UNIQUE_ID", "STATION_ID", "AGENCY_CODE",
"DATE", "METHOD", "SAMPLE_NUMBER",
"CONDITION", taxa.rank,
"REPORTING_VALUE")
#keep.cols[!keep.cols %in% names(long.df)]
long.df <- long.df[, keep.cols]
#test <- aggregate(REPORTING_VALUE ~ . , data = long.df, sum)
sub.master <- unique(master.df[, c("FINAL_ID", tolerance.value)])
test.master <- sub.master[!is.na(sub.master[, tolerance.value]), ]
test3 <- test.master[duplicated(test.master$FINAL_ID), ]
if(nrow(test3) > 0) stop("FINAL_ID duplicated for tolerance.value")
merged <- merge(long.df, sub.master, by.x = taxa.rank, by.y = "FINAL_ID", all.x = TRUE)
merged$MULT <- merged$REPORTING_VALUE * merged[, tolerance.value]
merged$E2 <- apply(merged[, c("UNIQUE_ID", "SAMPLE_NUMBER")], 1, function(x) paste0(x, collapse = "_"))
if(remove_na == TRUE){
na.check <- by(merged[, tolerance.value], merged$E2, function(x){
all(is.na(x))
})
long.unique <- unique(long.df[, c("UNIQUE_ID", "SAMPLE_NUMBER")])
long.unique <- long.unique[order(long.unique$UNIQUE_ID, long.unique$SAMPLE_NUMBER),]
all.na <- long.unique[na.check, ]
if(nrow(long.unique[na.check, ]) > 0){
all.na$E2 <- apply(all.na[, c("UNIQUE_ID", "SAMPLE_NUMBER")], 1, function(x) paste0(x, collapse = "_"))
just.na <- merged[merged$E2 %in% all.na$E2, ]
just.na <- just.na[, !names(just.na) %in% "E2"]
just.na$FINAL <- NA
just.na[, c("MULT", "REPORTING_VALUE")] <- NA
final.just.na <- unique(just.na[, c("UNIQUE_ID", "STATION_ID", "AGENCY_CODE",
"DATE", "METHOD", "SAMPLE_NUMBER",
"CONDITION",
"MULT", "REPORTING_VALUE", "FINAL")])
}
if(nrow(merged[!merged$E2 %in% all.na$E2, ]) > 0){
non.na <- merged[!merged$E2 %in% all.na$E2, ]
non.na <- non.na[!is.na(non.na$MULT), ]
agg.mult <- aggregate(MULT ~ UNIQUE_ID + STATION_ID + AGENCY_CODE +
DATE + METHOD + SAMPLE_NUMBER + CONDITION,
data = non.na, FUN = sum,
na.rm = TRUE, na.action = NULL)
agg.total <- aggregate(REPORTING_VALUE ~ UNIQUE_ID + STATION_ID + AGENCY_CODE +
DATE + METHOD + SAMPLE_NUMBER + CONDITION,
data = non.na, FUN = sum,
na.rm = TRUE, na.action = NULL)
final.non.na <- merge(agg.mult, agg.total, by = c("UNIQUE_ID", "STATION_ID",
"AGENCY_CODE", "DATE",
"METHOD",
"SAMPLE_NUMBER",
"CONDITION"))
final.non.na$FINAL <- final.non.na$MULT / final.non.na$REPORTING_VALUE
}
if(nrow(long.unique[na.check, ]) > 0 & nrow(merged[!merged$E2 %in% all.na$E2, ]) > 0){
final.df <- rbind(final.non.na, final.just.na)
}
if(nrow(long.unique[na.check, ]) > 0 & nrow(merged[!merged$E2 %in% all.na$E2, ]) == 0){
final.df <- final.just.na
}
if(nrow(long.unique[na.check, ]) == 0 & nrow(merged[!merged$E2 %in% all.na$E2, ]) > 0){
final.df <- final.non.na
}
final.vec <- final.df$FINAL
}else{
agg.mult <- aggregate(MULT ~ UNIQUE_ID + STATION_ID + AGENCY_CODE +
DATE + METHOD + SAMPLE_NUMBER + CONDITION, data = merged, FUN = sum,
na.rm = TRUE, na.action = NULL)
agg.total <- aggregate(REPORTING_VALUE ~ UNIQUE_ID + STATION_ID + AGENCY_CODE +
DATE + METHOD + SAMPLE_NUMBER + CONDITION, data = merged, FUN = sum,
na.rm = TRUE, na.action = NULL)
final.df <- merge(agg.mult, agg.total, by = c("UNIQUE_ID", "STATION_ID", "AGENCY_CODE",
"DATE", "METHOD", "SAMPLE_NUMBER",
"CONDITION"))
final.df$FINAL <- final.df$MULT / final.df$REPORTING_VALUE
final.vec <- final.df$FINAL
}
return(final.vec)
}
#==============================================================================
#'Tolerance Index for Presence/Absence
#'
#'@param long.df Taxonomic counts arrange in a long data format (i.e., each
#'row represents a unique sample and taxon).
#'@param master.df A master taxa list including taxonomic ranks Phylum through
#'the specified taxonomic rank (Family or Genus) and the an
#'associated list of tolerance values. The default is set to the master taxa
#'list included in the BIBI package. The master taxa list can be viewed with
#'the following script: master.df <- data(master)
#'@param tolerance.value The name of the column in the master taxon list
#'(specified using the master variable) that contains tolerance values on
#'a scale of 0-10.
#'@param taxa.rank The taxonomic rank used to perform the analysis. You must
#'sepecify either "FAMILY" or "GENUS" (Defualt = "FAMILY").
#'@return The average tolerance score using only presence/absence data for
#'each unique sampling event. This metric is calculated at the family or
#'genus level. If taxon does not have an assigned tolerance value it will
#'not contribute to the final score. The each taxon is represented by the
#'taxon's tolerance value. If the tolerance value is
#'missing it would be treated effectively as a zero, which would reduce
#'the average tolerance score; to avoid this issue, it is more effective to remove
#'the taxa without any tolerance values. However, if a large portion of
#'the sample is composed of a taxon or taxa without assigned tolerance
#'values the final value may be a poor representation of the sample.
#'Every effort should be made to assign each taxon a tolerance value.
#'If you have tolerance values that you would like to contribute and share
#'through the BIBI package please email Zachary M. Smith (zsmith@icprb.org).
#'@export
tol_pres_abs <- function(long.df, taxa.rank = "FAMILY", master.df, tolerance.value = "BIBI_TV") {
tol_am <- aggregate(master.df[, tolerance.value] ~ master.df[, colnames(master.df) == taxa.rank],
FUN = mean, na.rm = TRUE)
colnames(tol_am) <- c(taxa.rank, "TOL_VAL")
merge_am1 <- merge(tol_am, long.df, by.x = taxa.rank, by.y = taxa.rank, all.y = TRUE)
frame <- unique(merge_am1[, c("UNIQUE_ID", "STATION_ID")])
merge_am2 <- merge(frame, merge_am1, by.x = "UNIQUE_ID",
by.y = "UNIQUE_ID", all.x = TRUE)
merge_am3 <- with(merge_am2, merge_am2[order(UNIQUE_ID), ])
merge_am3 <- merge_am3[,c("UNIQUE_ID", "STATION_ID.x", taxa.rank, "TolVal")]
agg_am <- aggregate(merge_am3$TolVal ~ merge_am3$UNIQUE_ID +
merge_am3$STATION_ID.x + merge_am3[, taxa.rank], FUN = mean)
colnames(agg_am) <- c("UNIQUE_ID", "STATION_ID", taxa.rank, "TOLVAL")
sprd_am <- tidyr::spread(agg_am, taxa.rank, TOLVAL)
sprd_am[, 3:ncol(sprd_am)] <- sprd_am[, 8:ncol(sprd_am)] + 1
sprd_am[is.na(sprd_am)] <- 0
sum_am <- rowSums(sprd_am[, 8:ncol(sprd_am)])
rich_am <- rowSums(ifelse(sprd_am[, 8:ncol(sprd_am)] > 0, 1, 0))
final.vec <- (sum_am / rich_am) - 1
return(final.vec)
}
#==============================================================================
#'Percentage of Intolerant Individuals
#'
#'@param taxa.wide Taxonomic counts aggregated at the specific taxonomic
#' classification (e.g., Order, Family, or Genus) in a wide data format.
#' Use the wide function to prepare the data.
#'@param master.df A master taxa list including taxonomic ranks Phylum through
#'the specified taxonomic rank (Family or Genus) and the an
#'associated list of tolerance values. The default is set to the master taxa
#'list included in the BIBI package. The master taxa list can be viewed with
#'the following script: master.df <- data(master)
#'@param tolerance.value The name of the column in the master taxon list
#'(specified using the master variable) that contains tolerance values on
#'a scale of 0-10.
#'@param lower.value The lower tolerance value bound.
#'@param upper.value The upper tolerance value bound.
#'@return The percentage of individuals that fall within the specified
#'tolerance value bounds. Recommendations: Intolerant taxa
#'(lower.value = 0, upper.value = 3), facultative taxa
#'(lower.value = 4, upper.value = 6), and tolerant taxa
#' (lower.value = 7, upper.value = 10).
#'@export
pct_tol_val <- function(taxa.wide, master.df, tolerance.value = "BIBI_TV",
lower.value = 0, upper.value = 3) {
master.df$TOLERANCE <- ifelse(master.df[, tolerance.value] >= lower.value &
master.df[, tolerance.value] <= upper.value,
"TOL_VAL", "EXCLUDE")
tol <- split(master.df[, "FINAL_ID"], master.df$TOLERANCE)
name.list <- list(unique(tol$TOL_VAL))
group.taxa <- group_taxa(name.list, taxa.wide)
final.vec <- (group.taxa / rowSums(taxa.wide[, 8:ncol(taxa.wide)])) * 100
return(final.vec)
}
#==============================================================================
#'Richness of Tolerance Group
#'
#'@param taxa.wide Taxonomic counts aggregated at the specific taxonomic
#' classification (e.g., Order, Family, or Genus) in a wide data format.
#' Use the wide function to prepare the data.
#'@param master.df A master taxa list including taxonomic ranks Phylum through
#'the specified taxonomic rank (Family or Genus) and the an
#'associated list of tolerance values. The default is set to the master taxa
#'list included in the BIBI package. The master taxa list can be viewed with
#'the following script: master.df <- data(master)
#'@param tolerance.value The name of the column in the master taxon list
#'(specified using the master variable) that contains tolerance values on
#'a scale of 0-10.
#'@param lower.value The lower tolerance value bound.
#'@param upper.value The upper tolerance value bound.
#'@return The number of taxa that fall within the specified
#'tolerance value bounds. Recommendations: Intolerant taxa
#'(lower.value = 0, upper.value = 3), facultative taxa
#'(lower.value = 4, upper.value = 6), and tolerant taxa
#' (lower.value = 7, upper.value = 10).
#'@export
rich_tolerance <- function(taxa.wide, master.df, tolerance.value = "BIBI_TV",
lower.value, upper.value) {
master.df$TOLERANCE <- ifelse (master.df[, tolerance.value] >= lower.value &
master.df[, tolerance.value] <= upper.value,
1, 0)
tol <- split(master.df[, "FINAL_ID"], master.df$TOLERANCE)
name.list <- list(tol$`1`)
ID <- c("UNIQUE_ID", "STATION_ID", "AGENCY_CODE", "DATE", "METHOD",
"SAMPLE_NUMBER", "CONDITION")
taxa.list <- as.character(unlist(name.list))
taxa_list.df <- data.frame(taxa.wide[, names(taxa.wide) %in% c(ID, taxa.list)])
taxa_list.df[is.na(taxa_list.df)] <- 0 #NA = zero
if(ncol(taxa_list.df) < 8) {
final.vec <- 0
} else {
final.vec <- vegan::specnumber(taxa_list.df[, 8:ncol(taxa_list.df)])
}
return(final.vec)
}
#==============================================================================
#'***Percentage of Urban Intolerant Individuals***
#'
#'@param long.df Taxonomic counts arrange in a long data format (i.e., each
#'row represents a unique sample and taxon).
#'@param master.df A master taxa list including taxonomic ranks Phylum through
#'the specified taxonomic rank (Family or Genus) and the an
#'associated list of tolerance values. The default is set to the master taxa
#'list included in the BIBI package. The master taxa list can be viewed with
#'the following script: master.df <- data(master)
#'@return The percentage of indviduals that are classified as urban intolerant
#'by... (Where did this come from? Was in the 2011 BIBI.). Only available
#'at the family level.
#'@export
pct_urban_intol <- function(long.df, master.df) {
fam.wide <- wide(long.df, "FAMILY")
urban <- split(master.df$FAMILY, master.df$INTOLERANT_URBAN)
name.list <- list(urban$'1')
group.taxa <- group_taxa(name.list, fam.wide)
final.vec <- (group.taxa / rowSums(fam.wide[, 8:ncol(fam.wide)])) * 100
return(final.vec)
}
#==============================================================================
#'Beck's Index
#'
#'@param taxa.wide Taxonomic counts aggregated at the specific taxonomic
#' classification (e.g., Order, Family, or Genus) in a wide data format.
#' Use the wide function to prepare the data.
#'@param taxa.rank The taxonomic rank used to perform the analysis. You must
#'sepecify either "FAMILY" or "GENUS" (Defualt = "FAMILY").
#'@param master.df A master taxa list including taxonomic ranks Phylum through
#'the specified taxonomic rank (Family or Genus) and the an
#'associated list of tolerance values. The default is set to the master taxa
#'list included in the BIBI package. The master taxa list can be viewed with
#'the following script: master.df <- data(master)
#'@param beck.colum The name of the column in the master.df that contains
#'Beck values.
#'@param beck.version The version of Beck's Index specified as 1 or 3. The
#'defualt is 1, the orginal Beck's Index:
#'2(S1) + 1(S2) + 3(S3)
#'where:
#'S# = Taxa richness at each level of Beck's classifications (0-3).
#'Beck's Index version 3 is calculated
#'as specified by PADNR:
#'3(S0) + 2(S1) + 1(S2) + 3(S3)
#'where:
#'S# = Taxa richness at each level of Beck's classifications (0-3).
#'@return Beck's Biotic Index produces weighted richness values, which favors
#'organisms sensitive to degradation (Beck Classes 0 and 1).
#'@export
becks <- function(taxa.wide, taxa.rank, master.df, beck.column, beck.version = 1) {
split.beck <- split(master.df[, taxa.rank], master.df[, beck.column])
name.list.1 <- list(split.beck$`1`)
vec.1 <- unlist(unique(name.list.1))
beck.1 <- taxa.wide[, c(1:7, which(names(taxa.wide) %in% vec.1))]
if(ncol(beck.1) < 8){
final.vec <- rep(0, nrow(taxa.wide))
} else {
if(ncol(beck.1) == 8) rich.beck.1 <- ifelse(beck.1[, 8] > 0, 1, 0)
if(ncol(beck.1) > 8) rich.beck.1 <- vegan::specnumber(beck.1[, 8:ncol(beck.1)])
name.list.2 <- list(split.beck$`2`)
vec.2 <- unlist(unique(name.list.2))
beck.2 <- taxa.wide[, c(1:7, which(names(taxa.wide) %in% vec.2))]
rich.beck.2 <- vegan::specnumber(beck.2[, 8:ncol(beck.2)])
if(beck.version == 3){
name.list.0 <- list(split.beck$`0`)
vec.0 <- unlist(unique(name.list.0))
beck.0 <- taxa.wide[, c(1:7, which(names(taxa.wide) %in% vec.0))]
if(ncol(beck.0) < 8){
rich.beck.0 <- 0
}else{
rich.beck.0 <- vegan::specnumber(beck.0[, 8:ncol(beck.0)])
}
final.vec <- (3 * rich.beck.0) + (2 * rich.beck.1) + rich.beck.2
}
if(beck.version == 1) final.vec <- (2 * rich.beck.1) + rich.beck.2
}
return(final.vec)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.