########################################################
#Utility functions
#
########################################################
paste.rep<-function(x,y){ # Utility function needed for describing decision rules
if (length(x)==1){
paste0(x,"; ",y)
} else {
paste(x,rep(y,times=length(x)),sep="; ")
}
}
print.benth.taxnames<-function(x,...){
print(as.data.frame(attr(x,"Name_match")[,c(1,2,6)],row.names=1:nrow(attr(x,"Name_match"))))
}
print.benth.taxroll<-function(x,...){
print(x$decisions[,grepl("Decision",colnames(x$decisions))])
}
print.twogroup_comparision<-function(x,...){
print(x$table)
}
transform <- function(x, type = c("none","log","squared","square root","forth root",
"logit","arcsine")) {
type <- match.arg(type)
switch(type,
none = x,
log = log10(x+1),
squared = x^2,
'square root' = x^0.5,
'forth root' = x^0.25,
logit = suppressWarnings(car::logit(x)),
arcsine = suppressWarnings(asin(sqrt(x)))
)
}
inv.logit <- function(f,a=0.025) {
a <- (1-2*a)
(a*(1+exp(f))+(exp(f)-1))/(2*a*(1+exp(f)))
}
untransform <- function(x, type = c("none","log","squared","square root","forth root",
"logit","arcsine")) {
type <- match.arg(type)
switch(type,
none = x,
log = 10^(x+1),
squared = x^0.5,
'square root' = x^2,
'forth root' = x^4,
logit = suppressWarnings(inv.logit(x)),
arcsine = suppressWarnings(sin(x)^2)
)
}
########################################################
#Function to clean taxonomic names, match with ITIS (secondary matched to NCBI and EOL).
#
#
########################################################
benth.taxnames<- function(x,
use.invalid=F, # use taxa not currenly valid in ITIS? Not currently implimented
use.NCBI=T,
oligo.hairs=T, # Treat oligochaete taxa with and without hairs as different taxa
write.output=T # Write output to an .rds file?
)
{
if (!require(taxize,quietly = T)) install.packages('taxize')
require(taxize,quietly = T)
x<-as.character(x)
if(!is.vector(x)) stop("x must be a vector of taxon names")
if (any(grepl("indeterminate|immature",x))) stop("taxon names cannot contain: indeterminate or immature")
tax.names<-as.character(x)
tax.names.orig<-as.character(x)
# Section 1: remove useless text from taxa names
tax.names <- gsub("|", "", tax.names, fixed = T)
tax.names <-
gsub("Phylum: |Subphylum: | Class: | Order: | Family: | Subfamily: | Tribe: ",
"",
tax.names)
tax.names <-
gsub("spp.| group| complex| grp| grp.|larvae|Larvae|(larvae)|(Larvae)
|adult|Adult|(adult)|(Adult)
|immature|Immature|(immature)|(Immature)|
indet|Indet|pupae|Pupae",
"",
tax.names,
fixed = F)
taxa.groups<-tax.names.orig[grepl(" group| complex| grp| grp.| spp.",tax.names.orig)]
taxa.slash<-tax.names.orig[grepl("/",tax.names.orig)]
tax.names <- gsub(" sp.", "", tax.names, fixed = T)
tax.names <- gsub("S.O. ", "", tax.names, fixed = T)
tax.names <- gsub("S.F. ", "", tax.names, fixed = T)
tax.names <- gsub("P. ", "", tax.names, fixed = T)
tax.names <- gsub("O. ", "", tax.names, fixed = T)
tax.names <- gsub("F. ", "", tax.names, fixed = T)
tax.names <- gsub("Cl. ", "", tax.names, fixed = T)
tax.names<-gsub("\\s*\\([^\\)]+\\)","",tax.names)
tax.names <- gsub(" | ", " ", tax.names, fixed = F)
tax.names<-trimws(tax.names)
# Section 2: Check to see if any duplicates remain
if (any(duplicated(tax.names))) stop(paste0("Duplicated taxa not permitted. Please consolidate: ",
paste0(tax.names[duplicated(tax.names)], collapse=", ")))
# Section 3: Validate each taxon name
tax.names1<-data.frame(matrix(nrow=length(tax.names),ncol=18))
rownames(tax.names1)<-tax.names
message("Retrieving taxonomic information...")
pb <- txtProgressBar(min=1,max=length(tax.names),style = 3)
for (i in 1:length(tax.names)){
setTxtProgressBar(pb, i)
# a. Check if the name UNIQUELY exists in ITIS
temp1<-resolve(tax.names[i],with_canonical_ranks=T,with_context = T,best_match_only=T,preferred_data_sources=3,fields="all")
if(nrow(temp1$gnr)==1) { #If 1 match
if(!grepl("Animalia|Bilateria",temp1$gnr$classification_path)){ #make sure its an animal (some plants or fungi have identical names as some animals)
temp1<-resolve(tax.names[i],with_canonical_ranks=T,with_context = T,best_match_only=F,preferred_data_sources=3,fields="all")
if (any(grepl("Animalia|Bilateria",temp1$gnr$classification_path))){ #This will select the one that is an animal
tax.names1[i,]<-temp1$gnr[which(grepl("Animalia|Bilateria",temp1$gnr$classification_path))[1],]
}
} else {
tax.names1[i,]<-temp1$gnr[1,]
}
} else { # If no match is found, and use.NCBI is an option, look there for the taxon
if (use.NCBI) {
temp1<-resolve(tax.names[i],with_canonical_ranks=T,with_context = T,best_match_only=T,preferred_data_sources=4,fields="all")
if(nrow(temp1$gnr)==1) {
if(!grepl("Bilateria",temp1$gnr$classification_path)){
temp1<-resolve(tax.names[i],with_canonical_ranks=T,with_context = T,best_match_only=F,preferred_data_sources=4,fields="all")
if (any(grepl("Bilateria",temp1$gnr$classification_path))){
tax.names1[i,]<-temp1$gnr[which(grepl("Bilateria",temp1$gnr$classification_path))[1],]
}
} else {
tax.names1[i,]<-temp1$gnr[1,]
}
}
}
}
}
colnames(tax.names1)<-colnames(temp1$gnr)
# b. Check any taxa that could not be matched UNIQUELY in ITIS
if (any(is.na(tax.names1$matched_name2))){
no.match<-which(is.na(tax.names1$matched_name2))
error<-0
for (i in no.match) { #For each that couldnt be found
#browser()
if (grepl("/",tax.names[i],fixed=T)){# Check if taxa contain a "/"
#If it does, separate the taxa and evaluate them individually
#browser()
error<-error+1
two.tax<-strsplit(tax.names[i],"/")[[1]]
two.tax<-trimws(two.tax)
if (any(grepl(paste0(two.tax,collapse="|"),tax.names))){ #If any other taxa are in the dataset, treat undetermiend fraction as own taxa
temp2<-resolve(two.tax,with_canonical_ranks=T,with_context = T,best_match_only=T,preferred_data_sources=3,fields="all")
if(nrow(temp2$gnr)==2) {
if(!any(grepl("Animalia|Bilateria",temp2$gnr$classification_path))){
temp2.1<-resolve(two.tax[1],with_canonical_ranks=T,with_context = T,best_match_only=F,preferred_data_sources=3,fields="all")
temp2.2<-resolve(two.tax[2],with_canonical_ranks=T,with_context = T,best_match_only=F,preferred_data_sources=3,fields="all")
if (any(grepl("Animalia|Bilateria",c(temp2.1$gnr$classification_path,temp2.2$gnr$classification_path)))){
temp2$gnr<-rbind(temp2.1$gnr[which(grepl("Animalia|Bilateria",temp2$gnr$classification_path))[1],],
temp2.2$gnr[which(grepl("Animalia|Bilateria",temp2$gnr$classification_path))[1],])
higher.two.tax1<-as.data.frame(strsplit(temp2$gnr$classification_path,"|",fixed=T))
higher.two.rank1<-as.data.frame(strsplit(temp2$gnr$classification_path_ranks,"|",fixed=T))
higher.shared.rank<-match(higher.two.rank1[,1], higher.two.rank1[,2])[length(higher.two.rank1[,1])-1]
tax.names1[i,]<-resolve(as.character(higher.two.tax1[higher.shared.rank,1]),with_canonical_ranks=T,with_context = T,best_match_only=T,preferred_data_sources=3,fields="all")$gnr
tax.names1[i,c(1,2,6,7,8,9,10)]<-c(tax.names[i],NA,tax.names[i],
paste0(tax.names1$classification_path[i],"|",tax.names[i]),
paste0(tax.names1$classification_path_ranks[i],"|artificial_",error),
NA,
NA)
other.i<-grep(paste0(two.tax,collapse="|"),tax.names)
other.i<-other.i[which(!tax.names[i]==rownames(tax.names1[other.i,]))]
for(n in other.i){
template<-unlist(strsplit(tax.names1$classification_path[i],"|",fixed=T))
class.path<-unlist(strsplit(tax.names1$classification_path[n],"|",fixed=T))
new<-which(is.na(match(class.path,template)))
new.class.path<-paste0(c(class.path[1:(new[1]-1)],tax.names[i],class.path[new]),collapse="|")
template1<-unlist(strsplit(tax.names1$classification_path_ranks[i],"|",fixed=T))
class.path1<-unlist(strsplit(tax.names1$classification_path_ranks[n],"|",fixed=T))
new1<-which(is.na(match(class.path,template)))
new.class.path1<-paste0(c(class.path1[1:(new1[1]-1)],template1[length(template1)],class.path1[new1]),collapse="|")
tax.names1[n,c(7,8,9)]<-c(new.class.path,
new.class.path1,
NA)
}
}
} else {
higher.two.tax1<-as.data.frame(strsplit(temp2$gnr$classification_path,"|",fixed=T))
higher.two.rank1<-as.data.frame(strsplit(temp2$gnr$classification_path_ranks,"|",fixed=T))
higher.shared.rank<-match(higher.two.rank1[,1], higher.two.rank1[,2])[length(higher.two.rank1[,1])-1]
tax.names1[i,]<-resolve(as.character(higher.two.tax1[higher.shared.rank,1]),with_canonical_ranks=T,with_context = T,best_match_only=T,preferred_data_sources=3,fields="all")$gnr
tax.names1[i,c(1,2,6,7,8,9,10)]<-c(tax.names[i],NA,tax.names[i],
paste0(tax.names1$classification_path[i],"|",tax.names[i]),
paste0(tax.names1$classification_path_ranks[i],"|artificial_",error),
NA,
NA)
other.i<-grep(paste0(two.tax,collapse="|"),tax.names)
other.i<-other.i[which(!tax.names[i]==rownames(tax.names1[other.i,]))]
for(n in other.i){
template<-unlist(strsplit(tax.names1$classification_path[i],"|",fixed=T))
class.path<-unlist(strsplit(tax.names1$classification_path[n],"|",fixed=T))
new<-which(is.na(match(class.path,template)))
new.class.path<-paste0(c(class.path[1:(new[1]-1)],tax.names[i],class.path[new]),collapse="|")
template1<-unlist(strsplit(tax.names1$classification_path_ranks[i],"|",fixed=T))
class.path1<-unlist(strsplit(tax.names1$classification_path_ranks[n],"|",fixed=T))
new1<-which(is.na(match(class.path,template)))
new.class.path1<-paste0(c(class.path1[1:(new1[1]-1)],template1[length(template1)],class.path1[new1]),collapse="|")
tax.names1[n,c(7,8,9)]<-c(new.class.path,
new.class.path1,
NA)
}
}
} else {
if (use.NCBI) {
temp2<-resolve(two.tax,with_canonical_ranks=T,with_context = T,best_match_only=T,preferred_data_sources=4,fields="all")
if(nrow(temp2$gnr)==2) {
if(!any(grepl("Bilateria",temp2$gnr$classification_path))){
temp2.1<-resolve(two.tax[1],with_canonical_ranks=T,with_context = T,best_match_only=F,preferred_data_sources=4,fields="all")
temp2.2<-resolve(two.tax[2],with_canonical_ranks=T,with_context = T,best_match_only=F,preferred_data_sources=4,fields="all")
if (any(grepl("Bilateria",c(temp2.1$gnr$classification_path,temp2.2$gnr$classification_path)))){
temp2$gnr<-rbind(temp2.1$gnr[which(grepl("Bilateria",temp2$gnr$classification_path))[1],],
temp2.2$gnr[which(grepl("Bilateria",temp2$gnr$classification_path))[1],])
higher.two.tax1<-as.data.frame(strsplit(temp2$gnr$classification_path,"|",fixed=T))
higher.two.rank1<-as.data.frame(strsplit(temp2$gnr$classification_path_ranks,"|",fixed=T))
higher.shared.rank<-match(higher.two.rank1[,1], higher.two.rank1[,2])[length(higher.two.rank1[,1])-1]
tax.names1[i,]<-resolve(as.character(higher.two.tax1[higher.shared.rank,1]),with_canonical_ranks=T,with_context = T,best_match_only=T,preferred_data_sources=4,fields="all")$gnr
tax.names1[i,c(1,2,6,7,8,9,10)]<-c(tax.names[i],NA,tax.names[i],
paste0(tax.names1$classification_path[i],"|",tax.names[i]),
paste0(tax.names1$classification_path_ranks[i],"|artificial_",error),
NA,
NA)
other.i<-grep(paste0(two.tax,collapse="|"),tax.names)
other.i<-other.i[which(!tax.names[i]==rownames(tax.names1[other.i,]))]
for(n in other.i){
template<-unlist(strsplit(tax.names1$classification_path[i],"|",fixed=T))
class.path<-unlist(strsplit(tax.names1$classification_path[n],"|",fixed=T))
new<-which(is.na(match(class.path,template)))
new.class.path<-paste0(c(class.path[1:(new[1]-1)],tax.names[i],class.path[new]),collapse="|")
template1<-unlist(strsplit(tax.names1$classification_path_ranks[i],"|",fixed=T))
class.path1<-unlist(strsplit(tax.names1$classification_path_ranks[n],"|",fixed=T))
new1<-which(is.na(match(class.path,template)))
new.class.path1<-paste0(c(class.path1[1:(new1[1]-1)],template1[length(template1)],class.path1[new1]),collapse="|")
tax.names1[n,c(7,8,9)]<-c(new.class.path,
new.class.path1,
NA)
}
}
} else {
higher.two.tax1<-as.data.frame(strsplit(temp2$gnr$classification_path,"|",fixed=T))
higher.two.rank1<-as.data.frame(strsplit(temp2$gnr$classification_path_ranks,"|",fixed=T))
higher.shared.rank<-match(higher.two.rank1[,1], higher.two.rank1[,2])[length(higher.two.rank1[,1])-1]
tax.names1[i,]<-resolve(as.character(higher.two.tax1[higher.shared.rank,1]),with_canonical_ranks=T,with_context = T,best_match_only=T,preferred_data_sources=3,fields="all")$gnr
tax.names1[i,c(1,2,6,7,8,9,10)]<-c(tax.names[i],NA,tax.names[i],
paste0(tax.names1$classification_path[i],"|",tax.names[i]),
paste0(tax.names1$classification_path_ranks[i],"|artificial_",error),
NA,
NA)
other.i<-grep(paste0(two.tax,collapse="|"),tax.names)
other.i<-other.i[which(!tax.names[i]==rownames(tax.names1[other.i,]))]
for(n in other.i){
template<-unlist(strsplit(tax.names1$classification_path[i],"|",fixed=T))
class.path<-unlist(strsplit(tax.names1$classification_path[n],"|",fixed=T))
new<-which(is.na(match(class.path,template)))
new.class.path<-c(class.path[1:(new[1]-1)],tax.names[i],class.path[new])
template1<-unlist(strsplit(tax.names1$classification_path_ranks[i],"|",fixed=T))
class.path1<-unlist(strsplit(tax.names1$classification_path_ranks[n],"|",fixed=T))
new1<-which(is.na(match(class.path,template)))
new.class.path1<-c(class.path1[1:(new1[1]-1)],template1[length(template1)],class.path1[new1])
tax.names1[n,c(7,8,9)]<-c(new.class.path,
new.class.path1,
NA)
}
}
}
}
}
} else if (!all(tax.names==two.tax[1],tax.names==two.tax[2])) {#If there are no other taxa in the pair in the dataset, just pick one and use it
temp1<-resolve(two.tax,with_canonical_ranks=T,with_context = T,best_match_only=T,preferred_data_sources=3,fields="all")
if(nrow(temp1$gnr)>=1) {
if(!any(grepl("Animalia|Bilateria",temp1$gnr$classification_path))){
temp1<-resolve(two.tax,with_canonical_ranks=T,with_context = T,best_match_only=F,preferred_data_sources=3,fields="all")
if (any(grepl("Animalia|Bilateria",temp1$gnr$classification_path))){
tax.names1[i,]<-temp1$gnr[which(grepl("Animalia|Bilateria",temp1$gnr$classification_path))[1],]
}
} else {
tax.names1[i,]<-temp1$gnr[1,]
}
} else {
if (use.NCBI) {
temp1<-resolve(two.tax,with_canonical_ranks=T,with_context = T,best_match_only=T,preferred_data_sources=4,fields="all")
if(nrow(temp1$gnr)>=1) {
if(!any(grepl("Bilateria",temp1$gnr$classification_path))){
temp1<-resolve(two.tax,with_canonical_ranks=T,with_context = T,best_match_only=F,preferred_data_sources=4,fields="all")
if (any(grepl("Bilateria",temp1$gnr$classification_path))){
tax.names1[i,]<-temp1$gnr[which(grepl("Bilateria",temp1$gnr$classification_path))[1],]
}
} else {
tax.names1[i,]<-temp1$gnr[1,]
}
}
}
}
} else {#If only 1 other taxa in the pair is present in the dataset
}
}
}
}
if (oligo.hairs){
#browser()
hair<-grep("hair|chaetae|Hair|Chaetae",tax.names1$user_supplied_name)
if (length(hair)>0){
hair.dat<-tax.names1[hair,]
for (i in 1:nrow(hair.dat)) {
tax.names1[hair[i],c(6,7,8,9,10)]<-c(hair.dat$submitted_name[i],
paste0(hair.dat$classification_path[i],"|",hair.dat$submitted_name[i]),
paste0(hair.dat$classification_path_ranks[i],"|artificial_hair"),
NA,NA)
}
}
}
l.out<-list()
for (i in 1:nrow(tax.names1)){
tax<-as.character(strsplit(tax.names1$classification_path[i],"|",fixed=T)[[1]])
rank<-tolower(as.character(strsplit(tax.names1$classification_path_ranks[i],"|",fixed=T)[[1]]))
id<-as.character(strsplit(tax.names1$classification_path_ids[i],"|",fixed=T)[[1]])
if(is.na(tax[1])) next
if (tax[length(tax)]!=tax.names1$matched_name2[i]){
tax.names1$matched_name2[i]<-tax[length(tax)]
}
template<-data.frame(name=tax,
rank=rank,
id=id,
stringsAsFactors=F
)
template[template=="Not assigned"]<-NA
l.out[[i]]<-template
names(l.out)[i]<-tax.names1$matched_name2[i]
}
attr(l.out, 'Name_match')<-tax.names1
class(l.out)<-"benth.taxnames"
#browser()
mis.match<-tax.names1[,colnames(tax.names1)%in%c("user_supplied_name","matched_name2")]
mis.match<-mis.match[mis.match[,1]!=mis.match[,2],]
if (nrow(mis.match)>0){
rownames(mis.match)<-1:nrow(mis.match)
message("")
message("The following taxonomic name changes were made:")
print(mis.match)
message("Please review original data matrix, especially if species and genera names are now different, ")
message("i.e. - Hydropsyche morosa became Ceratopsyche morosa, but Hydropsyche stayed Hydropsyche")
message("")
}
if (length(taxa.slash)>0){
message("")
message("Input data contains '/' taxa:")
print(data.frame(taxa.slash))
message("Please review these taxa")
message("Functional traits (feeding and habitat) may not properly be calculated for these taxa")
message("")
}
if (length(taxa.groups)>0){
message("")
message("The following taxa groups were in the original dataset:")
print(data.frame(taxa.groups))
message("Manually confirm there are no redundant taxa in these groups")
message("")
}
if (write.output){
saveRDS(l.out,file=paste0(Sys.Date(),"_tax_names(IMPORTANT).rds"))
}
return(l.out)
}
########################################################
#Function to roll up benthic taxonomy.
#
#
########################################################
benth.taxroll<-function(taxa, #taxa by site matrix
taxa.names=NA, #output of benth.taxnames
roll.down=T, #apply roll downs in cases of unresolved taxonomy / otherwise will delete higher taxa
fam.roll.down=T, #apply roll downs in cases of unresolved taxonomy for family level output
assign.undet=T, #if rolling down, sites with no lower level taxa will be assigned to the most abundant taxon in dataset
fam.assign.undet=T, #same as assign.undet but for family level output
Criteria3.percent=0.2, #critical limit for criteria 3
Criteria5a.percent=0.5, #critical limit for criteria 5a
Criteria5b.numb=2, #critical limit for criteria 5b
output.taxonomy=T, #add full taxonomy to output matricesS
CABIN.taxa=F
) {
if (!require(plyr,quietly = T)) install.packages('plyr')
#require(plyr,quietly = T)
if (!require(taxize,quietly = T)) install.packages('taxize')
require(taxize,quietly = T)
if (!is.data.frame(taxa)) stop("taxa must be a data frame")
if (!all(apply(taxa[,-c(1)],2,is.numeric))) stop("taxa can only contain numberic values past row 1")
CABIN.omit.taxa<-c("Cladocera","Rotifera","Copepoda","Ostracoda","Nematoda","Porifera","Platyhelminthes")
taxa[,1]<-as.character(taxa[,1])
if(class(taxa.names)!="benth.taxnames"){
message("Starting name search")
taxa.names<-benth.taxnames(taxa[,1])
message("Finished name search")
} else {
taxa.names<-taxa.names
}
taxa[is.na(taxa)]<-0
tax<-taxa.names[unique(names(taxa.names))]
tax.full <-
c("kingdom",
"subkingdom",
"infrakingdom",
"superphylum",
"phylum",
"subphylum",
"class",
"subclass",
"infraclass",
"superorder",
"order",
"suborder",
"infraorder",
"section",
"superfamily",
"family",
"subfamily",
"tribe",
"genus",
"subgenus",
"species",
"subspecies"
)
if (any(!grepl(paste0(tax.full,collapse="|"),unique(unlist(lapply(tax, "[[", 2)))))){
numb.miss<-unique(unlist(lapply(tax, "[[", 2)))[which(!grepl(paste0(tax.full,collapse="|"),unique(unlist(lapply(tax, "[[", 2)))))]
for(i in numb.miss){
if (i=="") next
has.artif<-which(grepl(i,lapply(tax, "[[", 2)))
has.artif<-tax[[has.artif[which.max(lapply(tax[has.artif],nrow))]]][,2]
has.artif<-has.artif[(grep(i,has.artif)-1):(grep(i,has.artif)+1)]
tax.full<-append(tax.full,has.artif[2],which(match(tax.full,has.artif,nomatch=0)==1))
}
}
tax.full<-tax.full[!tax.full%in%c("kingdom",
"subkingdom",
"infrakingdom",
"superphylum")]
tax.full <-
tax.full[tax.full %in% unique(unlist(lapply(lapply(tax, "[[", 2), tail, n =
1), use.names = F))]
tax.full.ranks <- lapply(tax, "[[", 2)
tax.lowest.rank <-
unlist(lapply(lapply(tax, "[[", 2), tail, n = 1), use.names = T)
tax.full.ranks.ID <- lapply(tax, "[[", 3)
tax.lowest.rank.ID <-
unlist(lapply(lapply(tax, "[[", 3), tail, n = 1), use.names = T)
tax.full.ranks.name <- lapply(tax, "[[", 1)
tax.lowest.rank.name <-
unlist(lapply(lapply(tax, "[[", 1), tail, n = 1), use.names = T)
#browser()
dat.out<-aggregate(taxa[,-c(1)],by=list(names(taxa.names)),sum)
dat.out<-dat.out[match(unique(names(taxa.names)),dat.out$Group.1),]
dat.out<-dat.out[dat.out$Group.1%in%unique(paste0(as.character(tax.lowest.rank.name))),-c(1)]
row.names(dat.out) <- unique(paste0(as.character(tax.lowest.rank.name)))
dat.orig<-dat.out
tax.final <- tax
tax.full1 <-
c(
"kingdom",
"subkingdom",
"infrakingdom",
"superphylum",
"phylum",
"subphylum",
"class",
"subclass",
"infraclass",
"superorder",
"order",
"suborder",
"infraorder",
"superfamily",
"family",
"subfamily",
"tribe",
"genus",
"subgenus",
"species",
"subspecies"
)
if (any(!grepl(paste0(tax.full1,collapse="|"),unique(unlist(lapply(tax, "[[", 2)))))){
numb.miss<-unique(unlist(lapply(tax, "[[", 2)))[which(!grepl(paste0(tax.full1,collapse="|"),unique(unlist(lapply(tax, "[[", 2)))))]
for(i in numb.miss){
if (i=="") next
has.artif<-which(grepl(i,lapply(tax, "[[", 2)))
has.artif<-tax[[has.artif[which.max(lapply(tax[has.artif],nrow))]]][,2]
has.artif<-has.artif[(grep(i,has.artif)-1):(grep(i,has.artif)+1)]
tax.full1<-append(tax.full1,has.artif[2],which(match(tax.full1,has.artif,nomatch=0)==1))
}
}
tax.full1 <-
tax.full1[tax.full1 %in% as.character(unique(unlist(lapply(tax.final, "[[", 2), use.names =
F)))]
mat.out <- matrix(nrow = nrow(taxa), ncol = length(tax.full))
colnames(mat.out) <- tax.full
rownames(mat.out) <- rownames(attr(taxa.names,"Name_match"))
for (i in tax.final) {
if (any(rownames(mat.out) %in% i$name[nrow(i)])){
mat.out[rownames(mat.out) %in% i$name[nrow(i)], colnames(mat.out) %in% i$rank] <-
as.character(i$name[i$rank %in% colnames(mat.out)])
} else {
t1<-which(attr(taxa.names,"Name_match")$matched_name2==i$name[nrow(i)])
for (n in 1:length(t1)){
mat.out[rownames(mat.out) %in% rownames(attr(taxa.names,"Name_match"))[t1[n]], colnames(mat.out) %in% i$rank] <-
as.character(i$name[i$rank %in% colnames(mat.out)])
}
}
}
decisions.out<-data.frame(Orig.Taxa=rownames(attr(taxa.names,"Name_match")),Matched.Taxa=attr(taxa.names,"Name_match")$matched_name2,Decision="",stringsAsFactors = F)
decisions.out <- suppressWarnings(cbind(mat.out[match(rownames(attr(taxa.names,"Name_match")),rownames(mat.out)),],decisions.out))
decisions.out <- cbind(attr(taxa.names,"Name_match")[,c("data_source_title","taxon_id")],decisions.out)
decisions.out$Decision<-as.character(decisions.out$Decision)
if (any(rowSums(is.na(mat.out))==ncol(mat.out))){
decisions.out$Decision[which(rowSums(is.na(mat.out))==ncol(mat.out))]<-paste0("Could not match with valid taxa - see matched name column")
}
decisions.out$Decision[duplicated(decisions.out$Matched.Taxa)]<-paste0("Duplicated taxa summed")
decisions.out$Decision[is.na(decisions.out$Matched.Taxa)]<-paste0("Could not match with valid taxa - see matched name column")
decisions.out$Matched.Taxa[is.na(decisions.out$Matched.Taxa)]<-"no match"
message("Starting taxa redundnacy screen:")
for (i in tax.full) {
#if(i=="genus") stop()
message(paste0("Screening ",i))
#if(i=="artificial_2") browser()
at.i <-
tax.lowest.rank[names(tax.lowest.rank) %in% rownames(dat.out)][which(tax.lowest.rank[names(tax.lowest.rank) %in% rownames(dat.out)] == i)]
for (n in names(at.i)) {
over.n <- tax[[n]]
if (any(is.na(over.n))& !any(grepl("artificial",over.n))) {
stop("NA error in taxonomy.")
}
#criteria 1 - taxon at lowest level
other.at.n <-
which(sapply(tax.full.ranks.name[!names(tax.full.ranks.name) %in% n &
names(tax.full.ranks.name) %in% rownames(dat.out)], function(x)
any(as.character(x) == as.character(over.n$name[nrow(over.n)]))))
if (length(other.at.n) == 0) {
decisions.out$Decision[decisions.out$Matched.Taxa == n] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa == n], paste0("0A - Taxon is at lowest level. From ",n))
suppressWarnings(
rm(
other.at.n,
other.below.n,
at.and.below.n,
abund.at.n,
abund.below.n,
prop.below,
which.max.below.n
)
)
next
}
#criteria 2 - taxon contains only 1 lower-level identification
other.below.n <-other.at.n
if (length(other.below.n) == 1) {
dat.out[over.n$name[nrow(over.n)], ] <-
colSums(dat.out[c(over.n$name[nrow(over.n)], names(other.below.n)), ])
dat.out <- dat.out[!row.names(dat.out) %in% names(other.below.n), ]
decisions.out$Decision[decisions.out$Matched.Taxa == n] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa == n],paste0(
"1A - Only 1 lower level taxa. Recieved abundance from: ", names(other.below.n)))
decisions.out$Decision[decisions.out$Matched.Taxa %in% names(other.below.n)] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa %in% names(other.below.n)],paste0(
"1A - Only 1 lower level taxa. Taxa was rolled up to: ",n))
suppressWarnings(
rm(
over.n,
other.at.n,
other.below.n,
at.and.below.n,
abund.at.n,
abund.below.n,
prop.below,
which.max.below.n
)
)
next
}
#criteria 3 - Abundance at higher level is <20% of
# the total abundance within that taxon and lower
at.and.below.n <-
c(over.n$name[nrow(over.n)], names(other.below.n))
abund.at.n <-
sum(dat.out[rownames(dat.out) %in% at.and.below.n[1], ])
abund.below.n <-
sum(rowSums(dat.out[rownames(dat.out) %in% at.and.below.n[-c(1)], ]))
which.max.below.n<-names(which.max(rowSums(dat.out[rownames(dat.out) %in% at.and.below.n[-c(1)], ])))
if (abund.at.n / sum(abund.at.n, abund.below.n) <= Criteria3.percent) {
if (roll.down) {
for (x in 1:ncol(dat.out)) {
prop.below <-
dat.out[at.and.below.n[-c(1)], x] / sum(dat.out[at.and.below.n[-c(1)], x])
if (all(is.nan(prop.below))) {
if (!assign.undet) {
next
} else {
dat.out[rownames(dat.out) %in% which.max.below.n, x]<-dat.out[rownames(dat.out) %in% at.and.below.n[1], x]
next
#assign undetermined taxa to most abundant taxon
}
}
dat.out[rownames(dat.out) %in% at.and.below.n[-c(1)], x] <-
rowSums(cbind(prop.below * dat.out[at.and.below.n[1], x], dat.out[rownames(dat.out) %in%
at.and.below.n[-c(1)], x]))
}
}
dat.out <- dat.out[!row.names(dat.out) %in% at.and.below.n[1], ]
if (roll.down) {
if (assign.undet){
decisions.out$Decision[decisions.out$Matched.Taxa == at.and.below.n[1]] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa == at.and.below.n[1]],paste(
"3A - Abundance less than ",
Criteria3.percent*100,
"% of total abundance within taxon and lower. Proportionally assigned abundance to: ",
paste(at.and.below.n[-c(1)],collapse=", "),
". If no lower level taxa identified at the site, abundances assigned to: ", paste0(which.max.below.n)
))
decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n[-c(1)]] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n[-c(1)]],paste(
"3A - Abundance of higher level taxa less than ",
Criteria3.percent*100,
"% of total abundance within ",n," and lower. Recieved abundance from ",n
))
} else {
decisions.out$Decision[decisions.out$Matched.Taxa == at.and.below.n[1]] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa == at.and.below.n[1]],paste(
"3A - Abundance less than ",
Criteria3.percent*100,
"% of total abundance within taxon and lower. Proportionally assigned abundance to: ", paste(at.and.below.n[-c(1)],collapse=", ")
))
decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n[-c(1)]] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n[-c(1)]],paste(
"3A - Abundance of higher level taxa less than ",
Criteria3.percent*100,
"% of total abundance within ",n," and lower. Recieved abundance from ",n
))
}
} else {
decisions.out$Decision[decisions.out$Matched.Taxa == at.and.below.n[1]] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa == at.and.below.n[1]],paste(
"3A - Abundance less than ",
Criteria3.percent*100,
"% of total abundance within taxon and lower. Taxon deleted."
))
}
suppressWarnings(
rm(
over.n,
other.at.n,
other.below.n,
at.and.below.n,
abund.at.n,
abund.below.n,
prop.below,
which.max.below.n
)
)
next
}
#criteria 5a - Higher-level taxon contains 2 lower-level identifications and >50%
# of the total abundance within that taxon and lower
if (abund.at.n / sum(abund.at.n, abund.below.n) >= Criteria5a.percent) {
if (!length(at.and.below.n[-c(1)]) > Criteria5b.numb) {
dat.out[row.names(dat.out) %in% at.and.below.n[1], ] <-
colSums(dat.out[rownames(dat.out) %in% at.and.below.n, ])
dat.out <-
dat.out[!rownames(dat.out) %in% at.and.below.n[-c(1)], ]
decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n[-c(1)]] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n[-c(1)]],paste(
"5B - Dataset contains only ",Criteria5b.numb," taxa below ", n," and abundance of ",n," is equal to or greater than ",
Criteria5a.percent*100,
"% of total abundance within the higher taxon and lower. Abundance was rolled up to: ",paste(at.and.below.n[1],collapse=", ")
))
decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n[c(1)]] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n[c(1)]],paste(
"5B - Taxon contains only ",Criteria5b.numb," lower level taxa and abundance of higher taxa is equal to or greater than ",
Criteria5a.percent*100,
"% of total abundance within the higher taxon and lower. Recieved abundance from: ",paste(at.and.below.n[-c(1)],collapse=", ")
))
suppressWarnings(
rm(
over.n,
other.at.n,
other.below.n,
at.and.below.n,
abund.at.n,
abund.below.n,
prop.below,
which.max.below.n
)
)
next
} else {
decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n],paste(
"5C - Taxon contains more than ",Criteria5b.numb," lower level taxa and abundance of higher taxa is equal to or greater than ",
Criteria5a.percent*100,
"% of total abundance within the higher taxon and lower. Kept higher and lower level taxa. Applied rule from: ",n
))
next
}
} else {
if (roll.down) {
for (x in 1:ncol(dat.out)) {
prop.below <-
dat.out[at.and.below.n[-c(1)], x] / sum(dat.out[at.and.below.n[-c(1)], x])
if (all(is.nan(prop.below))) {
if (!assign.undet) {
next
} else {
dat.out[rownames(dat.out) %in% which.max.below.n, x]<-dat.out[rownames(dat.out) %in% at.and.below.n[1], x]
next
#assign undetermined taxa to most abundant taxon
}
}
dat.out[rownames(dat.out) %in% at.and.below.n[-c(1)], x] <-
rowSums(cbind(prop.below * dat.out[at.and.below.n[1], x], dat.out[rownames(dat.out) %in%
at.and.below.n[-c(1)], x]))
}
}
dat.out <- dat.out[!row.names(dat.out) %in% at.and.below.n[1], ]
if (roll.down) {
if (assign.undet){
decisions.out$Decision[decisions.out$Matched.Taxa == at.and.below.n[1]] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa == at.and.below.n[1]],paste(
"5A - Taxon contains ",Criteria5b.numb," or more lower level taxa and abundance of higher taxa is less than ",
Criteria5a.percent*100,
"% of total abundance within taxon and lower. Proportionally assigned abundance to: ",
paste(at.and.below.n[-c(1)],collapse=", "),
". If no lower level taxa identified at the site, abundances assigned to: ", paste0(which.max.below.n)
))
decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n[-c(1)]] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n[-c(1)]],paste(
"5A - Recieved abundance from: ",n
))
} else {
decisions.out$Decision[decisions.out$Matched.Taxa == at.and.below.n[1]] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa == at.and.below.n[1]],paste(
"5A - Taxon contains ",Criteria5b.numb," or more lower level taxa and abundance of higher taxa is less than ",
Criteria5a.percent*100,
"% of total abundance within taxon and lower. Proportionally assigned abundance to: ", paste(at.and.below.n[-c(1)],collapse=", ")
))
decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n[-c(1)]] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa %in% at.and.below.n[-c(1)]],paste(
"5A - Recieved abundance from: ",n
))
}
} else {
decisions.out$Decision[decisions.out$Matched.Taxa == at.and.below.n[1]] <-
paste.rep(decisions.out$Decision[decisions.out$Matched.Taxa == at.and.below.n[1]],paste(
"5A - Taxon contains ",Criteria5b.numb," or more lower level taxa and abundance of higher taxa is less than ",
Criteria5a.percent*100,
"% of total abundance within taxon and lower. Taxon deleted."
))
}
suppressWarnings(
rm(
over.n,
other.at.n,
other.below.n,
at.and.below.n,
abund.at.n,
abund.below.n,
prop.below,
which.max.below.n
)
)
next
}
}
}
d.temp<-do.call(plyr::rbind.fill,lapply(strsplit(decisions.out$Decision,";"),function(x) data.frame(t(data.frame(x)),stringsAsFactors = F)))
d.temp[is.na(d.temp)]<-""
colnames(d.temp)<-paste0("Decision ",1:ncol(d.temp))
decisions.out<-decisions.out[,!colnames(decisions.out)%in%"Decision"]
decisions.out<-cbind(decisions.out,d.temp)
#browser()
fams<-unique(unlist(lapply(taxa.names, function(x) x[x[2]=="family", 1])))
fams<-fams[!is.na(fams)]
tax.full.ranks.name1<-tax.full.ranks.name[!is.na(names(tax.full.ranks.name))]
tax.full.ranks1<-tax.full.ranks[!is.na(names(tax.full.ranks.name))]
dat.fam <- data.frame(matrix(nrow=length(fams),ncol=ncol(dat.orig)))
rownames(dat.fam)<-fams
colnames(dat.fam)<-colnames(dat.orig)
for (i in fams) {
dat.fam[row.names(dat.fam) == i, ] <-
colSums(dat.orig[grepl(i,tax.full.ranks.name1), ])
}
dat.orig.mod<-dat.orig
higher.tax<-tax.lowest.rank[!tax.lowest.rank%in%tax.full1[which(tax.full1=="family"):length(tax.full1)]]
for (i in names(higher.tax)){
n.higher<-length(which(lapply(lapply(tax.full.ranks.name1,function(x) which(x==i)),sum)>0))
if (n.higher<3){
dat.fam<-rbind(dat.orig.mod[rownames(dat.orig.mod)%in%i,],dat.fam)
} else if (fam.roll.down) {
#browser()
at.i<-tax.full.ranks.name1[grep(i,tax.full.ranks.name1)]
at.i<-at.i[!names(at.i)%in%i]
for (x in 1:ncol(dat.fam)){
if (dat.orig.mod[i,x]==0) next
if (fam.assign.undet & sum(dat.orig.mod[names(at.i),x])==0){
#browser()
highest.prop<-names(at.i)[which.max(rowSums(dat.orig.mod[names(at.i),]))]
dat.orig.mod[highest.prop,x]<-dat.orig.mod[i,x]
} else {
lower.prop<-dat.orig.mod[names(at.i),x]/sum(dat.orig.mod[names(at.i),x])
dat.orig.mod[names(at.i),x]<-rowSums(cbind(dat.orig.mod[i,x]*lower.prop,dat.orig.mod[names(at.i),x]))
}
}
fams.new<-unique(unlist(lapply(at.i, function(x) x[grepl("idae",x)])))
for (u in fams.new) {
dat.fam[row.names(dat.fam) == u, ] <-
colSums(dat.orig.mod[grepl(u,tax.full.ranks.name1), ])
}
}
}
lpl.temp<-merge(dat.out,decisions.out[!apply(decisions.out[,colnames(decisions.out)%in%colnames(mat.out)],1,function(x)all(is.na(x))),],all.x=T,all.y=F,by.x = 0,by.y="Matched.Taxa")
lpl.temp<-lpl.temp[match(rownames(dat.out),lpl.temp$Row.names),]
rownames(lpl.temp)<-lpl.temp$Row.names
lpl.temp<-lpl.temp[,match(c(colnames(mat.out),colnames(dat.out)),colnames(lpl.temp))]
lpl.tax<-cbind(lpl.temp[,colnames(mat.out)])#,rownames(lpl.temp))
lpl.tax<-apply(lpl.tax,2,as.character)
lpl.tax.out<-apply(lpl.tax,1,paste0,collapse=";")
lpl.tax.out<-gsub("NA","",lpl.tax.out)
fam.tax<-resolve(rownames(dat.fam),with_canonical_ranks=T,with_context = T,best_match_only=T,preferred_data_sources=3,fields="all")$gnr
l.out<-list()
for (i in 1:nrow(fam.tax)){
tax<-as.character(strsplit(fam.tax$classification_path[i],"|",fixed=T)[[1]])
rank<-tolower(as.character(strsplit(fam.tax$classification_path_ranks[i],"|",fixed=T)[[1]]))
template<-data.frame(name=tax,
rank=rank,
stringsAsFactors=F
)
l.out[[i]]<-template
names(l.out)[i]<-fam.tax$matched_name2[i]
}
fam.mat.out <- matrix(nrow = nrow(dat.fam), ncol = length(tax.full1[-c((which(tax.full1=="family")+1):length(tax.full1))]))
colnames(fam.mat.out) <- tax.full1[-c((which(tax.full1=="family")+1):length(tax.full1))]
fam.mat.out<-fam.mat.out[,!colnames(fam.mat.out)%in%c("kingdom","subkingdom","infrakingdom","superphylum","infraclass","superorder","infraorder","superfamily")]
rownames(fam.mat.out) <- rownames(dat.fam)
for (i in l.out) {
if (any(rownames(fam.mat.out) %in% i$name[nrow(i)])){
fam.mat.out[rownames(fam.mat.out) %in% i$name[nrow(i)], colnames(fam.mat.out) %in% i$rank] <-
as.character(i$name[i$rank %in% colnames(fam.mat.out)])
} else {
t1<-which(attr(taxa.names,"Name_match")$matched_name2==i$name[nrow(i)])
for (n in 1:length(t1)){
fam.mat.out[rownames(fam.mat.out) %in% rownames(attr(taxa.names,"Name_match"))[t1[n]], colnames(fam.mat.out) %in% i$rank] <-
as.character(i$name[i$rank %in% colnames(fam.mat.out)])
}
}
}
fam.tax<-apply(fam.mat.out,2,as.character)
fam.tax.out<-apply(fam.tax,1,paste0,collapse=";")
fam.tax.out<-gsub("NA","",fam.tax.out)
if (CABIN.taxa){
lpl.omits<-grepl(paste0(CABIN.omit.taxa,collapse="|"),lpl.tax.out)
fam.omits<-grepl(paste0(CABIN.omit.taxa,collapse="|"),fam.tax.out)
lpl.tax.out<-lpl.tax.out[!lpl.omits]
fam.tax.out<-fam.tax.out[!fam.omits]
dat.out<-dat.out[!lpl.omits,]
dat.fam<-dat.fam[!fam.omits,]
decision.omits<-sapply(attr(taxa.names,"Name_match")$classification_path,function(x) any(grepl(paste0(CABIN.omit.taxa,collapse="|"),x)))
decisions.out[decision.omits,grepl("Decision",colnames(decisions.out))]<-""
decisions.out[decision.omits,grepl("Decision",colnames(decisions.out))[1]]<-"CABIN excluded taxa"
}
out<-list()
out$lpl.matrix<-dat.out
out$fam.matrix<-dat.fam
out$decisions<-decisions.out
out$taxa.names<-taxa.names
if (output.taxonomy){
out$lpl.taxonomy<-lpl.tax.out
out$fam.taxonomy<-fam.tax.out
}
class(out)<-"benth.taxroll"
return(out)
}
########################################################
#Function to calculate benthic endpoints
#
#
########################################################
benth.endpoint<-function(x){
if (class(x)!="benth.taxroll") stop("Input dataset must be an output from benth.taxroll()")
if (!require(vegan,quietly = T)) install.packages('vegan')
require(vegan,quietly = T)
if (!require(BenthicAnalysistesting,quietly = T)) {
install.packages('devtools')
devtools::install_github('p-schaefer/BenthicAnalysistesting')
}
require(BenthicAnalysistesting,quietly = T)
fam.mat<-x$fam.matrix
lpl.mat<-x$lpl.matrix
endpoint.out <- matrix(nrow = ncol(lpl.mat), ncol = 42)
rownames(endpoint.out) <- colnames(lpl.mat)
colnames(endpoint.out) <-
c(
"Richness",
"Rarefied_Richness",
"Abundance",
"Perc_Dominance",
"Simpsons_Diversity",
"InvSimpsons_Diversity",
"Shannons_Diversity",
"Pielous_Evenness",
"Simpsons_Dominance",
"Simpsons_Eveness",
"EPT_Rich",
"EPT_Perc",
"Eph_Rich",
"Eph_Perc",
"Tric_Rich",
"Tric_Perc",
"Plec_Rich",
"Plec_Perc",
"Chiron_Perc",
"Oligo_Perc",
"Dipt_Rich",
"Dipt_Perc",
"HBI",
"CEFI",
"Shredder_Perc",
"Shredder_Rich",
"Filterer_Perc",
"Filterer_Rich",
"Predator_Perc",
"Predator_Rich",
"CollGath_Perc",
"CollGath_Rich",
"ScrapGraz_Perc",
"ScrapGraz_Rich",
"Swimmer_Perc",
"Swimmer_Rich",
"Clinger_Perc",
"Clinger_Rich",
"Sprawler_Perc",
"Sprawler_Rich",
"Climber_Perc",
"Climber_Rich"
)
endpoint.out.fam<-data.frame(endpoint.out)
endpoint.out.lpl<-data.frame(endpoint.out)
fam.out<-t(fam.mat)
colnames(fam.out)<-x$fam.taxonomy
lpl.out<-t(lpl.mat)
colnames(lpl.out)<-x$lpl.taxonomy
lpl.BA.out<-BenthicAnalysistesting::benth.metUI(x=lpl.out)
fam.BA.out<-BenthicAnalysistesting::benth.metUI(x=fam.out)
endpoint.out.lpl[,1:10]<-
data.frame(cbind(vegan::specnumber(lpl.out),
vegan::rarefy(ceiling(lpl.out),min(rowSums(lpl.out))),
rowSums(lpl.out),
apply(lpl.out,1,max)/rowSums(lpl.out),
vegan::diversity(lpl.out,index="simpson"),
vegan::diversity(lpl.out,index="invsimpson"),
vegan::diversity(lpl.out,index="shannon"),
vegan::diversity(lpl.out,index="shannon")/log(vegan::specnumber(lpl.out)),
1/vegan::diversity(lpl.out,index="simpson"),
(1/vegan::diversity(lpl.out,index="simpson"))/vegan::specnumber(lpl.out)
))
endpoint.out.fam[,1:10]<-
data.frame(cbind(vegan::specnumber(fam.out),
vegan::rarefy(ceiling(fam.out),min(rowSums(fam.out))),
rowSums(fam.out),
apply(fam.out,1,max)/rowSums(fam.out),
vegan::diversity(fam.out,index="simpson"),
vegan::diversity(fam.out,index="invsimpson"),
vegan::diversity(fam.out,index="shannon"),
vegan::diversity(fam.out,index="shannon")/log(vegan::specnumber(fam.out)),
1/vegan::diversity(fam.out,index="simpson"),
(1/vegan::diversity(fam.out,index="simpson"))/vegan::specnumber(fam.out)
))
endpoint.out.lpl$EPT_Rich <-lpl.BA.out$Summary.Metrics$EPT.Richness
endpoint.out.lpl$EPT_Perc <-fam.BA.out$Summary.Metrics$Percent.EPT
endpoint.out.lpl$Eph_Rich <-lpl.BA.out$Summary.Metrics$Ephem.Richness
endpoint.out.lpl$Eph_Perc <-fam.BA.out$Summary.Metrics$Percent.Ephem
endpoint.out.lpl$Tric_Rich <-lpl.BA.out$Summary.Metrics$Trich.Richness
endpoint.out.lpl$Tric_Perc <-fam.BA.out$Summary.Metrics$Percent.Trich
endpoint.out.lpl$Plec_Rich <-lpl.BA.out$Summary.Metrics$Plec.Richness
endpoint.out.lpl$Plec_Perc <-fam.BA.out$Summary.Metrics$Percent.Plec
endpoint.out.lpl$Dipt_Rich <- lpl.BA.out$Summary.Metrics$Dipt.Richness
endpoint.out.lpl$Dipt_Perc <- fam.BA.out$Summary.Metrics$Percent.Dipt
endpoint.out.lpl$Chiron_Perc <-lpl.BA.out$Summary.Metrics$Percent.Chironomidae
endpoint.out.lpl$Oligo_Perc <-lpl.BA.out$Summary.Metrics$Percent.Oligochaeta
endpoint.out.lpl$HBI <-lpl.BA.out$Summary.Metrics$HBI
endpoint.out.lpl$CEFI <-lpl.BA.out$Summary.Metrics$CEFI
endpoint.out.lpl$Shredder_Perc<-lpl.BA.out$Summary.Metrics$Shredder.Percent
endpoint.out.lpl$Shredder_Rich<-lpl.BA.out$Summary.Metrics$Shredder.Richness
endpoint.out.lpl$Filterer_Perc<-lpl.BA.out$Summary.Metrics$Filterer.Percent
endpoint.out.lpl$Filterer_Rich<-lpl.BA.out$Summary.Metrics$Filterer.Richness
endpoint.out.lpl$Predator_Perc<-lpl.BA.out$Summary.Metrics$Predator.Percent
endpoint.out.lpl$Predator_Rich<-lpl.BA.out$Summary.Metrics$Predator.Richness
endpoint.out.lpl$CollGath_Perc<-lpl.BA.out$Summary.Metrics$Gatherer.Percent
endpoint.out.lpl$CollGath_Rich<-lpl.BA.out$Summary.Metrics$Gatherer.Richness
endpoint.out.lpl$ScrapGraz_Perc<-lpl.BA.out$Summary.Metrics$ScraperGrazer.Percent
endpoint.out.lpl$ScrapGraz_Rich<-lpl.BA.out$Summary.Metrics$ScraperGrazer.Richness
endpoint.out.lpl$Swimmer_Perc<-lpl.BA.out$Summary.Metrics$Swimmer.Percent
endpoint.out.lpl$Swimmer_Rich<-lpl.BA.out$Summary.Metrics$Swimmer.Richness
endpoint.out.lpl$Clinger_Perc<-lpl.BA.out$Summary.Metrics$Clinger.Percent
endpoint.out.lpl$Clinger_Rich<-lpl.BA.out$Summary.Metrics$Clinger.Richness
endpoint.out.lpl$Sprawler_Perc<-lpl.BA.out$Summary.Metrics$Sprawler.Percent
endpoint.out.lpl$Sprawler_Rich<-lpl.BA.out$Summary.Metrics$Sprawler.Richness
endpoint.out.lpl$Climber_Perc<-lpl.BA.out$Summary.Metrics$Clinger.Percent
endpoint.out.lpl$Climber_Rich<-lpl.BA.out$Summary.Metrics$Clinger.Richness
endpoint.out.fam$EPT_Rich <-fam.BA.out$Summary.Metrics$EPT.Richness
endpoint.out.fam$EPT_Perc <-fam.BA.out$Summary.Metrics$Percent.EPT
endpoint.out.fam$Eph_Rich <-fam.BA.out$Summary.Metrics$Ephem.Richness
endpoint.out.fam$Eph_Perc <-fam.BA.out$Summary.Metrics$Percent.Ephem
endpoint.out.fam$Tric_Rich <-fam.BA.out$Summary.Metrics$Trich.Richness
endpoint.out.fam$Tric_Perc <-fam.BA.out$Summary.Metrics$Percent.Trich
endpoint.out.fam$Plec_Rich <-fam.BA.out$Summary.Metrics$Plec.Richness
endpoint.out.fam$Plec_Perc <-fam.BA.out$Summary.Metrics$Percent.Plec
endpoint.out.fam$Dipt_Rich <- fam.BA.out$Summary.Metrics$Dipt.Richness
endpoint.out.fam$Dipt_Perc <- fam.BA.out$Summary.Metrics$Percent.Dipt
endpoint.out.fam$Chiron_Perc <-fam.BA.out$Summary.Metrics$Percent.Chironomidae
endpoint.out.fam$Oligo_Perc <-fam.BA.out$Summary.Metrics$Percent.Oligochaeta
endpoint.out.fam$HBI <-fam.BA.out$Summary.Metrics$HBI
endpoint.out.fam$CEFI <-fam.BA.out$Summary.Metrics$CEFI
endpoint.out.fam$Shredder_Perc<-fam.BA.out$Summary.Metrics$Shredder.Percent
endpoint.out.fam$Shredder_Rich<-fam.BA.out$Summary.Metrics$Shredder.Richness
endpoint.out.fam$Filterer_Perc<-fam.BA.out$Summary.Metrics$Filterer.Percent
endpoint.out.fam$Filterer_Rich<-fam.BA.out$Summary.Metrics$Filterer.Richness
endpoint.out.fam$Predator_Perc<-fam.BA.out$Summary.Metrics$Predator.Percent
endpoint.out.fam$Predator_Rich<-fam.BA.out$Summary.Metrics$Predator.Richness
endpoint.out.fam$CollGath_Perc<-fam.BA.out$Summary.Metrics$Gatherer.Percent
endpoint.out.fam$CollGath_Rich<-fam.BA.out$Summary.Metrics$Gatherer.Richness
endpoint.out.fam$ScrapGraz_Perc<-fam.BA.out$Summary.Metrics$ScraperGrazer.Percent
endpoint.out.fam$ScrapGraz_Rich<-fam.BA.out$Summary.Metrics$ScraperGrazer.Richness
endpoint.out.fam$Swimmer_Perc<-fam.BA.out$Summary.Metrics$Swimmer.Percent
endpoint.out.fam$Swimmer_Rich<-fam.BA.out$Summary.Metrics$Swimmer.Percent
endpoint.out.fam$Clinger_Perc<-fam.BA.out$Summary.Metrics$Clinger.Percent
endpoint.out.fam$Clinger_Rich<-fam.BA.out$Summary.Metrics$Clinger.Richness
endpoint.out.fam$Sprawler_Perc<-fam.BA.out$Summary.Metrics$Sprawler.Percent
endpoint.out.fam$Sprawler_Rich<-fam.BA.out$Summary.Metrics$Sprawler.Richness
endpoint.out.fam$Climber_Perc<-fam.BA.out$Summary.Metrics$Clinger.Percent
endpoint.out.fam$Climber_Rich<-fam.BA.out$Summary.Metrics$Clinger.Richness
out<-list()
#browser()
out$lpl.endpoints<-endpoint.out.lpl
out$fam.endpoints<-endpoint.out.fam
out$lpl.attributes<-lpl.BA.out$Attributes
out$fam.attributes<-fam.BA.out$Attributes
out$fam.mat<-fam.mat
out$lpl.mat<-lpl.mat
return(out)
}
########################################################
#Function to calculate Bray-Curtis distances
#
#
########################################################
benth.bray<-function(ref,test,data){
#browser()
#if(any(!test%in%colnames(data)))stop("one or more test sites is not in column names of data")
#if(any(!ref%in%colnames(data)))stop("one or more reference sites is not in column names of data")
ref<-colnames(data)[grepl(paste0(ref,collapse = "|"),colnames(data))]
test<-colnames(data)[grepl(paste0(test,collapse = "|"),colnames(data))]
if (!require(vegan,quietly = T)) install.packages('vegan')
require(vegan,quietly = T)
site.class<-data.frame(cbind(c(ref,test),c(rep("ref",length(ref)),rep("test",length(test)))),stringsAsFactors = F)
ref.taxa<-apply(data[,colnames(data)%in%ref],1,median)
bc.out<-cbind(ref.taxa,data[,colnames(data)%in%ref],data[,colnames(data)%in%test])
bc.out<-as.matrix(vegdist(t(bc.out),method="bray"))[-c(1),1]
site.class<-cbind(site.class,bc.out)
colnames(site.class)<-c("Site","Class","Bray-Curtis")
return(site.class)
}
########################################################
#Function to calculate data summaries
#
#
########################################################
benth.summaries<-function(benth.endpoint,stations,taxa.summary,abund.thresh=NA) {
if (!require(reshape2,quietly = T)) install.packages('reshape2')
require(reshape2,quietly = T)
if (!require(plyr,quietly = T)) install.packages('plyr')
require(plyr,quietly = T)
for (i in stations){
benth.endpoint$lpl.endpoints$stations[grep(i,rownames(benth.endpoint$lpl.endpoints))]<-i
benth.endpoint$fam.endpoints$stations[grep(i,rownames(benth.endpoint$fam.endpoints))]<-i
}
melted.lpl <- melt(benth.endpoint$lpl.endpoints, id.vars=c("stations"))
melted.fam <- melt(benth.endpoint$fam.endpoints, id.vars=c("stations"))
lpl.summ<-ddply(melted.lpl, c("variable","stations"), summarise,
n=length(value[!is.na(value)]),
min=min(value,na.rm=T),
mean = mean(value,na.rm=T),
median=median(value,na.rm=T),
max=max(value,na.rm=T),
sd = sd(value,na.rm=T),
se = sd(value,na.rm=T)/sqrt(length(value[!is.na(value)])))
fam.summ<-ddply(melted.fam, c( "variable","stations"), summarise,
n=length(value[!is.na(value)]),
min=min(value,na.rm=T),
mean = mean(value,na.rm=T),
median=median(value,na.rm=T),
max=max(value,na.rm=T),
sd = sd(value,na.rm=T),
se = sd(value,na.rm=T)/sqrt(length(value[!is.na(value)])))
lpl.summ[,-c(1:3)]<-round(lpl.summ[,-c(1:3)],digits=3)
fam.summ[,-c(1:3)]<-round(fam.summ[,-c(1:3)],digits=3)
out<-list()
out$lpl.summary<-lpl.summ
out$fam.summary<-fam.summ
if (taxa.summary==T) {
fam.mat<-benth.endpoint$fam.mat
lpl.mat<-benth.endpoint$lpl.mat
for (i in 1:ncol(fam.mat)){
s<-sum(fam.mat[,i])
fam.mat[,i]<-fam.mat[,i]/s
}
for (i in 1:ncol(lpl.mat)){
s<-sum(lpl.mat[,i])
lpl.mat[,i]<-lpl.mat[,i]/s
}
fam.mat<-fam.mat[order(rowSums(fam.mat),decreasing = T),]
lpl.mat<-lpl.mat[order(rowSums(lpl.mat),decreasing = T),]
fam.mat<-round(fam.mat,digits=3)
lpl.mat<-round(lpl.mat,digits=3)
if (!is.na(abund.thresh)){
lpl.mat<-lpl.mat[apply(lpl.mat,1,function(x)any(x>abund.thresh)),]
fam.mat<-fam.mat[apply(fam.mat,1,function(x)any(x>abund.thresh)),]
}
out$lpl.prop<-lpl.mat
out$fam.prop<-fam.mat
lpl.mat<-data.frame(t(lpl.mat))
lpl.mat$stations<-benth.endpoint$lpl.endpoints$stations
fam.mat<-data.frame(t(fam.mat))
fam.mat$stations<-benth.endpoint$fam.endpoints$stations
melted.lpl <- melt(lpl.mat, id.vars=c("stations"))
melted.fam <- melt(fam.mat, id.vars=c("stations"))
lpl.summ<-ddply(melted.lpl, c("stations", "variable"), summarise,
n=length(value[!is.na(value)&value>0]),
min=min(value,na.rm=T),
mean = mean(value,na.rm=T),
median=median(value,na.rm=T),
max=max(value,na.rm=T),
sd = sd(value,na.rm=T),
se = sd(value,na.rm=T)/sqrt(length(value[!is.na(value)])))
fam.summ<-ddply(melted.fam, c("stations", "variable"), summarise,
n=length(value[!is.na(value)&value>0]),
min=min(value,na.rm=T),
mean = mean(value,na.rm=T),
median=median(value,na.rm=T),
max=max(value,na.rm=T),
sd = sd(value,na.rm=T),
se = sd(value,na.rm=T)/sqrt(length(value[!is.na(value)])))
lpl.summ[,-c(1:3)]<-round(lpl.summ[,-c(1:3)],digits=3)
fam.summ[,-c(1:3)]<-round(fam.summ[,-c(1:3)],digits=3)
out$lpl.prop.summary<-lpl.summ
out$fam.prop.summary<-fam.summ
fam.mat.c<-benth.endpoint$fam.mat
lpl.mat.c<-benth.endpoint$lpl.mat
if (!is.na(abund.thresh)){
lpl.mat.c<-lpl.mat.c[rownames(lpl.mat.c)%in%colnames(lpl.mat),]
fam.mat.c<-fam.mat.c[rownames(fam.mat.c)%in%colnames(fam.mat),]
}
fam.mat.c<-fam.mat.c[order(rowSums(fam.mat.c),decreasing = T),]
lpl.mat.c<-lpl.mat.c[order(rowSums(lpl.mat.c),decreasing = T),]
fam.mat.c<-round(fam.mat.c,digits=3)
lpl.mat.c<-round(lpl.mat.c,digits=3)
out$lpl.dens<-lpl.mat.c
out$fam.dens<-fam.mat.c
lpl.mat.c<-data.frame(t(lpl.mat.c))
lpl.mat.c$stations<-benth.endpoint$lpl.endpoints$stations
fam.mat.c<-data.frame(t(fam.mat.c))
fam.mat.c$stations<-benth.endpoint$fam.endpoints$stations
melted.lpl <- melt(lpl.mat.c, id.vars=c("stations"))
melted.fam <- melt(fam.mat.c, id.vars=c("stations"))
lpl.summ<-ddply(melted.lpl, c("stations", "variable"), summarise,
n=length(value[!is.na(value)&value>0]),
min=min(value,na.rm=T),
mean = mean(value,na.rm=T),
median=median(value,na.rm=T),
max=max(value,na.rm=T),
sd = sd(value,na.rm=T),
se = sd(value,na.rm=T)/sqrt(length(value[!is.na(value)])))
fam.summ<-ddply(melted.fam, c("stations", "variable"), summarise,
n=length(value[!is.na(value)&value>0]),
min=min(value,na.rm=T),
mean = mean(value,na.rm=T),
median=median(value,na.rm=T),
max=max(value,na.rm=T),
sd = sd(value,na.rm=T),
se = sd(value,na.rm=T)/sqrt(length(value[!is.na(value)])))
lpl.summ[,-c(1:3)]<-round(lpl.summ[,-c(1:3)],digits=3)
fam.summ[,-c(1:3)]<-round(fam.summ[,-c(1:3)],digits=3)
out$lpl.dens.summary<-lpl.summ
out$fam.dens.summary<-fam.summ
}
return(out)
}
########################################################
#Function to perform two group comparisions
#
#
########################################################
twogroup_comparision<-function(ref,test,data,alpha=0.1){
if (!require(car,quietly = T)) install.packages('car')
require(car,quietly = T)
ref<-rownames(data)[grepl(ref,rownames(data))]
test<-rownames(data)[grepl(test,rownames(data))]
site.class<-data.frame(cbind(c(ref,test),c(rep("ref",length(ref)),rep("exp",length(test)))),stringsAsFactors = F)
colnames(site.class)<-c("Station","Area")
site.class<-cbind(site.class,data[match(site.class$Station,rownames(data)),])
metrics<-colnames(data)
out<-data.frame(matrix(nrow=length(metrics),ncol=10))
colnames(out)<-c("Endpoint","Transformation","Test",
"P-value","Reference_Mean", "Exposure_Mean","Observed_ES","Observed_percent","Measurable_ES","Power")
out$Endpoint<-metrics
for (i in metrics) {
dat1<-data.frame(matrix(nrow=7,ncol=3))
colnames(dat1)<-c("Trans","norm","var")
dat1$Trans<-c("none","log","squared","square root","forth root",
"logit","arcsine")
for (n in dat1$Trans) {
if(class(try(transform(site.class[,i],n),silent=T))!="try-error"){
if (all(!is.na(transform(site.class[,i],n))) & all(!is.nan(transform(site.class[,i],n)))){
dat1$norm[dat1$Trans==n]<-shapiro.test(residuals(lm(transform(site.class[,i],n)~as.factor(site.class$Area))))$p.value
dat1$var[dat1$Trans==n]<-leveneTest(y=transform(site.class[,i],n),group=as.factor(site.class$Area))$`Pr(>F)`[1]
}
}
}
if (any(dat1$norm>0.05,na.rm = T)){
out$Transformation[out$Endpoint==i]<-dat1$Trans[which.max(dat1$norm)]
if (dat1$var[which.max(dat1$norm)]>0.05) {
out$Test[out$Endpoint==i]<-"T-equal"
out$'P-value'[out$Endpoint==i]<-t.test(transform(site.class[,i],out$Transformation[out$Endpoint==i])~as.factor(site.class$Area),
var.equal=T)$p.value
} else {
out$Test[out$Endpoint==i]<-"T-unequal"
out$'P-value'[out$Endpoint==i]<-t.test(transform(site.class[,i],out$Transformation[out$Endpoint==i])~as.factor(site.class$Area),
var.equal=F)$p.value
}
#browser()
n<-length(which(site.class$Area=="ref"))
ta<-qt(1-alpha/2,length(which(site.class$Area=="ref"))-1)
tb<-qt(1-alpha,length(which(site.class$Area=="ref"))-1)
sd<-sd(transform(site.class[site.class$Area=="ref",i],out$Transformation[out$Endpoint==i]),na.rm=T)
mse<-mean(residuals(aov(transform(site.class[,i],out$Transformation[out$Endpoint==i])~as.factor(site.class$Area)))^2)
out$Reference_Mean[out$Endpoint==i]<-mean(site.class[site.class$Area=="ref",i],na.rm=T)
out$Exposure_Mean[out$Endpoint==i]<-mean(site.class[site.class$Area=="exp",i],na.rm=T)
out$Observed_ES[out$Endpoint==i]<-((transform(out$Exposure_Mean[out$Endpoint==i],out$Transformation[out$Endpoint==i])-
transform(out$Reference_Mean[out$Endpoint==i],out$Transformation[out$Endpoint==i]))/
sd)
out$Observed_percent[out$Endpoint==i]<-((transform(out$Exposure_Mean[out$Endpoint==i],out$Transformation[out$Endpoint==i])-
transform(out$Reference_Mean[out$Endpoint==i],out$Transformation[out$Endpoint==i]))/
transform(out$Reference_Mean[out$Endpoint==i],out$Transformation[out$Endpoint==i]))
#browser()
#out$Power[out$Endpoint==i]<-pt(sqrt(n/(2*(sqrt(mse)/(2*sd))^2))-ta,n-1)
#out$Measurable_ES[out$Endpoint==i]<-((ta+tb)*(sqrt(mse)*(sqrt(2/n))))/sd
} else {
out$Transformation[out$Endpoint==i]<-"rank"
out$Test[out$Endpoint==i]<-"MW"
out$'P-value'[out$Endpoint==i]<-wilcox.test(site.class[,i]~as.factor(site.class$Area),exact=T)$p.value
out$Reference_Mean[out$Endpoint==i]<-median(site.class[site.class$Area=="ref",i],na.rm=T)
out$Exposure_Mean[out$Endpoint==i]<-median(site.class[site.class$Area=="exp",i],na.rm=T)
out$Observed_ES[out$Endpoint==i]<-(median(site.class[site.class$Area=="exp",i],na.rm=T)-
median(site.class[site.class$Area=="ref",i],na.rm=T))/
mad(site.class[,i],na.rm=T)
out$Observed_percent[out$Endpoint==i]<-(median(site.class[site.class$Area=="exp",i],na.rm=T)-
median(site.class[site.class$Area=="ref",i],na.rm=T))/
median(site.class[site.class$Area=="ref",i],na.rm=T)
}
}
#browser()
out[,-c(1:3)]<-round(out[,-c(1:3)],digits=3)
out$Observed_percent<-paste0(out$Observed_percent*100,"%")
#out[,-c(1:3)]<-scale::comma(out[,-c(1:3,8)])
out1<-list()
out1$table<-out
out1$data<-data
class(out1)<-"twogroup_comparision"
return(out1)
}
########################################################
#Function to plot two group comparisions
#
#
########################################################
plot.twogroup_comparision<-function(x,...) {
results<-x$table
data<-x$data
browser()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.