#' Estimating biotic uniqueness of biological samples
#' @description This algorithm \code{uniquity} estimates the uniqueness of a set of communities
#' @param sstable a data.frame with with a species/site matrix (or an OTU table)
#' that has sites/samples as columns and species/OTUs as rows, and observations as
#' presence/absence data or abundance data.
#' @param classes a list (dataframe) assigning samples to a set of unique classes (e.g. habitat-types). Each class
#' represents an exclusive proportion af the investigated total area). (data.frame should contain these column names: site, class).
#' If no classes are supplied, each samples is assigned to a unique class, and these will receive equal weights.
#' @param weights a list (dataframe) with the weight (proportion of total) of each
#' class. (data.frame should contain these column names: class, weight).
#' If no weights are supplied, classes are assigned equal weights.
#' @param presabs TRUE/FALSE. reduce species/site matrix to presence absence (0/1). Default is TRUE
#' @param background_sites any positive number. The number of background sites to construct
#' for counting the number of unique species/OTUs of the sample/site in focus (i.e the uniquity of the site/sample).
#' each each compare. If \code{background_sites} < 1, the algorithm uses that proportion
#' of the sites investigated as the number of background sites. If \code{background_sites} =>1, that absolute number
#' of background sites wll be constructed. Default is 0.66 (=66\% of samples/sites)
#' @param rep any positive integer. The number of replicates pr site evaluation.
#' (i.e. the number of times to construct a pool of background sites for evaluating each site/sample). Default is 100.
#' @param non_correspondence TRUE/FALSE. If set to TRUE non-matching entries (classes, weights, sites) will be removed from the dataet.
#' If set to FALSE, non non-matching entries will throw an error. Can be usefull for subsetting data,
#' as subsetting will only be necessary in one part of the data (species-site table)
#' @param total_coverage any number from 0 to 1. The fraction of total area investigated. If set to 0 the supplied
#' weights will be used, unless the total weight exceeds 1, in wich case weights are scaled for a total of 1. If set to any
#' number above 0 (including 1), weights are scaled for a total weight of \code{total_coverage}.
#' @return Function \code{uniquity} returns a list of results based on the input
#' table and parameters.
#' \enumerate{
#' \item \code{simple_site_score} the sum of inverse species weights, as a fast approximation of the estimated uniquity.
#' \item \code{uniquity_score} the simulated uniquity contribution of each site, relative to species richness.
#' \item \code{adjusted_uniquity} - number of curated (parent) OTUs
#' \item \code{uniquity_table} the final table with number of unique observations pr species per site
#' \item \code{uniquity_species_scores} sum of contribution per species.
#' \item \code{Unique_species_avg} average of contribution per species for sites where it occurs.
#' \item \code{Summed_species_weights} sum of weight pr species (used for drawing species for background sites).
#' \item \code{Average_species_weight_pr_site} average species weight pr site (with a weight).
#' \item \code{Top20_unique_sites} top 20 unique sites.
#' \item \code{Top20_unique_species} top 20 species with highest uniquity impact.
#' \item \code{Accumulated_uniqueness} uniquity uniquity metric per replicate.
#' \item \code{site_richness} species richness per site.
#' \item \code{replicates} The number of replicates used for each site uniquity estimation.
#' \item \code{background_sites} The number of background sites sampled.
#' \item \code{Removed_classes} classes removed due to non correspondence.
#' \item \code{Removed_sites} sites removed due to non correspondence.
#' \item \code{total_coverage} sum of site weights used in weighting of species.}
#' @examples
#' uniquity(my_table)
#' uniquity(my_table, my_classes, my_weights, presabs = FALSE, rep = 50, size = 100, non_correspondence = TRUE, total_coverage = 0.5)
#' @author Tobias Guldberg Frøslev
#' @export
#Variables
# ft: final table
# sn: site number
# lr: loop rounds
# rep: repetitions. Number of resampling events pr plot evaluation
# css: current site species. IDs of the species present in the currently evaluated plot
# rfp: number of background plots used to evaluate each site
# crsp : current background site pool. Pooled species IDs of full background site pool.
# csu : current site unique. Unique species IDs of current site in current repetition
# csut : current site unique total. vector of all unique species IDs pr current site.
# nsi : number of sites in study
# nsp : number of species in study
# spr : species richness pr site
# siw : site weights. percentage of area in the study represented by each site.
# ssw : species-site weights. calculated weight pr species pr site (site). siw applied to relevant species in each site
# spw : species weights. summed weight of each species across all sites
# claw : class weight. he figure needed to multiply each weight with. (equal to 1/(number of sites referred to class))
# classw : temporary datafra to calculate adjusted site weights
# rep: number of replicates pr site evaluation
# sv: vector of species IDs (numbers)
# refspecies: input number of species to draw for each background site. (0: use random)
# ssrsp: the actual number of species being drawn for each background site.
uniquity2 <- function(sstable, classes = NULL, weights = NULL, presabs = TRUE, background_sites = 50, rep = 50, non_correspondence = TRUE, total_coverage = 0){
#Exclude singletons if selected
require(vegan)
#Check if a species/site table is present
if (missing(sstable)) stop("Need to specify a valid species/site matrix as a data.frame with sites as rownames and species/OTUs as column names")
sstable_names <- row.names(sstable)
sstable <- as.matrix(sstable)
removed_classes <- NULL
removed_sites <- NULL
#Check if valid combination of classes and weights are present.
if (is.null(classes)) {
if (is.null(weights)) {
message("no classes supplied. Assigning separate class to each site!")
classes <- data.frame(site=as.character(row.names(sstable)),class=rep(1,length(sstable_names)))
} else {
stop("Need to specify a valid classes, if supplying weights")
}
}
#Apply equal weights when no weights supplied
if (is.null(weights)) {
message("no weights supplied. Assigning equal weights to all classes!")
weights <- data.frame(class=names(table(classes$class)),weight=1)
}
sorted_sstable_names <- as.character(sort(sstable_names))
sorted_class_site_names <- as.character(sort(classes$site))
sstable_uniq <- paste(setdiff(sorted_sstable_names,sorted_class_site_names),collapse=" ")
class_uniq <- paste(setdiff(sorted_class_site_names,sorted_sstable_names),collapse=" ")
if(!identical(sorted_sstable_names, sorted_class_site_names)){
if(non_correspondence){
common_site_names <- intersect(sorted_sstable_names,sorted_class_site_names)
rem_s_uniq <- paste(setdiff(sorted_sstable_names,common_site_names),collapse=" ")
rem_c_uniq <- paste(setdiff(sorted_class_site_names,common_site_names),collapse=" ")
message("Non-identical classes between classes and weights")
message(paste("removing the following from the species_site_table:",rem_s_uniq))
message(paste("removing the following from the class file:",rem_c_uniq))
removed_sites <- paste(rem_s_uniq,rem_c_uniq,collapse=" ")
sstable <- sstable[row.names(sstable) %in% common_site_names,]
classes <- classes[classes$site %in% common_site_names,]
} else {
message("Site/sample names are not identical between site/species matrix and class-data")
message(paste("unique to site/species table:",sstable_uniq))
message(paste("unique to classes:",class_uniq))
stop("stopped!")
}
}
sorted_class_site_classes <- sort(names(table(classes$class)))
sorted_weight_classes <- sort(weights$class)
class_uniq <- paste(setdiff(sorted_class_site_classes,sorted_weight_classes),collapse=" ")
weight_uniq <- paste(setdiff(sorted_weight_classes,sorted_class_site_classes),collapse=" ")
if(!identical(sorted_class_site_classes, sorted_weight_classes)){
if(non_correspondence){
common_class_names <- intersect(sorted_class_site_classes, sorted_weight_classes)
rem_c_uniq <- paste(setdiff(sorted_class_site_classes,common_class_names),collapse=" ")
rem_w_uniq <- paste(setdiff(sorted_weight_classes,common_class_names),collapse=" ")
message("Non-identical classes between classes and weights")
message(paste("removing the following from the class file:",rem_c_uniq))
message(paste("removing the following from the weight file:",rem_w_uniq))
classes <- classes[classes$class %in% common_class_names,]
weights <- weights[weights$class %in% common_class_names,]
removed_classes <- paste(rem_w_uniq,rem_c_uniq,collapse=" ")
} else {
message("Names of classes are not identical between class data and weight data")
message(paste("class names unique to classes:",class_uniq))
message(paste("class names unique to weights:",weight_uniq))
stop("stopped!")
}
}
sorted_sstable_names <- as.character(sort(sstable_names))
sorted_class_site_names <- as.character(sort(classes$site))
sstable_uniq <- paste(setdiff(sorted_sstable_names,sorted_class_site_names),collapse=" ")
class_uniq <- paste(setdiff(sorted_class_site_names,sorted_sstable_names),collapse=" ")
if(!identical(sorted_sstable_names, sorted_class_site_names)){
if(non_correspondence){
common_site_names <- intersect(sorted_sstable_names,sorted_class_site_names)
rem_s_uniq <- paste(setdiff(sorted_sstable_names,common_site_names),collapse=" ")
rem_c_uniq <- paste(setdiff(sorted_class_site_names,common_site_names),collapse=" ")
message("Non-identical classes between classes and weights")
message(paste("removing the following from the species_site_table:",rem_s_uniq))
message(paste("removing the following from the class file:",rem_c_uniq))
removed_sites <- paste(removed_sites,rem_s_uniq,rem_c_uniq,collapse=" ")
sstable <- sstable[row.names(sstable) %in% common_site_names,]
classes <- classes[classes$site %in% common_site_names,]
}
}
sstable_names <- row.names(sstable)
tab <- sstable
#distribute class weights equally among sites assigned to each class
claw <- as.data.frame(1/table(classes$class)) # proportion of weight to assign to each site within each class
classw <- merge(weights,claw, by.x = "class", by.y="Var1")
classw$adj <- classw$weight * classw$Freq #calculate adjusted weight pr site within each class
siw <- merge(classes,classw, by="class")[,c("site","adj")]
row.names(siw) <- siw$site
siw <- siw[sstable_names,]$adj #sort site-weights vector
siw_sum <- sum(siw)
if (siw_sum > 1 & total_coverage == 0) {
message("total site-weights are above 1, scaling weight for a total of 1")
siw <- siw/siw_sum
}
if (siw_sum <= 1 & total_coverage == 0) {
message(paste0("Using weights as supplied, for total site weights of ",siw_sum))
}
if (total_coverage == 1){
siw <- siw/siw_sum
message(paste0("Scaling site weight for a total of :",sum(siw)))
}
if (siw_sum < 1 & total_coverage > 0 & total_coverage < 1) {
siw <- (siw/siw_sum)*total_coverage
message(paste0("Scaling site weight for a total of ", sum(siw)))
}
if (presabs){ tab[tab>0] <- 1 }# Convert to pres/abs table
nsi <- nrow(tab) # number of sites
nsp <- ncol(tab) # number of species/otus
spr <- rowSums(tab>0) # Get number of species
#Setting resampling parameters
lr <- nsi*rep*size # calculate loop rounds (to be able to make "progress bar")
sv <- seq(1:nsp) # make a vector of species-numbers
#Normalized_table1C <- decostand(tab,"total") # Normalize Table1 to get relative share pr species (this choice has been deactivated)
ssw <- apply(tab,2,function(x)x*siw) # multiply relative share by siteprobs to get species_weight relative to plot
#ssw <- apply(Normalized_table1C,2,function(x)x*siw) # multiply relative share by siteprobs to get species_weight relative to plot
spw <- colSums(ssw) ## Add all species probs pr site from all sites to get total weight
spw2 <- spw
names(spw2) <- 1:length(spw2)
sspl <- length(spw2)
norm_score <- max(spw) - spw
score_tab <- apply(tab,1,function(x)x*norm_score)
site_cores <- colSums(score_tab)
csut <- vector(mode="list", length=nsi) # prepare list for total unique species for each site
ft <- tab # Prepare a table for results
ft[ft>0] = 0 # resetting the results table
total_table <- list()
site_table <- list()
site_rep_uniq <- ft#[,0]
for (sn in 1:nsi){ # looping through the sites one by one
print(paste0("evaluating sample ", sn ," of ", nsi)) # make a progressline
for (rn in 1:rep){ # looping through the replicates one by one
css <- which(tab[sn,]>0) # which species (ID) are present in the current site?
crsp = NULL #resetting the current background set
for (jj in 1:size){
crsp <- unique(c(crsp,which(runif(sspl, 0, 100) < spw2)))
}
csu <- css[!(css %in% crsp)] # find species IDs unique to currently investigated site compared to total background pool
site_table[[rn]] <- csu
site_rep_uniq[sn,rn] <- length(csu)
csut[[sn]] <- c(csut[[sn]],csu) # add unique IDs of current replicate to growing vector of unique IDs for current plot ()
}
total_table[[sn]] <- site_table
}
# process list of vectors of unique species
for (sn in 1:nsi){
ft[sn,as.numeric(names(table(csut[[sn]])))] <- table(csut[[sn]])
}
Uniqueness <- rowSums(ft)/rep #Calculating average uniqueness pr plot
Adj_uniquity <- Uniqueness/spr
Uniquenss_table <- ft
Unique_species <- colSums(ft)
av_un_sp <- (Unique_species/colSums(ft != 0))/rep
nzmean <- function(x) {
zvals <- x==0
if (all(zvals)) 0 else mean(x[!zvals])
}
average_ssw <- apply(ssw,1,nzmean)
top20 <- sort(Uniqueness,decreasing = TRUE)[1:20]
top20sp <- sort(av_un_sp,decreasing = TRUE)[1:20]
result <- list(simple_site_score=site_cores,
uniquity_score=Uniqueness,
adjusted_uniquity=Adj_uniquity,
uniquity_table=Uniquenss_table,
uniquity_species_scores=Unique_species,
Unique_species_avg=av_un_sp,
Summed_species_weights=spw,
Average_species_weight_pr_site=average_ssw,
Top20_unique_sites=top20,
Top20_unique_species=top20sp,
Accumulated_uniqueness=total_table,
Site_richness=spr,
replicates=rep,
background_sites=background_sites,
Removed_classes = removed_classes,
Removed_sites = removed_sites,
ft, total_coverage=sum(siw)
)
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.