#' @title Check taxonomy uniqueness.
#' @description This function checks for the duplicate taxonomy names that can occur in different groups (e.g., the same genus names in different families).
#' @param x Data frame with taxonomy (columns = taxonomy ranks, rows = species)
#' @param col Column name (taxonomy rank) that should be checked for uniqueness
#' @param return_all Logical; default, FALSE - only non-unique values will be returned
#' @param dropNA Logical; default, TRUE - missing values within the selected column ('col') will be removed
#' @return Data frame with counts of taxon occurence (by default, only non-unique values will be shown).
#' @export
#'
#' @examples
#' datt <- data.frame(
#' Kingdom = rep(LETTERS[1:2], each = 6),
#' Phylum = rep(LETTERS[3:6], each = 3),
#' Class = rep(letters[1:6], times = 2),
#' stringsAsFactors = F
#' )
#'
#' check_tax_uniqueness(datt, col = "Phylum") # All ranks of Phylum are unique
#' check_tax_uniqueness(datt, col = "Class") # Classes are duplicated
#'
check_tax_uniqueness <- function(x, col = "k", return_all = F, dropNA = T){
# x = data frame
#NB! Columns should be ordered, missing values are coded with NA
# require(plyr)
x <- as.data.frame(x)
## Where is the selected rank?
COLID <- which(colnames(x) == col)
## Remove NAs
if(dropNA == TRUE){
nn <- is.na(x[,COLID])
if(any(nn)){ x <- x[!nn, ] }
}
## Remove lower ranks and count number of unique values
res <- plyr::ddply(.data = x[, 1:COLID], .variables = col, .fun = function(z){
rez <- z[!duplicated(z),] # remove duplicates
rezz <- data.frame(UniqueCombs = nrow(rez))
return(rezz)
})
## Remove unique
if(return_all == FALSE){
res <- subset(res, UniqueCombs > 1)
if(nrow(res) == 0){ cat("All ranks of ", col, " are unique.\n") }
}
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.