Nothing
#' Get accepted canonical names and taxonomy for a given species name
#'
#' The function maps user provided names to accepted species names.
#'
#' @param x a character string or vector of species names.
#' @param subspecies logical. If TRUE (default), the given name is resolved to
#' subspecies epithet, otherwise it will be mapped to species level.
#' @param higherrank logical. If FALSE (default), it will not allow remapping of
#' unknown species names to higher taxon ranks (e.g. genus).
#' @param verbose logical. If FALSE (default), warnings and messages are
#' suppressed.
#' @param fuzzy logical. Defaults to TRUE to deal with misspelled names. May
#' produce wrong assignments in case of very similar taxon names. If FALSE
#' (default), names are only resolved to exactly matching taxa on GBIF
#' taxonomy service.
#' @param conf_threshold numerical, ranging from 0 to 100 (default value = 90).
#' Defines the confidence level of the request to be accepted. To cover for
#' misspellings and errors, could go as low as 50.
#' @param resolve_synonyms logical. If TRUE (default), user provided synonyms
#' are mapped to the accepted names on GBIF taxonomy service.
#'
#' @return a data.frame mapping the user supplied names to the accepted taxon
#' names and higher taxonomic information (kingdom, phylum, class, order,
#' family, genus).
#'
#' @details The function relies on package 'taxize' by Scott Chamberlain. It
#' uses the spell-checking and fuzzy matching algorithms provided by Global
#' Names Resolver (`taxize::gnr_resolve()`) and forwards synonyms to the
#' accepted names as provided by GBIF Backbone Taxonomy
#' (`taxize::get_gbif_id_()`).
#'
#' If 'synonym' is returned as TRUE, the user provided name has been
#' identified as a synonym and was mapped to an accepted name.
#'
#' The field confidence reports the confidence of the matching procedure
#' performed by the function `get_gbifid_()` of the package 'taxize'. The
#' taxonID is a globally valid URI that links to the taxon description of the
#' GBIF backbone taxonomy.
#'
#' @import taxize
#' @import curl
#' @importFrom data.table rbindlist
#' @export
#'
#' @examples
#'
#' get_gbif_taxonomy(c("Chorthippus albomarginatus", "Chorthippus apricarius",
#' "Chorthippus biguttulus", "Chorthippus dorsatus", "Chorthippus montanus",
#' "Chorthippus parallelus", "Chrysochraon dispar", "Conocephalus dorsalis",
#' "Conocephalus fuscus", "Decticus verrucivorus", "Euthystira brachyptera",
#' "Gomphocerippus rufus", "Gryllus campestris", "Metrioptera roeselii",
#' "Omocestus viridulus", "Phaneroptera falcata", "Platycleis albopunctata",
#' "Spec", "Stenobothrus lineatus", "Stenobothrus stigmaticus",
#' "Stethophyma grossum", "Tetrix kraussi", "Tetrix subulata",
#' "Tetrix tenuicornis", "Tetrix undulata", "Tettigonia cantans",
#' "Tettigonia viridissima")
#' )
#'
#' get_gbif_taxonomy("Vicia")
get_gbif_taxonomy <- function(x,
subspecies = TRUE,
higherrank = FALSE,
verbose = FALSE,
fuzzy = TRUE,
conf_threshold = 90,
resolve_synonyms = TRUE
) {
matchtype = status = confidence = NULL
# test for internet connectivity
if( !curl::has_internet() ) {
message("Connection to Gbif Taxonomy API failed. Please check internet connectivity!")
temp <- lapply(x, function(i) {data.frame()} )
names(temp) <- x
} else {
# get gbif mappings
temp <- taxize::get_gbifid_(x, messages = verbose)
}
# loop over all species returns
for(i in 1:length(temp)) {
warning_i = ""
synonym_i = FALSE
# add warning in offline mode
if( !curl::has_internet() ) {
warning_i = "Gbif Taxonomy Service unavailable! Internet connection required."
temp[[i]] <- data.frame(verbatimScientificName = x[i], matchtype = "NONE", status = "NA", rank = "species", stringsAsFactors = FALSE)
} else {
# buildup empty returns
if(nrow(temp[[i]]) == 0) {
warning_i <- paste("No matching species concept!")
temp[[i]] <- data.frame(verbatimScientificName = x[i], matchtype = "NONE", status = "NA", rank = "species", stringsAsFactors = FALSE)
}
# clean out fuzzy matches, if not allowed
if(!fuzzy & nrow(temp[[i]]) > 0) {
temp[[i]] <- subset(temp[[i]], matchtype != "FUZZY")
if(nrow(temp[[i]]) == 0) {
warning_i <- paste(warning_i, "Fuzzy matching might yield results.")
}
}
# check for confidence threshold
if(!is.null(conf_threshold) & nrow(temp[[i]]) > 0) {
temp[[i]] <- subset(temp[[i]], confidence >= conf_threshold)
if(nrow(temp[[i]]) == 0) {
temp[[i]] <- data.frame(verbatimScientificName = x[i], matchtype = "NONE", status = "NA", rank = "species", stringsAsFactors = FALSE)
warning_i <- paste(warning_i, "Check spelling or lower confidence threshold!")
}
}
# remove all synonyms, if accepted exact match is found
if(any(temp[[i]]$status == "ACCEPTED")) {
temp[[i]] <- subset(temp[[i]], status == "ACCEPTED")
temp[[i]] <- subset(temp[[i]], temp[[i]]$confidence == max(temp[[i]]$confidence))
#warning_i <- paste(warning_i, "Automatically mapped to accepted species name!", sep = " ")
if(nrow(temp[[i]]) > 1) {
temp[[i]] <- temp[[i]][1,]
warning_i <- paste(warning_i, "Selected first of multiple equally ranked concepts!")
}
}
# resolve all synonyms, if allowed
if(!any(temp[[i]]$status == "ACCEPTED") & any(temp[[i]]$status == "SYNONYM")) {
if(resolve_synonyms) {
keep <- temp[i]
temp[i] <- taxize::get_gbifid_(temp[[i]]$species[which.max(temp[[i]]$confidence)], messages = verbose)
if(temp[[i]][1,]$status == "ACCEPTED") {
temp[[i]] <- subset(temp[[i]], matchtype == "EXACT" & status == "ACCEPTED")
temp[[i]] <- subset(temp[[i]], temp[[i]]$confidence == max(temp[[i]]$confidence))
if(nrow(temp[[i]]) > 1) {
temp[[i]] <- temp[[i]][1,]
warning_i <- paste(warning_i, "Selected first of multiple equally ranked concepts!")
}
warning_i <- paste(warning_i, "A synonym was mapped to the accepted species concept!", sep = " ")
synonym_i = TRUE
} else {
status <- temp[[i]][1,]$status
temp[i] <- keep
if(nrow(temp[[i]]) > 1) {
temp[[i]] <- temp[[i]][1,]
warning_i <- paste(warning_i, "Selected first of multiple equally ranked concepts!")
}
warning_i <- paste0(warning_i, " Resolved synonym '", temp[[i]]$species,"' is labelled '", status, "'. Clarification required!" )
}
} else {
temp[[i]] <- subset(temp[[i]], status == "SYNONYM")
temp[[i]] <- subset(temp[[i]], temp[[i]]$confidence == max(temp[[i]]$confidence))
warning_i <- paste(warning_i, "The provided taxon seems to be a synonym of '", temp[[i]]$species,"'!", sep = "")
}
}
# check for doubtful status
if(all(temp[[i]]$status == "DOUBTFUL")) {
temp[[i]] <- subset(temp[[i]], status == "DOUBTFUL")
warning_i <- paste(warning_i, "Mapped concept is labelled 'DOUBTFUL'!")
temp[[i]] <- subset(temp[[i]], temp[[i]]$confidence == max(temp[[i]]$confidence))
#warning_i <- paste(warning_i, "Automatically mapped to accepted species name!", sep = " ")
if(nrow(temp[[i]]) > 1) {
temp[[i]] <- temp[[i]][1,]
warning_i <- paste(warning_i, "Selected first of multiple equally ranked concepts!")
}
}
# 3. check rankorder of result
rankorder <- c("kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies")
if(match(temp[[i]]$rank, rankorder) > 7 & !subspecies) {
if(length(strsplit(as.character(temp[[i]]$canonicalname), " ")[[1]]) > 2) {
temp[i] <- taxize::get_gbifid_(paste(strsplit(names(temp[i]), " ")[[1]][1:2], collapse = " "), messages = verbose)
temp[[i]] <- subset(temp[[i]], temp[[i]]$confidence == max(temp[[i]]$confidence))
warning_i<- paste(warning_i, "Subspecies has been remapped to species concept!", sep = " ")
} else {
temp[[i]] <- data.frame(verbatimScientificName = x[i], matchtype = "NONE", rank = "subspecies", stringsAsFactors = FALSE)
warning_i <- paste(warning_i, "No mapping of subspecies name to species was possible!", sep = " ")
}
}
if(temp[[i]]$matchtype == "HIGHERRANK") {
if(higherrank) {
temp[[i]] <- subset(temp[[i]], temp[[i]]$confidence == max(temp[[i]]$confidence))
warning_i <- paste(warning_i, "No matching species concept! Entry has been mapped to higher taxonomic level.")
} else {
temp[[i]] <- data.frame(verbatimScientificName = x[i], matchtype = "NONE", rank = "highertaxon", stringsAsFactors = FALSE)
warning_i <- paste("No matching species concept!", warning_i)
}
}
}
# 4. create structured output
temp[[i]] <- data.frame(
verbatimScientificName = x[i],
synonym = synonym_i,
scientificName = if(is.null(temp[[i]]$canonicalname)) NA else temp[[i]]$canonicalname,
author = if(is.null(temp[[i]]$canonicalname)) NA else sub(paste0(temp[[i]]$canonicalname," "), "", temp[[i]]$scientificname),
taxonRank = if(is.null(temp[[i]]$rank)) NA else temp[[i]]$rank,
confidence = if(is.null(temp[[i]]$confidence)) NA else temp[[i]]$confidence,
kingdom = if(is.null(temp[[i]]$kingdom)) NA else temp[[i]]$kingdom,
phylum = if(is.null(temp[[i]]$phylum)) NA else temp[[i]]$phylum,
class = if(is.null(temp[[i]]$class)) NA else temp[[i]]$class,
order = if(is.null(temp[[i]]$order)) NA else temp[[i]]$order,
family = if(is.null(temp[[i]]$family)) NA else temp[[i]]$family,
genus = if(is.null(temp[[i]]$genus)) NA else temp[[i]]$genus,
taxonomy = "GBIF Backbone Taxonomy",
taxonID = if(is.null(temp[[i]]$usagekey)) NA else paste0("http://www.gbif.org/species/", temp[[i]]$usagekey, ""),
warnings = NA,
stringsAsFactors = FALSE
)
temp[[i]]$warnings <- warning_i
if(verbose & nchar(warning_i) >= 1) warning(warning_i)
}
#compile output data.frame
out <- data.table::rbindlist(temp, fill = TRUE)
class(out) <- c("data.frame", "taxonomy")
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.