Nothing
##' Match taxonomic names to the Open Tree Taxonomy.
##'
##' Accepts one or more taxonomic names and returns information about
##' potential matches for these names to known taxa in the Open Tree
##' Taxonomy.
##'
##' This service uses taxonomic contexts to disambiguate homonyms and misspelled
##' names; a context may be specified using the \code{context_name} argument.
##' The default value for \code{context_name} is "All life". If no context is
##' specified (i.e., \code{context_name} is set to \code{NULL}), then the
##' context will be inferred (i.e., the shallowest taxonomic context that
##' contains all unambiguous names in the input). Taxonomic contexts are
##' uncontested higher taxa that have been selected to allow limits to be
##' applied to the scope of TNRS searches (e.g. 'match names only within
##' flowering plants'). Once a context has been identified (either
##' user-specified or inferred), all taxon name matches will performed only
##' against taxa within that context. For a list of available taxonomic
##' contexts, see \code{\link{tnrs_contexts}}.
##'
##' A name is considered unambiguous if it is not a synonym and has
##' only one exact match to any taxon name in the entire taxonomy.
##'
##' When the name search returns multiple matches, the taxon with the highest
##' match score is returned. If the name returned is not the one you intended,
##' you can use the \code{inspect} function to check the other taxa returned by
##' your search. The
##' \href{https://docs.ropensci.org/rotl/articles/rotl.html#how-to-change-the-ott-ids-assigned-to-my-taxa}{Getting
##' Started vignette} has more information on how to do this.
##'
##' Several functions listed in the \sQuote{See also} section can be
##' used to inspect and manipulate the object generated by this
##' function.
##'
##'
##' @title Match names to the Open Tree Taxonomy
##' @param names taxon names to be queried. Currently limited to 10,000 names
##' for exact matches and 2,500 names for approximate matches (character
##' vector)
##' @param context_name name of the taxonomic context to be searched (length-one
##' character vector or \code{NULL}). Must match (case sensitive) one of the
##' values returned by \code{\link{tnrs_contexts}}. Default to "All life".
##' @param do_approximate_matching A logical indicating whether or not to
##' perform approximate string (a.k.a. \dQuote{fuzzy}) matching. Using
##' \code{FALSE} will greatly improve speed. Default, however, is \code{TRUE}.
##' @param ids A vector of ids to use for identifying names. These will be
##' assigned to each name in the names array. If ids is provided, then ids and
##' names must be identical in length.
##' @param include_suppressed Ordinarily, some quasi-taxa, such as incertae
##' sedis buckets and other non-OTUs, are suppressed from TNRS results. If
##' this parameter is true, these quasi-taxa are allowed as possible TNRS
##' results.
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return A data frame summarizing the results of the query. The original
##' query output is appended as an attribute to the returned object (and can
##' be obtained using \code{attr(object, "original_response")}).
##' @seealso \code{\link{inspect.match_names}},
##' \code{\link{update.match_names}}, \code{\link{synonyms.match_names}}.
##' @examples \dontrun{
##' deuterostomes <- tnrs_match_names(names=c("echinodermata", "xenacoelomorpha",
##' "chordata", "hemichordata"))
##' }
##' @importFrom stats setNames
##' @export
tnrs_match_names <- function(names = NULL, context_name = "All life",
do_approximate_matching = TRUE, ids = NULL,
include_suppressed = FALSE, ...) {
if (!is.null(context_name) &&
!context_name %in% unlist(tnrs_contexts(...))) {
stop(
"The ", sQuote("context_name"),
" is not valid. Check possible values using tnrs_contexts()"
)
}
## take care of duplicated names
if (any(duplicated(tolower(names)))) {
names <- tolower(names)
warning("Some names were duplicated: ",
paste(sQuote(names[duplicated(names)]), collapse = ", "), ".",
call. = FALSE
)
names <- unique(names)
}
res <- .tnrs_match_names(
names = names, context_name = context_name,
do_approximate_matching = do_approximate_matching,
ids = ids, include_suppressed = include_suppressed,
...
)
check_tnrs(res)
match_ids <- highest_match_score(res)
if (!identical(length(res[["results"]]), length(match_ids))) {
stop(
"The number of matches and the number of 'results' elements should",
" be the same."
)
}
summary_match <- mapply(
function(rid, mid) {
build_summary_match(
res,
res_id = rid,
match_id = mid,
initial_creation = TRUE
)
},
seq_along(res[["results"]]),
match_ids,
SIMPLIFY = FALSE
)
## add taxon names with no maches
summary_match <- do.call("rbind", summary_match)
summary_match <- as.data.frame(summary_match, stringsAsFactors = FALSE)
summary_match$search_string <- gsub("\\\\", "", summary_match$search_string)
## reorder to match original query
ordr <- match(tolower(names), tolower(summary_match$search_string))
stopifnot(identical(length(match_ids), length(ordr)))
summary_match <- summary_match[ordr, ]
match_ids <- match_ids[ordr]
summary_match[["approximate_match"]] <-
convert_to_logical(summary_match[["approximate_match"]])
summary_match[["is_synonym"]] <-
convert_to_logical(summary_match[["is_synonym"]])
summary_match[["flags"]] <- convert_to_logical(summary_match[["flags"]])
has_original_match <- !is.na(summary_match[["number_matches"]])
json_coords <- data.frame(
search_string = names,
original_order = as.numeric(rownames(summary_match)),
match_id = match_ids,
has_original_match = has_original_match,
row.names = seq_along(names),
stringsAsFactors = FALSE
)
attr(summary_match, "original_order") <- as.numeric(rownames(summary_match))
attr(summary_match, "original_response") <- res
attr(summary_match, "match_id") <- match_ids
attr(summary_match, "has_original_match") <- has_original_match
attr(summary_match, "json_coords") <- json_coords
class(summary_match) <- c("match_names", "data.frame")
rownames(summary_match) <- NULL
summary_match
}
##' @importFrom stats na.omit
convert_to_logical <- function(x) {
if (all(stats::na.omit(x) %in% c("TRUE", "FALSE"))) {
x <- as.logical(x)
} else {
x
}
}
check_tnrs <- function(req) {
no_match <- req[["unmatched_names"]]
if (any(vapply(no_match, length, integer(1)) > 0)) {
warning(
paste(unlist(no_match), collapse = ", "), " are not matched",
call. = FALSE
)
}
}
tnrs_columns <- list(
"search_string" = function(x) x[["search_string"]],
"unique_name" = function(x) .tax_unique_name(x[["taxon"]]),
"approximate_match" = function(x) x[["is_approximate_match"]],
"score" = function(x) x[["score"]],
"ott_id" = function(x) .tax_ott_id(x[["taxon"]]),
"is_synonym" = function(x) x[["is_synonym"]],
"flags" = function(x) paste(.tax_flags(x[["taxon"]]), collapse = ", ")
)
summary_row_factory <- function(res, res_id, match_id, columns = tnrs_columns) {
res_address <- res[["results"]][[res_id]][["matches"]][[match_id]]
ret <- sapply(columns, function(f) f(res_address))
n_match <- length(res[["results"]][[res_id]][["matches"]])
c(ret, number_matches = n_match)
}
build_summary_match <- function(res, res_id, match_id = NULL, initial_creation) {
if (length(res_id) > 1 &&
(!is.null(match_id) && length(match_id) > 1)) {
stop("Something is wrong. Please contact us.")
}
build_summary_row <- function(rid) {
if (is.null(match_id)) {
match_id <- seq_len(length(res[["results"]][[rid]][["matches"]]))
}
if (identical(length(match_id), 0L) ||
is.null(res[["results"]][[rid]][["matches"]][match_id][[1]])) {
return(build_empty_row(tolower(res[["results"]][[rid]][["name"]])))
}
res <- lapply(match_id, function(mid) {
summary_row_factory(res, rid, mid)
})
if (identical(length(match_id), 1L)) {
unlist(res)
} else {
res
}
}
summary_row <- lapply(res_id, build_summary_row)
if (identical(length(res_id), 1L)) {
summary_row <- unlist(summary_row, recursive = FALSE)
}
## Needed if only 1 row returned
if (!inherits(summary_row, "list")) {
summary_row <- list(summary_row)
}
summary_match <- do.call("rbind", summary_row)
summary_match <- data.frame(summary_match, stringsAsFactors = FALSE)
names(summary_match) <- c(names(tnrs_columns), "number_matches")
clean_tnrs_summary(summary_match)
}
##' @importFrom stats setNames
build_empty_row <- function(x) {
no_match_row <- stats::setNames(
rep(NA, length(tnrs_columns) + 1),
c(names(tnrs_columns), "number_matches")
)
no_match_row[1] <- x
no_match_row
}
lowest_ott_id <- function(rsp) {
vapply(seq_along(rsp[["results"]]), function(x) {
.r <- build_summary_match(
res = rsp, res_id = x, match_id = NULL,
initial_creation = TRUE
)
.r <- .r[(!as.logical(.r[["is_synonym"]])) &
!is.na(.r[["flags"]]) &
.r[["flags"]] == "", ]
if (nrow(.r) > 0) {
which.min(.r[["ott_id"]])
} else {
1L
}
}, integer(1))
}
highest_match_score <- function(rsp) {
vapply(seq_along(rsp[["results"]]), function(x) {
.r <- build_summary_match(
res = rsp, res_id = x, match_id = NULL,
initial_creation = TRUE
)
.r <- .r[(!as.logical(.r[["is_synonym"]])) &
!is.na(.r[["flags"]]) &
.r[["flags"]] == "", ]
if (nrow(.r) > 0) {
which.max(.r[["score"]])
} else {
1L
}
}, integer(1))
}
clean_tnrs_summary <- function(summary_match) {
summary_match[["approximate_match"]] <-
convert_to_logical(summary_match[["approximate_match"]])
summary_match[["score"]] <- as.numeric(summary_match[["score"]])
summary_match[["is_synonym"]] <-
convert_to_logical(summary_match[["is_synonym"]])
summary_match[["flags"]] <- convert_to_logical(summary_match[["flags"]])
summary_match[["ott_id"]] <- as.integer(summary_match[["ott_id"]])
summary_match[["number_matches"]] <-
as.integer(summary_match[["number_matches"]])
summary_match
}
##' This function returns a list of pre-defined taxonomic contexts
##' (i.e. clades) which can be used to limit the scope of tnrs
##' queries.
##'
##' Taxonomic contexts are available to limit the scope of TNRS
##' searches. These contexts correspond to uncontested higher taxa
##' such as 'Animals' or 'Land plants'. This service returns a list
##' containing all available taxonomic context names, which may be
##' used as input (via the \code{context_name} argument in other
##' functions) to limit the search scope of other services including
##' \code{\link{tnrs_match_names}}.
##' @title TNRS contexts
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return Returns invisibly a list for each major clades (e.g.,
##' animals, microbes, plants, fungi, life) whose elements
##' contains the possible contexts.
##' @export
tnrs_contexts <- function(...) {
res <- .tnrs_contexts(...)
class(res) <- "tnrs_contexts"
res
}
##' @export
print.tnrs_contexts <- function(x, ...) {
cat("Possible contexts:\n")
lapply(x, function(t) {
res <- unlist(t)
cat(" ", res[1], "\n")
if (length(res) > 1) {
lapply(seq(2, length(res), by = 5), function(l) {
m <- ifelse(l + 5 <= length(res), l + 4, length(res))
cat(" ", paste(res[l:m], collapse = ", "), "\n")
})
}
})
}
##' Return a taxonomic context given a list of taxonomic names
##'
##' Find the least inclusive taxonomic context that includes all the
##' unambiguous names in the input set. Unambiguous names are names
##' with exact matches to non-homonym taxa. Ambiguous names (those
##' without exact matches to non-homonym taxa) are indicated in
##' results.
##'
##' @title Infer the taxonomic context from a list of names
##' @param names Vector of taxon names.
##' @param ... additional arguments to customize the API request (see
##' \code{\link{rotl}} package documentation).
##' @return A list including the context name, the context ott id and
##' possibly the names in the query that have an ambiguous
##' taxonomic meaning in the query.
##' @examples
##' \dontrun{
##' res <- tnrs_infer_context(names=c("Stellula calliope", "Struthio camelus"))
##' }
##' @export
tnrs_infer_context <- function(names = NULL, ...) {
res <- .tnrs_infer_context(names = names, ...)
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.