Nothing
#' Synchronize Species Names Using The Reptile Database
#'
#' @description
#' Queries a list of species names through \code{reptSearch()} and returns a data frame with the currently valid names and taxonomic status for each input.
#'
#' @param x A character vector of taxon names to be matched (e.g., species lists, phylogenetic tip labels, or trait table entries).
#' @param solveAmbiguity Logical. If \code{TRUE}, attempts to resolve ambiguous names by retrieving all possible valid species to which the query may refer. Default is \code{TRUE}.
#' @param cores Integer. Number of CPU cores to use for parallel processing. Default is \code{cores = 1}.
#' @param showProgress Logical. If \code{TRUE}, displays progress updates during processing. Default is \code{TRUE}.
#' @param getLink Logical. If \code{TRUE}, retrieves searched species URLs. Defaults if \code{FALSE}.
#'
#' @return A data frame with the following columns:
#' \itemize{
#' \item \code{query}: the original input names.
#' \item \code{RDB}: the best-matching valid names according to The Reptile Database.
#' \item \code{status}: a status label indicating the result of the match (\code{"up_to_date"}, \code{"updated"}, \code{"updated_typo"}, \code{"ambiguous"}, \code{"merge"}, or \code{"not_found"}).
#' \item \code{url}: Optional, if getLink = TRUE returns the URL of the species page retrieved for each match, or a list of possible matches if ambiguous.
#' }
#'
#' @note
#' \code{reptSync()} does not make authoritative taxonomic decisions. It matches input names against currently accepted names in The Reptile Database (RDB).
#' A name marked as \code{"up_to_date"} may still refer to a taxon that has been split, and thus may not reflect the most recent population-level taxonomy.
#'
#' For ambiguous names, the \code{url} field may contain multiple links corresponding to all valid species to which the queried name is considered a synonym.
#'
#' See package vignettes for more details.
#'
#' @references
#' Liedtke, H. C. (2018). AmphiNom: an amphibian systematics tool. *Systematics and Biodiversity*, 17(1), 1–6. https://doi.org/10.1080/14772000.2018.1518935
#'
#' @examples
#' query <- c("Vieira-Alencar authoristicus", "Boa atlantica", "Boa diviniloqua", "Boa imperator")
#'
#' \donttest{
#' reptSync(x = query, cores = 2)
#' }
#'
#' @export
reptSync <- function(x,
solveAmbiguity = TRUE,
cores = 1,
showProgress = TRUE,
getLink = FALSE) {
# Worker function: performs search + classifies result
worker <- function(species_name) {
result <- letsRept::reptSearch(species_name, verbose = FALSE)
if (is.list(result)) {
RDB <- result$species
status <- if (species_name == RDB) "up_to_date" else "updated"
url <- result$url
} else if (is.character(result) && grepl("^https:", result)) {
search <-safeRequest(result)
title_node <- rvest::html_element(search, "h1")
title_text <- rvest::html_text(title_node, trim = TRUE)
if (grepl("^Search results", title_text)) {
ul_element <- rvest::html_elements(search, "#content > ul:nth-child(6)")
if (length(ul_element) == 0) return(NULL)
li_nodes <- xml2::xml_children(ul_element[[1]])
species <- sapply(li_nodes, function(node) {
rvest::html_text(rvest::html_element(xml2::xml_child(node, 1), "em"), trim = TRUE)
})
}
RDB <- paste(species, collapse = "; ")
status <- "ambiguous"
url <- result
} else {
fuzzy <- agrep(species_name, letsRept::allReptiles$species, max.distance = 0.1, value = TRUE)
if(length(fuzzy) == 0){
RDB <- result
status <- "not_found"
url <- result
}else if(length(fuzzy)==1){
RDB <- fuzzy
status <- "updated_typo"
url <- result
}else{
RDB <- paste(fuzzy, collapse = "; ")
status <- "fuzzy_ambiguous"
url <- result
}
}
data.frame(query = species_name, RDB = RDB, status = status, url = url, stringsAsFactors = FALSE)
}
# Run in parallel using your safeParallel() function
results <- safeParallel(x, FUN = worker, cores = cores, showProgress = showProgress)
# Combine all individual data frames into one
df <- do.call(rbind, results)
if(solveAmbiguity){
synSample <- df[df$status == "ambiguous", c("query", "url")]
if(showProgress && nrow(synSample) >=1 ) message(sprintf("Sampling synonyms to approach ambiguity for %d species", nrow(synSample)))
if (nrow(synSample) > 0) {
ambiguity_results <- safeParallel(seq_len(nrow(synSample)), FUN = function(i) {
# For each species, resolve ambiguity using reptSynonyms
spp_syn <- reptSynonyms(reptSpecies(synSample$url[i], getLink = TRUE, showProgress = FALSE, cores = cores), cores = cores, showProgress = FALSE)
synonyms <- spp_syn$species[synSample$query[i] == spp_syn$synonyms]
if (length(synonyms) == 1) {
RDB_new <- synonyms
status_new <- "updated"
} else if (length(synonyms) > 1) {
RDB_new <- paste(synonyms, collapse = "; ")
status_new <- "ambiguous"
} else {
fuzzy <- agrep(synSample$query[i], reptSpecies(synSample$url[i], getLink = FALSE, showProgress = FALSE, cores = cores), max.distance = 0.1, value = TRUE)
if(length(fuzzy) == 0){
RDB_new <- paste(reptSpecies(synSample$url[i], getLink = FALSE, showProgress = FALSE, cores = cores), collapse = "; ")
status_new <- "ambiguous"
}else if(length(fuzzy)==1){
RDB_new <- fuzzy
status_new <- "updated_typo"
}else{
RDB_new <- paste(fuzzy, collapse = "; ")
status_new<- "fuzzy_ambiguous"
}
}
list(query = synSample$query[i], RDB_new = RDB_new, status_new = status_new)
}, cores = cores, showProgress = showProgress)
# Combine ambiguity results into a data frame
ambiguity_df <- do.call(rbind, lapply(ambiguity_results, function(res) {
data.frame(query = res$query, RDB = res$RDB_new, status = res$status_new, stringsAsFactors = FALSE)
}))
# Update the main dataframe with the resolved results
for (i in 1:nrow(ambiguity_df)) {
df$RDB[df$query == ambiguity_df$query[i]] <- ambiguity_df$RDB[i]
df$status[df$query == ambiguity_df$query[i]] <- ambiguity_df$status[i]
}
}
df$status[df$RDB %in% names(which(table(df$RDB)[!names(table(df$RDB)) %in% c("ambiguous", "not_found")] >=2))] <- "merge"
if(getLink){
return(df)
}else{
df <- df[,-4]
return(df)
}
}else{
df$status[df$RDB %in% names(which(table(df$RDB)[!names(table(df$RDB)) %in% c("ambiguous", "not_found")] >=2))] <- "merge"
if(getLink){
return(df)
}else{
df <- df[,-4]
return(df)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.