#' Get taxa information from GBIF
#'
#' This function retrieves taxa information from GBIF. It is a higher level function built
#' on rgbif functions `name_usage()` and `name_lookup()`.
#' @param taxon_keys (single numeric or character or a vector) a single key or a
#' vector of keys. Not to use together with `checklist_keys`.
#' @param checklist_keys (single character or a vector) a datasetKey (character)
#' or a vector of datasetkeys. Not to use together with `checklist_keys`.
#' @param origin (single character or a vector) filter by origin.
#' It can take many inputs, and treated as OR (e.g., a or b or c)
#' To be used only in combination with `checklist_keys`. Ignored otherwise.
#' @param limit With taxon_keys: limit number of taxa.
#' With checklist_keys: limit number of taxa per each dataset.
#' A warning is given if limit is higher than the length of taxon_keys or number of records
#' in the checklist_keys (if string) or any of the checklist_keys (if vector)
#' @return A data.frame with all returned attributes for any taxa
#' @export
#' @importFrom dplyr %>% .data
#' @examples
#' \dontrun{
#' # A single numeric taxon_keys
#' gbif_get_taxa(taxon_keys = 1)
#' # A single character taxon_keys
#' gbif_get_taxa(taxon_keys = "1")
#' # Multiple numeric taxon_keys (vector)
#' gbif_get_taxa(taxon_keys = c(1, 2, 3, 4, 5, 6))
#' # Multiple character taxon_keys (vector)
#' gbif_get_taxa(taxon_keys = c("1", "2", "3", "4", "5", "6"))
#' # Limit number of taxa (coupled with taxon_keys)
#' gbif_get_taxa(taxon_keys = c(1, 2, 3, 4, 5, 6), limit = 3)
#' # A single checklist_keys (character)
#' gbif_get_taxa(checklist_keys = "b3fa7329-a002-4243-a7a7-cd066092c9a6")
#' # Multiple checklist_keys (vector)
#' gbif_get_taxa(checklist_keys = c(
#' "e4746398-f7c4-47a1-a474-ae80a4f18e92",
#' "b3fa7329-a002-4243-a7a7-cd066092c9a6"
#' ))
#' # Limit number of taxa (coupled with checklist_keys)
#' gbif_get_taxa(
#' checklist_keys = c(
#' "e4746398-f7c4-47a1-a474-ae80a4f18e92",
#' "b3fa7329-a002-4243-a7a7-cd066092c9a6"
#' ),
#' limit = 30
#' )
#' # Filter by origin
#' gbif_get_taxa(
#' checklist_keys = "9ff7d317-609b-4c08-bd86-3bc404b77c42",
#' origin = "source", limit = 3000
#' )
#' gbif_get_taxa(
#' checklist_keys = "9ff7d317-609b-4c08-bd86-3bc404b77c42",
#' origin = c("source", "denormed_classification"), limit = 3000
#' )
#' }
gbif_get_taxa <- function(
taxon_keys = NULL,
checklist_keys = NULL,
origin = NULL,
limit = NULL) {
# test incoming arguments
assertthat::assert_that(!all(!is.null(taxon_keys), !is.null(checklist_keys)),
msg = paste(
"Both taxon_keys and checklist_keys not NULL.",
"You should choose one of the two!"
)
)
# test argument taxon_keys
if (!is.null(taxon_keys)) {
assertthat::assert_that(is.numeric(taxon_keys) | is.character(taxon_keys),
msg = "taxon_keys should be a numeric, character or a vector."
)
}
# test argument checklist_keys
if (!is.null(checklist_keys)) {
assertthat::assert_that(is.character(checklist_keys),
msg = "checklist_keys should be a character or a vector."
)
}
# test limit
if (!is.null(limit)) {
assertthat::assert_that(is.numeric(limit), msg = "Limit has to be numeric.")
assertthat::assert_that(limit > 0,
msg = "Limit has to be a positive number."
)
}
# test number of taxa
if (!is.null(checklist_keys) & !is.null(limit)) {
assertthat::assert_that(limit < 100000,
msg = "Too many keys. API maximum is 99999."
)
if (limit * length(checklist_keys) > 100000) {
warning(paste(
"Attention: if all datasets contain at least as many taxa",
"as limit, you are querying too many taxa.",
"API maximum is 99999."
))
}
}
# test origin and set to lower
if (!is.null(origin)) {
assertthat::assert_that(is.character(origin),
msg = "origin must be a character or a vector."
)
origins <- tolower(origin)
if (!is.null(taxon_keys)) {
warning("origin argument ignored if used in combination with taxon_keys.")
}
}
# working with taxon_keys
if (!is.null(taxon_keys)) {
return <- "taxon"
if (is.null(limit)) {
maxlimit <- length(taxon_keys)
} else {
if (limit > length(taxon_keys)) {
warning("Limit is higher than number of taxon keys.")
maxlimit <- length(taxon_keys)
} else {
maxlimit <- limit
}
}
taxon_keys <- as.integer(taxon_keys[1:maxlimit])
taxon_keys_df <- as.data.frame(taxon_keys)
taxon_taxa <- purrr::map_dfr(
taxon_keys_df$taxon_keys, ~ rgbif::name_usage(key = .)$data
)
taxon_taxa <- taxon_taxa %>%
dplyr::ungroup() %>%
dplyr::mutate(origin = tolower(.data$origin))
if (!is.null(origin)) {
taxon_taxa <- taxon_taxa %>% dplyr::filter(.data$origin %in% origins)
}
# GBIF Backbone matching
number_key <- nrow(taxon_taxa)
number_no_nubkey <- taxon_taxa %>%
dplyr::filter(is.na(.data$nubKey)) %>%
nrow()
}
# working with checklist_keys
if (!is.null(checklist_keys) & is.character(checklist_keys)) {
return <- "checklist"
if (is.null(limit)) {
maxlimit <- 99999 # after paging implmentation, set maxlimit <- 99999
} else {
maxlimit <- limit
}
checklist_keys <- as.character(checklist_keys)
if (!is.null(origin)) {
checklist_taxa <-
purrr::map_dfr(
checklist_keys, ~ rgbif::name_lookup(
datasetKey = .,
origin = origins,
limit = maxlimit
)$data
)
} else {
checklist_taxa <-
purrr::map_dfr(checklist_keys, ~ rgbif::name_lookup(
datasetKey = .,
limit = maxlimit
)$data)
}
checklist_taxa <-
checklist_taxa %>%
dplyr::ungroup() %>%
dplyr::mutate(origin = tolower(.data$origin))
if (!is.null(limit) &
(nrow(checklist_taxa) < maxlimit * length(checklist_keys))) {
if (length(checklist_keys) > 1) {
warning("One of the datasets contains less records than limit.")
} else {
warning("Dataset contains less records than limit.")
}
}
# GBIF Backbone matching
number_key <- nrow(checklist_taxa)
number_no_nubkey <-
checklist_taxa %>%
dplyr::filter(is.na(.data$nubKey)) %>%
nrow()
}
# print GBIF Backbone matching on screen
if (number_no_nubkey == 0) {
print(paste("All", number_key, "taxa match GBIF Backbone."))
} else {
print(paste(
number_key, "taxa found of which", number_no_nubkey,
"could not be matched to GBIF Backbone."
))
}
# select output
return <- match.arg(return, c("taxon", "checklist"))
switch(return,
taxon = taxon_taxa,
checklist = checklist_taxa
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.