#' Gets occurrence data from speciesLink
#'
#' This function access version beta 0.1 of the
#' [speciesLink API]{https://api.splink.org.br/} and returns occurrence data of
#' species.
#'
#' @param dir Path to directory where the file will be saved. Default is to
#' create a "results/" directory
#' @param filename Name of the output file
#' @param save Logical. Save output to filename? Defaults to FALSE
#' @param basisOfRecord Character. Any in 'PreservedSpecimen', 'LivingSpecimen',
#' 'FossilSpecimen', 'HumanObservation', 'MachineObservation' or
#' 'MaterialSample'. Default is 'PreservedSpecimen' for museum and herbarium
#' search
#' @param family Family name. More than one name should be concatenated in a
#' vector
#' @param species Genus or genus and epithet separated by space. More than one
#' species can be concatenated into a vector. The request cannot be done
#' with more than 50 species at a time. Use lapply, or any sort of loop when
#' dealing with multiple species.
#' @param collectionCode Any collection available at speciesLink. Example: ALCB,
#' E, INPA, MOBOT_BR. Accepts a vector of names
#' @param country Any country name. No ASCII characters allowed. Accepts a
#' vector of names
#' @param county Any municipality name. No ASCII characters allowed. Accepts a
#' vector of names
#' @param stateProvince Any state or province. No ASCII characters allowed.
#' Accepts a vector of names
#' @param Coordinates Specify if records should have coordinates. Default is
#' "no check" but it also accepts "Yes", No", "Original", "Automatic", "Blocked"
#' or "no check"
#' @param CoordinatesQuality Any character in "Good" or "Bad" to select specific
#' type of coordinates
#' @param Scope Group to be required. If NULL searches all groups. Any in
#' "plants", "animals", "microrganisms" or "fossils"
#' @param Synonyms If species names should be checked for synonyms in a specific
#' dictionary. Set to "species2000" for search in Catálogo da Vida species2000,
#' "flora2020" for Flora do Brasil 2020, "MycoBank" for MycoBank, "AlgaeBase"
#' for AlgaeBase, "DSMZ" for DSMZ Prokaryotic Nomenclature Up-to-Date,
#' "Moure" for Catálogo de Abelhas Moure or "no synonyms".
#' @param Typus Logic. If TRUE select only typus
#' @param Images If select only records with images. Default is NULL.
#' It accepts: "Yes", "Live", "Polen", "Wood"
#' @param RedList Logic. If TRUE only species in the IUCN Red List are returned
#' @param MaxRecords Numeric. Maximum number of records to be required
#' @param file.format Character. The file extension to be used for saving ('csv'
#' or 'rds'). Default to 'csv'.
#' @param compress Logical. Should the file be compressed? Default to FALSE.
#'
#' @return A data.frame with the search result, which can also be saved on disk
#'
#' @details The time necessary to download records from the speciesLink API will
#' depend on the number of records and species in the query and from the speed
#' of the internet connection.
#'
#' The speciesLink API does not allow the download of ~50 or more taxa at a
#' time. So to download records from larger lists of species, you will probably
#' need to make the queries in a loop (see Examples).
#'
#' @author Sara Mortara
#'
#' @examples
#'\dontrun{
#' # Example for a single species, saving into a file called "ex01"
#' ex01 <-
#' rspeciesLink(filename = "ex01",
#' species = c("Eugenia platyphylla"),
#' Scope = "plants")
#' # Use lapply or similar for more than 50 species
#' # Making a request for multiple species
#'sp_list <- lapply(sp, function(x) rspeciesLink(species = x,
#' Scope = "plants",
#' basisOfRecord = "PreservedSpecimen",
#' Synonyms = "flora2020"))
#'# Adding species names to each element of the list
#'names(sp_list) = sp
#'# Binding all searchs and keeping a column w/ the named used in the search
#'sp_df <- bind_rows(sp_list, .id = "original_search")
#'}
#'
#' @import data.table
#' @importFrom jsonlite fromJSON
#' @export
rspeciesLink <- function(dir = "results/",
filename = "output",
save = FALSE,
basisOfRecord = NULL,
family = NULL,
species = NULL,
collectionCode = NULL,
country = NULL,
stateProvince = NULL,
county = NULL,
Coordinates = NULL, # Yes | No | Original | Automatic | Blocked
CoordinatesQuality = NULL, #Good | Bad
Scope = NULL, # plants, animals, microrganisms,fossils
Synonyms = "no synomyms", #species2000 | flora2020 | MycoBank | AlgaeBase | DSMZ | Moure no synonyms
Typus = FALSE,
Images = NULL,
RedList = FALSE,
MaxRecords = NULL, # n > 0 all records
file.format = "csv",
compress = FALSE
) { # Yes | No | Live | Polen | Wood
# speciesLink url
my_url <- "https://api.splink.org.br/records/"
# helper function
url_query <- function(vector, name) {
char <- paste(paste0(vector, "/"), collapse = "")
url <- paste0(name, "/", char)
return(url)
}
# basis of record
if (is.null(basisOfRecord)) {
my_url
} else {
if (basisOfRecord %in% c(
'PreservedSpecimen',
'LivingSpecimen',
'FossilSpecimen',
'HumanObservation',
'MachineObservation',
'MaterialSample'
)) {
br <- url_query(basisOfRecord, "basisOfRecord")
my_url <- paste0(my_url, br)
}
}
# Species name
if (is.null(species)) {
my_url
} else {
if (is.character(species)) {
if (length(species) > 50)
stop("Please make request of no more than 50 species at a time!")
species <- gsub(" ", "%20", species)
sp <- url_query(species, "scientificName")
my_url <- paste0(my_url, sp)
} else {
stop("species must be a character")
}
}
# Family name
if (is.null(family)) {
my_url
} else {
if (is.character(family)) {
fam <- url_query(family, "family")
my_url <- paste0(my_url, fam)
}
else {
stop("family name must be a character")
}
}
# Collection code
if (is.null(collectionCode)) {
my_url
} else {
if (is.character(collectionCode)) {
cc <- url_query(collectionCode, "collectionCode")
my_url <- paste0(my_url, cc)
}
}
# country
if (is.null(country)) {
my_url
} else {
if (is.character(country)) {
country <- gsub(" ", "%20", country)
ct <- url_query(country, "country")
my_url <- paste0(my_url, ct)
}
}
# stateProvince
if (is.null(stateProvince)) {
my_url
} else {
if (is.character(stateProvince)) {
stateProvince <- gsub(" ", "%20", stateProvince)
st <- url_query(stateProvince, "stateProvince")
my_url <- paste0(my_url, st)
}
}
# county
if (is.null(county)) {
my_url
} else {
if (is.character(county)) {
county <- gsub(" ", "%20", county)
co <- url_query(county, "county")
my_url <- paste0(my_url, co)
}
}
# Coordinates
if (is.null(Coordinates)) {
my_url
} else {
if (Coordinates %in% c("Yes", "No", "Original", "Automatic", "Blocked")) {
xy <- url_query(Coordinates, "Coordinates")
my_url <- paste0(my_url, xy)
}
}
# Coordinates quality
if (is.null(CoordinatesQuality)) {
my_url
} else {
if (CoordinatesQuality %in% c("Good", "Bad")) {
cq <- url_query(CoordinatesQuality, "CoordinatesQuality")
my_url <- paste0(my_url, cq)
}
}
# Scope
if (is.null(Scope)) {
my_url
} else {
if (Scope %in% c("plants", "animals", "microrganisms", "fossils")) {
sc <- url_query(Scope, "Scope")
my_url <- paste0(my_url, sc)
}
}
# Synonyms
# if (length(species) > 9) {
# stop("Function does not support synonym check of more than nine species")
# } else {
if (is.null(Synonyms)) {
my_url
} else {
if (Synonyms %in% c("species2000", "flora2020",
"MycoBank", "AlgaeBase", "DSMZ")) {
sy <- url_query(Synonyms, "Synonyms")
my_url <- paste0(my_url, sy)
}
}
# }
# Typus
if (Typus == FALSE) {
my_url
} else {
my_url <- paste0(my_url, "Typus/Yes/")
}
# Images # "Yes", "Live", "Polen", "Wood"
if (is.null(Images)) {
my_url
} else {
if (Images %in% c("Yes", "Live", "Polen", "Wood")) {
im <- url_query(Images, "Images")
my_url <- paste0(my_url, im)
}
}
# RedList
if (RedList == FALSE) {
my_url
} else {
my_url <- paste0(my_url, "RedList/Yes/")
}
# MaxRecords
if (is.null(MaxRecords)) {
my_url
} else {
if (is.numeric(MaxRecords)) {
mr <- url_query(MaxRecords, "MaxRecords")
my_url <- paste0(my_url, mr)
}
}
# making request
my_url <- paste0(my_url, "Format/JSON/") #Model/DwC is already default
message("Making request to speciesLink...")
#r <- httr::GET(my_url)
#message("Extracting content ...")
#rr <- httr::content(r, as="parse") # text content
# requesting JSON format
df <- jsonlite::fromJSON(my_url)$result
#rrr <- readr::read_tsv(rr, locale = readr::locale(encoding = "UTF-8"))
if (save) {
# creating dir
if (!dir.exists(dir)) { dir.create(dir) }
# fullname <- paste0(dir, filename, ".csv")
# message(paste0("Writing ", fullname, " on disk."))
# write.csv(df,
# fullname,
# row.names = FALSE)
if (file.format == "csv") {
if (compress) {
fullname <- paste0(dir, filename, ".csv.zip")
message(paste0("Writing ", fullname, " on disk."))
data.table::fwrite(df, file = fullname, compress = "gzip")
} else {
fullname <- paste0(dir, filename, ".csv")
message(paste0("Writing ", fullname, " on disk."))
data.table::fwrite(df, file = fullname)
}
}
if (file.format == "rds") {
fullname <- paste0(dir, filename, ".rds")
message(paste0("Writing ", fullname, " on disk."))
if (compress) {
saveRDS(df, file = fullname, compress = "gzip")
} else {
saveRDS(df, file = fullname, compress = FALSE)
}
}
}
# if output is empty, return message
if (is.null(dim(df))) {
warning("Output is empty. Check your request.")
}
warning("Please make sure that the restrictions and citation indicated by
each speciesLink/CRIA data provider are observed and respected.")
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.