#' Legacy alternative to occ_search
#'
#' @param taxonKey (numeric) A taxon key from the GBIF backbone. All included
#' and synonym taxa are included in the search, so a search for aves with
#' taxononKey=212 will match all birds, no matter which species. You can pass
#' many keys to \code{occ_search(taxonKey=c(1,212))}.
#' @param scientificName A scientific name from the GBIF backbone. All included
#' and synonym taxa are included in the search.
#' @param country (character) The 2-letter country code (ISO-3166-1)
#' in which the occurrence was recorded. \code{enumeration_country()}.
#' @param datasetKey (character) The occurrence dataset uuid key. That can be
#' found in the dataset page url. For example, "7e380070-f762-11e1-a439-00145
#' eb45e9a" is the key for [Natural History Museum (London) Collection Specimens](https://www.gbif.org/dataset/7e380070-f762-11e1-a439-00145eb45e9a).
#' @param eventDate (character) Occurrence date in ISO 8601 format: yyyy,
#' yyyy-MM, yyyy-MM-dd, or MM-dd. Supports range queries, 'smaller,larger'
#' ('1990,1991', whereas '1991,1990' wouldn't work).
#' @param catalogNumber (character) An identifier of any form assigned by the
#' source within a physical collection or digital dataset for the record which
#' may not unique, but should be fairly unique in combination with the
#' institution and collection code.
#' @param recordedBy (character) The person who recorded the occurrence.
#' @param recordedByID (character) Identifier (e.g. ORCID) for the person who
#' recorded the occurrence
#' @param identifiedByID (character) Identifier (e.g. ORCID) for the person who
#' provided the taxonomic identification of the occurrence.
#' @param collectionCode (character) An identifier of any form assigned by the
#' source to identify the physical collection or digital dataset uniquely within
#' the text of an institution.
#' @param institutionCode An identifier of any form assigned by the source to
#' identify the institution the record belongs to.
#' @param basisOfRecord (character) The specific nature of the data record. See
#' [here](https://gbif.github.io/parsers/apidocs/org/gbif/api/vocabulary/BasisOfRecord.html).
#'
#' \itemize{
#' \item "FOSSIL_SPECIMEN"
#' \item "HUMAN_OBSERVATION"
#' \item "MATERIAL_CITATION"
#' \item "MATERIAL_SAMPLE"
#' \item "LIVING_SPECIMEN"
#' \item "MACHINE_OBSERVATION"
#' \item "OBSERVATION"
#' \item "PRESERVED_SPECIMEN"
#' \item "OCCURRENCE"
#' }
#' @param year The 4 digit year. A year of 98 will be interpreted as AD 98.
#' Supports range queries, 'smaller,larger' (e.g., '1990,1991', whereas 1991,
#' 1990' wouldn't work).
#' @param month The month of the year, starting with 1 for January. Supports
#' range queries, 'smaller,larger' (e.g., '1,2', whereas '2,1' wouldn't work).
#' @param search (character) Query terms. The value for this parameter can be a
#' simple word or a phrase. For example, [search="puma"](https://www.gbif.org/occurrence/search?q=puma)
#' @param decimalLatitude Latitude in decimals between -90 and 90 based on
#' WGS84. Supports range queries, 'smaller,larger' (e.g., '25,30', whereas
#' '30,25' wouldn't work).
#' @param decimalLongitude Longitude in decimals between -180 and 180 based on
#' WGS84. Supports range queries (e.g., '-0.4,-0.2', whereas '-0.2,-0.4'
#' wouldn't work).
#' @param publishingCountry The 2-letter country code (as per ISO-3166-1) of
#' the country in which the occurrence was recorded. See
#' \code{enumeration_country()}.
#' @param elevation Elevation in meters above sea level. Supports range
#' queries, 'smaller,larger' (e.g., '5,30', whereas '30,5' wouldn't work).
#' @param depth Depth in meters relative to elevation. For example 10 meters
#' below a lake surface with given elevation. Supports range queries,
#' 'smaller,larger' (e.g., '5,30', whereas '30,5' wouldn't work).
#' @param geometry (character) Searches for occurrences inside a polygon in
#' Well Known Text (WKT) format. A WKT shape written as either
#'
#' \itemize{
#' \item "POINT"
#' \item "LINESTRING"
#' \item "LINEARRING"
#' \item "POLYGON"
#' \item "MULTIPOLYGON"
#' }
#'
#' For Example, "POLYGON((37.08 46.86,38.06 46.86,38.06 47.28,37.08 47.28,
#' 37.0 46.8))". See also the section **WKT** below.
#' @param geom_big (character) One"bbox" or "asis" (default).
#' @param geom_size (integer) An integer indicating size of the cell. Default:
#' 40.
#' @param geom_n (integer) An integer indicating number of cells in each
#' dimension. Default: 10.
#' @param hasGeospatialIssue (logical) Includes/excludes occurrence records
#' which contain spatial issues (as determined in our record interpretation),
#' i.e. \code{hasGeospatialIssue=TRUE} returns only those records with spatial
#' issues while \code{hasGeospatialIssue=FALSE} includes only records without
#' spatial issues. The absence of this parameter returns any record with or
#' without spatial issues.
#' @param issue (character) One or more of many possible issues with each
#' occurrence record. Issues passed to this parameter filter results by
#' the issue. One of many [options](https://gbif.github.io/gbif-api/apidocs/org/gbif/api/vocabulary/OccurrenceIssue.html).
#' See [here](https://data-blog.gbif.org/post/issues-and-flags/) for definitions.
#' @param hasCoordinate (logical) Return only occurrence records with lat/long
#' data (\code{TRUE}) or all records (\code{FALSE}, default).
#' @param typeStatus Type status of the specimen. One of many
#' [options](https://www.gbif.org/occurrence/search?type_status=PARATYPE).
#' @param recordNumber Number recorded by collector of the data, different from
#' GBIF record number.
#' @param lastInterpreted Date the record was last modified in GBIF, in ISO
#' 8601 format: yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd. Supports range queries,
#' 'smaller,larger' (e.g., '1990,1991', whereas '1991,1990' wouldn't work).
#' @param continent The source supplied continent.
#'
#' \itemize{
#' \item "africa"
#' \item "antarctica"
#' \item "asia"
#' \item "europe"
#' \item "north_america"
#' \item "oceania"
#' \item "south_america"
#' }
#'
#' Continent is not inferred but only populated if provided by the
#' dataset publisher. Applying this filter may exclude many relevant records.
#' @param mediaType (character) Media type of "MovingImage", "Sound", or
#' "StillImage".
#' @param repatriated (character) Searches for records whose publishing country
#' is different to the country where the record was recorded in.
#' @param kingdomKey (numeric) Kingdom classification key.
#' @param phylumKey (numeric) Phylum classification key.
#' @param classKey (numeric) Class classification key.
#' @param orderKey (numeric) Order classification key.
#' @param familyKey (numeric) Family classification key.
#' @param genusKey (numeric) Genus classification key.
#' @param speciesKey (numeric) Species classification key.
#' @param subgenusKey (numeric) Subgenus classification key.
#' @param establishmentMeans (character) provides information about whether an
#' organism or organisms have been introduced to a given place and time through
#' the direct or indirect activity of modern humans.
#'
#' \itemize{
#' \item "Introduced"
#' \item "Native"
#' \item "NativeReintroduced"
#' \item "Vagrant"
#' \item "Uncertain"
#' \item "IntroducedAssistedColonisation"
#' }
#'
#' @param degreeOfEstablishment (character) Provides information about degree to
#' which an Organism survives, reproduces, and expands its range at the given
#' place and time. One of many [options](https://www.gbif.org/occurrence/search?advanced=1°ree_of_establishment=Managed).
#' @param protocol (character) Protocol or mechanism used to provide the
#' occurrence record. One of many [options](https://www.gbif.org/occurrence/search?protocol=DWC_ARCHIVE&advanced=1).
#' @param license (character) The type license applied to the dataset or record.
#'
#' \itemize{
#' \item "CC0_1_0"
#' \item "CC_BY_4_0"
#' \item "CC_BY_NC_4_0"
#' }
#'
#' @param organismId (numeric) An identifier for the Organism instance (as
#' opposed to a particular digital record of the Organism). May be a globally
#' unique identifier or an identifier specific to the data set.
#' @param publishingOrg (character) The publishing organization key (a UUID).
#' @param stateProvince (character) The name of the next smaller administrative
#' region than country (state, province, canton, department, region, etc.) in
#' which the Location occurs.
#' @param waterBody (character) The name of the water body in which the
#' locations occur
#' @param locality (character) The specific description of the place.
#' @param occurrenceStatus (character) Default is "PRESENT". Specify whether
#' search should return "PRESENT" or "ABSENT" data.
#' @param gadmGid (character) The gadm id of the area occurrences are desired
#' from. https://gadm.org/.
#' @param coordinateUncertaintyInMeters A number or range between 0-1,000,000
#' which specifies the desired coordinate uncertainty. A coordinateUncertainty
#' InMeters=1000 will be interpreted all records with exactly 1000m. Supports
#' range queries, 'smaller,larger' (e.g., '1000,10000', whereas '10000,1000'
#' wouldn't work).
#' @param verbatimScientificName (character) Scientific name as provided by the
#' source.
#' @param verbatimTaxonId (character) The taxon identifier provided to GBIF by
#' the data publisher.
#' @param eventId (character) identifier(s) for a sampling event.
#' @param identifiedBy (character) names of people, groups, or organizations.
#' @param networkKey (character) The occurrence network key (a uuid)
#' who assigned the Taxon to the subject.
#' @param occurrenceId (character) occurrence id from source.
#' @param organismQuantity A number or range which
#' specifies the desired organism quantity. An organismQuantity=5
#' will be interpreted all records with exactly 5. Supports range queries,
#' smaller,larger (e.g., '5,20', whereas '20,5' wouldn't work).
#' @param organismQuantityType (character) The type of quantification system
#' used for the quantity of organisms. For example, "individuals" or "biomass".
#' @param relativeOrganismQuantity (numeric) A relativeOrganismQuantity=0.1 will
#' be interpreted all records with exactly 0.1 The relative measurement of the
#' quantity of the organism (a number between 0-1). Supports range queries,
#' "smaller,larger" (e.g., '0.1,0.5', whereas '0.5,0.1' wouldn't work).
#' @param iucnRedListCategory (character) The IUCN threat status category.
#'
#' \itemize{
#' \item "NE" (Not Evaluated)
#' \item "DD" (Data Deficient)
#' \item "LC" (Least Concern)
#' \item "NT" (Near Threatened)
#' \item "VU" (Vulnerable)
#' \item "EN" (Endangered)
#' \item "CR" (Critically Endangered)
#' \item "EX" (Extinct)
#' \item "EW" (Extinct in the Wild)
#' }
#' @param lifeStage (character) the life stage of the occurrence. One of many
#' [options](https://www.gbif.org/occurrence/search?advanced=1&life_stage=Tadpole).
#' @param isInCluster (logical) identify potentially related records on GBIF.
#' @param distanceFromCentroidInMeters A number or range. A value of "2000,*"
#' means at least 2km from known centroids. A value of "0" would mean occurrences
#' exactly on known centroids. A value of "0,2000" would mean within 2km of
#' centroids. Max value is 5000.
#' @param skip_validate (logical) whether to skip wellknown::validate_wkt call
#' or not. passed down to check_wkt(). Default: TRUE
#' @param limit Number of records to return. Default: 500. Note that the per
#' request maximum is 300, but since we set it at 500 for the function, we
#' do two requests to get you the 500 records (if there are that many).
#' Note that there is a hard maximum of 100,000, which is calculated as the
#' \code{limit+start}, so \code{start=99,000} and \code{limit=2000} won't work
#' @param start Record number to start at. Use in combination with limit to
#' page through results. Note that we do the paging internally for you, but
#' you can manually set the \code{start} parameter
#' @param curlopts (list)
#'
#' @details
#' This function is a legacy alternative to `occ_search()`. It is not
#' recommended to use `occ_data()` as it is not as flexible as `occ_search()`.
#' New search terms will not be added to this function and it is only supported
#' for legacy reasons.
#'
#' @return An object of class `gbif_data`, which is a S3 class list, with
#' slots for metadata (`meta`) and the occurrence data itself
#' (`data`), and with attributes listing the user supplied arguments
#' and whether it was a "single" or "many" search; that is, if you supply
#' two values of the `datasetKey` parameter to searches are done, and
#' it's a "many". `meta` is a list of length four with offset, limit,
#' endOfRecords and count fields. `data` is a tibble (aka data.frame)
#' @export
#'
occ_data <- function(taxonKey=NULL,
scientificName=NULL,
country=NULL,
publishingCountry=NULL,
hasCoordinate=NULL,
typeStatus=NULL,
recordNumber=NULL,
lastInterpreted=NULL,
continent=NULL,
geometry=NULL,
geom_big="asis",
geom_size=40,
geom_n=10,
recordedBy=NULL,
recordedByID=NULL,
identifiedByID=NULL,
basisOfRecord=NULL,
datasetKey=NULL,
eventDate=NULL,
catalogNumber=NULL,
year=NULL,
month=NULL,
decimalLatitude=NULL,
decimalLongitude=NULL,
elevation=NULL,
depth=NULL,
institutionCode=NULL,
collectionCode=NULL,
hasGeospatialIssue=NULL,
issue=NULL,
search=NULL,
mediaType=NULL,
subgenusKey = NULL,
repatriated = NULL,
phylumKey = NULL,
kingdomKey = NULL,
classKey = NULL,
orderKey = NULL,
familyKey = NULL,
genusKey = NULL,
speciesKey = NULL,
establishmentMeans = NULL,
degreeOfEstablishment = NULL,
protocol = NULL,
license = NULL,
organismId = NULL,
publishingOrg = NULL,
stateProvince = NULL,
waterBody = NULL,
locality = NULL,
occurrenceStatus = 'PRESENT',
gadmGid = NULL,
coordinateUncertaintyInMeters = NULL,
verbatimScientificName = NULL,
eventId = NULL,
identifiedBy = NULL,
networkKey = NULL,
verbatimTaxonId = NULL,
occurrenceId = NULL,
organismQuantity = NULL,
organismQuantityType = NULL,
relativeOrganismQuantity = NULL,
iucnRedListCategory = NULL,
lifeStage = NULL,
isInCluster = NULL,
distanceFromCentroidInMeters = NULL,
skip_validate = TRUE,
limit=500,
start=0,
curlopts = list(http_version=2)) {
geometry <- geometry_handler(geometry, geom_big, geom_size, geom_n)
url <- paste0(gbif_base(), '/occurrence/search')
argscoll <- NULL
.get_occ_data <- function(x=NULL, itervar=NULL, curlopts = list()) {
if (!is.null(x)) {
assign(itervar, x)
}
# check that wkt is proper format and of 1 of 4 allowed types
geometry <- check_wkt(geometry, skip_validate = skip_validate)
# check limit and start params
check_vals(limit, "limit")
check_vals(start, "start")
# Make arg list
args <- rgbif_compact(
list(
hasCoordinate = hasCoordinate,
hasGeospatialIssue = hasGeospatialIssue,
occurrenceStatus = occurrenceStatus,
q = search,
repatriated = repatriated,
limit = check_limit(as.integer(limit)),
isInCluster = isInCluster,
offset = check_limit(as.integer(start))
)
)
args <- c(
args,
parse_issues(issue),
convmany(lastInterpreted),
convmany(decimalLatitude),
convmany(decimalLongitude),
convmany(elevation),
convmany(depth),
convmany(eventDate),
convmany(month),
convmany(year),
convmany(coordinateUncertaintyInMeters),
convmany(organismQuantity),
convmany(organismQuantityType),
convmany(relativeOrganismQuantity),
convmany(taxonKey),
convmany(scientificName),
convmany(country),
convmany(publishingCountry),
convmany(datasetKey),
convmany(typeStatus),
convmany(recordNumber),
convmany(continent),
convmany(recordedBy),
convmany(recordedByID),
convmany(identifiedByID),
convmany(catalogNumber),
convmany(institutionCode),
convmany(collectionCode),
convmany(geometry),
convmany(mediaType),
convmany(subgenusKey),
convmany(phylumKey),
convmany(kingdomKey),
convmany(classKey),
convmany(orderKey),
convmany(familyKey),
convmany(genusKey),
convmany(speciesKey),
convmany(establishmentMeans),
convmany(degreeOfEstablishment),
convmany(protocol),
convmany(license),
convmany(organismId),
convmany(publishingOrg),
convmany(stateProvince),
convmany(waterBody),
convmany(locality),
convmany(basisOfRecord),
convmany(gadmGid),
convmany(verbatimScientificName),
convmany(eventId),
convmany(identifiedBy),
convmany(networkKey),
convmany(occurrenceId),
convmany(iucnRedListCategory),
convmany(lifeStage),
convmany(distanceFromCentroidInMeters)
)
argscoll <<- args
if (limit >= 300) {
### loop route for limit>0
iter <- 0
sumreturned <- 0
outout <- list()
while (sumreturned < limit) {
iter <- iter + 1
tt <- gbif_GET(url, args, TRUE, curlopts)
# if no results, assign count var with 0
if (identical(tt$results, list())) tt$count <- 0
numreturned <- NROW(tt$results)
sumreturned <- sumreturned + numreturned
if (tt$count < limit) {
limit <- tt$count
}
if (sumreturned < limit) {
args$limit <- limit - sumreturned
args$offset <- sumreturned + start
}
outout[[iter]] <- tt
}
} else {
### loop route for limit<300
outout <- list(gbif_GET(url, args, TRUE, curlopts))
}
meta <- outout[[length(outout)]][c('offset', 'limit', 'endOfRecords',
'count')]
data <- lapply(outout, "[[", "results")
if (identical(data[[1]], list())) {
data <- NULL
} else {
data <- lapply(data, clean_data)
data <- data.table::setDF(data.table::rbindlist(data, use.names = TRUE,
fill = TRUE))
data <- tibble::as_tibble(prune_result(data))
}
list(meta = meta, data = data)
}
params <- list(
taxonKey=taxonKey,
scientificName=scientificName,
datasetKey=datasetKey,
catalogNumber=catalogNumber,
recordedBy=recordedBy,
recordedByID=recordedByID,
identifiedByID=identifiedByID,
geometry=geometry,
country=country,
publishingCountry=publishingCountry,
recordNumber=recordNumber,
q=search,
institutionCode=institutionCode,
collectionCode=collectionCode,
continent=continent,
decimalLatitude=decimalLatitude,
decimalLongitude=decimalLongitude,
depth=depth,
year=year,
typeStatus=typeStatus,
lastInterpreted=lastInterpreted,
mediaType=mediaType,
subgenusKey=subgenusKey,
repatriated=repatriated,
phylumKey=phylumKey,
kingdomKey=kingdomKey,
classKey=classKey,
orderKey=orderKey,
familyKey=familyKey,
genusKey=genusKey,
speciesKey=speciesKey,
establishmentMeans=establishmentMeans,
protocol=protocol,
license=license,
organismId=organismId,
publishingOrg=publishingOrg,
stateProvince=stateProvince,
waterBody=waterBody,
locality=locality,
limit=limit,
basisOfRecord=basisOfRecord,
occurrenceStatus=occurrenceStatus,
gadmGid=gadmGid,
verbatimScientificName=verbatimScientificName,
eventId=eventId,
identifiedBy=identifiedBy,
networkKey=networkKey,
occurrenceId=occurrenceId,
organismQuantity=organismQuantity,
organismQuantityType=organismQuantityType,
relativeOrganismQuantity=relativeOrganismQuantity,
iucnRedListCategory=iucnRedListCategory,
lifeStage=lifeStage,
coordinateUncertaintyInMeters=coordinateUncertaintyInMeters,
distanceFromCentroidInMeters=distanceFromCentroidInMeters
)
if (!any(sapply(params, length) > 0)) {
stop(sprintf("At least one of the parmaters must have a value:\n%s",
possparams()),
call. = FALSE)
}
iter <- params[which(sapply(params, length) > 1)]
if (length(names(iter)) > 1) {
stop(sprintf("You can have multiple values for only one of:\n%s",
possparams()),
call. = FALSE)
}
if (length(iter) == 0) {
out <- .get_occ_data(curlopts = curlopts)
} else {
out <- lapply(iter[[1]], .get_occ_data, itervar = names(iter),
curlopts = curlopts)
names(out) <- transform_names(iter[[1]])
}
if (any(names(argscoll) %in% names(iter))) {
argscoll[[names(iter)]] <- iter[[names(iter)]]
}
structure(out, args = argscoll, class = "gbif_data",
type = if (length(iter) == 0) "single" else "many")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.