Nothing
#' Download iNaturalist data
#'
#' @description Primary function to retrieve observations from iNaturalist, allows users to search
#' for data, or just filter results by a subset of what is offered by the API.
#'
#' @param query Query string for a general search.
#' @param quality The quality grade to be used. Must be either "casual" or "research". If left
#' blank both will be returned.
#' @param taxon_name Filter by iNat taxon name. Note that this will also select observations of
#' descendant taxa. Note that names are not unique, so if the name matches multiple taxa, no
#' observations may be returned.
#' @param taxon_id Filter by iNat taxon ID. Note that this will also select observations of descendant taxa.
#' @param place_id Filter by iNat place ID.
#' @param geo Flag for returning only results that are georeferenced, TRUE will exclude
#' non-georeferenced results, but they cannot be excluded.
#' @param annotation Filter by annotation. Vector of length 2, the first
#' element being the term ID (e.g. "1" for life stage) and the second
#' element being the value ID (e.g. "2" for adult). Refer to the
#' \href{https://forum.inaturalist.org/t/how-to-use-inaturalists-search-urls-wiki-part-2-of-2/18792#heading--annotations}{annotation documentation} to know which values to use.
#' @param year Return observations only in that year (can only be one year, not a range of years).
#' @param month Return observations only by month, must be numeric, 1...12
#' @param day Return observations only on a given day of the month, 1...31
#' @param bounds A bounding box of longitude (-180 to 180) and latitude (-90 to 90) to search
#' within. It is a vector in the form of southern latitude, western longitude, northern latitude,
#' and eastern longitude. Alternatively supply an sf or sp object from which the bounding box will
#' be derived.
#' @param photo_license Filter by photo license. Values can be CC variations ("CC0", "CC-BY", CC-BY-NC", "CC-BY-SA", "CC-BY-ND", "CC-BY-NC-SA", "CC-BY-NC-ND"), "any" (any CC licence), or "none" (all rights reserved). Note that an observation and its photos don't necessarily have the same licence.
#' @param maxresults The maximum number of results to return. Should not be
#' a number higher than 10000.
#' @param meta (logical) If TRUE, the output of this function is a list with metadata on the output
#' and a data.frame of the data. If FALSE (default), just the data.frame.
#' @note Filtering doesn't always work with the query parameter for some reason (a problem on
#' the API end). If you want to filter by time, it's best to use the scientific name and put it
#' in the 'taxa' field, and not in the query field. Another issue is that the query parameter
#' will search the entire entry, so it is possible to get unintended results. Depending on your
#' use case it may be advisable to use the "taxon" field instead of the query field.
#' @return A dataframe of the number of observations requested.
#' @examples \dontrun{
#' ## Make a standard query
#' get_inat_obs(query = "Monarch Butterfly")
#'
#' ## Restrict to juveniles thanks to annotations
#' get_inat_obs(query = "possum", annotation = c(1, 8))
#'
#' ## Filter by a bounding box of Northern California
#' bounds <- c(38.44047, -125, 40.86652, -121.837)
#' get_inat_obs(query = "Mule Deer", bounds = bounds)
#'
#' ## Filter by taxon, allows higher order filtering,
#' ## Here we can search for just stone flies (order Plecoptera)
#' get_inat_obs(taxon_name = "Plecoptera")
#'
#' ## Get metadata (the number of results found on the server)
#' out <- get_inat_obs(query = "Monarch Butterfly", meta = TRUE)
#' out$meta
#' }
#' @import httr
#' @importFrom utils read.csv
#' @importFrom curl has_internet
#' @export
get_inat_obs <- function(query = NULL, taxon_name = NULL, taxon_id = NULL,
place_id = NULL, quality = NULL, geo = NULL,
annotation = NULL, year = NULL, month = NULL,
day = NULL, bounds = NULL,
photo_license = NULL,
maxresults = 100, meta = FALSE) {
# check Internet connection
if (!curl::has_internet()) {
message("No Internet connection.")
return(invisible(NULL))
}
base_url <- "http://www.inaturalist.org/"
# check that iNat can be reached
if (httr::http_error(base_url)) { # TRUE: 400 or above
message("iNaturalist API is unavailable.")
return(invisible(NULL))
}
## Parsing and error-handling of input strings
arg_list <- list(
query, taxon_name, taxon_id, place_id, quality, geo,
year, month, day, bounds
)
arg_vals <- lapply(arg_list, is.null)
if (all(unlist(arg_vals))) {
stop("All search parameters NULL. Please provide at least one.")
}
search <- ""
if (!is.null(query)) {
search <- paste0(search, "&q=", gsub(" ", "+", query))
}
if (!is.null(quality)) {
if (!sum(grepl(quality, c("casual", "research")))) {
stop("Please enter a valid quality flag, 'casual' or 'research'.")
}
search <- paste0(search, "&quality_grade=", quality)
}
if (!is.null(taxon_name)) {
search <- paste0(search, "&taxon_name=", gsub(" ", "+", taxon_name))
}
if (!is.null(taxon_id)) {
search <- paste0(search, "&taxon_id=", gsub(" ", "+", taxon_id))
}
if (!is.null(place_id)) {
search <- paste0(search, "&place_id=", gsub(" ", "+", place_id))
}
if (!is.null(photo_license)) {
search <- paste0(search, "&photo_license=", gsub(" ", "+", photo_license))
}
if (!is.null(geo) && geo) {
search <- paste0(search, "&has[]=geo")
}
if (!is.null(annotation)) {
if (length(annotation) != 2) {
stop("annotation needs to be a vector of length 2.")
}
# convert to char as could be a vector of two integers
annotation <- as.character(annotation)
if (grepl("\\D", annotation[1])) {
stop("The annotation's term ID can only contain digits.")
}
if (grepl("\\D", annotation[2])) {
stop("The annotation's value ID can only contain digits.")
}
# old API doesn't seem to support several values
# if(!all(unlist(strsplit(annotation[2], "")) %in% unlist(strsplit("0123456789,", "")))) {
# stop("The annotation value(s) can only contain digits and commas.")
# }
search <- paste0(search, "&term_id=", annotation[1])
search <- paste0(search, "&term_value_id=", annotation[2])
}
if (!is.null(year)) {
if (length(year) > 1) {
stop("You can only filter results by one year, please enter only one value for year.")
}
search <- paste0(search, "&year=", year)
}
if (!is.null(month)) {
month <- as.numeric(month)
if (is.na(month)) {
stop("Please enter a month as a number between 1 and 12, not as a word.")
}
if (length(month) > 1) {
stop("You can only filter results by one month, please enter only one value for month.")
}
if (month < 1 || month > 12) {
stop("Please enter a valid month between 1 and 12")
}
search <- paste0(search, "&month=", month)
}
if (!is.null(day)) {
day <- as.numeric(day)
if (is.na(day)) {
stop("Please enter a day as a number between 1 and 31, not as a word.")
}
if (length(day) > 1) {
stop("You can only filter results by one day, please enter only one value for day.")
}
if (day < 1 || day > 31) {
stop("Please enter a valid day between 1 and 31")
}
search <- paste0(search, "&day=", day)
}
if (!is.null(bounds)) {
if (inherits(bounds, "sf")) {
bounds_prep <- sf::st_bbox(bounds)
bounds <- c(
swlat = bounds_prep[2],
swlng = bounds_prep[1],
nelat = bounds_prep[4],
nelng = bounds_prep[3]
)
}
if (inherits(bounds, "Spatial")) {
bounds_prep <- sp::bbox(bounds)
bounds <- c(
swlat = bounds_prep[2, 1],
swlng = bounds_prep[1, 1],
nelat = bounds_prep[2, 2],
nelng = bounds_prep[1, 2]
)
}
if (length(bounds) != 4) {
stop("Bounding box specifications must have 4 coordinates.")
}
search <- paste0(
search, "&swlat=", bounds[1], "&swlng=", bounds[2],
"&nelat=", bounds[3], "&nelng=", bounds[4]
)
}
if (maxresults > 10000) {
stop("Please provide a maxresults value <= 10000.")
}
q_path <- "observations.csv"
ping_path <- "observations.json"
ping_query <- paste0(search, "&per_page=1&page=1")
### Make the first ping to the server to get the number of results
### easier to pull down if you make the query in json, but easier to arrange results
### that come down in CSV format
ping <- GET(base_url, path = ping_path, query = ping_query)
total_res <- as.numeric(ping$headers$`x-total-entries`)
if (total_res == 0) {
stop("Your search returned zero results. Either your species of interest has no records or you entered an invalid search.")
} else if (total_res >= 200000) {
stop("Your search returned too many results, please consider breaking it up into smaller chunks by year or month.")
} else if (!is.null(bounds) && total_res >= 100000) {
stop("Your search returned too many results, please consider breaking it up into smaller chunks by year or month.")
}
page_query <- paste0(search, "&per_page=200&page=1")
data <- GET(base_url, path = q_path, query = page_query)
data <- inat_handle(data)
data_out <- if (is.na(data)) NA else read.csv(textConnection(data), stringsAsFactors = FALSE)
if (total_res < maxresults) maxresults <- total_res
if (maxresults > 200) {
for (i in 2:ceiling(maxresults / 200)) {
page_query <- paste0(search, "&per_page=200&page=", i)
data <- GET(base_url, path = q_path, query = page_query)
data <- inat_handle(data)
data_out <- rbind(data_out, read.csv(textConnection(data), stringsAsFactors = FALSE))
}
}
if (is.data.frame(data_out)) {
if (maxresults < dim(data_out)[1]) {
data_out <- data_out[1:maxresults, ]
}
}
if (meta) {
return(list(meta = list(found = total_res, returned = nrow(data_out)), data = data_out))
} else {
return(data_out)
}
}
inat_handle <- function(x) {
res <- content(x, as = "text")
if (!x$headers$`content-type` == "text/csv; charset=utf-8" || x$status_code > 202 || nchar(res) == 0) {
if (!x$headers$`content-type` == "text/csv; charset=utf-8") {
warning("Content type incorrect, should be 'text/csv; charset=utf-8'")
NA
}
if (x$status_code > 202) {
warning(sprintf("Error: HTTP Status %s", x$status_code))
NA
}
if (nchar(res) == 0) {
warning("No data found")
NA
}
} else {
res
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.