R/capture_occurrences.R

Defines functions capture_occurrences_glimpse capture_occurrences_doi capture_occurrences_la capture_occurrences_gbif capture_occurrences

#' Internal function to convert `data_request` with `type = "occurrences"` to a `query`
#' @noRd
#' @keywords Internal
capture_occurrences <- function(.query,
                                 ...,
                                 error_call = rlang::caller_env()){
  if(is.null(.query$filter) & 
     is.null(.query$identify) & 
     is.null(.query$geolocate)){
    cli::cli_abort("No filters supplied to `capture()` with `type = \"occurrences\"`",
                   call = error_call)
  }
  switch(potions::pour("atlas", "region"),
         "Global" = capture_occurrences_gbif(.query, ...),
         capture_occurrences_la(.query, ...))
}

#' calculate the query to be returned for GBIF
#' @noRd
#' @keywords Internal
capture_occurrences_gbif <- function(.query, 
                                      format = "SIMPLE_CSV", 
                                      ...){
  # get user string
  username <- potions::pour("user", "username", .pkg = "galah")
  password <- potions::pour("user", "password", .pkg = "galah")
  user_string <- glue::glue("{username}:{password}")
  
  # build object
  list(type = "data/occurrences",
       url = url_lookup("data/occurrences"),
       headers =  list(
         `User-Agent` = galah_version_string(), 
         `X-USER-AGENT` = galah_version_string(),
         `Content-Type` = "application/json",
         Accept = "application/json"),
       options = list(
         httpauth = 1,
         userpwd = user_string),
       body = list(filter = .query$filter, 
                   identify = .query$identify,
                   geolocate = .query$geolocate,
                   format = "SIMPLE_CSV")) |>
    as_prequery()
}

#' calculate the query to be returned for a given living atlas
#' @param .query An object of class `data_request()`
#' @noRd
#' @keywords Internal
capture_occurrences_la <- function(.query,
                                    mint_doi = FALSE){
  # build a query
  query <- c(build_query(identify = .query$identify,
                         filter = .query$filter, 
                         location = .query$geolocate, 
                         apply_profile = .query$apply_profile),
             fields = "`SELECT_PLACEHOLDER`",
             qa = "`ASSERTIONS_PLACEHOLDER`",
             facet = "false",
             sourceTypeId = {potions::pour("atlas", "region") |>
                             source_type_id_lookup()},
             reasonTypeId = potions::pour("user", "download_reason_id"),
             dwcHeaders = "true") |>
    add_email_notify() |>
    add_email_address(query = .query) |>
    add_doi_request(mint_doi = mint_doi)

  # build url
  url <- url_lookup("data/occurrences") |>
    httr2::url_parse()
  url$query <- query
  
  # build output
  list(type = "data/occurrences",
       url = httr2::url_build(url),
       headers = build_headers()) |>
    as_prequery()
}

#' Internal function to convert `data_request` with `type = "doi"` to a `query`
#' @noRd
#' @keywords Internal
capture_occurrences_doi <- function(.query, 
                                    error_call = rlang::caller_env()){
  # check for a doi filter
  if(is.null(.query$filter)){
    cli::cli_abort("A DOI must be specified using `filter(doi == \"my-doi-here\")`.", 
                   call = error_call)
  }
  
  # check that doi is supplied 'correctly'
  if(inherits(.query$filter, "predicates_filter")){
    doi_missing_check <- .query$filter$key != "DOI"
  }else{
    doi_missing_check <- is.null(.query$filter$variable) && .query$filter$variable != "doi"
  }
  
  # check atlas is ok
  atlas <- potions::pour("atlas", "acronym")
  if(!(atlas %in% c("ALA", "GBIF"))){
    c(
      "DOI downloads not supported by selected atlas.",
      i = "`request_data(type = \"occurrences-doi\")` has only been implemented for ALA & GBIF") |>
    cli::cli_abort(call = error_call)    
  }
  
  # get doi
  doi <- .query$filter$value[[1]]
  
  # remove "https://" if present
  if (stringr::str_detect(doi, "^(http|https)://doi.org/")) {
    doi <- stringr::str_remove(doi, "^(http|https)://doi.org/")
  }
  
  # extract useful part of DOI
  
  if(atlas == "ALA"){
    doi <- stringr::str_split(doi, "ala.")[[1]][2]
    if(is.na(doi)){
      c("DOI has not been generated by the ALA.",
        i = "DOIs created by the ALA have a prefix of 10.26197/ala.") |>
      cli::cli_abort(call = error_call)
    }
  }
  
  # build an object
  list(type = "data/occurrences-doi",
       url = url_lookup("data/occurrences-doi", 
                        doi_string = doi),
       headers = build_headers(),
       download = TRUE) |>
  as_query()
}

#' Internal function to convert `data_request` with `type = "occurrences-glimpse"` to a `query`
#' @noRd
#' @keywords Internal
capture_occurrences_glimpse <- function(.query){
  if(is_gbif()){
    result <- capture_occurrences_count(.query)
    result$body$limit <- 3
    result$type <- "data/occurrences-glimpse"
    as_prequery(result)
  }else{
    result <- capture_occurrences_la(.query)
    url <- httr2::url_parse(result$url)

    # replace path with count API
    url$path <- url_lookup("data/occurrences-count") |>
      httr2::url_parse() |>
      purrr::pluck("path")

    # add a pageSize arg
    url$query$pageSize <- 3

    # rebuild and ship
    result$url <- httr2::url_build(url)
    result$type <- "data/occurrences-glimpse"
    as_prequery(result)
  }
}

Try the galah package in your browser

Any scripts or data that you put into this service are public.

galah documentation built on Feb. 11, 2026, 9:11 a.m.