R/utilities_internal.R

Defines functions media_supported reasons_supported profiles_supported species_facets image_filters image_fields default_columns preset_groups is_ala is_gbif add_doi_request add_email_address add_email_notify source_type_id_lookup galah_version_string snake_to_camel_case gbif_upper_case camel_to_snake_case lookup_rename_columns lookup_select_columns_taxa lookup_select_columns parse_arrange parse_rename parse_filter parse_select

##---------------------------------------------------------------
##                 Output formatting functions                 --
##---------------------------------------------------------------

#' Internal function to run `eval_tidy()` on captured `select()` requests.
#' This is the *enactment* phase and is usually called by `collect()`.
#' Critically, this function is *NOT* called by `select()`. This matters because
#' we have to eval `unnest()` before `select()` for it to work, and this can
#' only happen at the end of a pipe.
#' @noRd
#' @keywords Internal
parse_select <- function(df, .query){
  # get quosures captured by `select()`
  quo_list <- purrr::pluck(.query, "request", "select", "quosure")
  # map() over list of quosures
  # honestly I don't know why `!!quo_list` fails here, but it does, so used this instead
  pos <- purrr::map(quo_list, \(a){
    tidyselect::eval_select(expr = a, data = df)
  }) |>
    unlist()
  # apply tidy selection to `df`
  # note: this code taken from `tidyselect` documentation; it could be argued that `df[pos]` is sufficient
  rlang::set_names(df[pos], names(pos)) 
}

#' equivalent to `parse_select()` but for filter
#' mainly called for delayed filter arugments on APIs that don't support `q`
#' @noRd
#' @keywords Internal
parse_filter <- function(df, query){
  filter_entry <- query$request$filter
  if(!is.null(filter_entry) & ncol(df) > 0){
    search_col <- switch(query$type, 
                         "metadata/fields" = "id",
                         "metadata/profiles" = "short_name", 
                         colnames(df)[1])
    value <- filter_entry$value
    df |> dplyr::filter(.data[[search_col]] == value)
  }else{
    df
  }
}

#' Internal function to rename specific columns. Note this is safer than calling
#' `dplyr::rename()` directly, because it only seeks to rename columns that 
#' are actually present, and so won't fail.
#' @noRd
#' @keywords Internal
parse_rename <- function(df, .query){
  cols <- colnames(df)
  rename_vec <- .query$type |>
    stringr::str_remove("^metadata/") |>
    stringr::str_remove("-single$|-multiple$") |> # so that `taxa` args are matched
    lookup_rename_columns()
  # check whether renaming information is given
  if(!is.null(rename_vec)){
    # check whether these are actually present in the supplied `tibble`
    col_lookup <- rename_vec %in% cols
    # if they are, rename
    if(any(col_lookup)){
      rename_cols <- as.list(rename_vec[col_lookup])
      dplyr::rename(df, !!!rename_cols)
    # otherwise, return source `tibble`
    }else{
      df
    }
  # if no lookup information supplied, return source `tibble`
  }else{
    df
  }
}

#' Simple internal function to `arrange()` by first column
#' @noRd
#' @keywords Internal
parse_arrange <- function(df){
  dplyr::arrange(df, dplyr::pull(df, 1))
}

#' Choose column names to pass to `select()`. 
#' NOTE: this isn't especially subtle wrt different atlases
#' NOTE: this assumes `dplyr::rename_with(camel_to_snake_case)` has been run
#' @noRd
#' @keywords Internal
lookup_select_columns <- function(type) {
    switch(type,
           "assertions" = c("id",
                            "description",
                            "category",
                            "type"),
           "fields" = c("id",
                        "description",
                        "type"),
           "identifiers" = lookup_select_columns_taxa(),
           "licences" = c("id",
                          "name",
                          "acronym",
                          "url"),
           "lists" = c("species_list_uid",
                       "list_name",
                       "description",
                       "list_type",
                       "item_count"),
           "lists-unnest" = c("scientific_name",
                              "vernacular_name",
                              "taxon_concept_id"),
           "media" = c("media_id",
                       "occurrence_id",
                       "creator", 
                       "license",
                       "data_resource_uid",
                       "date_taken",
                       "date_uploaded",
                       "mime_type",
                       "size_in_bytes",
                       "success"),
           "profiles" = c("id",
                         "short_name",
                         "name",
                         "description"),
           "profiles-unnest" = c("id",
                                 "description",
                                 "filter",
                                 "enabled"),
           "reasons" = c("id",
                         "name"),
           "taxa" = lookup_select_columns_taxa(),
           "taxa-unnest" = c("name",
                             "taxon_concept_id",
                             "parent_taxon_concept_id",
                             "rank"),
           NULL # When no defaults are set, sending NULL tells the code to call `everything()`
           )
}

#' `lookup_select_columns()` but for taxa and identifier queries
#' @noRd
#' @keywords Internal
lookup_select_columns_taxa <- function(){
  c("search_term",
    "scientific_name",
    "scientific_name_authorship", 
    "taxon_concept_id", # ALA
    "taxon_concept_lsid", # Austria, Guatemala
    "authority", # OpenObs
    "key", # GBIF
    "usage_key", # GBIF
    "guid", # species search
    "canonical_name", "status", 
    "rank",
    "match_type",
    "confidence",
    "time_taken",
    "vernacular_name",
    "issues",
    # taxonomic ranks (basic only)
    "kingdom",
    "phylum",
    "class",
    "order",
    "family",
    "genus",
    "species"
    # if all are needed, use this instead
    # {show_all_ranks() |> dplyr::pull("name")}
  )
}

#' Choose which columns to rename
#' @noRd
#' @keywords Internal
lookup_rename_columns <- function(type){
  switch(type, 
         "assertions" = c("id" = "name"),
         "identifiers" = c("taxonConceptID" = "key"),
         "lists" = c("species_list_uid" = "data_resource_uid"),
         "lists-unnest" = c("taxon_concept_id" = "lsid"),
         "media" = c("media_id" = "image_identifier"),
         "taxa" = c("class" = "classs",
                    "taxon_concept_id" = "usage_key",
                    "taxon_concept_id" = "guid",
                    "taxon_concept_id" = "reference_id",
                    "taxon_concept_id" = "key",
                    "genus" = "genus_name",
                    "family" = "family_name",
                    "order" = "order_name",
                    "phylum" = "phylum_name",
                    "kingdom" = "kingdom_name",
                    "rank" = "rank_name",
                    "vernacular_name" = "french_vernacular_name"),
         "taxa-unnest" = c("taxon_concept_id" = "guid",
                           "parent_taxon_concept_id" = "parent_guid"),
         NULL
  )
}

##---------------------------------------------------------------
##                          Cases                              --
##---------------------------------------------------------------

#' Internal function to make text to snake case
#' @noRd
#' @keywords Internal
camel_to_snake_case <- function(string){
  string |>
    gsub("([a-z])([A-Z])", "\\1_\\L\\2", x = _, perl = TRUE) |>
    trimws(which = "both") |> # end spaces
    gsub("\\.+|\\s+", "_", x = _) |> # internal dots or spaces
    tolower()
}

#' Internal function to handle conversion from camelCase to upper snake case
#' @noRd
#' @keywords internal
gbif_upper_case <- function(string){
  gsub("(?=[[:upper:]])", "_", string, perl = TRUE) |> 
    toupper()
}

#' Internal function to handle conversion from upper snake case to camelCase
#' Primarily for reversing the action of `gbif_upper_case()` (above)
#' vectorized (kinda) 2026-02-05
#' @noRd
#' @keywords internal
snake_to_camel_case <- function(string){
  # first split into words
  string_list <- string |>
    tolower() |>
    strsplit("_")
  # then merge multi-word strings
  n_words <- lengths(string_list)
  if(any(n_words > 1)){
    x <- purrr::map(string_list[n_words > 1],
               \(a){
                  c(a[[1]],  stringr::str_to_title(a[seq(2, length(a))])) |>
                    paste0(collapse = "")
               })
    string_list[n_words > 1] <- x
  }
  unlist(string_list)
}

##---------------------------------------------------------------
##                   Set API header arguments                  --
##---------------------------------------------------------------

# Construct the user agent string, consisting of the galah version
# This is added on to all requests to enable usage monitoring 
galah_version_string <- function() {
  version_string <- "version unknown"
  suppressWarnings(
    try(version_string <- utils::packageDescription("galah")[["Version"]],
        silent = TRUE)) ## get the galah version, if we can
  glue::glue("galah-R {version_string}")
}

#' @noRd
#' @keywords Internal
source_type_id_lookup <- function(region){
  switch(region,
         "Austria" = 1,
         "United Kingdom" = 2001,
         "2004") # ALA default for galah
}

##----------------------------------------------------------------
##  Functions to add information to occurrence queries          --
##----------------------------------------------------------------
## Note these now follow `tidyverse` convention of accepting and
## returning same object type

#' Add a logical flag re: whether user should receive an email
#' @param x a list
#' @noRd
#' @keywords Internal
add_email_notify <- function(x) {
  notify <- as.logical(potions::pour("package", "send_email"))
  if(is.na(notify)) {
    notify <- FALSE
  }
  x$emailNotify <- ifelse(notify, "true", "false")
  x
}

#' Add an email address, but *only* when JWT tokens are not given
#' @noRd
#' @keywords Internal
add_email_address <- function(x, query){
  if(is.null(query$authenticate)){
    x$email <- potions::pour("user", "email")
  }
  x
}

#' Add a DOI request
#' @noRd
#' @keywords Internal
add_doi_request <- function(x, mint_doi = FALSE){
  if(isTRUE(mint_doi) & 
     potions::pour("atlas", "region") == "Australia"){
    x$mintDoi <- TRUE 
  }
  x
}

##----------------------------------------------------------------
##  Functions to change behaviour depending on selected `atlas` --
##----------------------------------------------------------------

#' Internal function for determining if we should call GBIF or not
#' @noRd
#' @keywords Internal
is_gbif <- function(){
  potions::pour("atlas", "region") == "Global"
}

#' Internal function for determining if we should call ALA or not
#' @noRd
#' @keywords Internal
is_ala <- function(){
  potions::pour("atlas", "region") == "Australia"
}

#' Internal function to populate `groups` arg in `select()`
#' @noRd
#' @keywords Internal
preset_groups <- function(group_name) {
  cols <- switch(group_name,
                 "basic" = default_columns(),
                 "event" = c("eventRemarks",
                             "eventTime",
                             "eventID",
                             "eventDate",
                             "samplingEffort",
                             "samplingProtocol"),
                 "media" = image_fields(),
                 "taxonomy" = c("kingdom",
                                "phylum",
                                "class", 
                                "order", 
                                "family",
                                "genus",
                                "species",
                                "subspecies"))
  # note: assertions handled elsewhere
  return(cols)
}

#' Internal function to specify 'basic' columns in `select()`
#' @noRd
#' @keywords Internal
default_columns <- function() {
  atlas <- potions::pour("atlas", "region")
  if(atlas %in% c("Austria", 
                  "Brazil", 
                  "Guatemala", 
                  "Portugal")){
    c("id",
      "taxon_name",
      "taxon_concept_lsid",
      "latitude",
      "longitude",
      "occurrence_date",
      "basis_of_record",
      "occurrence_status",
      "data_resource_uid")
  }else if(atlas %in% c("France")){
    c("id", # only difference from ALA
      "scientificName",
      "taxonConceptID",
      "decimalLatitude",
      "decimalLongitude",
      "eventDate",
      "basisOfRecord",
      "occurrenceStatus",
      "dataResourceName")
  }else if(atlas %in% c("Australia",
                        "Flanders",
                        "Kew",
                        "Spain",
                        "Sweden",
                        "United Kingdom")){
    c("recordID", # note this requires that the ALA name (`id`) be corrected
      "scientificName",
      "taxonConceptID",
      "decimalLatitude",
      "decimalLongitude",
      "eventDate",
      "basisOfRecord",
      "occurrenceStatus",
      "dataResourceName")
  }else{
    cli::cli_abort("Unknown `atlas`")
  }
}

#' @noRd
#' @keywords Internal
image_fields <- function() {
  atlas <- potions::pour("atlas", "region")
  if(atlas %in% c("Austria", 
                  "Brazil", 
                  "Guatemala", 
                  "Portugal")){
    "all_image_url"
  }else if(atlas %in% c("Australia",
                        "Flanders",
                        "Spain",
                        "Sweden",
                        "United Kingdom")){
    c("multimedia", "images", "sounds", "videos")
  }else if(atlas %in% c("Kew")){
    c("multimedia", "images")
  }else{
    cli::cli_abort("Unknown `atlas`")
  }
}

#' Set filters that work for media in each atlas
#' @noRd
#' @keywords Internal
image_filters <- function(present_fields,
                          error_call = rlang::caller_env()){
  
  atlas <- potions::pour("atlas", "region")
  switch(atlas,
         "Austria" = "(all_image_url:*)",
         "Australia" = glue::glue("({present_fields}:*)"),
         "Brazil" = "(all_image_url:*)",
         "Flanders" = "(images:*)",
         "Guatemala" = "(all_image_url:*)",
         "Kew" =  "(images:*)",
         "Portugal" = "(all_image_url:*)",
         "Spain" = "(multimedia:*)",
         "Sweden" = {filter_fields <- present_fields |>
                       stringr::str_remove("s$") |>
                       paste0("IDsCount")
                     glue::glue("{filter_fields}:[1 TO *]")},
         "United Kingdom" = "(all_image_url:*)",
         cli::cli_abort("`atlas_media` is not supported for atlas = {atlas}",
                        call = error_call)
  )
}

#' @noRd
#' @keywords Internal
species_facets <- function(){
  atlas <- potions::pour("atlas", "region")
  if(atlas %in% c("Australia",
                  "Flanders",
                  "France",
                  "Spain",
                  "Sweden",
                  "United Kingdom")) {
    "speciesID"
  }else{
    "species_guid"
  }
}

#' @noRd
#' @keywords Internal
profiles_supported <- function(){
  atlas <- potions::pour("atlas", "region")
  if(atlas %in% c("Australia",
                  "Flanders",
                  "Sweden",
                  "Spain")) {
    TRUE
  }else{
    FALSE
  }
}

#' Internal function for determining whether a Living Atlas supports reasons API.
#' This affects whether a reason is appended to a query in `collapse()` (and 
#' checked in `compute()`)
#' @noRd
#' @keywords Internal
reasons_supported <- function(){
  atlas <- potions::pour("atlas", "region")
  supported_atlases <- request_metadata(type = "apis") |>
    collect() |>
    dplyr::filter(.data$type == "metadata/reasons") |>
    dplyr::pull("atlas")
  atlas %in% supported_atlases
}

#' @noRd
#' @keywords Internal
media_supported <- function(){
  atlas <- potions::pour("atlas", "region",
                         .pkg = "galah")
  unsupported_atlases <- c("France", "Global")
  if(atlas %in% unsupported_atlases){
    cli::cli_abort("`atlas_media` is not supported for atlas = {atlas}")
  }
}

Try the galah package in your browser

Any scripts or data that you put into this service are public.

galah documentation built on Feb. 11, 2026, 9:11 a.m.