R/atlas_taxonomy.R

Defines functions rank_index constrain_id check_down_to check_identify drill_down_taxonomy check_constraints atlas_taxonomy

Documented in atlas_taxonomy

#' @rdname atlas_
#' @order 5
#' @param constrain_ids `string`: Optional string to limit which `taxon_concept_id`'s
#' are returned. This is useful for restricting taxonomy to particular 
#' authoritative sources. Default is `"biodiversity.org.au"` for Australia, 
#' which is the infix common to National Species List IDs; use
#' `NULL` to suppress source filtering. Regular expressions are supported.
#' @keywords internal
#' @export
atlas_taxonomy <- function(request = NULL,
                           identify = NULL, 
                           filter = NULL,
                           constrain_ids = NULL
                           ) {
  # capture supplied arguments
  args <- as.list(environment())

  # convert to a valid `data_request` object
  .query <- check_atlas_inputs(args)
  .query$type <- "taxonomy" # default, but in case supplied otherwise
  check_identify(.query)
  check_down_to(.query)
  constrain_ids <- check_constraints(args = args, 
                                     call = as.list(sys.call()))

  # extract required information from `identify` 
  taxa_info <- request_metadata(type = "taxa") |>
    identify(.query$identify$search_term) |>
    collect()
  start_row <- taxa_info |>
    dplyr::mutate(name = stringr::str_to_title(taxa_info$scientific_name),
                  parent_taxon_concept_id = NA) |>
    dplyr::select("name", 
                  "rank", 
                  "taxon_concept_id", 
                  "parent_taxon_concept_id") 

  # build then flatten a tree
  taxonomy_tree <- drill_down_taxonomy(start_row, 
                                       down_to = .query$filter$value,
                                       constrain_ids = constrain_ids)
  for(i in seq_len(purrr::pluck_depth(taxonomy_tree))){
    taxonomy_tree <- purrr::list_flatten(taxonomy_tree)
  }
  result <- dplyr::bind_rows(taxonomy_tree)
  
  # remove rows with ranks that are too low
  index <- rank_index(result$rank)
  down_to_index <- rank_index(.query$filter$value)
  result |>
    dplyr::filter({{index}} <= {{down_to_index}} | is.na({{index}})) |>
    dplyr::select("name", 
                  "rank", 
                  "parent_taxon_concept_id", 
                  "taxon_concept_id")
}

#' Internal function to check whether constraints have been passed
#' @noRd
#' @keywords Internal
check_constraints <- function(args, call){
  if(any(names(call) == "constrain_ids")){ # i.e. if user specifies an argument
    call$constrain_ids
  }else{ # if NULL only occurs because no argument is set
    if(potions::pour("atlas", "region") == "Australia"){
      "biodiversity.org.au"
    }
  }
} 

#' Internal recursive function to get child taxa
#' @noRd
#' @keywords Internal
drill_down_taxonomy <- function(df, 
                                down_to,
                                constrain_ids = NULL){
  if(!(df$rank %in% c("unranked", "informal"))){
    if (rank_index(df$rank) >= rank_index(down_to)) {
      return(df)
    }
  }
  children <- request_metadata() |>
    filter("taxa" == df$taxon_concept_id) |>
    unnest() |>
    collect()
  if(nrow(children) < 1){
    return(df)
  }else{
    if(is.null(constrain_ids)){
      result <- children
    }else{
      result <- children |>
        constrain_id(constrain_to = constrain_ids) 
    }
    if(nrow(result) < 1){
      return(df)
    }else{
      result_list <- purrr::map(
        split(result, seq_len(nrow(result))), 
        function(a){drill_down_taxonomy(a, 
                                        down_to, 
                                        constrain_ids = constrain_ids)})
      return( 
        append(list(df), list(result_list)))
    }
  }
}

#' Internal function to check `identify` term is specified correctly
#' @noRd
#' @keywords Internal
check_identify <- function(.query,
                           error_call = rlang::caller_env()){
  if(is.null(.query$identify)){
    c("Argument `identify` is missing, with no default.",
      i = "Did you forget to specify a taxon?") |>
    cli::cli_abort(call = error_call)
  }
  
  if(nrow(.query$identify) > 1){
    number_of_taxa <- nrow(.query$identify)
    c("Can't provide tree more than one taxon to start with.",
      i = "atlas_taxonomy` only accepts a single taxon at a time.",
      x = "`identify` has length of {number_of_taxa}.") |>
    cli::cli_abort(call = error_call)
  }
}

#' Internal function to check `identify` term is specified correctly
#' @noRd
#' @keywords Internal
check_down_to <- function(.query,
                          error_call = rlang::caller_env()){
  if (is.null(.query$filter$value)) {
    c("Argument `rank` is missing, with no default.",
      i = "Use `show_all(ranks)` to display valid ranks",
      i = "Use `filter(rank == chosen_rank)` to specify a rank"
    )
    cli::cli_abort(call = error_call)
  }
  
  down_to <- tolower(.query$filter$value) 
  if(!any(show_all_ranks()$name == down_to)){
    c("Invalid taxonomic rank provided.",
      i = "The rank provided to `rank` must be a valid taxonomic rank.",
      x = "{down_to} is not a valid rank.") |>
    cli::cli_abort(call = error_call)
  }  
}

#' Internal function to only return GUIDs that match particular criteria
#' @noRd
#' @keywords Internal
constrain_id <- function(df, constrain_to){
  check_list <- purrr::map(constrain_to, 
                           function(a){grepl(a, df$taxon_concept_id)})
  check_result <- purrr::map(purrr::list_transpose(check_list), any) |>
    unlist()
  df |> 
    dplyr::filter(check_result)
}

# Return the index of a taxonomic rank- 
# lower index corresponds to higher up the tree
#' @noRd
#' @keywords Internal
rank_index <- function(x) {
  all_ranks <- show_all_ranks()
  purrr::map(x, function(a){
    if (a %in% all_ranks$name) {
      return(all_ranks$id[all_ranks$name == a])  
    }else{
      NA
    }    
  }) |>
    unlist()
}

Try the galah package in your browser

Any scripts or data that you put into this service are public.

galah documentation built on Feb. 11, 2026, 9:11 a.m.