Nothing
knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
The avilistr
package provides access to the AviList Global Avian Checklist, the first unified global bird taxonomy. This vignette demonstrates how to work with the data for common ornithological and biodiversity analyses.
library(avilistr) library(dplyr) library(ggplot2) library(tidyverse)
The package provides three main datasets:
# Load the datasets data(avilist_2025) # Complete dataset (26 fields) data(avilist_2025_short) # Essential fields (~12 fields) data(avilist_metadata) # Field descriptions # Check data dimensions cat("Full dataset:", nrow(avilist_2025), "records,", ncol(avilist_2025), "fields\n") cat("Short dataset:", nrow(avilist_2025_short), "records,", ncol(avilist_2025_short), "fields\n")
# Count records by taxonomic rank avilist_2025_short %>% count(Taxon_rank, sort = TRUE)
# Count species by order (top 10) species_by_order <- avilist_2025_short %>% filter(Taxon_rank == "species") %>% count(Order, sort = TRUE) %>% head(10) print(species_by_order)
# Visualize most diverse orders ggplot(species_by_order, aes(x = reorder(Order, n), y = n)) + geom_col(fill = "steelblue", alpha = 0.8) + coord_flip() + labs( title = "Most Species-Rich Bird Orders", subtitle = "Top 10 orders by number of species", x = "Order", y = "Number of Species", caption = "Data: AviList Global Avian Checklist v2025" ) + theme_minimal()
# Most diverse bird families family_richness <- avilist_2025_short %>% filter(Taxon_rank == "species") %>% count(Family, Family_English_name, sort = TRUE) %>% head(15) print(family_richness)
# Visualize family diversity ggplot(family_richness, aes(x = reorder(Family_English_name, n), y = n)) + geom_col(fill = "darkgreen", alpha = 0.8) + coord_flip() + labs( title = "Most Species-Rich Bird Families", subtitle = "Top 15 families by number of species", x = "Family", y = "Number of Species", caption = "Data: AviList Global Avian Checklist v2025" ) + theme_minimal() + theme(axis.text.y = element_text(size = 10))
# Get all thrush species thrushes <- avilist_2025_short %>% filter(Family == "Turdidae", Taxon_rank == "species") %>% select(Scientific_name, English_name_AviList) cat("Number of thrush species:", nrow(thrushes), "\n") head(thrushes)
# Get all raptors (birds of prey) raptor_families <- c("Accipitridae", "Falconidae", "Strigidae", "Tytonidae") raptors <- avilist_2025_short %>% filter(Family %in% raptor_families, Taxon_rank == "species") %>% count(Family, Family_English_name, sort = TRUE) print(raptors)
# Find species with "Robin" in their name robins <- avilist_2025_short %>% filter(str_detect(English_name_AviList, "Robin"), Taxon_rank == "species") %>% select(Scientific_name, English_name_AviList, Family) %>% arrange(Family) print(robins)
# Explore a specific genus (Turdus) turdus_species <- avilist_2025_short %>% filter(str_detect(Scientific_name, "^Turdus "), Taxon_rank == "species") %>% select(Scientific_name, English_name_AviList) %>% arrange(Scientific_name) cat("Number of Turdus species:", nrow(turdus_species), "\n") head(turdus_species, 10)
# Summary of data completeness data_completeness <- avilist_2025 %>% summarise( total_records = n(), missing_scientific_names = sum(is.na(Scientific_name)), missing_families = sum(is.na(Family)), missing_orders = sum(is.na(Order)), missing_avilist_names = sum(is.na(English_name_AviList)) ) print(data_completeness)
# Compare AviList vs Clements naming name_comparison <- avilist_2025 %>% filter(Taxon_rank == "species") %>% summarise( total_species = n(), has_avilist_name = sum(!is.na(English_name_AviList)), has_clements_name = sum(!is.na(English_name_Clements_v2024)), has_both_names = sum(!is.na(English_name_AviList) & !is.na(English_name_Clements_v2024)), names_differ = sum(English_name_AviList != English_name_Clements_v2024, na.rm = TRUE) ) print(name_comparison)
# Examples where names differ between sources name_differences <- avilist_2025 %>% filter( Taxon_rank == "species", !is.na(English_name_AviList), !is.na(English_name_Clements_v2024), English_name_AviList != English_name_Clements_v2024 ) %>% select(Scientific_name, English_name_AviList, English_name_Clements_v2024) %>% head(10) print(name_differences)
# For large analyses, use the short dataset when possible system.time({ short_analysis <- avilist_2025_short %>% filter(Taxon_rank == "species") %>% count(Order) }) # Filter early to reduce data size songbirds <- avilist_2025_short %>% filter(Order == "Passeriformes", Taxon_rank == "species") cat("Songbird species:", nrow(songbirds), "\n") # Select only needed columns to reduce memory usage essential_fields <- avilist_2025 %>% select(Scientific_name, English_name_AviList, Family, Order, Taxon_rank) cat("Memory usage reduced from", ncol(avilist_2025), "to", ncol(essential_fields), "columns\n")
taxize
library(taxize) # Get a sample of species for validation sample_species <- avilist_2025_short %>% filter(Family == "Turdidae", Taxon_rank == "species") %>% pull(Scientific_name) %>% head(5) # Validate names with GBIF (commented out to avoid API calls in vignette) # gbif_validation <- get_gbifid(sample_species)
rebird
for eBird Integrationlibrary(rebird) # Get Cornell Lab species codes from full dataset thrush_codes <- avilist_2025 %>% filter(Family == "Turdidae", Taxon_rank == "species") %>% select(Scientific_name, Species_code_Cornell_Lab) %>% filter(!is.na(Species_code_Cornell_Lab)) # Example: Get recent observations (commented out to avoid API calls) # recent_thrushes <- ebirdregion("US-NY", species = thrush_codes$Species_code_Cornell_Lab[1])
# Find monotypic genera (genera with only one species) monotypic_genera <- avilist_2025_short %>% filter(Taxon_rank == "species") %>% mutate(genus = str_extract(Scientific_name, "^[A-Z][a-z]+")) %>% count(genus, Family) %>% filter(n == 1) %>% arrange(Family) cat("Number of monotypic genera:", nrow(monotypic_genera), "\n") # Genera per family monotypic_summary <- monotypic_genera %>% count(Family, name = "monotypic_genera") %>% arrange(desc(monotypic_genera)) %>% head(10) print(monotypic_summary)
# Analyze type localities (where species were first described) type_localities <- avilist_2025 %>% filter(Taxon_rank == "species", !is.na(Type_locality)) %>% mutate( continent = case_when( str_detect(Type_locality, regex("Australia|New Zealand", ignore_case = TRUE)) ~ "Australasia", str_detect(Type_locality, regex("Europe|European", ignore_case = TRUE)) ~ "Europe", str_detect(Type_locality, regex("Africa|African", ignore_case = TRUE)) ~ "Africa", str_detect(Type_locality, regex("Asia|Asian|China|Japan|India", ignore_case = TRUE)) ~ "Asia", str_detect(Type_locality, regex("America|Brazil|Peru|Mexico|Canada|USA", ignore_case = TRUE)) ~ "Americas", TRUE ~ "Other" ) ) %>% count(continent, sort = TRUE) print(type_localities)
# Understand the available fields print(avilist_metadata) # Fields available in short vs full dataset cat("Fields in short dataset:\n") short_fields <- avilist_metadata %>% filter(in_short_version) %>% pull(field_name) cat(paste(short_fields, collapse = ", "), "\n\n") cat("Additional fields in full dataset:\n") full_only_fields <- avilist_metadata %>% filter(in_full_version & !in_short_version) %>% pull(field_name) cat(paste(full_only_fields, collapse = ", "), "\n")
The avilistr
package provides comprehensive access to the unified AviList Global Avian Checklist. Key takeaways:
taxize
and rebird
for enhanced functionalityFor more advanced functionality, future versions of the package may include dedicated search and validation functions.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.