getting-started.R
In avilistr: Access and Work with the 'AviList' Global Avian Checklist

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(avilistr)
library(dplyr)
library(ggplot2)
library(tidyverse)

## ----load-data----------------------------------------------------------------
# Load the datasets
data(avilist_2025)         # Complete dataset (26 fields)
data(avilist_2025_short)   # Essential fields (~12 fields)  
data(avilist_metadata)     # Field descriptions

# Check data dimensions
cat("Full dataset:", nrow(avilist_2025), "records,", ncol(avilist_2025), "fields\n")
cat("Short dataset:", nrow(avilist_2025_short), "records,", ncol(avilist_2025_short), "fields\n")

## ----explore-basic------------------------------------------------------------
# Count records by taxonomic rank
avilist_2025_short %>%
  count(Taxon_rank, sort = TRUE)

## ----explore-taxonomy---------------------------------------------------------
# Count species by order (top 10)
species_by_order <- avilist_2025_short %>%
  filter(Taxon_rank == "species") %>%
  count(Order, sort = TRUE) %>%
  head(10)

print(species_by_order)

## ----plot-orders, fig.width=8, fig.height=5-----------------------------------
# Visualize most diverse orders
ggplot(species_by_order, aes(x = reorder(Order, n), y = n)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  coord_flip() +
  labs(
    title = "Most Species-Rich Bird Orders",
    subtitle = "Top 10 orders by number of species",
    x = "Order",
    y = "Number of Species",
    caption = "Data: AviList Global Avian Checklist v2025"
  ) +
  theme_minimal()

## ----explore-families---------------------------------------------------------
# Most diverse bird families
family_richness <- avilist_2025_short %>%
  filter(Taxon_rank == "species") %>%
  count(Family, Family_English_name, sort = TRUE) %>%
  head(15)

print(family_richness)

## ----plot-families, fig.width=10, fig.height=6--------------------------------
# Visualize family diversity
ggplot(family_richness, aes(x = reorder(Family_English_name, n), y = n)) +
  geom_col(fill = "darkgreen", alpha = 0.8) +
  coord_flip() +
  labs(
    title = "Most Species-Rich Bird Families",
    subtitle = "Top 15 families by number of species",
    x = "Family",
    y = "Number of Species",
    caption = "Data: AviList Global Avian Checklist v2025"
  ) +
  theme_minimal() +
  theme(axis.text.y = element_text(size = 10))

## ----filter-examples----------------------------------------------------------
# Get all thrush species
thrushes <- avilist_2025_short %>%
  filter(Family == "Turdidae", Taxon_rank == "species") %>%
  select(Scientific_name, English_name_AviList)

cat("Number of thrush species:", nrow(thrushes), "\n")
head(thrushes)

## ----filter-raptors-----------------------------------------------------------
# Get all raptors (birds of prey)
raptor_families <- c("Accipitridae", "Falconidae", "Strigidae", "Tytonidae")

raptors <- avilist_2025_short %>%
  filter(Family %in% raptor_families, Taxon_rank == "species") %>%
  count(Family, Family_English_name, sort = TRUE)

print(raptors)

## ----pattern-matching---------------------------------------------------------
# Find species with "Robin" in their name
robins <- avilist_2025_short %>%
  filter(str_detect(English_name_AviList, "Robin"), Taxon_rank == "species") %>%
  select(Scientific_name, English_name_AviList, Family) %>%
  arrange(Family)

print(robins)

## ----genus-search-------------------------------------------------------------
# Explore a specific genus (Turdus)
turdus_species <- avilist_2025_short %>%
  filter(str_detect(Scientific_name, "^Turdus "), Taxon_rank == "species") %>%
  select(Scientific_name, English_name_AviList) %>%
  arrange(Scientific_name)

cat("Number of Turdus species:", nrow(turdus_species), "\n")
head(turdus_species, 10)

## ----data-quality-------------------------------------------------------------
# Summary of data completeness
data_completeness <- avilist_2025 %>%
  summarise(
    total_records = n(),
    missing_scientific_names = sum(is.na(Scientific_name)),
    missing_families = sum(is.na(Family)),
    missing_orders = sum(is.na(Order)),
    missing_avilist_names = sum(is.na(English_name_AviList))
  )

print(data_completeness)

## ----name-comparison----------------------------------------------------------
# Compare AviList vs Clements naming
name_comparison <- avilist_2025 %>%
  filter(Taxon_rank == "species") %>%
  summarise(
    total_species = n(),
    has_avilist_name = sum(!is.na(English_name_AviList)),
    has_clements_name = sum(!is.na(English_name_Clements_v2024)),
    has_both_names = sum(!is.na(English_name_AviList) & !is.na(English_name_Clements_v2024)),
    names_differ = sum(English_name_AviList != English_name_Clements_v2024, na.rm = TRUE)
  )

print(name_comparison)

## ----name-differences---------------------------------------------------------
# Examples where names differ between sources
name_differences <- avilist_2025 %>%
  filter(
    Taxon_rank == "species",
    !is.na(English_name_AviList),
    !is.na(English_name_Clements_v2024),
    English_name_AviList != English_name_Clements_v2024
  ) %>%
  select(Scientific_name, English_name_AviList, English_name_Clements_v2024) %>%
  head(10)

print(name_differences)

## ----performance-tips---------------------------------------------------------
# For large analyses, use the short dataset when possible
system.time({
  short_analysis <- avilist_2025_short %>%
    filter(Taxon_rank == "species") %>%
    count(Order)
})

# Filter early to reduce data size
songbirds <- avilist_2025_short %>%
  filter(Order == "Passeriformes", Taxon_rank == "species")

cat("Songbird species:", nrow(songbirds), "\n")

# Select only needed columns to reduce memory usage
essential_fields <- avilist_2025 %>%
  select(Scientific_name, English_name_AviList, Family, Order, Taxon_rank)

cat("Memory usage reduced from", ncol(avilist_2025), "to", ncol(essential_fields), "columns\n")

## ----taxize-example, eval=FALSE-----------------------------------------------
# library(taxize)
# 
# # Get a sample of species for validation
# sample_species <- avilist_2025_short %>%
#   filter(Family == "Turdidae", Taxon_rank == "species") %>%
#   pull(Scientific_name) %>%
#   head(5)
# 
# # Validate names with GBIF (commented out to avoid API calls in vignette)
# # gbif_validation <- get_gbifid(sample_species)

## ----rebird-example, eval=FALSE-----------------------------------------------
# library(rebird)
# 
# # Get Cornell Lab species codes from full dataset
# thrush_codes <- avilist_2025 %>%
#   filter(Family == "Turdidae", Taxon_rank == "species") %>%
#   select(Scientific_name, Species_code_Cornell_Lab) %>%
#   filter(!is.na(Species_code_Cornell_Lab))
# 
# # Example: Get recent observations (commented out to avoid API calls)
# # recent_thrushes <- ebirdregion("US-NY", species = thrush_codes$Species_code_Cornell_Lab[1])

## ----taxonomic-patterns-------------------------------------------------------
# Find monotypic genera (genera with only one species)
monotypic_genera <- avilist_2025_short %>%
  filter(Taxon_rank == "species") %>%
  mutate(genus = str_extract(Scientific_name, "^[A-Z][a-z]+")) %>%
  count(genus, Family) %>%
  filter(n == 1) %>%
  arrange(Family)

cat("Number of monotypic genera:", nrow(monotypic_genera), "\n")

# Genera per family
monotypic_summary <- monotypic_genera %>%
  count(Family, name = "monotypic_genera") %>%
  arrange(desc(monotypic_genera)) %>%
  head(10)

print(monotypic_summary)

## ----geographic-analysis------------------------------------------------------
# Analyze type localities (where species were first described)
type_localities <- avilist_2025 %>%
  filter(Taxon_rank == "species", !is.na(Type_locality)) %>%
  mutate(
    continent = case_when(
      str_detect(Type_locality, regex("Australia|New Zealand", ignore_case = TRUE)) ~ "Australasia",
      str_detect(Type_locality, regex("Europe|European", ignore_case = TRUE)) ~ "Europe",
      str_detect(Type_locality, regex("Africa|African", ignore_case = TRUE)) ~ "Africa",
      str_detect(Type_locality, regex("Asia|Asian|China|Japan|India", ignore_case = TRUE)) ~ "Asia",
      str_detect(Type_locality, regex("America|Brazil|Peru|Mexico|Canada|USA", ignore_case = TRUE)) ~ "Americas",
      TRUE ~ "Other"
    )
  ) %>%
  count(continent, sort = TRUE)

print(type_localities)

## ----metadata-exploration-----------------------------------------------------
# Understand the available fields
print(avilist_metadata)

# Fields available in short vs full dataset
cat("Fields in short dataset:\n")
short_fields <- avilist_metadata %>%
  filter(in_short_version) %>%
  pull(field_name)
cat(paste(short_fields, collapse = ", "), "\n\n")

cat("Additional fields in full dataset:\n")
full_only_fields <- avilist_metadata %>%
  filter(in_full_version & !in_short_version) %>%
  pull(field_name)
cat(paste(full_only_fields, collapse = ", "), "\n")

Any scripts or data that you put into this service are public.

avilistr documentation built on June 17, 2025, 5:08 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

avilistr
Access and Work with the 'AviList' Global Avian Checklist

inst/doc/getting-started.R
In avilistr: Access and Work with the 'AviList' Global Avian Checklist

Try the avilistr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

avilistr Access and Work with the 'AviList' Global Avian Checklist

inst/doc/getting-started.R In avilistr: Access and Work with the 'AviList' Global Avian Checklist

Try the avilistr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

avilistr
Access and Work with the 'AviList' Global Avian Checklist

inst/doc/getting-started.R
In avilistr: Access and Work with the 'AviList' Global Avian Checklist