inst/doc/auk.R

## ---- echo = FALSE------------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  warning = FALSE, error = FALSE, message = FALSE
)
suppressPackageStartupMessages(library(auk))
suppressPackageStartupMessages(library(dplyr))

## ----quickstart, eval = FALSE-------------------------------------------------
#  library(auk)
#  # path to the ebird data file, here a sample included in the package
#  # in practice, provide path to ebd, e.g. input_file <- "data/ebd_relFeb-2018.txt"
#  input_file <- system.file("extdata/ebd-sample.txt", package = "auk")
#  # output text file
#  output_file <- "ebd_filtered_grja.txt"
#  ebird_data <- input_file %>%
#    # 1. reference file
#    auk_ebd() %>%
#    # 2. define filters
#    auk_species(species = "Canada Jay") %>%
#    auk_country(country = "Canada") %>%
#    # 3. run filtering
#    auk_filter(file = output_file) %>%
#    # 4. read text file into r data frame
#    read_ebd()

## ----quickstart-nopipes, eval = FALSE-----------------------------------------
#  input_file <- system.file("extdata/ebd-sample.txt", package = "auk")
#  output_file <- "ebd_filtered_grja.txt"
#  ebd <- auk_ebd(input_file)
#  ebd_filters <- auk_species(ebd, species = "Canada Jay")
#  ebd_filters <- auk_country(ebd_filters, country = "Canada")
#  ebd_filtered <- auk_filter(ebd_filters, file = output_file)
#  ebd_df <- read_ebd(ebd_filtered)

## ----example-data-1, eval = FALSE---------------------------------------------
#  library(auk)
#  library(dplyr)
#  system.file("extdata/ebd-sample.txt", package = "auk")

## ----example-data-2, eval = FALSE---------------------------------------------
#  # ebd
#  system.file("extdata/zerofill-ex_ebd.txt", package = "auk")
#  # sampling event data
#  system.file("extdata/zerofill-ex_sampling.txt", package = "auk")

## ----auk-ebd------------------------------------------------------------------
ebd <- system.file("extdata/ebd-sample.txt", package = "auk") %>% 
  auk_ebd()
ebd

## ----auk-filter---------------------------------------------------------------
ebd_filters <- ebd %>% 
  # species: common and scientific names can be mixed
  auk_species(species = c("Canada Jay", "Cyanocitta cristata")) %>%
  # country: codes and names can be mixed; case insensitive
  auk_country(country = c("US", "Canada", "mexico")) %>%
  # bbox: long and lat in decimal degrees
  # formatted as `c(lng_min, lat_min, lng_max, lat_max)`
  auk_bbox(bbox = c(-100, 37, -80, 52)) %>%
  # date: use standard ISO date format `"YYYY-MM-DD"`
  auk_date(date = c("2012-01-01", "2012-12-31")) %>%
  # time: 24h format
  auk_time(start_time = c("06:00", "09:00")) %>%
  # duration: length in minutes of checklists
  auk_duration(duration = c(0, 60)) %>%
  # complete: all species seen or heard are recorded
  auk_complete()
ebd_filters

## ----auk-complete, eval = FALSE-----------------------------------------------
#  output_file <- "ebd_filtered_blja-grja.txt"
#  ebd_jays <- system.file("extdata/ebd-sample.txt", package = "auk") %>%
#    auk_ebd() %>%
#    auk_species(species = c("Canada Jay", "Cyanocitta cristata")) %>%
#    auk_country(country = "Canada") %>%
#    auk_filter(file = output_file)

## ----read---------------------------------------------------------------------
system.file("extdata/ebd-sample.txt", package = "auk") %>% 
  read_ebd() %>% 
  glimpse()

## ----read-auk-ebd, eval = FALSE-----------------------------------------------
#  output_file <- "ebd_filtered_blja-grja.txt"
#  ebd_df <- system.file("extdata/ebd-sample.txt", package = "auk") %>%
#    auk_ebd() %>%
#    auk_species(species = c("Canada Jay", "Cyanocitta cristata")) %>%
#    auk_country(country = "Canada") %>%
#    auk_filter(file = output_file) %>%
#    read_ebd()

## ----awk-script---------------------------------------------------------------
awk_script <- system.file("extdata/ebd-sample.txt", package = "auk") %>% 
  auk_ebd() %>% 
  auk_species(species = c("Canada Jay", "Cyanocitta cristata")) %>% 
  auk_country(country = "Canada") %>% 
  auk_filter(awk_file = "awk-script.txt", execute = FALSE)
# read back in and prepare for printing
awk_file <- readLines(awk_script)
unlink("awk-script.txt")
awk_file[!grepl("^[[:space:]]*$", awk_file)] %>% 
  paste0(collapse = "\n") %>% 
  cat()

## ----auk-unique---------------------------------------------------------------
# read in an ebd file and don't automatically remove duplicates
ebd_dupes <- system.file("extdata/ebd-sample.txt", package = "auk") %>%
  read_ebd(unique = FALSE)
# remove duplicates
ebd_unique <- auk_unique(ebd_dupes)
# compare number of rows
nrow(ebd_dupes)
nrow(ebd_unique)

## ----auk-rollup---------------------------------------------------------------
# read in sample data without rolling up
ebd <- system.file("extdata/ebd-rollup-ex.txt", package = "auk") %>%
  read_ebd(rollup = FALSE)
# apply roll up
ebd_ru <- auk_rollup(ebd)

# all taxa not identifiable to species are dropped
# taxa below species have been rolled up to species
unique(ebd$category)
unique(ebd_ru$category)

# yellow-rump warbler subspecies rollup
# without rollup, there are three observations
ebd %>%
  filter(common_name == "Yellow-rumped Warbler") %>%
  select(checklist_id, category, common_name, subspecies_common_name,
         observation_count)
# with rollup, they have been combined
ebd_ru %>%
  filter(common_name == "Yellow-rumped Warbler") %>%
  select(checklist_id, category, common_name, observation_count)

## ----ebd-zf-------------------------------------------------------------------
# to produce zero-filled data, provide an EBD and sampling event data file
f_ebd <- system.file("extdata/zerofill-ex_ebd.txt", package = "auk")
f_smp <- system.file("extdata/zerofill-ex_sampling.txt", package = "auk")
filters <- auk_ebd(f_ebd, file_sampling = f_smp) %>% 
  auk_species("Collared Kingfisher") %>% 
  auk_time(c("06:00", "10:00")) %>% 
  auk_complete()
filters

## ----zf-filter-fake, echo = FALSE---------------------------------------------
# needed to allow building vignette on machines without awk
ebd_sed_filtered <- filters
ebd_sed_filtered$output <- "ebd-filtered.txt"
ebd_sed_filtered$output_sampling <- "sampling-filtered.txt"

## ----zf-filter, eval = -1-----------------------------------------------------
ebd_sed_filtered <- auk_filter(filters, 
                               file = "ebd-filtered.txt",
                               file_sampling = "sampling-filtered.txt")
ebd_sed_filtered

## ----auk-zf-fake, echo = FALSE------------------------------------------------
# needed to allow building vignette on machines without awk
fake_ebd <- read_ebd(f_ebd)
fake_smp <- read_sampling(f_smp)
# filter in R to fake AWK call
fake_ebd <- subset(
  fake_ebd, 
  all_species_reported & 
    scientific_name %in% filters$filters$species & 
    time_observations_started >= filters$filters$time[1] & 
    time_observations_started <= filters$filters$time[2])
fake_smp <- subset(
  fake_smp, 
  all_species_reported & 
    time_observations_started >= filters$filters$time[1] & 
    time_observations_started <= filters$filters$time[2])
ebd_zf <- auk_zerofill(fake_ebd, fake_smp)

## ----auk-zf, eval = -1--------------------------------------------------------
ebd_zf <- auk_zerofill(ebd_sed_filtered)
ebd_zf

## ----zf-components------------------------------------------------------------
head(ebd_zf$observations)
glimpse(ebd_zf$sampling_events)

## ----zf-collapse, eval = -1---------------------------------------------------
ebd_zf_df <- auk_zerofill(ebd_filtered, collapse = TRUE)
ebd_zf_df <- collapse_zerofill(ebd_zf)
class(ebd_zf_df)
ebd_zf_df

Try the auk package in your browser

Any scripts or data that you put into this service are public.

auk documentation built on Nov. 14, 2023, 5:10 p.m.