#' Reference to eBird sampling event file
#'
#' Create a reference to an eBird sampling event file in preparation for
#' filtering using AWK. For working with the sightings data use `auk_ebd()`,
#' only use `auk_sampling()` if you intend to only work with checklist-level
#' data.
#'
#' @param file character; input sampling event data file, which contains
#' checklist data from eBird.
#' @param sep character; the input field separator, the eBird data are tab
#' separated so this should generally not be modified. Must only be a single
#' character and space delimited is not allowed since spaces appear in many of
#' the fields.
#'
#' @details eBird data can be downloaded as a tab-separated text file from the
#' [eBird website](http://ebird.org/ebird/data/download) after submitting a
#' request for access. In the eBird Basic Dataset (EBD) each row corresponds
#' to a observation of a single bird species on a single checklist, while the
#' sampling event data file contains a single row for every checklist. This
#' function creates an R object to reference only the sampling data.
#'
#' @return An `auk_sampling` object storing the file reference and the desired
#' filters once created with other package functions.
#' @export
#' @family objects
#' @examples
#' # get the path to the example data included in the package
#' # in practice, provide path to the sampling event data
#' # e.g. f <- "data/ebd_sampling_relFeb-2018.txt"
#' f <- system.file("extdata/zerofill-ex_sampling.txt", package = "auk")
#' auk_sampling(f)
auk_sampling <- function(file, sep = "\t") {
# checks
assertthat::assert_that(
assertthat::is.string(sep), nchar(sep) == 1, sep != " "
)
file <- ebd_file(file)
# read header rows
header <- tolower(get_header(file, sep))
header <- stringr::str_replace_all(header, "_", " ")
col_idx <- data.frame(id = NA_character_,
name = header,
index = seq_along(header),
stringsAsFactors = FALSE)
# check column name for protocol column
protocol_col_name <- "protocol name"
if (!protocol_col_name %in% header) {
protocol_col_name <- "protocol type"
}
# check column name for project column
project_col_name <- "project names"
if (!project_col_name %in% header) {
project_col_name <- "project code"
}
# ensure key columns are present
mandatory <- c("country code", "state code",
"latitude", "longitude",
"observation date", "time observations started",
protocol_col_name,
"duration minutes", "effort distance km",
"all species reported",
"sampling event identifier", "group identifier")
col_miss <- mandatory[!(mandatory %in% header)]
if (length(col_miss) > 0) {
m <- sprintf("Required columns missing from the sampling file:\n\t%s",
paste(col_miss, collapse = "\n\t"))
stop(m)
}
# identify columns required for filtering
filter_cols <- data.frame(
id = c("country", "state", "county", "bcr",
"lat", "lng",
"date", "time", "last_edited",
"protocol", "project",
"duration", "distance",
"complete",
"observer"),
name = c("country code", "state code", "county code", "bcr code",
"latitude", "longitude",
"observation date", "time observations started",
"last edited date",
protocol_col_name, project_col_name,
"duration minutes", "effort distance km",
"all species reported",
"observer id"),
stringsAsFactors = FALSE)
filter_cols <- filter_cols[filter_cols$name %in% col_idx$name, ]
col_idx$id[match(filter_cols$name, col_idx$name)] <- filter_cols$id
# output
structure(
list(
file = normalizePath(file),
output = NULL,
col_idx = col_idx,
filters = list(
country = character(),
state = character(),
county = character(),
bbox = numeric(),
year = integer(),
date = character(),
time = character(),
last_edited = character(),
protocol = character(),
project = character(),
duration = numeric(),
distance = numeric(),
complete = FALSE,
observer = character()
)
),
class = "auk_sampling"
)
}
#' @export
print.auk_sampling <- function(x, ...) {
cat("Input \n")
cat(paste(" Sampling events:", x$file, "\n"))
cat("\n")
cat("Output \n")
if (is.null(x$output)) {
cat(" Filters not executed\n")
} else {
cat(paste(" Sampling events:", x$output, "\n"))
}
cat("\n")
cat("Filters \n")
# country filter
cat(" Countries: ")
if (length(x$filters$country) == 0) {
cat("all")
} else if (length(x$filters$country) <= 10) {
cat(paste(x$filters$country, collapse = ", "))
} else {
cat(paste0(length(x$filters$country), " countries"))
}
cat("\n")
# state filter
cat(" States: ")
if (length(x$filters$state) == 0) {
cat("all")
} else if (length(x$filters$state) <= 10) {
cat(paste(x$filters$state, collapse = ", "))
} else {
cat(paste0(length(x$filters$state), " states"))
}
cat("\n")
# state filter
cat(" Counties: ")
if (length(x$filters$county) == 0) {
cat("all")
} else if (length(x$filters$county) <= 10) {
cat(paste(x$filters$county, collapse = ", "))
} else {
cat(paste0(length(x$filters$county), " counties"))
}
cat("\n")
# bbox filter
cat(" Bounding box: ")
e <- round(x$filters$bbox, 1)
if (length(e) == 0) {
cat("full extent")
} else {
cat(paste0("Lon ", e[1], " - ", e[3], "; "))
cat(paste0("Lat ", e[2], " - ", e[4]))
}
cat("\n")
# year filter
cat(" Years: ")
if (length(x$filters$year) == 0) {
cat("all")
} else if (length(x$filters$year) <= 10) {
cat(paste(x$filters$year, collapse = ", "))
} else {
cat(paste0(length(x$filters$year), " years"))
}
cat("\n")
# date filter
cat(" Date: ")
if (length(x$filters$date) == 0) {
cat("all")
} else {
cat(paste0(x$filters$date[1], " - ", x$filters$date[2]))
}
cat("\n")
# time filter
cat(" Start time: ")
if (length(x$filters$time) == 0) {
cat("all")
} else {
cat(paste0(x$filters$time[1], "-", x$filters$time[2]))
}
cat("\n")
# last edited date filter
cat(" Last edited date: ")
if (length(x$filters$last_edited) == 0) {
cat("all")
} else {
cat(paste0(x$filters$last_edited[1], " - ", x$filters$last_edited[2]))
}
cat("\n")
# protocol filter
cat(" Protocol: ")
if (length(x$filters$protocol) == 0) {
cat("all")
} else {
cat(paste(x$filters$protocol, collapse = ", "))
}
cat("\n")
# project filter
cat(" Project code: ")
if (length(x$filters$project) == 0) {
cat("all")
} else {
cat(paste(x$filters$project, collapse = ", "))
}
cat("\n")
# duration filter
cat(" Duration: ")
if (length(x$filters$duration) == 0) {
cat("all")
} else {
cat(paste0(x$filters$duration[1], "-", x$filters$duration[2], " minutes"))
}
cat("\n")
# distance filter
cat(" Distance travelled: ")
if (length(x$filters$distance) == 0) {
cat("all")
} else {
cat(paste0(x$filters$distance[1], "-", x$filters$distance[2], " km"))
}
cat("\n")
# complete checklists only
cat(" Complete checklists only: ")
if (x$filters$complete) {
cat("yes")
} else {
cat("no")
}
cat("\n")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.