#' Check for errors in daily and season data
#'
#' The \code{proofHS} function checks for overbag and overdays values in the Harvest Survey season data. In addition to overbag and overdays, daily data are checked for early and late hunts.
#'
#' @importFrom dplyr rename_all
#' @importFrom dplyr filter
#' @importFrom stringr str_detect
#' @importFrom dplyr mutate
#' @importFrom dplyr select
#' @importFrom dplyr group_by
#' @importFrom dplyr summarize
#' @importFrom dplyr ungroup
#' @importFrom dplyr left_join
#' @importFrom dplyr tibble
#' @importFrom dplyr rename
#' @importFrom dplyr bind_rows
#' @importFrom dplyr distinct
#' @importFrom stringr str_remove
#' @importFrom lubridate ymd
#'
#' @param data Daily or season data table
#' @param ref_data The reference data table that corresponds to the year of the daily data
#'
#' @export
#' @author Abby Walter, \email{abby_walter@@fws.gov}
#' @references \url{https://github.com/USFWS/migbirdHS}
proofHS <-
function(data, ref_data){
# {Part 1}
# Format over bags ref table
suppressWarnings(
ref_table <-
wrangle_ref(ref_data) |>
filter(!is.na(spp) & !is.na(bag)) |>
select(seasonyear, state = st, speciesgroup, spp, bag) |>
group_by(seasonyear, state, spp) |>
summarize(max_bag = max(bag, na.rm = T)) |>
ungroup() |>
left_join(
tibble(
state = datasets::state.abb,
sampled_state = datasets::state.name),
by = "state") |>
select(-c("state", "seasonyear")) |>
rename(sp_group_estimated = spp)
)
# Duplicate the "Doves" lines so they apply to MODO and WWDO
# Duplicate the "CootsGallinules" lines so they apply to Coots and
# Gallinules
special_table <-
ref_table |>
filter(sp_group_estimated == "MODO-WWDO") |>
mutate(sp_group_estimated = "Mourning Dove") |>
bind_rows(
ref_table |>
filter(sp_group_estimated == "MODO-WWDO") |>
mutate(sp_group_estimated = "White-Winged Dove")) |>
bind_rows(
ref_table |>
filter(sp_group_estimated == "CootsGallinules") |>
mutate(sp_group_estimated = "Coots")) |>
bind_rows(
ref_table |>
filter(sp_group_estimated == "CootsGallinules") |>
mutate(sp_group_estimated = "Gallinules"))
# Remove specialdates spp from the original dates df
ref_table <-
ref_table |>
filter(
!sp_group_estimated %in%
c("MODO-WWDO", "CootsGallinules")) |>
bind_rows(special_table) |>
distinct()
# {Part 2}
# Format over days ref table
suppressMessages(
dates <-
wrangle_ref(ref_data) |>
select(seasonyear, state = st, speciesgroup, open, close, spp) |>
filter(!is.na(spp) & !is.na(open) & !is.na(close)) |>
group_by(seasonyear, state, spp) |>
summarize(
open = min(ymd(open), na.rm = T),
close = max(ymd(close), na.rm = T)) |>
ungroup() |>
left_join(
tibble(
state = datasets::state.abb,
sampled_state = datasets::state.name),
by = "state") |>
select(-c("state", "seasonyear")) |>
rename(sp_group_estimated = spp) |>
# Calculate season length in days
mutate(season_length = as.numeric(close - open))
)
# Duplicate the "Doves" lines so they apply to MODO and WWDO
# Duplicate the "CootsGallinules" lines so they apply to Coots and
# Gallinules
specialdates <-
dates |>
filter(sp_group_estimated == "MODO-WWDO") |>
mutate(sp_group_estimated = "Mourning Dove") |>
bind_rows(
dates |>
filter(sp_group_estimated == "MODO-WWDO") |>
mutate(sp_group_estimated = "White-Winged Dove")) |>
bind_rows(
dates |>
filter(sp_group_estimated == "CootsGallinules") |>
mutate(sp_group_estimated = "Coots")) |>
bind_rows(
dates |>
filter(sp_group_estimated == "CootsGallinules") |>
mutate(sp_group_estimated = "Gallinules"))
# Remove specialdates spp from the original dates df
dates <-
dates |>
filter(
!sp_group_estimated %in%
c("MODO-WWDO", "CootsGallinules")) |>
bind_rows(specialdates) |>
distinct()
if(
nrow(
dates |>
group_by(sp_group_estimated, sampled_state) |>
filter(n() > 1)) > 0){
message(
paste0(
"Warning: More than one season length detected for a given species a",
"nd state."))
print(
dates |>
group_by(sp_group_estimated, sampled_state) |>
filter(n() > 1) |>
ungroup())
}
# {Part 3}
# Error flagging
if(str_detect(deparse(substitute(data)), "season") == TRUE |
str_detect(deparse(substitute(data)), "tibblelist\\[3\\]") == TRUE){
# Season data error flagging: overbags and overdays
season_errors <-
data |>
left_join(
ref_table,
by = c("sp_group_estimated", "sampled_state")) |>
left_join(
dates,
by = c("sp_group_estimated", "sampled_state")) |>
mutate(
days_hunted =
ifelse(
str_detect(days_hunted, "NULL"),
NA,
as.numeric(days_hunted)),
retrieved =
ifelse(
str_detect(retrieved, "NULL"),
NA,
as.numeric(retrieved)),
bag_per_day =
ifelse(
days_hunted > 0,
round(as.numeric(retrieved)/as.numeric(days_hunted), 1),
0),
errors = "x") |>
group_by(surveyID, selected_hunterID, sp_group_estimated) |>
mutate(
sum_retrieved = sum(retrieved, na.rm = T),
avg_retrieved_over_season = sum_retrieved/season_length) |>
ungroup() |>
# Flag overbags
mutate(
errors =
case_when(
avg_retrieved_over_season > max_bag ~
paste(errors, "overbag", sep = "-"),
is.na(max_bag) ~ errors,
TRUE ~ errors),
overbag =
ifelse(
avg_retrieved_over_season > max_bag,
round(avg_retrieved_over_season - max_bag, 2),
NA)) |>
# Flag overdays
mutate(
errors =
case_when(
days_hunted > season_length ~ paste(errors, "overdays", sep = "-"),
is.na(season_length) ~ errors,
TRUE ~ errors),
overday =
ifelse(
days_hunted > season_length,
as.numeric(days_hunted) - as.numeric(season_length),
NA)) |>
# Remove the x from errors
mutate(
errors =
ifelse(
errors == "x" | (days_hunted == 0 & retrieved == 0) |
(is.na(days_hunted) & is.na (retrieved)),
"none",
str_remove(errors, "^x\\-"))) |>
select(
-c("max_bag", "season_length", "bag_per_day",
"avg_retrieved_over_season"))
if(nrow(filter(season_errors, is.na(errors))) > 0){
message("Warning: Not all species matched.")
print(
season_errors |>
filter(is.na(errors)) |>
select(surveyID, sampled_state, sp_group_estimated, retrieved)
)
}
return(season_errors)
}else if(str_detect(deparse(substitute(data)), "daily|party") == TRUE){
# Daily data error flagging: overbags, overdays, early hunts, late hunts
daily_errors <-
data |>
left_join(
ref_table,
by = c("sp_group_estimated", "sampled_state")) |>
left_join(
dates,
by = c("sp_group_estimated", "sampled_state")) |>
group_by(selected_hunterID, sampled_state, sp_group_estimated) |>
mutate(n_days = n()) |>
ungroup() |>
mutate(
harvested_date = ymd(harvested_date),
errors = "x",
overbag = NA,
overday = NA,
early = NA,
late = NA) |>
# Flag overbags
mutate(
errors =
case_when(
# Overbag error if max_bag is exceeded
retrieved > max_bag ~ paste(errors, "overbag", sep = "-"),
is.na(max_bag) ~ errors,
TRUE ~ errors),
overbag =
ifelse(
retrieved > max_bag,
retrieved - max_bag,
NA)) |>
# Flag overdays
mutate(
errors =
case_when(
n_days > season_length ~ paste(errors, "overdays", sep = "-"),
is.na(season_length) ~ errors,
TRUE ~ errors),
overday =
ifelse(
n_days > season_length,
n_days - season_length,
NA)) |>
# Flag early hunts
mutate(
errors =
case_when(
harvested_date < open ~ paste(errors, "early_hunt", sep = "-"),
is.na(open) ~ errors,
TRUE ~ errors),
early =
ifelse(
harvested_date < open,
open - harvested_date,
NA)) |>
# Flag late hunts
mutate(
errors =
case_when(
harvested_date > close ~ paste(errors, "late_hunt", sep = "-"),
is.na(close) ~ errors,
TRUE ~ errors),
late =
ifelse(
harvested_date > close,
harvested_date - close,
NA)) |>
# Join in seaduck/brant reference table
left_join(
seaduck_counties |>
select(sampled_state = STATE, county = COUNTY, brant, seaduck) |>
filter(county != "Unknown"),
by = c("sampled_state", "county")
) |>
# Flag non-seaduck counties
mutate(
errors =
case_when(
is_SeaDuck == "Y" & seaduck == "N" ~ paste(errors, "badseaduck", sep = "-"),
is.na(seaduck) ~ errors,
TRUE ~ errors)) |>
# Flag non-brant counties
mutate(
errors =
case_when(
is_Brant == "Y" & brant == "N" ~ paste(errors, "badbrant", sep = "-"),
is.na(brant) ~ errors,
TRUE ~ errors)) |>
# Remove the x from errors
mutate(
errors =
ifelse(
errors == "x" | (n_days == 0 & retrieved == 0),
"none",
str_remove(errors, "^x\\-"))) |>
# Per hunter, state, and species group, calculate a col for each of:
# - total overbag
# - total number of days w/ overbag
# - 2 or more days with 2 or more overbag
group_by(selected_hunterID, sampled_state, county, sp_group_estimated) |>
mutate(
total_overbag = sum(overbag, na.rm = T),
total_days_with_overbag = length(overbag[!is.na(overbag)]),
twoplus_overbag = ifelse(overbag >= 2, "Y", NA),
twoplus_days_with_twoplus_overbag =
ifelse(
total_days_with_overbag >= 2 &
length(twoplus_overbag[!is.na(twoplus_overbag)]) >= 2,
"Y",
NA
)
) |>
ungroup() |>
select(
-c("max_bag", "season_length", "n_days", "open", "close", "brant",
"seaduck", "twoplus_overbag"))
if(nrow(filter(daily_errors, is.na(errors))) > 0){
message("Warning: Not all species matched.")
print(
daily_errors |>
filter(is.na(errors)) |>
select(surveyID, sampled_state, sp_group_estimated, retrieved)
)
}
return(daily_errors)
}else{
message(
"Error: `data` must be season_totals or daily_records data frame.")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.