library(tidyverse)
library(ggnewscale)
library(MetBrewer)
library(DT)

# Write tibble names to list for function compatibility
tibblelist <-
  list(ref_data, daily, season) |> 
  set_names(c("all_seasons", "daily_records", "season_totals"))

Introduction

This is the r params$year season report for Harvest Survey online data.

Check for missing data

For the season data (including daily records), what species are missing per state?

datatable(
  nodata(
    data = get(names(tibblelist[3])), 
    ref_data = get(names(tibblelist[1])), 
    species = "all",
    report = TRUE) |>
    filter(sampled_state != "PR"), 
  options = list(pageLength = 5))

Hunters who submitted

Season submitted

How many hunters submitted in the season data? Daily records excluded and data are only shown if days_hunted > 0.

Total

knitr::kable(
  submitted(get(names(tibblelist[3]))) |> 
    rename(`Has submitted?` = has_submitted) |> 
    mutate(prop = round(n/sum(n), 2)))

By species

submitted(get(names(tibblelist[3])), type = "species", output = "plot") +
  scale_fill_manual(values = met.brewer("Hokusai3", 2)) + 
  theme(legend.position = "bottom")

Daily submitted

How many hunters submitted in the daily data? Records must have answered "yes" to has hunted.

Total

knitr::kable(
  submitted(get(names(tibblelist[2]))) |> 
    rename(`Has submitted?` = has_submitted) |> 
    mutate(prop = round(n/sum(n), 2)))

By species

submitted(get(names(tibblelist[2])), type = "species", output = "plot") +
  scale_fill_manual(values = met.brewer("Hokusai3", 2)) + 
  theme(legend.position = "bottom")

Has hunted?

Season

How many hunters hunted in the season data? Daily records have been excluded.

Total

knitr::kable(
  hunted(get(names(tibblelist[3]))) |> 
    mutate(
    days_hunted = 
      ifelse(
        str_detect(days_hunted, "NULL"), 
        NA, 
        days_hunted),
    has_hunted = ifelse(days_hunted > 0, "Y", "N")) |> 
    group_by(has_hunted) |> 
    summarize(sum_n = sum(n)) |> 
    ungroup() |> 
    rename(
      `Has hunted?` = has_hunted,
      n = sum_n) |> 
    mutate(prop = round(n/sum(n), 2)))

By species

hunted(get(names(tibblelist[3])), type = "species", output = "plot") +
  scale_fill_manual(values = met.brewer("Hokusai3", 2)) + 
  theme(legend.position = "bottom")

Party hunts and erroneous take

Identify party hunts

Here are the manual corrections done to party hunt retrieved values with the partyproof function:

proofed_parties <- read_csv(params$partypath)

datatable(
  proofed_parties |> 
    select(-new_retrieved) |> 
    rename(new_retrieved = retrieved), 
  options = list(pageLength = 5))

Check for overdays

Are there any hunts submitted with an impossible number of days hunted?

Season overdays

datatable(
  overdays(get(names(tibblelist[3])), get(names(tibblelist[1])))
)

Daily overdays

datatable(
  overdays(get(names(tibblelist[2])), get(names(tibblelist[1]))) 
)

Check for overbags

Season overbags

What values are over the limit per species and state in the season totals data? Total retrieved values are divided by number of days hunted. Values over the daily limit per state and species are shown below.

datatable(
  overbags(get(names(tibblelist[3])), get(names(tibblelist[1]))) 
)

Daily overbags

What values in the retrieved field of daily data were over the limit per species and state?

overbags_tbl <- 
  migbirdHS::overbags(daily_records, all_seasons) |> 
  rename(
    state = sampled_state,
    sp = sp_group_estimated) |> 
  mutate(
    sp = ifelse(str_detect(sp, "Sea"), "Sea Ducks", sp)) |> 
  left_join(
    tibble(
      state = state.name,
      state_abbr = state.abb),
    by = "state") |> 
  mutate(bag_lim_label = "Bag limit") 

overbags_tbl |> 
  filter(!str_detect(sp, "Dove|Pigeon")) |> 
  ggplot() + 
  geom_point(aes(x = state_abbr, y = bag_limit, shape = bag_lim_label), 
             color = "red", size = 6) + 
  geom_boxplot(aes(x = state_abbr, y = retrieved), show.legend = F) +
  theme_classic() + 
  labs(y = "Number of birds retrieved", x = "State", shape = "") +
  scale_shape_manual(values = "-", labels = "Bag limit") +
  theme(legend.position = "bottom") +
  facet_grid(
    ~sp, 
    scales = "free", 
    space = "free", 
    labeller = label_wrap_gen(width = 5))

overbags_tbl |> 
  filter(str_detect(sp, "Dove|Pigeon")) |> 
  ggplot() + 
  geom_point(aes(x = state_abbr, y = bag_limit, shape = bag_lim_label), 
             color = "red", size = 6) + 
  geom_boxplot(aes(x = state_abbr, y = retrieved), show.legend = F) +
  theme_classic() + 
  labs(y = "Number of birds retrieved", x = "State", shape = "") +
  scale_shape_manual(values = "-", labels = "Bag limit") +
  theme(legend.position = "bottom") +
  facet_grid(
    ~sp, 
    scales = "free", 
    space = "free", 
    labeller = label_wrap_gen(width = 5))
datatable(
  overbags(get(names(tibblelist[2])), get(names(tibblelist[1])), summary = T) |> 
    rename(
      state = sampled_state,
      sp = sp_group_estimated) |> 
    mutate(
      sp = ifelse(str_detect(sp, "Sea"), "Sea Ducks", sp))
)

Overbags lookup table

datatable(
  overbags(get(names(tibblelist[2])), get(names(tibblelist[1])), summary = F) |> 
    rename(
      state = sampled_state,
      sp = sp_group_estimated) |> 
    mutate(
      sp = 
        case_when(
          str_detect(sp, "Sea") ~ "SeaDucks", 
          str_detect(sp, "Mourning") ~ "MODO",
          str_detect(sp, "White") ~ "WWDO",
          str_detect(sp, "Crane") ~ "Cranes",
          TRUE ~ sp) %>% 
        str_remove_all(., " ")) |> 
    pivot_wider(
      id_cols = 
        c("selected_hunterID", "surveyID", "state", "county", "harvested_date"),
      values_from = retrieved,
      names_from = sp) |> 
    mutate(over = "Y") |> 
    bind_rows(
      overbags(get(names(tibblelist[2])), get(names(tibblelist[1])), over = F) |> 
        rename(
          sp = sp_group_estimated,
          state = sampled_state) |> 
        mutate(
          sp = 
            case_when(
              str_detect(sp, "Sea") ~ "SeaDucks", 
              str_detect(sp, "Mourning") ~ "MODO",
              str_detect(sp, "White") ~ "WWDO",
              str_detect(sp, "Crane") ~ "Cranes",
              TRUE ~ sp) %>%
            str_remove_all(., " ")) |> 
        pivot_wider(
          id_cols = 
            c("selected_hunterID", "surveyID", "state", "county", "harvested_date"),
          values_from = retrieved,
          names_from = sp) |> 
        mutate(over = "N")) |> 
    relocate(over, .after = "harvested_date") |> 
    group_by(selected_hunterID) |> 
    mutate(n = n()) |> 
    ungroup() |> 
    filter(n >= 2) |> 
    group_by(selected_hunterID) |> 
    filter("Y" %in% over) |> 
    ungroup() |> 
    arrange(selected_hunterID) |> 
    rename(
      hunterID = selected_hunterID,
      harvest_date = harvested_date) |> 
    left_join(
      tibble(
        st = state.abb,
        state = state.name),
      by = "state") |> 
    relocate(st, .before = "county") |> 
    select(-c("state", "n")) |> 
    arrange(hunterID, harvest_date) |> 
    group_by(hunterID) |> 
    mutate(
      color = 
        ifelse(
          cur_group_id() %% 2 == 0,
          "odd",
          "even"),
      ID = 
        row_number()) |> 
    ungroup() |> 
    relocate(ID, .before = "hunterID"), 
  extensions = "Buttons",
  options = 
    list(
      dom = "Bfrti",
      buttons = c("csv", "excel", "pdf"),
      ordering = F, 
      pageLength = -1,
      columnDefs = list(list(visible = F, targets = c(13))),
      scrollX = "1100px",
      scrollY = "500px"),
  width = "1100px",
  height = "500px") |> 
  formatStyle(
    "color",
    target = "row",
    backgroundColor = styleEqual(c("odd", "even"), c("white", "lightgray"))) |> 
  formatStyle(
    "over",
    backgroundColor = styleEqual("Y", "pink")) 

\
\
\
\
\
\
\
\
\
\
\
\
\
\
\ \ \ \ \

Identify out-of-season take

What harvest dates in the daily data fell outside of the legal hunting season per species and state?

openclose(get(names(tibblelist[2])), get(names(tibblelist[1])), summary = T) |> 
  rename(
    sp = sp_group_estimated,
    state = sampled_state) |> 
  left_join(
    tibble(
      state = state.name,
      state_abbr = state.abb),
    by = "state") |> 
  mutate(prop = n/sum(n)) |> 
  group_by(state) |> 
  mutate(
    sum_n = sum(n),
    sum_prop = sum(prop)) |> 
  ungroup() |> 
  ggplot() + 
  geom_bar(
    aes(x = reorder(state_abbr, -sum_prop), y = prop, fill = error), 
    stat = "identity") + 
  geom_text(
    aes(x = reorder(state_abbr, -sum_prop), 
        y = sum_prop, 
        label = sum_n, 
        angle = 90),
    vjust = 0.2, hjust = -0.2) +
  scale_y_continuous(expand = expansion(mult = c(-0, 0.4))) +
  theme_classic() + 
  labs(
    y = "Proportion", 
    x = "State", 
    fill = "Error type") +
  theme(legend.position = "bottom") +
  scale_fill_manual(values = met.brewer("Hokusai3", 3))
datatable(openclose(get(names(tibblelist[2])), get(names(tibblelist[1])), summary = T))

Error summary

Season data can include errors such as overbags (bag value over what is the legal limit), overdays (days hunted more than what is possible during the regular season), or both. Errors detected in daily data may include overbags, overdays, early hunts (hunting entry recorded before the season opens), and/or late hunts (hunting entry recorded after the season ended).

party_proofed <-
  get(names(tibblelist[2])) |> 
  left_join(
    proofed_parties |> 
      select(record_id, new_retrieved)) |> 
  mutate(
    retrieved = 
      ifelse(
        !is.na(new_retrieved),
        new_retrieved,
        retrieved
      )
  ) |> 
  select(-new_retrieved)

# Proof the daily data
daily_proofed <- proofHS(party_proofed, get(names(tibblelist[1])))
rm(proofed_parties)

# Proof the season data
season_proofed <- proofHS(get(names(tibblelist[3])), get(names(tibblelist[1])))

Season

errorsHS <-
  function(data){
    data |> 
      select(errors) |> 
      filter(errors != "none") |> 
      mutate(errors = ifelse(str_detect(errors, "-"), "2+ errors", errors)) |> 
      group_by(errors) |> 
      summarize(n = n()) |> 
      ungroup() |> 
      mutate(prop = n/nrow(data |> filter(errors != "none"))) |> 
      ggplot() +
      geom_bar(aes(x = errors, y = prop), stat = "identity") + 
      geom_text(aes(x = errors, y = prop, label = n), nudge_y = .05) +
      labs(y = "Proportion", x = "Errors") +
      theme_classic()
  }

errorsHS(season_proofed)

Daily

errorsHS(daily_proofed)

Retrieved

Retrieved map

The plots below show average retrieved values per state and species using the proofed Harvest Survey daily data.

retrievedmap(daily_proofed)

Birds retrieved

For the daily data, how many birds were retrieved on average per hunter? The figure and table below use the proofed data.

retrieved(daily_proofed, output = "plot", average = T) +
  theme(legend.position = "bottom") +
  scale_fill_manual(
    values = 
      met.brewer("Hokusai3", 
                 length(unique(daily_proofed$sp_group_estimated))))

datatable(retrieved(daily_proofed, average = T))

Popular species

What is the proportion of birds that were successfully hunted? Each hunter's pursued species were grouped for this visualization. The proofed daily data are used.

bagspp(daily_proofed, output = "success")

Days hunted

In the daily data, how many days were spent hunting per species on average for each hunter? The proofed data are used.

huntdays(daily_proofed, type = "species", output = "plot", average = T)

How about by state?

huntdays(daily_proofed, output = "plot", average = T) +
  theme(legend.position = "bottom") + 
  scale_fill_manual(
    values = met.brewer("Hokusai3",
                        length(unique(daily_proofed$sp_group_estimated))))
datatable(huntdays(daily_proofed, average = T))

Comparing submitted vs. non-submit

What is the relationship between number of birds retrieved and number of days spent hunting when data are divided into 4 submission groups? The proofed daily and season data are used in the figures below. Daily retrieved values and days spent hunting were summed for direct comparison to season data.

compare(daily_proofed, season_proofed, type = "line") +
  # Added for line clarity
  new_scale_color() +
  stat_smooth(
    aes(color = type), method = "lm", size = 1, alpha = 0.3) +
  scale_color_manual(
    name = "Type",
    labels = c("Daily non-submit", "Daily submit", "Season non-submit", 
               "Season submit"),
    values = c("orange", "green", "blue", "purple"))

What is the distribution of number of days hunted per submission group? Bubbles represent number of birds retrieved and red lines indicate mean values. Each bubble is a hunter's record per state and species.

compare(daily_proofed, season_proofed, type = "days")

What is the distribution of number of birds retrieved per submission group? Bubbles represent number of days hunted and red lines indicate mean values. Each bubble is a hunter's record per state and species.

compare(daily_proofed, season_proofed, type = "retrieved")


USFWS/migbirdMBHS documentation built on Feb. 20, 2024, 4:49 a.m.