#' Tidy commercial PBS CPUE data
#'
#' This function determines the qualifying "fleet" for CPUE analyses. It is
#' meant to be used with modern data available at the fishing event level.
#'
#' @param dat An input data frame from [gfdata::get_cpue_index_hl()].
#' @param species_common The species common name.
#' @param year_range The range of years to include.
#' @param alt_year_start_date Alternative year starting date specified as a
#' month-day combination. E.g. "03-01" for March 1st. Can be used to create
#' 'fishing years'.
#' @param use_alt_year Should the alternate year (e.g. fishing year) column
#' `alt_year` be used? If `FALSE` then the calendar year will be used. If this
#' is set to `TRUE` then the `year` column will be replaced with the
#' `alt_year` column.
#' @param lat_range The range of latitudes to include.
#' @param min_positive_trips The minimum number of annual positive trips.
#' @param min_yrs_with_trips The number of years in which the
#' `min_positive_trips` criteria needs to be met.
#' @param major_areas The fishery area(s) (see lookup table [get_management_areas()])
#' to include in data extraction (eg. '5A'; c('3C', '3D', '5A', '5B')).
#' @param lat_band_width The latitude bandwidths in degrees.
#' @param depth_band_width The depth band widths in m.
#' @param clean_bins Logical. Should the depth and latitude bands be rounded to
#' the nearest clean value as defined by `lat_band_width` or
#' `depth_band_width`? Internally, these use, for example:
#' `gfplot:::round_down_even(lat_range[1], lat_band_width)`.
#' @param depth_bin_quantiles Quantiles for the depth bands. If the cumulative
#' proportion of positive fishing events within a given depth bin is less then
#' the lower amount or greater than the upper amount then that depth bin will
#' be dropped.
#' @param min_bin_prop If the proportion of fishing events for any given factor
#' level is less than this value then that factor level will be dropped.
#' @param lat_bin_quantiles Quantiles for the latitude bands. Values above and
#' below these quantiles will be discarded.
#' @param gear One or more gear types as a character vector.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' d <- get_cpue_index(gear = "bottom trawl")
#' walleye <- tidy_cpue_index(d, "walleye pollock",
#' area_grep_pattern = "5[CDE]+")
#' }
tidy_cpue_index_hl <- function(dat, species_common,
year_range = c(1980, as.numeric(format(Sys.Date(), "%Y")) - 1),
alt_year_start_date = "04-01",
use_alt_year = FALSE,
min_positive_annual_trips = 5,
min_yrs_with_trips = 5,
area_grep_pattern = "^3C|^3D|^5A|^5B|^5C|^5D|^5E",
# lat_band_width = 0.1,
# depth_band_width = 25,
clean_bins = TRUE,
# depth_bin_quantiles = c(0.001, 0.999),
min_bin_prop = 0.001,
# lat_bin_quantiles = c(0, 1)
method = "by_trip"
) {
pbs_areas <- gfplot::pbs_areas[grep(
area_grep_pattern,
gfplot::pbs_areas$major_stat_area_description
), ]
names(dat) <- tolower(names(dat))
dat <- inner_join(dat, gfplot::pbs_species, by = "species_code")
if (use_alt_year) {
# create possibly alternate starting date:
if (alt_year_start_date != "01-01") {
dat <- dplyr::mutate(dat, .year_start_date =
lubridate::ymd_hms(paste0(year, "-", alt_year_start_date, " 00:00:00")))
dat <- dplyr::mutate(dat, .time_diff = best_date - .year_start_date)
dat <- dplyr::mutate(dat, alt_year = ifelse(.time_diff > 0, year, year - 1L))
dat <- dplyr::select(dat, -.time_diff, -.year_start_date)
}
dat$year <- NULL
dat$year <- dat$alt_year
dat$alt_year <- NULL
}
#
# dat$locality_code <- paste(dat$major_stat_area_code,
# dat$minor_stat_area_code, dat$locality_code, sep = "-")
#
# basic filtering:
catch <- dat %>%
inner_join(pbs_areas, by = "major_stat_area_code") %>%
mutate(catch = landed_kg + discarded_kg) %>%
filter(catch >0) %>% # why are there sometimes rows with a species name but 0 catch? Remove these to avoid erroneous pos_catch below.
filter(!is.na(vessel_registration_number))
# filtering if using effort based on trip level data (vessel, area)
if (method == "by_trip"){
catch <- catch %>%
group_by(.data$fishery_sector,
trip_id,
vessel_registration_number) %>%
mutate(year = min(year),
#alt_year = min(alt_year),
month = min(month)) %>%
group_by(fishery_sector,
trip_id,
year,
month,
vessel_registration_number,
species_code,
.data$species_scientific_name,
species_common_name) %>%
dplyr::summarize(
catch = sum(catch),
landed_kg = sum(landed_kg),
discarded_kg = sum(discarded_kg)) %>%
ungroup()
}
# # filtering if using effort based on fishing event id level instead
# # of trip level data
#
# filter(year >= year_range[[1]] & year <= year_range[[2]]) %>%
# filter(!is.na(fe_start_date), !is.na(fe_end_date)) %>%
# filter(fe_start_date < fe_end_date)
# filter(!is.na(latitude), !is.na(longitude), !is.na(best_depth)) %>%
# filter(latitude >= lat_range[[1]] & latitude <= lat_range[[2]]) %>%
# group_by(fishing_event_id) %>%
# mutate(n_date = length(unique(best_date))) %>%
# filter(n_date < Inf) %>%
# select(-n_date) %>% # remove FE_IDs with multiple dates
# ungroup()
# catch <- dat %>%
#
# trip-level catch for target spp:
if (method == "by_trip"){
catch <- group_by(
catch, fishery_sector, year, month, vessel_registration_number, trip_id
) %>%
mutate(
spp_in_row = species_common_name == toupper(species_common)
) %>%
summarise(
# hours_fished = mean(hours_fished, na.rm = TRUE),
spp_catch = sum(ifelse(spp_in_row, catch, 0), na.rm = TRUE),
pos_catch = ifelse(spp_catch > 0, 1, 0))
ungroup()
}
# # fe-level catch for target spp:
# catch <- group_by(
# catch, fishing_event_id, best_date, month, locality_code,
# vessel_name, year, trip_id, hours_fished
# ) %>%
# mutate(
# spp_in_fe = toupper(species_common) %in% species_common_name,
# spp_in_row = species_common_name == toupper(species_common)
# ) %>%
# summarise(
# pos_catch = ifelse(spp_in_fe[[1]], 1, 0),
# # hours_fished = mean(hours_fished, na.rm = TRUE),
# spp_catch = sum(ifelse(spp_in_row, catch, 0), na.rm = TRUE),
# best_depth = mean(best_depth, na.rm = TRUE),
# latitude = mean(latitude, na.rm = TRUE)
# ) %>%
# ungroup()
#
# trip-level: figure out which vessels should be considered part of the spp. fleet
fleet <- catch %>%
group_by(vessel_registration_number, year) %>%
mutate(annual_positive_trips = sum(pos_catch)) %>%
filter(.data$annual_positive_trips >= min_positive_annual_trips) %>%
## threshold
#filter(spp_catch > 0) %>% # not necessary - 0 catch removed in basic filtering
# group_by(year, vessel_registration_number, trip_id) %>%
# summarise(sum_catch = sum(spp_catch, na.rm = TRUE)) %>%
# filter(sum_catch > 0) %>%
group_by(vessel_registration_number) %>%
mutate(n_years = length(unique(year))) %>%
# for speed (filter early known cases):
filter(n_years >= min_yrs_with_trips) %>%
ungroup()
fleet_catch <- semi_join(catch, select(fleet, vessel_registration_number)) %>%
summarise(catch = sum(spp_catch))
all_sr_catch <- catch %>%
summarise(catch = sum(spp_catch))
#
# # fe-level: figure out which vessels should be considered part of the spp. fleet
# fleet <- catch %>%
# group_by(vessel_name) %>%
# mutate(total_positive_tows = sum(pos_catch)) %>%
# filter(total_positive_tows >= min_positive_fe) %>%
# filter(spp_catch > 0) %>%
# group_by(year, vessel_name, trip_id) %>%
# summarise(sum_catch = sum(spp_catch, na.rm = TRUE)) %>%
# filter(sum_catch > 0) %>%
# group_by(vessel_name) %>%
# mutate(n_years = length(unique(year))) %>%
# # for speed (filter early known cases):
# filter(n_years >= min_yrs_with_trips) %>%
# group_by(year, vessel_name) %>%
# summarise(n_trips_per_year = length(unique(trip_id))) %>%
# mutate(
# trips_over_treshold_this_year =
# n_trips_per_year >= min_positive_trips
# ) %>%
# group_by(vessel_name) %>%
# summarise(trips_over_thresh = sum(trips_over_treshold_this_year)) %>%
# filter(trips_over_thresh >= min_yrs_with_trips) %>%
# ungroup()
#
#
# # retain the data from our "fleet"
# d_retained <- semi_join(catch, fleet, by = "vessel_name")
#
# if (nrow(d_retained) == 0) {
# warning("No vessels passed the fleet conditions.")
# return(NA)
# }
#
# lat_range <- stats::quantile(d_retained$latitude,
# probs = c(min(lat_bin_quantiles), max(lat_bin_quantiles))
# )
# # depth_range <- stats::quantile(d_retained$best_depth,
# # probs = c(min(depth_bin_quantiles), max(depth_bin_quantiles))
# # )
# depth_range <- vector(mode = "numeric", length = 2L)
#
# if (clean_bins) {
# lat_range[1] <- round_down_even(lat_range[1], lat_band_width)
# lat_range[2] <- round_up_even(lat_range[2], lat_band_width)
# depth_range[1] <- 0
# depth_range[2] <- 10000
# }
#
# d_retained <- filter(
# d_retained,
# latitude > lat_range[[1]],
# latitude < lat_range[[2]],
# best_depth > depth_range[[1]],
# best_depth < depth_range[[2]]
# )
# lat_bands <- seq(lat_range[[1]], lat_range[[2]], lat_band_width)
# depth_bands <- seq(depth_range[[1]], depth_range[[2]], depth_band_width)
#
# d_retained <- d_retained %>%
# filter(best_depth >= min(depth_bands) & best_depth <= max(depth_bands)) %>%
# mutate(
# depth = factor_bin_clean(best_depth, depth_bands),
# vessel = as.factor(vessel_name),
# latitude = factor_bin_clean(latitude, lat_bands),
# locality = as.factor(locality_code),
# year_factor = factor_clean(year),
# month = factor_clean(month)
# ) %>%
# mutate(pos_catch = ifelse(spp_catch > 0, 1, 0))
#
# # anonymize the vessels
# vessel_df <- dplyr::tibble(
# vessel = unique(d_retained$vessel),
# scrambled_vessel = factor_clean(seq_along(vessel))
# )
# d_retained <- left_join(d_retained, vessel_df, by = "vessel") %>%
# select(-vessel) %>%
# rename(vessel = scrambled_vessel)
#
# pos_catch_fleet <- filter(d_retained, .data$pos_catch == 1)
#
# # retain depth bins with enough data:
# tb <- table(pos_catch_fleet$depth)
# too_deep <- names(tb)[cumsum(tb)/sum(tb) > depth_bin_quantiles[2]]
# too_shallow <- names(tb)[cumsum(tb)/sum(tb) < depth_bin_quantiles[1]]
# d_retained <- filter(d_retained,
# !.data$depth %in% union(too_deep, too_shallow))
#
# too_few <- function(x) {
# tb <- table(pos_catch_fleet[[x]])
# names(tb)[tb/sum(tb) < min_bin_prop]
# }
#
# # not enough pos. data in bins?
# d_retained <- filter(d_retained, !.data$latitude %in% too_few("latitude"))
# d_retained <- filter(d_retained, !.data$month %in% too_few("month"))
# d_retained <- filter(d_retained, !.data$depth %in% too_few("depth"))
# d_retained <- filter(d_retained, !.data$locality %in% too_few("locality"))
# # vessel already known
#
# base_month <- get_most_common_level(pos_catch_fleet$month)
# base_depth <- get_most_common_level(pos_catch_fleet$depth)
# base_lat <- get_most_common_level(pos_catch_fleet$latitude)
# base_vessel <- get_most_common_level(pos_catch_fleet$vessel)
# base_locality <- get_most_common_level(pos_catch_fleet$locality)
#
# d_retained$locality <- stats::relevel(as.factor(d_retained$locality),
# ref = base_locality)
# d_retained$depth <- stats::relevel(as.factor(d_retained$depth),
# ref = base_depth)
# d_retained$latitude <- stats::relevel(as.factor(d_retained$latitude),
# ref = base_lat)
# d_retained$vessel <- stats::relevel(as.factor(d_retained$vessel),
# ref = base_vessel)
# d_retained$month <- stats::relevel(as.factor(d_retained$month),
# ref = base_month)
#
# d_retained <- droplevels(d_retained)
#
# d_retained <- mutate(d_retained, cpue = .data$spp_catch / .data$hours_fished)
#
# # make sure unique:
# d_retained <- mutate(d_retained,
# fishing_event_id = paste(year, trip_id, fishing_event_id, sep = "-"))
#
# arrange(d_retained, .data$year, .data$vessel) %>%
# dplyr::as_tibble()
}
get_most_common_level <- function(x) {
rev(names(sort(table(x))))[[1]]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.