#' Get all data (includes some IPHC data that will get over-written
#' in Survey biomass indices section of make-pages.R)
#' @param type A or B
#' @param path Path
#' @param compress TRUE or FALSE to compress .rds
#' @param force Should data be downloaded even if already cached data exists?
#' @param sleep System sleep in seconds between each species
#' to be nice to the server.
#' @export
get_data <- function(
type = c("A", "B"), path = ".",
compress = FALSE, force = FALSE, sleep = 20) {
dir.create(path, showWarnings = FALSE)
.d <- get_spp_names()
.d <- .d[.d$type %in% type, , drop = FALSE]
already_exists <- gsub("\\.rds", "", list.files(path))
if (!force) {
.d <- filter(.d, !spp_w_hyphens %in% already_exists)
}
if (nrow(.d) > 0L) {
for (i in seq_along(.d$species_code)) {
tryCatch(
{
gfdata::cache_pbs_data(
species = .d$species_code[i],
file_name = .d$spp_w_hyphens[i],
path = path, unsorted_only = FALSE, historical_cpue = FALSE,
survey_sets = TRUE, verbose = FALSE, compress = compress
)
},
error = function(e) e
)
Sys.sleep(sleep)
}
}
if (force || !file.exists(file.path(path, "cpue-index-dat.rds"))) {
.dat <- gfdata::get_cpue_index(gear = "bottom trawl", min_cpue_year = 1996)
saveRDS(.dat, file = file.path(path, "cpue-index-dat.rds"), compress = compress)
}
get_data_iphc(
type = type, path = file.path(path, "iphc"),
compress = compress, force = force
)
get_data_iphc_hook_with_bait(
path = file.path(path, "iphc"),
compress = compress, force = force
)
get_iphc_hook_data(path = file.path(path, "iphc"))
}
#' Get the IPHC data for all years, should get merged into get_data at some point
#' but before gfsynopsis meeting we don't want to re-get all the non-IPHC data
#' @param type A or B
#' @param path Path
#' @param compress TRUE or FALSE to compress .rds
#' @param force Should data be downloaded even if already cached data exists?
#' @export
#'
get_data_iphc <- function(
type = c("A", "B"), path = ".",
compress = FALSE, force = FALSE) {
dir.create(path, showWarnings = FALSE)
.d <- get_spp_names()
.d <- .d[.d$type %in% type, , drop = FALSE]
already_exists <- gsub("\\.rds", "", list.files(path))
if (!force) {
.d <- filter(.d, !spp_w_hyphens %in% already_exists)
}
if (nrow(.d) > 0L) {
gfiphc::cache_pbs_data_iphc(
species = .d$species_common_name,
file_name = .d$spp_w_hyphens,
path = path, compress = compress
)
}
}
#' Get the IPHC data for all years for hooks with bait (for hook competition
#' calculations.
#' @param path Path
#' @param compress TRUE or FALSE to compress .rds
#' @param force Should data be downloaded even if already cached data exists?
#' @export
#'
get_data_iphc_hook_with_bait <- function(
path = ".",
compress = FALSE, force = FALSE) {
dir.create(path, showWarnings = FALSE)
if (!force & file.exists(file.path(path, "hook-with-bait.rds"))) {
return()
} else {
gfiphc::cache_pbs_data_iphc(
species = "hook with bait",
file_name = "hook-with-bait",
path = path, compress = compress
)
}
}
#' Download longline baited hook counts
#'
#' @description
#' Get the hook data for the HBLL outside and inside surveys from GFBIO.
#'
#' @param path Path where the 'bait-counts.rds' file is saved to
#' @param species Species code or common name to query
#' @param ssid Survey series id defaulting to the longline surveys
#'
#' @returns RDS object with a stored dataframe ssid, year, fishing_event_id, and baited hook count
#' @export
#'
get_ll_bait_counts <- function(path = ".", species = 442, ssid = c(22, 36, 39, 40)) {
ll_hook_data <- gfdata::get_ll_hook_data(species = species, ssid = ssid)
# Use bait counts only because other columns have questionable data quality
bait_counts <- ll_hook_data |>
dplyr::select(ssid, year, fishing_event_id, count_bait_only)
saveRDS(bait_counts, file.path(path, "bait-counts.rds"))
}
#' Get IPHC hook counts for all survey years
#'
#' @param path Path where the 'iphc-hook-counts.rds' file is saved to
#' @param species Species common name to query, defaults to 'pacific halibut'
#' because to date this species does not affect the output.
#'
#' @returns An RDS object containing a dataframe with the following columns:
#' - year: The year of the data.
#' - station: The station identifier.
#' - lat: The latitude coordinates.
#' - lon: The longitude coordinates.
#' - baited_hooks: The number of baited hooks.
#' - setID: The set identifier (used by the IPHC, may not match GFBio output)
#' - obsHooksPerSet: The observed hooks per set.
#' - effSkateIPHC: The effective skate IPHC (International Pacific Halibut Commission).
#' - iphcUsabilityCode: The IPHC usability code.
#' - iphcUsabilityDesc: The description of the IPHC usability code.
#'
#' @export
#'
get_iphc_hook_data <- function(path = ".", species = "pacific halibut") {
sp_file <- paste0(clean_name(species), ".rds")
sp_dat <- readRDS(file.path(path, sp_file))$set_counts |>
dplyr::mutate(species = species)
# Get hook_bait counts matching GFBio species counts: 1995:2022
hook_bait <- readRDS(file.path(path, "hook-with-bait.rds"))$set_counts |>
dplyr::mutate(baited_hooks = ifelse(!is.na(N_it), N_it, N_it20)) |>
dplyr::select(year, station, lat, lon, baited_hooks)
sp_dat <- dplyr::left_join(sp_dat, hook_bait)
# Need total observed hook counts to calculate prop_removed
# ---------------------------------------------------------
# Get set information for years 2003:2012; 2014:2019; 2022
if (!file.exists(file.path(path, "iphc-set-info.rds"))) {
message("File: <iphc_sets_info.rds> not found, querying GFBio using
gfiphc::get_iphc_sets_info() - requires VPN connection\n")
iphc_set_info <- get_iphc_sets_info() # requires VPN connection
saveRDS(iphc_set_info, file.path(path, "iphc-set-info.rds"))
}
iphc_set_info <- readRDS(file.path(path, "iphc-set-info.rds")) |>
dplyr::rename(lon = "long") |>
dplyr::filter(year != 2022) # these hook counts are wrong from GFBio
# Hook counts for 1996 - 2002
set_1996_2002 <-
gfiphc::data1996to2002 |>
dplyr::mutate(species = tolower(spNameIPHC), station = as.character(station)) |>
dplyr::rename(N_it = "catchCount", obsHooksPerSet = hooksObserved) |>
dplyr::select(year, station, lat, lon, obsHooksPerSet, usable)
# Hook counts for 1995 and 2013
# Need to sum observations of all 'species' observed to get hook counts
set_1995 <- dplyr::left_join(
gfiphc::setData1995,
gfiphc::countData1995 |> group_by(station) |> summarise(obsHooksPerSet = sum(specCount))
) |>
dplyr::mutate(year = 1995)
set_2013 <- dplyr::left_join(
gfiphc::setData2013,
gfiphc::countData2013 |> group_by(station) |> summarise(obsHooksPerSet = sum(specCount))
)
# Hook counts for 2020, 2021, 2022
set_2020_2021_2022 <- dplyr::bind_rows(gfiphc::setData2020, gfiphc::setData2021, gfiphc::setData2022) |>
dplyr::rename(obsHooksPerSet = "hooksObs") |>
dplyr::select(year, station, lat, lon, obsHooksPerSet, usable, standard)
set_info <-
dplyr::bind_rows(iphc_set_info, set_1995, set_1996_2002, set_2013, set_2020_2021_2022) %>%
dplyr::select(year, setID, station, obsHooksPerSet, effSkateIPHC, iphcUsabilityCode, iphcUsabilityDesc) %>%
dplyr::distinct(year, setID, station, .keep_all = TRUE)
# unique(sp_dat$year)[!unique(sp_dat$year) %in% unique(c(set_info$year))]
# Combine set information with species count data
sp_with_hooks <- dplyr::left_join(sp_dat, set_info) |>
dplyr::select(-species, -(E_it:C_it20))
# Resolve the many-to-many by using lat/lon as additional key values
year2019_stations <-
dplyr::left_join(
dplyr::filter(sp_dat, (year == 2019 & station %in% c("2099", "2107"))),
dplyr::filter(set_info, (year == 2019 & station %in% c("2099", "2107")))
) |>
dplyr::select(tidyr::all_of(colnames(sp_with_hooks)))
iphc_hook_out <-
sp_with_hooks |>
# simplest to remove unresolved many-to-many and add proper values in
dplyr::filter(!(year == 2019 & station %in% c("2099", "2107"))) |>
dplyr::bind_rows(year2019_stations) |>
dplyr::arrange(year, station) |>
dplyr::select(-usable, -standard)
saveRDS(iphc_hook_out, file.path(path, "iphc-hook-counts.rds"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.