R/data.R

Defines functions get_ll_bait_counts get_data

Documented in get_data get_ll_bait_counts

#' Get all data (includes some IPHC data that will get over-written
#'  in Survey biomass indices section of make-pages.R)
#' @param type A or B
#' @param path Path
#' @param compress TRUE or FALSE to compress .rds
#' @param force Should data be downloaded even if already cached data exists?
#' @param sleep System sleep in seconds between each species
#'   to be nice to the server.
#' @export
get_data <- function(
    type = c("A", "B"), path = ".",
    compress = FALSE, force = FALSE, sleep = 20) {
  dir.create(path, showWarnings = FALSE)
  .d <- get_spp_names()
  .d <- .d[.d$type %in% type, , drop = FALSE]
  already_exists <- gsub("\\.rds", "", list.files(path))
  if (!force) {
    .d <- filter(.d, !spp_w_hyphens %in% already_exists)
  }
  if (nrow(.d) > 0L) {
    for (i in seq_along(.d$species_code)) {
      tryCatch(
        {
          gfdata::cache_pbs_data(
            species = .d$species_code[i],
            file_name = .d$spp_w_hyphens[i],
            path = path, unsorted_only = FALSE, historical_cpue = FALSE,
            survey_sets = TRUE, verbose = FALSE, compress = compress
          )
        },
        error = function(e) e
      )

      Sys.sleep(sleep)
    }
  }
  if (force || !file.exists(file.path(path, "cpue-index-dat.rds"))) {
    .dat <- gfdata::get_cpue_index(gear = "bottom trawl", min_cpue_year = 1996)
    saveRDS(.dat, file = file.path(path, "cpue-index-dat.rds"), compress = compress)
  }
  # get_data_iphc(
  #   type = type, path = file.path(path, "iphc"),
  #   compress = compress, force = force
  # )
  # get_data_iphc_hook_with_bait(
  #   path = file.path(path, "iphc"),
  #   compress = compress, force = force
  # )
  # get_iphc_hook_data(path = file.path(path, "iphc"))
  get_ll_bait_counts(path = path)
}

# #' Get the IPHC data for all years, should get merged into get_data at some point
# #'  but before gfsynopsis meeting we don't want to re-get all the non-IPHC data
# #' @param type A or B
# #' @param path Path
# #' @param compress TRUE or FALSE to compress .rds
# #' @param force Should data be downloaded even if already cached data exists?
# #' @export
# #'
# get_data_iphc <- function(
#     type = c("A", "B"), path = ".",
#     compress = FALSE, force = FALSE) {
#   dir.create(path, showWarnings = FALSE)
#   .d <- get_spp_names()
#   .d <- .d[.d$type %in% type, , drop = FALSE]
#   already_exists <- gsub("\\.rds", "", list.files(path))
#   if (!force) {
#     .d <- filter(.d, !spp_w_hyphens %in% already_exists)
#   }
#   if (nrow(.d) > 0L) {
#     gfiphc::cache_pbs_data_iphc(
#       species = .d$species_common_name,
#       file_name = .d$spp_w_hyphens,
#       path = path, compress = compress
#     )
#   }
# }

# #' Get the IPHC data for all years for hooks with bait (for hook competition
# #'  calculations.
# #' @param path Path
# #' @param compress TRUE or FALSE to compress .rds
# #' @param force Should data be downloaded even if already cached data exists?
# #' @export
# #'
# get_data_iphc_hook_with_bait <- function(
#     path = ".",
#     compress = FALSE, force = FALSE) {
#   dir.create(path, showWarnings = FALSE)
#   if (!force & file.exists(file.path(path, "hook-with-bait.rds"))) {
#     return()
#   } else {
#     gfiphc::cache_pbs_data_iphc(
#       species = "hook with bait",
#       file_name = "hook-with-bait",
#       path = path, compress = compress
#     )
#   }
# }

#' Download longline baited hook counts
#'
#' @description
#' Get the hook data for the HBLL outside and inside surveys from GFBIO.
#'
#' @param path Path where the 'bait-counts.rds' file is saved to
#' @param species Species code or common name to query
#' @param ssid Survey series id defaulting to the longline surveys
#'
#' @returns RDS object with a stored dataframe ssid, year, fishing_event_id, and baited hook count
#' @export
#'
get_ll_bait_counts <- function(path = ".", species = 442, ssid = c(22, 36, 39, 40)) {
  if (!file.exists(file.path(path, "bait-counts.rds"))) {
    ll_hook_data <- gfdata::get_ll_hook_data(species = species, ssid = ssid)
    # Use bait counts only because other columns have questionable data quality
    bait_counts <- ll_hook_data |>
      dplyr::select(ssid, year, fishing_event_id, count_bait_only)
    saveRDS(bait_counts, file.path(path, "bait-counts.rds"))
  }
}


# #' Get IPHC hook counts for all survey years
# #'
# #' @param path Path where the 'iphc-hook-counts.rds' file is saved to
# #' @param species Species common name to query, defaults to 'pacific halibut'
# #'   because to date this species does not affect the output.
# #'
# #' @returns An RDS object containing a dataframe with the following columns:
# #'   - year: The year of the data.
# #'   - station: The station identifier.
# #'   - lat: The latitude coordinates.
# #'   - lon: The longitude coordinates.
# #'   - baited_hooks: The number of baited hooks.
# #'   - setID: The set identifier (used by the IPHC, may not match GFBio output)
# #'   - obsHooksPerSet: The observed hooks per set.
# #'   - effSkateIPHC: The effective skate IPHC (International Pacific Halibut Commission).
# #'   - iphcUsabilityCode: The IPHC usability code.
# #'   - iphcUsabilityDesc: The description of the IPHC usability code.
# #'
# #' @export
# #'
# get_iphc_hook_data <- function(path = ".", species = "pacific halibut") {
#   sp_file <- paste0(clean_name(species), ".rds")
#   sp_dat <- readRDS(file.path(path, sp_file))$set_counts |>
#     dplyr::mutate(species = species)
#   # Get hook_bait counts matching GFBio species counts: 1995:2022

#   hook_bait <- readRDS(file.path(path, "hook-with-bait.rds"))$set_counts |>
#     dplyr::mutate(baited_hooks = ifelse(!is.na(N_it), N_it, N_it20)) |>
#     dplyr::select(year, station, lat, lon, baited_hooks)
#   sp_dat <- dplyr::left_join(sp_dat, hook_bait)

#   # Need total observed hook counts to calculate prop_removed
#   # ---------------------------------------------------------
#   # Get set information for years 2003:2012; 2014:2019; 2022
#   if (!file.exists(file.path(path, "iphc-set-info.rds"))) {
#     message("File: <iphc_sets_info.rds> not found, querying GFBio using
#         gfiphc::get_iphc_sets_info() - requires VPN connection\n")
#     iphc_set_info <- get_iphc_sets_info() # requires VPN connection
#     saveRDS(iphc_set_info, file.path(path, "iphc-set-info.rds"))
#   }

#   iphc_set_info <- readRDS(file.path(path, "iphc-set-info.rds")) |>
#     dplyr::rename(lon = "long") |>
#     dplyr::filter(year != 2022) # these hook counts are wrong from GFBio

#   # Hook counts for 1996 - 2002
#   set_1996_2002 <-
#     gfiphc::data1996to2002 |>
#     dplyr::mutate(species = tolower(spNameIPHC), station = as.character(station)) |>
#     dplyr::rename(N_it = "catchCount", obsHooksPerSet = hooksObserved) |>
#     dplyr::select(year, station, lat, lon, obsHooksPerSet, usable)

#   # Hook counts for 1995 and 2013
#   # Need to sum observations of all 'species' observed to get hook counts
#   set_1995 <- dplyr::left_join(
#     gfiphc::setData1995,
#     gfiphc::countData1995 |> group_by(station) |> summarise(obsHooksPerSet = sum(specCount))
#   ) |>
#     dplyr::mutate(year = 1995)

#   set_2013 <- dplyr::left_join(
#     gfiphc::setData2013,
#     gfiphc::countData2013 |> group_by(station) |> summarise(obsHooksPerSet = sum(specCount))
#   )

#   # Hook counts for 2020, 2021, 2022
#   set_2020_2021_2022 <- dplyr::bind_rows(gfiphc::setData2020, gfiphc::setData2021, gfiphc::setData2022) |>
#     dplyr::rename(obsHooksPerSet = "hooksObs") |>
#     dplyr::select(year, station, lat, lon, obsHooksPerSet, usable, standard)

#   set_info <-
#     dplyr::bind_rows(iphc_set_info, set_1995, set_1996_2002, set_2013, set_2020_2021_2022) %>%
#     dplyr::select(year, setID, station, obsHooksPerSet, effSkateIPHC, iphcUsabilityCode, iphcUsabilityDesc) %>%
#     dplyr::distinct(year, setID, station, .keep_all = TRUE)

#   # unique(sp_dat$year)[!unique(sp_dat$year) %in% unique(c(set_info$year))]

#   # Combine set information with species count data
#   sp_with_hooks <- dplyr::left_join(sp_dat, set_info) |>
#     dplyr::select(-species, -(E_it:C_it20))

#   # Resolve the many-to-many by using lat/lon as additional key values
#   year2019_stations <-
#     dplyr::left_join(
#       dplyr::filter(sp_dat, (year == 2019 & station %in% c("2099", "2107"))),
#       dplyr::filter(set_info, (year == 2019 & station %in% c("2099", "2107")))
#     ) |>
#     dplyr::select(tidyr::all_of(colnames(sp_with_hooks)))

#   iphc_hook_out <-
#     sp_with_hooks |>
#     # simplest to remove unresolved many-to-many and add proper values in
#     dplyr::filter(!(year == 2019 & station %in% c("2099", "2107"))) |>
#     dplyr::bind_rows(year2019_stations) |>
#     dplyr::arrange(year, station) |>
#     dplyr::select(-usable, -standard)

#   saveRDS(iphc_hook_out, file.path(path, "iphc-hook-counts.rds"))
# }
pbs-assess/gfsynopsis documentation built on June 10, 2025, 3:58 p.m.