fit_sdmTMB_cpue <- function(
cpue_data_file, # e.g. here::here("report/data-cache-2024-05/cpue-index-dat.rds"),
species,
survey_grids = c("SYN QCS", "SYN HS", "SYN WCVI", "SYN WCHG"),
final_year = as.numeric(format(Sys.Date(), "%Y")) - 1L,
min_positive_tows = 100L,
min_positive_trips = 5L,
min_yrs_with_trips = 5L,
raw_cpue_caching_file = NULL,
plots = FALSE, silent = TRUE, return_raw_cpue = FALSE) {
# library(dplyr)
library(sdmTMB)
library(sf)
if (plots) library(ggplot2)
if (require("RhpcBLASctl")) {
RhpcBLASctl::blas_set_num_threads(1) # default currently is all/80!
RhpcBLASctl::omp_set_num_threads(1) # default currently is all/80!
}
NA_return <-
data.frame(
year = NA, est = NA, lwr = NA, upr = NA, log_est = NA, se = NA,
region = paste(survey_grids, collapse = "; "), species = species,
stringsAsFactors = FALSE
)
params <- list()
params$area <- c("^5A|^5B|^5C|^5D|^5E|^3C|^3D")
params$area_name <- c("Coastwide")
params$min_positive_tows <- min_positive_tows
params$min_positive_trips <- min_positive_trips
params$min_yrs_with_trips <- min_yrs_with_trips
params$final_year <- final_year
params$depth_bin_quantiles <- c(0.001, 0.999)
params$lat_range <- c(48, Inf)
params$depth_range <- c(-Inf, Inf)
params$species <- species
spp <- gsub(" ", "-", gsub("\\/", "-", tolower(params$species)))
d1996 <- readr::read_rds(cpue_data_file)
d1996$fishing_event_id_unique <- paste0(
d1996$vessel_registration_number, "-",
d1996$trip_id, "-", d1996$fishing_event_id
)
if (plots) {
gfdata::survey_blocks |>
filter(active_block) |>
dplyr::filter(grepl("^SYN", survey_abbrev)) |>
ggplot(aes(colour = survey_abbrev)) +
geom_sf() +
theme_minimal() +
scale_colour_brewer(palette = "Dark2")
}
grid <- gfdata::survey_blocks |>
filter(active_block) |>
dplyr::filter(grepl("^SYN", survey_abbrev))
dat <- gfplot::tidy_cpue_index(
d1996,
species_common = tolower(params$species),
gear = "bottom trawl",
use_alt_year = FALSE,
year_range = c(1996, params$final_year),
lat_range = params$lat_range,
min_positive_tows = params$min_positive_tows,
min_positive_trips = params$min_positive_trips,
min_yrs_with_trips = params$min_yrs_with_trips,
depth_band_width = 1,
area_grep_pattern = params$area,
depth_bin_quantiles = params$depth_bin_quantiles,
min_bin_prop = 0.001,
lat_band_width = 0.02,
return_raw_data = TRUE #<
)
if (nrow(dat) < 200) {
if (!return_raw_cpue) {
saveRDS(NA, file = raw_cpue_caching_file, compress = FALSE)
return(NA_return)
} else {
return(NA)
}
}
bathy <- marmap::getNOAA.bathy(
lon1 = -138, lon2 = -120, lat1 = 47, lat2 = 57,
resolution = 1, keep = TRUE
)
grid_ll <- sf::st_transform(grid, crs = "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
suppressWarnings({
grid_ll_coord <- grid_ll |>
sf::st_centroid() |>
sf::st_coordinates()
})
x <- marmap::get.depth(bathy, grid_ll_coord[, 1:2], locator = FALSE) |>
dplyr::mutate(depth_m = (depth * -1))
grid$depth_marmap <- x$depth_m
dat <- sdmTMB::add_utm_columns(dat, c("longitude", "latitude"), utm_crs = 32609, units = "km")
x <- marmap::get.depth(bathy, dat[, c("longitude", "latitude")], locator = FALSE) |>
dplyr::mutate(depth_m = (depth * -1))
dat$depth_marmap <- x$depth_m
grid <- dplyr::filter(grid, depth_marmap > 0)
dat <- dplyr::filter(dat, depth_marmap > 0)
dat_sf <- sf::st_as_sf(dat,
coords = c("longitude", "latitude"),
crs = "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
)
dat_sf <- sf::st_transform(dat_sf, crs = sf::st_crs(grid))
grid_region <- dplyr::filter(grid, survey_abbrev %in% survey_grids)
if (plots) {
g <- grid_region |>
ggplot(aes(colour = survey_abbrev)) +
geom_sf() +
theme_minimal() +
scale_colour_brewer(palette = "Dark2")
print(g)
}
intersected <- sf::st_intersects(dat_sf, grid_region)
intersected_i <- which(lengths(intersected) > 0)
dat_sf_reduced <- dat_sf[intersected_i, ]
if (plots) {
g <- dat_sf_reduced |>
ggplot() +
geom_sf(data = grid_region) +
geom_sf(alpha = 0.6, pch = ".", colour = "red") +
theme_light()
print(g)
}
intersected_grid <- sf::st_intersects(grid_region, dat_sf_reduced)
intersected_grid_i <- which(lengths(intersected_grid) > 0)
grid_region_reduced <- grid_region[intersected_grid_i, ]
grid_region_reduced <- grid_region # !!
if (plots) {
g <- grid_region_reduced |>
ggplot() +
geom_sf() +
geom_sf(data = dat_sf_reduced, alpha = 0.6, pch = ".", colour = "red") +
theme_light()
print(g)
}
suppressWarnings(
co <- sf::st_centroid(grid_region_reduced)
)
co <- sf::st_coordinates(co)
gg <- data.frame(X = co[, 1] / 1000, Y = co[, 2] / 1000)
gg$depth_m <- grid_region_reduced$depth_m
gg$depth_marmap <- grid_region_reduced$depth_marmap
gg$log_depth <- log(gg$depth_m)
gg$month_num <- 1
gg$vessel <- factor(NA)
dat_reduced <- dat[intersected_i, ]
if (plots) {
g <- ggplot(gg, aes(X, Y)) +
geom_tile(fill = "grey60", width = 2, height = 2) +
geom_point(data = dat_reduced, colour = "red", size = .5, alpha = 0.1) +
coord_fixed() +
theme_light()
print(g)
}
if (plots) {
plot(dat_reduced$best_depth, dat_reduced$depth_marmap, pch = ".")
abline(0, 1, col = "red")
hist(dat_reduced$depth_marmap)
}
dat <- dat_reduced # !!
dat$area <- params$area_name
dat$log_depth <- log(dat$depth_marmap)
gg$log_depth <- log(gg$depth_marmap)
dat$vessel <- as.factor(dat$vessel_registration_number)
dat$month <- factor(dat$month)
# dplyr::filter(dat, hours_fished > 2000) # 8762.05!
dat <- dplyr::filter(dat, hours_fished < 2000)
dat$depth_scaled <- (dat$log_depth - mean(dat$log_depth)) / sd(dat$log_depth)
ret <- dat |>
filter(!is.na(spp_catch), !is.na(hours_fished)) |>
group_by(year) |>
summarise(est_unstandardized = sum(spp_catch) / sum(hours_fished)) |>
mutate(est_unstandardized = est_unstandardized /
exp(mean(log(est_unstandardized))))
ret$region <- paste(survey_grids, collapse = "; ")
ret$species <- params$species
if (return_raw_cpue) {
return(ret)
}
saveRDS(ret, file = raw_cpue_caching_file, compress = FALSE)
if (plots) {
g <- ggplot(dat, aes(as.factor(year), (spp_catch + 1) / hours_fished)) +
geom_boxplot() +
scale_y_log10()
print(g)
}
.cutoff <- if (length(survey_grids) > 2) 25 else 10
mesh <- make_mesh(dat, c("X", "Y"), cutoff = .cutoff)
plot(mesh$mesh)
mesh$mesh$n
gg <- sdmTMB::replicate_df(gg, "year", time_values = sort(unique(dat$year)))
gg$depth_scaled <- (gg$log_depth - mean(dat$log_depth)) / sd(dat$log_depth)
dat$vessel <- as.factor(dat$vessel)
get_most_common_level <- function(x) {
rev(names(sort(table(x))))[[1]]
}
base_month <- get_most_common_level(dat$month)
dat$month <- stats::relevel(factor(dat$month), ref = base_month)
gg$month <- factor(base_month, levels = levels(dat$month))
dat$month_num <- as.numeric(dat$month)
if (length(unique(dat$month)) >= 9) { # use month smoother
f <- spp_catch ~ 0 + as.factor(year) +
depth_scaled + I(depth_scaled^2) +
(1 | vessel) + s(month_num, bs = "cc")
mm <- stats::model.matrix(spp_catch ~ 0 + as.factor(year) +
depth_scaled + I(depth_scaled^2), data = dat)
} else {
f <- spp_catch ~ 0 + as.factor(year) +
depth_scaled + I(depth_scaled^2) +
(1 | vessel) + as.factor(month)
mm <- stats::model.matrix(spp_catch ~ 0 + as.factor(year) +
depth_scaled + I(depth_scaled^2) + as.factor(month), data = dat)
}
if (nrow(dat) < 200) {
return(NA_return)
}
tictoc::tic()
fit <- tryCatch(sdmTMB::sdmTMB(
f,
knots = list(month_num = c(0.5, 12.5)),
family = sdmTMB::delta_lognormal(),
# family = sdmTMB::delta_lognormal(type = "poisson-link"),
# family = sdmTMB::delta_gamma(),
control = sdmTMBcontrol(profile = c("b_j", "b_j2"), multiphase = FALSE),
# control = sdmTMBcontrol(multiphase = FALSE),
priors = sdmTMBpriors(b = normal(rep(0, ncol(mm)), rep(20, ncol(mm)))),
mesh = mesh,
offset = log(dat$hours_fished),
spatial = "on",
spatiotemporal = "iid",
data = dat,
time = "year",
anisotropy = TRUE,
predict_args = list(newdata = gg, re_form_iid = NA),
index_args = list(area = rep(4, nrow(gg))),
do_index = TRUE,
silent = silent
), error = function(e) NA)
tictoc::toc()
if (length(fit) == 1L) {
if (is.na(fit)) {
return(NA_return)
}
}
s <- sanity(fit)
if (!all(unlist(s))) {
fit <- tryCatch(update(fit, anisotropy = FALSE), error = function(e) NA)
s <- sanity(fit)
}
if (length(fit) == 1L) {
if (is.na(fit)) {
return(NA_return)
}
}
if (!all(unlist(s))) {
fit <- tryCatch(update(fit, spatiotemporal = "off"), error = function(e) NA)
s <- sanity(fit)
}
if (length(fit) == 1L) {
if (is.na(fit)) {
return(NA_return)
}
}
if (!all(unlist(s))) {
return(NA_return)
}
fit
do_expanion <- function(model) {
ind <- get_index(model, bias_correct = TRUE, area = 4)
ind$region <- paste(survey_grids, collapse = "; ")
ind$species <- params$species
ind
}
ind <- do_expanion(fit)
if (plots) {
g <- ind |> ggplot(aes(year, est, ymin = lwr, ymax = upr)) +
geom_ribbon() +
facet_wrap(~region, scales = "free_y") +
ylim(0, NA)
print(g)
}
ind
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.