# assemble data frame of design-based indices
# assemble data frame of geostat-based indices
# scale geostat to max 1
# scale design to have some geo mean
# add any shading
# make base design-plot dots/lines
# overlay geostat ribbons
# library(purrr)
# library(dplyr)
# library(ggplot2)
# library(here)
# devtools::load_all(".")
make_index_panel <- function(spp_w_hyphens, final_year_surv = 2023, french = FALSE, all_survey_years = NULL) {
cat(crayon::green(clisymbols::symbol$tick), spp_w_hyphens, "\n")
# setup -------------------------------------------------
# spp_w_hyphens <- "pacific-cod"
# dc <- here("report", "data-cache-nov-2023")
dat <- readRDS(paste0(file.path(dc, spp_w_hyphens), ".rds"))
report_folder <- "report"
iphc_index_cache <- file.path(report_folder, "iphc-cache")
dir.create(iphc_index_cache, showWarnings = FALSE, recursive = TRUE)
iphc_index_cache_spp <- paste0(file.path(iphc_index_cache, spp_w_hyphens), ".rds")
survey_cols <- c(
"SYN WCHG" = "#E41A1C",
"SYN WCVI" = "#984EA3",
"SYN HS" = "#377EB8",
"SYN QCS" = "#4DAF4A",
"HBLL OUT N" = "#FF7F00",
"HBLL OUT S" = "#FDBF6F",
"HBLL INS N/S" = "#A65628",
"IPHC FISS" = "#F781BF",
"MSSM WCVI" = "#6c6c6c",
"MSSM Geostat" = "#6c6c6c",
"MSA HS" = "#6c6c6c",
"SYN HS/QCS/WCHG/WCVI" = "#6c6c6c",
"SYN HS/QCS/WCVI" = "#6c6c6c",
"HBLL OUT N/S" = "#6c6c6c",
"Commercial" = "#303030"
)
lvls <- c(
"SYN WCHG",
"SYN HS",
"SYN QCS",
"SYN WCVI",
"HBLL OUT N",
"HBLL OUT S",
"SYN WCHG/HS/QCS/WCVI",
"SYN HS/QCS/WCVI",
"HBLL OUT N/S",
"HBLL INS N/S",
"IPHC FISS",
"OTHER HS MSA", # changes!
"MSSM WCVI"
)
# get design indices ---------------------------------------
dat_design <- tidy_survey_index(
dat$survey_index,
min_years = 1,
survey = c(
lvls
)
)
# pad zeros:
if (!is.null(all_survey_years)) {
all_survey_years <- filter(all_survey_years, survey_abbrev %in% unique(dat_design$survey_abbrev))
dat_design <- full_join(all_survey_years, dat_design,
by = c("survey_abbrev", "year")
)
if (!all(is.na(dat_design$biomass))) {
tofill <- is.na(dat_design$biomass) & !is.na(dat_design$year)
dat_design$lowerci[tofill] <- 0
dat_design$upperci[tofill] <- 0
dat_design$biomass[tofill] <- 0
}
}
lvls[lvls == "OTHER HS MSA"] <- "MSA HS"
dat_design$survey_abbrev <- gsub(
"OTHER HS MSA", "MSA HS",
dat_design$survey_abbrev
)
dat_design$survey_abbrev <- factor(dat_design$survey_abbrev, levels = lvls)
# sub iphc -------------------------------------------------
# replace IPHC design with NA so that they still show up as empty panels but
# we aren't going to show the design index this year
# dat_design <- dat_design |>
# filter(survey_abbrev != "IPHC FISS") |>
# bind_rows(tibble(survey_abbrev = "IPHC FISS"))
dat_design <- gfsynopsis:::sub_iphc_design(
dc,
iphc_index_cache_spp, spp_w_hyphens, dat_design
)
# read geostat ---------------------------------------------
get_index <- function(folder, spp, .family = "", model_tag = "st-iid") {
paths <- list.files(folder, pattern = ".rds", full.names = TRUE)
path <- paths[grepl(spp, paths)]
if (length(path) > 1) {
path <- path[grepl(model_tag, path)]
}
if (length(path)) {
file_names <- list.files(folder, pattern = ".rds")
sp <- gsub("-", " ", spp)
if (file.exists(path)) {
# if (grepl("QCS", path)) browser()
d <- readRDS(path)
if (length(d) > 1L) {
return(dplyr::mutate(d, species = sp, family = .family))
}
}
}
}
families <- c(
"delta-gamma",
"delta-poisson-link-gamma",
"tweedie",
"delta-poisson-link-lognormal",
"delta-lognormal"
)
geo <- list()
take_min_aic <- function(x) {
if (nrow(x)) {
# x <- filter(x, !grepl("lognormal", family))
filter(x, aic == min(aic))
}
}
geo$syn_coast <- purrr::map_dfr(families, \(f) {
get_index(paste0("report/stitch-cache/synoptic-", f, "/"), spp_w_hyphens, .family = f)
}) |> take_min_aic()
geo$syn_hs <- purrr::map_dfr(families, \(f) {
get_index(paste0("report/stitch-cache/synoptic-SYN HS-", f, "/"), spp_w_hyphens, .family = f)
}) |> take_min_aic()
geo$syn_qcs <- purrr::map_dfr(families, \(f) {
get_index(paste0("report/stitch-cache/synoptic-SYN QCS-", f, "/"), spp_w_hyphens, .family = f)
}) |> take_min_aic()
geo$syn_wchg <- purrr::map_dfr(families, \(f) {
get_index(paste0("report/stitch-cache/synoptic-SYN WCHG-", f, "/"), spp_w_hyphens, .family = f)
}) |> take_min_aic()
geo$syn_wcvi <- purrr::map_dfr(families, \(f) {
get_index(paste0("report/stitch-cache/synoptic-SYN WCVI-", f, "/"), spp_w_hyphens, .family = f)
}) |> take_min_aic()
geo$hbll_out_n <- geo$hbll_ins <- get_index("report/stitch-cache/hbll_outside_n/", spp_w_hyphens)
geo$hbll_out_s <- geo$hbll_ins <- get_index("report/stitch-cache/hbll_outside_s/", spp_w_hyphens)
geo$mssm <- purrr::map_dfr(families, \(f) {
get_index(paste0("report/stitch-cache/mssm-", f, "/"), spp_w_hyphens, .family = f)
}) |> take_min_aic()
geo$hbll_ins <- get_index("report/stitch-cache/hbll_inside/", spp_w_hyphens)
geo$hbll_out <- get_index("report/stitch-cache/hbll_outside/", spp_w_hyphens)
geo$iphc <- get_index(paste0("report/stitch-cache/iphc"), spp_w_hyphens)
if ("iphc" %in% names(geo)) {
geo$iphc$stitch_regions <- "IPHC FISS"
}
if (length(geo)) {
dat_geo <- bind_rows(geo)
dat_geo$survey_abbrev <- dat_geo$stitch_regions
dat_geo$survey_abbrev <- gsub("HBLL INS N, HBLL INS S", "HBLL INS N/S", dat_geo$survey_abbrev)
dat_geo$survey_abbrev <- gsub("HBLL OUT N, HBLL OUT S", "HBLL OUT N/S", dat_geo$survey_abbrev)
dat_geo$survey_abbrev <- gsub("SYN HS, SYN QCS, SYN WCHG, SYN WCVI", "SYN WCHG/HS/QCS/WCVI", dat_geo$survey_abbrev)
dat_geo$survey_abbrev <- gsub("SYN HS, SYN QCS, SYN WCVI", "SYN HS/QCS/WCVI", dat_geo$survey_abbrev)
dat_geo <- select(dat_geo, survey_abbrev, year, biomass, lowerci, upperci, mean_cv, num_sets, num_pos_sets) |> as_tibble()
dat_geo <- filter(dat_geo, mean_cv < 1)
# combine both types -------------------------------------
dat_design$method <- "design"
dat_geo$method <- "geostat"
dat_design$survey_abbrev <- as.character(dat_design$survey_abbrev)
both <- bind_rows(dat_geo, dat_design)
has_geo <- TRUE
} else {
has_geo <- FALSE
both <- dat_design
both$method <- "design"
}
# both <- filter(both, !(survey_abbrev == "SYN WCHG" & year %in% c(2013, 2014, 2015)))
# scale as needed ----------------------------------------
# prior_levels <- levels(both$survey_abbrev)
both_scaled <- group_by(both, survey_abbrev) |>
group_split() |>
purrr::map_dfr(\(x) {
both_present <- length(unique(x$method[!is.na(x$biomass)])) > 1L
if (both_present) {
# if (x$survey_abbrev[1] == "SYN WCHG/HS/QCS/WCVI") browser()
# if (x$survey_abbrev[1] == "IPHC FISS") browser()
x_geo <- filter(x, method == "geostat")
x_des <- filter(x, method == "design")
xx <- inner_join(
select(x_geo, year),
select(x_des, year, biomass),
by = join_by(year)
) |>
filter(biomass > 0) # can't take geometric mean of these!
overlapping_non_zero_years <- xx$year
x_geo_mean <- exp(mean(log(x_geo$biomass[x_geo$year %in% overlapping_non_zero_years])))
x_des_mean <- exp(mean(log(x_des$biomass[x_des$year %in% overlapping_non_zero_years])))
x_geo <- mutate(x_geo,
biomass_scaled = biomass / x_geo_mean,
lowerci_scaled = lowerci / x_geo_mean,
upperci_scaled = upperci / x_geo_mean
)
x_des <- mutate(x_des,
biomass_scaled = biomass / x_des_mean,
lowerci_scaled = lowerci / x_des_mean,
upperci_scaled = upperci / x_des_mean
)
max_geo <- max(x_geo$upperci_scaled, na.rm = TRUE)
xx <- bind_rows(x_geo, x_des)
mutate(xx,
biomass_scaled = biomass_scaled / max_geo,
lowerci_scaled = lowerci_scaled / max_geo,
upperci_scaled = upperci_scaled / max_geo
)
} else {
if (sum(!is.na(x$biomass))) {
mutate(x,
biomass_scaled = biomass / max(upperci, na.rm = TRUE),
lowerci_scaled = lowerci / max(upperci, na.rm = TRUE),
upperci_scaled = upperci / max(upperci, na.rm = TRUE)
)
} else {
mutate(x,
biomass_scaled = NA_real_,
lowerci_scaled = NA_real_,
upperci_scaled = NA_real_
)
}
}
}) |>
mutate(biomass = biomass_scaled, lowerci = lowerci_scaled, upperci = upperci_scaled) |>
mutate(survey_abbrev = factor(survey_abbrev, levels = lvls))
# some cleanup --------------------------------------------------
# nothing SYN COASTWIDE present:
if (all(is.na(filter(both_scaled, survey_abbrev == "SYN HS/QCS/WCVI")$biomass)) &&
all(is.na(filter(both_scaled, survey_abbrev == "SYN WCHG/HS/QCS/WCVI")$biomass))) {
both_scaled <- filter(both_scaled, survey_abbrev != "SYN HS/QCS/WCVI")
both_scaled$survey_abbrev <- forcats::fct_drop(both_scaled$survey_abbrev)
}
# "SYN HS/QCS/WCVI present:
if (any(!is.na(filter(both_scaled, survey_abbrev == "SYN HS/QCS/WCVI")$biomass))) {
both_scaled <- filter(both_scaled, survey_abbrev != "SYN WCHG/HS/QCS/WCVI")
both_scaled$survey_abbrev <- forcats::fct_drop(both_scaled$survey_abbrev)
}
# "SYN WCHG/HS/QCS/WCVI present:
if (any(!is.na(filter(both_scaled, survey_abbrev == "SYN WCHG/HS/QCS/WCVI")$biomass))) {
both_scaled <- filter(both_scaled, survey_abbrev != "SYN HS/QCS/WCVI")
both_scaled$survey_abbrev <- forcats::fct_drop(both_scaled$survey_abbrev)
}
# COVID, weird in some cases:
both_scaled <- filter(both_scaled, !(survey_abbrev == "MSSM WCVI" & year == 2020))
geo_scaled <- filter(both_scaled, method == "geostat")
des_scaled <- filter(both_scaled, method == "design")
labs <- distinct(select(both_scaled, survey_abbrev))
yrs <- c(1984, final_year_surv)
# get stats ----------------------------------------------
stats_df <- select(both_scaled, survey_abbrev, mean_cv, num_sets, num_pos_sets, method) %>%
distinct() |>
group_by(survey_abbrev) %>%
group_split() |>
purrr::map_dfr(\(x) {
if (length(unique(x$method)) > 1) {
filter(x, method == "geostat")
} else {
x
}
}) |>
group_by(survey_abbrev) |>
summarise(
mean_cv = sprintf("%.2f", round(mean(mean_cv, na.rm = TRUE), 2)),
mean_num_pos_sets = sprintf("%.0f", round(mean(num_pos_sets, na.rm = TRUE), 0)),
mean_num_sets = sprintf("%.0f", round(mean(num_sets, na.rm = TRUE), 0))
) %>%
mutate(sets = paste0(en2fr("Mean +ve sets", french), ": ", mean_num_pos_sets, "/", mean_num_sets)) %>%
mutate(cv = paste0(en2fr("Mean", french), " CV: ", mean_cv)) %>%
mutate(cv = ifelse(mean_cv == "NaN", "", cv)) %>%
mutate(sets = ifelse(mean_num_pos_sets == "NaN", "", sets)) %>%
mutate(mean_cv = as.numeric(mean_cv)) %>%
mutate(
mean_num_pos_sets = as.numeric(mean_num_pos_sets),
mean_num_sets = as.numeric(mean_num_sets)
)
# plot ----------------------------------------
suppressMessages(suppressWarnings({
if (nrow(geo_scaled) == 0L & all(is.na(des_scaled$biomass))) {
# Blank plot area when no data
g <- ggplot() +
theme_pbs() +
ggplot2::ggtitle(en2fr("Survey relative biomass indices", french))
} else {
g <- plot_survey_index(
dat = des_scaled,
scale = FALSE,
col = c("grey60", "grey20"),
max_cv = 1,
survey_cols = survey_cols,
xlim = c(1984 - 0.2, final_year_surv + 0.2),
french = french,
scale_type = "max-CI",
pjs_mode = TRUE
) +
coord_cartesian(
ylim = c(-0.004, 1.03),
xlim = c(yrs[1], final_year_surv) + c(-0.75, 0.75), expand = FALSE
)
if (has_geo) {
g <- g + geom_line(data = geo_scaled, mapping = aes(colour = survey_abbrev)) +
geom_ribbon(
data = geo_scaled,
mapping = aes(ymin = lowerci_scaled, ymax = upperci_scaled, fill = survey_abbrev), alpha = 0.2
)
}
g <- g + scale_x_continuous(guide = ggplot2::guide_axis(check.overlap = TRUE)) +
theme(
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) + ggplot2::ggtitle(en2fr("Survey relative biomass indices", french)) +
theme(
strip.background = element_blank(),
strip.text.x = element_blank()
) +
geom_text(
data = labs, x = yrs[1] + 0.5, y = 0.89,
aes(label = survey_abbrev),
inherit.aes = FALSE, colour = "grey30", size = 3, hjust = 0
)
if ("MSSM WCVI" %in% both_scaled$survey_abbrev && !all(is.na(filter(both_scaled, survey_abbrev == "MSSM WCVI")$biomass_scaled))) {
g <- g +
geom_rect(
data = filter(both_scaled, survey_abbrev == "MSSM WCVI")[1, , drop = FALSE],
mapping = ggplot2::aes(xmin = yrs[1] - 2, xmax = 2003, ymin = -Inf, ymax = Inf),
alpha = 0.15
)
}
g <- g + geom_text(
data = stats_df, aes(label = cv),
x = yrs[1] + 0.5, y = 0.67,
colour = "grey35", size = 2.65, hjust = 0
) +
geom_text(
data = stats_df,
aes(label = sets),
x = yrs[1] + 0.5, y = 0.49,
colour = "grey35", size = 2.65, hjust = 0
)
}
}))
g
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.