knitr::opts_chunk$set(warning=FALSE, message=FALSE)

library(ggplot2)
library(ggpubr)
library(kableExtra)
arrow_length = unit(0.15, 'inches')
arrow_height = 1.05
my_xlim = c(-4, 7.5)
my_ylim = c(0.08, 1.2)

# choose appropriate color scale for color vs. black & white
p1 = ggplot(data.frame(x = c(-5, 8)),
            aes(x)) +
  stat_function(fun = dnorm,
                args = list(mean = 4, sd = 0.4),
                aes(color = 'Daytime Fish'),
                lwd = 2) +
  stat_function(fun = dnorm,
                args = list(mean = 6, sd = 0.55),
                aes(color = 'Total Fish'),
                geom = 'blank') +
  stat_function(fun = dnorm,
                args = list(mean = 5, sd = 0.6),
                aes(color = 'Unique Fish'),
                geom = 'blank') +
  stat_function(fun = dnorm,
                args = list(mean = -1, sd = 0.75),
                aes(color = 'Unique Wild Fish'),
                geom = 'blank') +
  stat_function(fun = dnorm,
                args = list(mean = -1, sd = 0.75),
                aes(color = 'Unique Hatchery Fish'),
                geom = 'blank') +
  stat_function(fun = dnorm,
                args = list(mean = -1, sd = 0.75),
                aes(color = 'Unique HNC Fish'),
                geom = 'blank') +
  scale_color_brewer(palette = "Spectral") +
  geom_vline(xintercept = 4,
             linetype = 2) +
  theme_pubr(legend = "bottom") +
  guides(color = guide_legend(nrow = 3,
                              title.position = 'top')) +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank()) +
  labs(x = 'Escapement',
       color = 'Fish Type',
       title = '   Daytime Fish') +
  coord_cartesian(xlim = my_xlim,
                  ylim = my_ylim)
# coord_fixed(ratio = 4,
#             xlim = my_xlim,
#             ylim = my_ylim)

p2 = p1 +
  stat_function(fun = dnorm,
                args = list(mean = 6, sd = 0.55),
                aes(color = 'Total Fish'),
                lwd = 2) +
  labs(title = '   Nighttime Passage')

p2a = p2 +
  geom_segment(x = 4,
               y = arrow_height,
               xend = 6,
               yend = arrow_height,
               arrow = arrow(length = arrow_length))

p3 = p2 +
  stat_function(fun = dnorm,
                args = list(mean = 5, sd = 0.6),
                aes(color = 'Unique Fish'),
                lwd = 2) +
  labs(title = '   Re-ascension')

p3a = p3 +
  geom_segment(x = 6,
               y = arrow_height,
               xend = 5,
               yend = arrow_height,
               arrow = arrow(length = arrow_length))

p4 = p3 +
  stat_function(fun = dnorm,
                args = list(mean = -1, sd = 0.85),
                aes(color = 'Unique Wild Fish'),
                lwd = 2) +
  stat_function(fun = dnorm,
                args = list(mean = 1, sd = 0.75),
                aes(color = 'Unique Hatchery Fish'),
                lwd = 2) +
  stat_function(fun = dnorm,
                args = list(mean = -2, sd = 1.25),
                aes(color = 'Unique HNC Fish'),
                lwd = 2) +
  geom_segment(x = 5,
               y = arrow_height,
               xend = -1,
               yend = arrow_height,
               arrow = arrow(length = arrow_length)) +
  labs(title = '   Origin Proportion')


examp_p = ggarrange(p1, p2a, p3a, p4,
                      labels = 'AUTO',
                      ncol = 2,
                      nrow = 2,
                      align = 'v',
                      common.legend = T,
                      legend = 'bottom')

Introduction

The STADEM package was developed with the goal of estimating total adult escapement of spring/summer Chinook salmon and steelhead that cross Lower Granite dam (LGD). In addition, to meet desired management and research objectives, total escapement has to include estimates of uncertainty and be parsed into weekly strata by three origin groups; wild, hatchery and hatchery no-clip. To reach this goal, we have developed the STate space Adult Dam Escapement Model (STADEM) model that incorporates fish ladder window counts, data from sampled fish at the LGD adult trap, and observations of previously PIT tagged fish at LGD adult detection sites.

Some of the data needed for STADEM is available at other dams, and the package developers are currently working to develop the ability to query all of the necessary data at other locations. Currently however, the focus remains on Lower Granite dam, the furthest upstream dam returning salmonids encounter on the journey up the Snake River. The following example will show how to run STADEM at Lower Granite for one species and one year, and what some of the output looks like.

System requirements

STADEM relies on the following R packages which can be downloaded via CRAN or by using the function install.packages():

In addition, STADEM requires the JAGS software (Just Another Gibbs Sampler). Please download version >= 4.0.0 via the former link.

Data sources

STADEM relies on several pieces of data, which must be compiled from multiple sources. Many of them are accessed through the Columbia Basin Research Data Access in Real Time (DART) website.

High-level overview

STADEM estimates the total number of fish crossing the dam each week, based on two major data sources: the window counts and the total fish in the trap, while also accounting for two known biological processes: night-time passage and fallback-and-reascension. Using a state-space approach, STADEM assumes that the window counts and the estimates from the trap (fish in the trap divided by trap rate that week) are generated by processes with observation error. In the case of the window counts, there is some inherent error in the counting process, and it fails to account for fish that cross the dam while the window is closed. In the case of the trap, there is sampling variation and uncertainty in what the realized trap rate is. In addition, STADEM accounts for potential double-counting of fish that have fallen back and re-ascended the dam. It then partitions the estimate of total fish over the dam by origin, to provide the needed data for management goals (Figure \@ref(fig:stadem-model-figure)).

plot(examp_p)

\newpage

Compiling data

The STADEM package relies on many functions from two other packages, tidyverse for many data manipulation functions and lubridate for dates and intervals. It also depends upon the jagsUI package to run the JAGS program from within R.

library(tidyverse)
library(lubridate)
library(STADEM)
library(jagsUI)

The STADEM package makes it easy to compile all the necessary data in one convenient function, compileGRAdata. The user provides the spawn year and species (either "Chinook" or "Steelhead") they are interested in. STADEM operates on a weekly time-step, and the user has the option to determine the day of the week that most strata will begin on, using the strata_beg argument. There are periodic messages describing what is being done within compileGRAdata. Within compileGRAdata, several internal functions are being called, which may be utilized with the STADEM package for other purposes. They include:

# what spawning year? (2010 - 2016)
yr = 2014

# what species?
spp = 'Chinook'

# pull together all data
stadem_list = compileGRAdata(spp = spp,
                             start_date = paste0(yr, '0301'),
                             end_date = paste0(yr, '0817'),
                             strata_beg = 'Mon',
                             trap_dbase = readLGRtrapDB('../inst/extdata/Chnk2014_TrapDatabase.csv'))

The compileGRAdata() function returns several pieces of information, consolidated into a named list we have called stadem_list:

To run STADEM, only weeklyData is needed. STADEM also includes a function to transform all relevant data from the weekly summary to a list ready to be passed to JAGS.

# compile everything into a list to pass to JAGS
jags_data_list = prepJAGS(stadem_list[['weeklyData']])

Run STADEM

Part of the function runJAGSmodel writes the JAGS model as a text file. This requires a filename, and the type of statistical distribution the user would like to use to model the window counts. The options are Poisson (pois), negative binomial (neg_bin), a more flexible type of negative binomial, described in @Linden2011 (neg_bin2), quasi-Poisson (quasi_pois), or as a normal distribution in log-space (log_space). Once those have been set, use the runSTADEMmodel function to run the model in JAGS. Some of the inputs to this function are:

Suggestions

Recommended MCMC parameters are:

This provides a sample of 4,000 draws from the posterior distribution. Through trial and error, we have also determined the appropriate burn-in length and thinning interval to meet MCMC posterior checks.

We also recommend using the negative binomial distribution (win_model = neg_bin) to model the window counts. In our experience, all options other than the Poisson distribution provide similar estimates, with similar estimates of uncertainty. The Poisson distribution does not allow for the possibility of overdispersion in the window count data, leading to smaller uncertainty estimates which may not be appropriate. However, we encourage investigation of how different distribution choices may affect estimates for particular datasets, and for users to critically consider the appropriate modeling choice.

# name of JAGS model to write
model_file_nm = 'STADEM_JAGS_model.txt'

# what distribution to use for window counts?
win_model = c('pois', 'neg_bin', 'neg_bin2', 'quasi_pois', 'log_space')[2]

#-----------------------------------------------------------------
# run STADEM model
#-----------------------------------------------------------------
mod = runSTADEMmodel(file_name = model_file_nm,
                     mcmc_chainLength = 40000,
                     mcmc_burn = 10000,
                     mcmc_thin = 30,
                     mcmc_chains = 4,
                     jags_data = jags_data_list,
                     seed = 5,
                     weekly_params = T,
                     win_model = win_model,
                     trap_est = T)

STADEM output

The JAGS object returned by runSTADEMmodel contains many parameter estimates. Some of the most important are the total escapement of various fish (Table \@ref(tab:summary-total-escapement)).

mod$summary[grep('X.tot', rownames(mod$summary)),] %>%
  kbl(booktabs = T,
      linesep = "",
      caption = "Estimates of escapement.",
      format.args = list(big.mark = ",")) %>%
  kable_styling()

Another table that summarizes some of the output by week can be found by using the code below (Table \@ref(tab:week-est-tab)).

week_df = mod$summary[grepl('^X.all\\[', rownames(mod$summary)) |
                         grepl('^X.day\\[', rownames(mod$summary)) |
                         grepl('^X.night\\[', rownames(mod$summary)) |
                         grepl('^X.reasc\\[', rownames(mod$summary)) |
                         grepl('X.new.tot\\[', rownames(mod$summary)),] %>%
  as.data.frame() %>%
  mutate(var = rownames(.),
         week = as.integer(str_extract(var, "[0-9]+")),
         param = str_extract_all(var, "[:alpha:]+", simplify = T)[,2]) %>%
  tbl_df() %>%
  mutate(param = recode(param,
                        'all' = 'Total',
                        'day' = 'Day',
                        'night' = 'Night',
                        'reasc' = 'Reascension',
                        'new' = 'Unique')) %>%
  select(param, week, mean, sd)

# table with point estimates only
pt_est_tab = week_df %>%
  select(-sd) %>%
  mutate(mean = round(mean)) %>%
  spread(param, mean) %>%
  left_join(stadem_list[['weeklyData']] %>%
              mutate(week = 1:n())) %>%
  select(Week = week,
         Win.Cnt = win_cnt,
         Total,
         Day,
         Night,
         Reascension,
         Unique)

# table with point estimates and standard errors
est_tab = week_df %>%
  mutate(prnt_val = paste0(prettyNum(round(mean), big.mark = ","), ' (', prettyNum(round(sd, 1), big.mark = ","), ')')) %>%
  select(-sd, -mean) %>%
  spread(param, prnt_val) %>%
  left_join(stadem_list[['weeklyData']] %>%
              mutate(week = 1:n())) %>%
  select(Week = week,
         Win.Cnt = win_cnt,
         Total,
         Day,
         Night,
         Reascension,
         Unique)

est_tab %>%
  kbl(booktabs = T,
      linesep = "",
      caption = "Estimates by week.",
      format.args = list(big.mark = ',')) %>%
  kable_styling()

A user might also like to make time-series plots of estimates, to compare with window counts and/or trap estimates (Figure \@ref(fig:time-series-plot)).

week_est = mod$summary[grep('^X.all', rownames(mod$summary)),] %>%
  as.data.frame() %>%
  mutate(var = rownames(.),
         week = as.integer(str_extract(var, "[0-9]+")),
         param = str_extract_all(var, "[:alpha:]+", simplify = T)[,3],
         param = ifelse(param == '', 'all', param)) %>%
  tbl_df() %>%
  select(var, param, week, everything()) %>%
  left_join(stadem_list[['weeklyData']],
            by = c('week' = 'week_num'))
# plot time-series of model estimates, window counts and trap estimates
week_est %>%
  filter(param == 'all') %>%
  ggplot(aes(x = Start_Date,
             y = `50%`)) +
  geom_ribbon(aes(ymin = `2.5%`,
                  ymax = `97.5%`),
              alpha = 0.2) +
  geom_line(aes(y = win_cnt / (day_tags / tot_tags),
                color = 'Window (adj)')) +
  geom_point(aes(y = win_cnt / (day_tags / tot_tags),
                 color = 'Window (adj)')) +
  geom_line(aes(y = win_cnt,
                color = 'Window (raw)')) +
  geom_point(aes(y = win_cnt,
                 color = 'Window (raw)')) +
  geom_line(aes(y = trap_est,
                color = 'Trap')) +
  geom_point(aes(y = trap_est,
                 color = 'Trap')) +
  geom_line(aes(color = 'Model')) +
  geom_point(aes(color = 'Model')) +
  scale_color_manual(values = c('Model' = 'black',
                                'Window (raw)' = 'lightblue',
                                'Window (adj)' = 'blue',
                                'Trap' = 'red')) +
  theme_bw() +
  theme(legend.position = 'bottom') +
  labs(x = 'Date',
       y = 'Estimate',
       color = 'Source',
       title = paste('All', spp, 'in', yr))

It's also possible to examine estimates of night-time passage or re-ascension rates. Figure \@ref(fig:rate-figure) shows the observed values as points, the estimated rates as lines, with 95% credible intervals around them.

rate_est = mod$summary[grepl('^day.true', rownames(mod$summary)) | 
                         grepl('^reasc.true', rownames(mod$summary)),] %>%
  as.data.frame() %>%
  mutate(var = rownames(.),
         week = as.integer(str_extract(var, "[0-9]+")),
         param = str_extract_all(var, "[:alpha:]+", simplify = T)[,1]) %>%
  tbl_df() %>%
  select(var, param, week, everything()) %>%
  left_join(stadem_list[['weeklyData']],
            by = c('week' = 'week_num'))

rate_est %>%
  ggplot(aes(x = Start_Date,
             y = `50%`)) +
  geom_ribbon(aes(ymin = `2.5%`,
                  ymax = `97.5%`,
                  fill = param),
              alpha = 0.2) +
  geom_line(aes(color = param)) +
  geom_point(aes(y = day_tags / tot_tags,
                 color = 'day')) +
  geom_point(aes(y = reascent_tags / tot_tags,
                 color = 'reasc')) +
  theme_bw() +
  scale_color_brewer(palette = 'Set1',
                     name = 'Rate',
                     labels = c('day' = 'Daytime Passage',
                                'reasc' = 'Reascension')) +
  scale_fill_brewer(palette = 'Set1',
                    name = 'Rate',
                    labels = c('day' = 'Daytime Passage',
                               'reasc' = 'Reascension')) +
  labs(y = 'Estimate',
       x = 'Date',
       title = 'Estimated Rates')

Additional options

The user does have the option to not use fish caught in the trap as a secondary estimate of escapement. To turn this feature off, set the argument trap_est equal to FALSE in the runSTADEMmodel function. This will constrain the statistical distribution used to model the window counts as Poisson, because there is no other way to estimate the variance in those counts.

If the user would like to summarize STADEM estimates of total unique escapement, by week, (e.g. as an input to the SCOBI package), there is a function to do that, STADEMtoSCOBI.

scobi_input = STADEMtoSCBOI(stadem_mod = mod,
                            lgr_weekly = stadem_list[['weeklyData']])

head(scobi_input)

References



KevinSee/STADEM documentation built on Sept. 22, 2021, 12:40 a.m.