knitr::opts_chunk$set(echo = TRUE, warning = FALSE)
This is a description of the 'PacifichakeMSE' package to run management strategy evaluation of Pacific hake. The package sets up a set of parameters, and runs an operating model in R, and fits an estimation in TMB.
The package runs on a range of dependencies, most notably TMB, which require a developer version of R. For more information on TMB see https://github.com/kaskr/adcomp
To install the 'PacifichakeMSE' package run
devtools::install_github('https://github.com/nissandjac/PacifichakeMSE')
Load the dependencies
library(TMB) library(r4ss) library(devtools) library(PacifichakeMSE) library(dplyr) library(ggplot2) library(reshape2) library(patchwork)
First we can initialize the parameters of the operating model using the function 'load_data_seasons'
df <- load_data_seasons(nseason = 4, # Specifies the number of seasons nspace = 2, # Specifies the number of areas logSDR = 1.4, # Recruitment deviation moveslope = 0.8) # Slope of the movement function
df here contains both the input data to the model, as well as the parameters from the assessment model. For instance the recruitment deviations
df.plot <- data.frame(R = as.numeric(df$parms$Rin), years = df$years) ggplot(df.plot, aes(x = years, y = R))+geom_point()+theme_bw()+scale_y_continuous('recruitment\ndeviations')
We can then use the data frame (df) to run an operating model by the parameters specified previously. The function returns a list with a range of different values
# sim.data <- run.agebased.true.catch(df) names(sim.data)
We can for instance plot the spawning stock biomass (per country) and the total catch
ssb <- as.data.frame(sim.data$SSB) ssb$years <- df$years ssb <- melt(ssb, measure.vars = 1:df$nspace, id.vars = 'years', variable.name = 'space', value.name = 'SSB') ssbplot <- ggplot(ssb, aes(x = years, y = SSB, color = space))+ geom_line(size = 1.3)+theme_light()+ scale_color_manual(values = c('red','blue')) catch <- data.frame(years = df$years, catch = sim.data$Catch) catchplot <- ggplot(catch, aes(x = years, y = catch))+ geom_line(size = 1.3, color = 'black')+geom_point()+theme_light() ssbplot+catchplot
Or look at the dimensions of some of the simulated data (e.g., space, time, age)
dim(sim.data$N.save.age) dimnames(sim.data$N.save.age)
The operating model can be simulated into the future by changing the input to the df
df.future <- load_data_seasons(nseason = 4, nspace = 2, bfuture = 1, # Bias adjustment in the future. Has to be changed based on # of simulations yr_future = 50) # 50 years into the future # Run the OM again # As default load_data_seasons uses average historical catch for the future sim.future <- run.agebased.true.catch(df.future) # Plot the recruitment and spawning biomass ssb <- as.data.frame(sim.future$SSB.all[,,3]) # Look at SSB in season 3 ssb$years <- as.numeric(rownames(ssb)) ssb.plot <- melt(ssb, measure.vars = 1:df$nspace, id.vars = 'years', variable.name = 'space', value.name = 'SSB') ssbplot <- ggplot(ssb.plot, aes(x = years, y = SSB*1e-6, color = space))+ geom_line(size = 1.3)+theme_minimal()+scale_y_continuous('SSB\n(million tonnes)')+ scale_color_manual(values = c('red','blue')) R.df <- data.frame(R = rowSums(sim.future$R.save), years = df.future$years) Rplot <- ggplot(R.df, aes(x = years, y = R*1e-6))+geom_point()+geom_line()+ theme_minimal()+scale_y_continuous('recruits\n(millions)') ssbplot+Rplot
Right now the easiest way to run multiple simulations is through a for loop. Note that the recruitment dev's are calculated in 'load_data_seasons' so that call needs to be a part of the loop to get a stochastic result.
n <- 100# Just run 100 seeds <- round(runif(n, min = 1, max = 1e6)) df.future <- load_data_seasons(nseason = 1, # Use a standard model with 1 season and 1 spatial area nspace = 1, bfuture = 0.5, yr_future = 50) # 50 years into the future sim.tmp <- run.agebased.true.catch(df.future, seeds = seeds[1]) # Control the seed for reproducibility SSB.save <- data.frame(n = rep(1:n, each = length(df.future$years)), SSB = NA, years = rep(df.future$years, n)) SSB.save[SSB.save$n == 1,]$SSB <- sim.tmp$SSB for(i in 2:n){ df.tmp <- load_data_seasons(nseason = 1, # Use a standard model with 1 season and 1 spatial area nspace = 1, bfuture = 0.5, yr_future = 50) # 50 years into the future sim.tmp <- run.agebased.true.catch(df.tmp, seeds = seeds[i]) SSB.save[SSB.save$n == i,]$SSB <- sim.tmp$SSB } # Summarize the data idx <- c(1,40,30,60,99, 2,15) # Plot some random runs SSB.plot <- SSB.save %>% group_by(years) %>% summarise(SSBmean = exp(mean(log(SSB))), SSB90 = quantile(SSB, probs = 0.9), SSB10 = quantile(SSB, probs = 0.1)) %>% ggplot(aes(x = years,y = SSBmean*1e-6))+geom_line(size = 1.5)+theme_minimal()+scale_y_continuous('SSB\n(million tonnes)')+ geom_ribbon(aes(ymin = SSB10*1e-6, ymax = SSB90*1e-6), fill = alpha('gray', alpha = 0.2))+ geom_line(data = SSB.save[SSB.save$n %in% idx,],aes(y= SSB*1e-6, group = as.factor(n)), color =alpha('black',alpha = 0.2))+ theme(legend.position = 'none') SSB.plot # # Plot 10 individual lines # SSBall <- ggplot(SSB.save[], aes(x = years, y = SSB*1e-6, color = as.factor(n)))+geom_line()+theme_minimal()+ # theme(legend.position = 'none') # # SSB.plot+SSBall
The second model in the MSE is the estimation model, which is a copy of the current Pacific hake assessment model, but rewritten in TMB compared to SS3.
# Files that needs to be added to the package source('R/getParameters_ss.R') source('R/load_data_ss.R') source('R/getUncertainty.R') # First load the estimation model (also figure out how to make this baseline in the model) compile("src/runHakeassessment.cpp") dyn.load(dynlib("src/runHakeassessment")) # For comparison load the stock synthesis model (this is the 2018 veriosn) mod <- SS_output('inst/extdata/SS32018/', printstats=FALSE, verbose = FALSE) # Use R4SS to load df.OM <- load_data_seasons(nseason = 1, nspace = 1) # Compare it with a simple OM (1 area, 1 season) sim.data <- run.agebased.true.catch(df.OM) # Run the OM parms.ss <- getParameters_ss(mod) # Get the parameters to be estimated in TMB form df <- load_data_ss(mod) obj <-MakeADFun(df,parms.ss,DLL="runHakeassessment", silent = TRUE) # Look at the data before fitting the model reps <- obj$report() # Create a data frame of spawning biomass SSB.ss3 <- mod$derived_quants$Value[grep('SSB_1966', mod$derived_quants$Label):grep('SSB_2018', mod$derived_quants$Label)] df.plot <- data.frame(SSB = c(reps$SSB, sim.data$SSB, SSB.ss3*0.5), years = rep(df$years, 3), model = rep(c('EM','OM','SS3'))) ggplot(df.plot, aes(x = years, y = SSB*1e-6))+geom_line()+theme_minimal()+facet_wrap(~model, ncol = 3)+ scale_y_continuous('SSB\n(tonnes)')
The plots are virtually identical, when running them with the same parameter inputs. Let's try to fit the TMB model to 1) the real data, and 2) to the operating model
# First specify the parameter bounds lower <- obj$par-Inf upper <- obj$par+Inf # Some specific bounds to help the model converge lower[names(lower) == 'F0'] <- 0.001 upper[names(upper) == 'PSEL'] <- 9 upper[names(upper) == 'logh'] <- log(0.999) upper[names(upper) == 'F0'] <- 2 upper[names(upper) == 'psel_fish'] <- 3 #upper[names(upper) == 'logphi_catch'] <- log(1) lower[names(lower) == 'psel_fish'] <- 0.0001 system.time(opt<-nlminb(obj$par,obj$fn,obj$gr,lower=lower,upper=upper, # First find the MLE of the parameters control = list(iter.max = 1e6, eval.max = 1e6))) # system.time(rep<-sdreport(obj)) # Calculate the hessian sdrep <- summary(rep) # Summary report of parameters and uncertainty rep.values<-rownames(sdrep) df$nyear <- length(df$years) # Add some values here for plotting df$year <- df$years SSB <- getUncertainty('SSB',df, sdrep) # Get the uncertainty estimates SSB$model <- 'TMB' # df.plot <- data.frame(SSB = c(SSB$value, SSB.ss3*0.5), SEmin = c(SSB$min, rep(NA, df$nyear)), SEmax = c(SSB$max, rep(NA, df$nyear)), years = rep(df$years, 2), model = rep(c('TMB','SS3'), each = df$nyear)) # Plot the fitted model vs the SS3 model ggplot(df.plot, aes(x = years, y = SSB, color = model))+geom_line()+theme_minimal()+ geom_ribbon(data = df.plot, aes(ymin = SEmin, ymax = SEmax), fill = alpha('red', alpha = 0.15), linetype = 0, show.legend = FALSE)
Second we can fit the model to the operating model for a simulation analysis. Turning the OM into a TMB object is done using the function 'create_TMB_data' which takes input 'sim.data' (the operating model) and 'df' (the operating model parameters)
df.sim <- create_TMB_data(sim.data, df.OM ) # prepare the data input from TMB based on the operating model parms.sim <- df.OM$parms # Take the parameter input also used in the OM for initial parameters # Add fishing mortality and recruitment to estimated parameters F0 <- rowSums(sim.data$Fout) # Fishing mortality is calculated internally in the OM Rdev <- parms.sim$Rin[1:(length(parms.sim$Rin)-1)] # We can't estimate the recruitment in the last year parms.sim$F0 <- F0#rowSums(sim.data$Fsave, na.rm = TRUE) parms.sim$Rin <- Rdev#parms.new$Rin[1:(length(parms.new$Rin)-1)] obj.OM <- MakeADFun(df.sim,parms.sim,DLL="runHakeassessment", silent = TRUE) # Run the assessment
This leads to the following fit
system.time(opt<-nlminb(obj.OM$par,obj.OM$fn,obj.OM$gr,lower=lower,upper=upper, control = list(iter.max = 1e6, eval.max = 1e6))) # If error one of the random effects is unused system.time(rep<-sdreport(obj.OM)) # Calculate the hessian sdrep <- summary(rep) # Summary report of parameters and uncertainty rep.values<-rownames(sdrep) SSB <- getUncertainty('SSB',df.sim, sdrep) # Get the uncertainty estimates SSB$model <- 'EM' # df.plot <- data.frame(SSB = c(SSB$value, sim.data$SSB), SEmin = c(SSB$min, rep(NA, df$nyear)), SEmax = c(SSB$max, rep(NA, df$nyear)), years = rep(df$years, 2), model = rep(c('TMB','OM'), each = df$nyear)) # Plot the fitted model vs the SS3 model p.ssb <- ggplot(df.plot, aes(x = years, y = SSB, color = model))+geom_line()+theme_minimal()+ geom_ribbon(data = df.plot, aes(ymin = SEmin, ymax = SEmax), fill = alpha('red', alpha = 0.15), linetype = 0, show.legend = FALSE) print(p.ssb)
Pretty good fit - but the models are also identical.
Not sure why SSB is so close, but R0 seems to be so far away?
idx <- 1:3 # Which parameters to plot df.parms <- data.frame(parms = rep(rep.values[idx],2), value = exp(c(sdrep[idx,1], as.numeric(parms.sim[idx]))), SEMin = c(exp(sdrep[idx,1])-2*sdrep[idx,2], rep(NA, length(idx))), SEMax = c(exp(sdrep[idx,1])+2*sdrep[idx,2], rep(NA, length(idx))), model = rep(c('EM','OM'), each = length(idx)) ) ggplot(df.parms, aes(x = parms, y = value, color = model))+geom_point()+ facet_wrap(~parms, scales = 'free')+geom_errorbar(aes(ymin = SEMin, ymax = SEMax), width = 0.1)+theme_minimal()
Now that we know the basic structure of the code, we can prepare a range of MSE simulations. The MSEs run from the function 'run_multiple_MSEs' which takes input 'simyears' which is the number of years to simulate, and 'df' which is the data frame that is used to initialize the operating model. There are furthermore a range of input parameters that can be changed (with defaults), such as the movement parameters, seeds, and type of TAC .To run several MSE's one needs to loop over the function.
simyears <- 4 # run 2 yrs to make it quick df <- load_data_seasons(nseason = 4, nspace = 2, nsurvey = 1) # This one has a survey every year tmp <- run_multiple_MSEs(simyears = simyears, seeds = 123, TAC = 1, # Options are TAC 1) HCR, 2) historical, 3) realized df = df)
Note that the run_multiple_MSEs summarizes the data, so to get different output, one would have to manipulate the function. It is pretty condensed right now to ensure that the save files don't explode in size. Currently, the operating model is being re-run every year in the simulation, which is pretty inefficient (probably adds about 30 secs to each MSE run). Needs to be fixed. The function plots the fit to the spawning biomass in the final year including the uncertainty in estimation.
An example of a full MSE loop is printed below
ls.save <- list() ls.converge <- matrix(0, nruns) for (i in 1:nruns){ tmp <- run_multiple_MSEs(simyears = simyears, seeds = seeds[i], TAC = 1, df = df, cincrease = 0, mincrease = 0) print(i) if(is.list(tmp)){ ls.save[[i]] <-tmp ls.converge[i] <- 1 }else{ ls.save[[i]] <- NA ls.converge[i] <- 0 } }
Alternatively, the function 'fnMSE' exists, which can be used to run bulk MSE's where 'df' is constant
df <- load_data_seasons(nseason = 4, nspace = 2) # Initilialize data ls.save <- fnMSE(df, simyears = 3, nruns = 2) # Run 2 MSE's 3 years into the future
Using a saved MSE run we can plot the data. The function 'summarizeMSE' summarizes all the MSEs in the dataframe, and can be used to internally calculate objectives.
# This data set assumes the fishery follows the control rule and movement is constant load('results/Climate/MSErun_move_JMC_climate_0_0_HYBR_TAC1.Rdata') # Saved as ls.save source('R/summarizeMSE.R') # ls.save contains 100 MSE runs, each which contains the following data names(ls.save[[1]]) # USe this fcuntion to get a summary of the MSE p <- summarizeMSE(ls.save) catchplot <- ggplot(p$catch, aes(x = year, y = catchmean))+geom_line()+ geom_ribbon(aes(ymin = quantsmin, ymax = quantsmax),fill = alpha('red', alpha = 0.2))+ scale_y_continuous('catch')+theme_minimal() SSBplot <- ggplot(p$SSB, aes(x = year, y = SSBmean))+geom_line()+ geom_ribbon(aes(ymin = quantsmin, ymax = quantsmax),fill = alpha('red', alpha = 0.2))+ scale_y_continuous('SSB')+theme_minimal() catchplot+SSBplot
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.