# Width should apply to tidy # digits hopefully affects number of digits when using print options(width=50, width.cutoff=50, digits = 3) install.packages("pander", repos="http://cran.us.r-project.org")
# FROM: https://github.com/yihui/knitr-examples/blob/master/077-wrap-output.Rmd library(knitr) hook_output = knit_hooks$get('output') knit_hooks$set(output = function(x, options) { # this hook is used only when the linewidth option is not NULL if (!is.null(n <- options$linewidth)) { x = knitr:::split_lines(x) # any lines wider than n should be wrapped if (any(nchar(x) > n)) x = strwrap(x, width = n) x = paste(x, collapse = '\n') } hook_output(x, options) }) # TRIGGERED USING `linewidth=60`
This tutorial will walk through a simple example of how to use SpatialDeltaGLMM
for estimating abundance indices, distribution shifts, and range expansion.
To install TMB on a windows machine, we need to first install Rtools. During the installation, please select the option to have Rtools included in your system path. On other operating systems, it is not necessary to install Rtools. We then install SpatialDeltaGLMM
devtools::install_github("pfmc-assessments/geostatistical_delta-GLMM") devtools::install_github("james-thorson/utilities")
Next load libraries.
library(TMB) # Can instead load library(TMBdebug) library(SpatialDeltaGLMM)
If you have further questions after reading this tutorial, please explore the GitHub repo mainpage, wiki, and glossary. Also please explore the R help files, e.g., ?Data_Fn
for explanation of data inputs, or ?Param_Fn
for explanation of parameters.
Related tools for spatio-temporal fisheries analysis are currently housed at www.FishStats.org. These include VAST, a multispecies extension of SpatialDeltaGLMM, and www.FishViz.org, a tool for visualizing results using SpatialDeltaGLMM worldwide.
SpatialDeltaGLMM
has involved many publications for developing individual features. If using SpatialDeltaGLMM, please read and cite:
citation("SpatialDeltaGLMM")
and also browse the GitHub list of packages.
First chose an example data set for this script, as archived with package
Data_Set = c("Chatham_rise_hake", "Iceland_cod", "WCGBTS_canary", "GSL_american_plaice", "BC_pacific_cod", "EBS_pollock", "GOA_Pcod", "GOA_pollock", "GB_spring_haddock", "GB_fall_haddock", "SAWC_jacopever", "Aleutian_islands_POP")[6]
Next use latest version for CPP code
Version = "geo_index_v4b"
The following settings define the spatial resolution for the model, and whether to use a grid or mesh approximation
Method = c("Grid", "Mesh", "Spherical_mesh")[2] grid_size_km = 25 n_x = c(100, 250, 500, 1000, 2000)[1] # Number of stations Kmeans_Config = list( "randomseed"=1, "nstart"=100, "iter.max"=1e3 )
The following settings define whether to include spatial and spatio-temporal variation, whether its autocorrelated, and whether there's overdispersion
FieldConfig = c("Omega1"=1, "Epsilon1"=1, "Omega2"=1, "Epsilon2"=1) RhoConfig = c("Beta1"=0, "Beta2"=0, "Epsilon1"=0, "Epsilon2"=0) VesselConfig = c("Vessel"=0, "VesselYear"=0) ObsModel = 2
We also define any potential stratification of results, and settings specific to any case-study data set
# Default if( Data_Set %in% c("GSL_american_plaice","BC_pacific_cod","EBS_pollock","SAWC_jacopever","Chatham_rise_hake","Aleutian_islands_POP")){ strata.limits <- data.frame('STRATA'="All_areas") } # Specific (useful as examples) if( Data_Set %in% c("WCGBTS_canary","Sim")){ # In this case, it will calculate a coastwide index, and also a separate index for each state (although the state lines are approximate) strata.limits <- data.frame( 'STRATA' = c("Coastwide","CA","OR","WA"), 'north_border' = c(49.0, 42.0, 46.0, 49.0), 'south_border' = c(32.0, 32.0, 42.0, 46.0), 'shallow_border' = c(55, 55, 55, 55), 'deep_border' = c(1280, 1280, 1280, 1280) ) # Override default settings for vessels VesselConfig = c("Vessel"=0, "VesselYear"=1) } if( Data_Set %in% c("GOA_Pcod","GOA_pollock")){ # In this case, will calculating an unrestricted index and a separate index restricted to west of -140W strata.limits <- data.frame( 'STRATA' = c("All_areas", "west_of_140W"), 'west_border' = c(-Inf, -Inf), 'east_border' = c(Inf, -140) ) } if( Data_Set %in% c("GB_spring_haddock","GB_fall_haddock")){ # For NEFSC indices, strata must be specified as a named list of area codes strata.limits = list( 'Georges_Bank'=c(1130, 1140, 1150, 1160, 1170, 1180, 1190, 1200, 1210, 1220, 1230, 1240, 1250, 1290, 1300) ) } if( Data_Set %in% c("Iceland_cod")){ strata.limits = data.frame( 'STRATA'="All_areas" ) # Turn off all spatial, temporal, and spatio-temporal variation in probability of occurrence, because they occur almost everywhere FieldConfig = c("Omega1"=0, "Epsilon1"=0, "Omega2"=1, "Epsilon2"=1) RhoConfig = c("Beta1"=3, "Beta2"=0, "Epsilon1"=0, "Epsilon2"=0) }
Depending on the case study, we define a Region
used when extrapolating or plotting density estimates. If its a different data set, it will define Region="Other"
, and this is a recognized level for all uses of Region
(which attempts to define reasonable settings based on the location of sampling). For example Data_Set="Iceland_cod"
has no associated meta-data for the region, so it uses Region="Other"
by default.
Region = switch( Data_Set, "Chatham_rise_hake"="New_Zealand", "WCGBTS_canary"="California_current", "GSL_american_plaice"="Gulf_of_St_Lawrence", "BC_pacific_cod"="British_Columbia", "EBS_pollock"="Eastern_Bering_Sea", "GOA_Pcod"="Gulf_of_Alaska", "GOA_pollock"="Gulf_of_Alaska", "GB_spring_haddock"="Northwest_Atlantic", "GB_fall_haddock"="Northwest_Atlantic", "SAWC_jacopever"="South_Africa", "Aleutian_islands_POP"="Aleutian_Islands", "Other")
We then set the location for saving files.
DateFile = paste0(getwd(),'/SpatialDeltaGLMM_output/') dir.create(DateFile)
I also like to save all settings for later reference, although this is not necessary.
Record = ThorsonUtilities::bundlelist( c("Data_Set","strata.limits","Region","Version","Method","grid_size_km","n_x","FieldConfig","RhoConfig","VesselConfig","ObsModel","Kmeans_Config") ) save( Record, file=file.path(DateFile,"Record.RData")) capture.output( Record, file=paste0(DateFile,"Record.txt"))
Depending upon the Data_Set
chosen, we load archived data sets that are distributed with the package. Each archived data set is then reformatted to create a data-frame Data_Geostat
with a standardized set of columns. For a new data set, the user is responsible for formatting Data_Geostat
appropriately to match this format. We show the first six rows of Data_Geostat
given that Data_Set = Data_Set
.
if(Data_Set=="WCGBTS_canary"){ data( WCGBTS_Canary_example, package="FishStatsUtils" ) Year = as.numeric(sapply(WCGBTS_Canary_example[,'PROJECT_CYCLE'], FUN=function(Char){strsplit(as.character(Char)," ")[[1]][2]})) Data_Geostat = data.frame( "Catch_KG"=WCGBTS_Canary_example[,'HAUL_WT_KG'], "Year"=Year, "Vessel"=WCGBTS_Canary_example[,"VESSEL"], "AreaSwept_km2"=WCGBTS_Canary_example[,"AREA_SWEPT_HA"]/1e2, "Lat"=WCGBTS_Canary_example[,'BEST_LAT_DD'], "Lon"=WCGBTS_Canary_example[,'BEST_LON_DD'], "Pass"=WCGBTS_Canary_example[,'PASS']-1.5) } if( Data_Set %in% c("BC_pacific_cod")){ data( BC_pacific_cod_example, package="FishStatsUtils" ) Data_Geostat = data.frame( "Catch_KG"=BC_pacific_cod_example[,'PCOD_WEIGHT'], "Year"=BC_pacific_cod_example[,'Year'], "Vessel"="missing", "AreaSwept_km2"=BC_pacific_cod_example[,'TOW.LENGTH..KM.']/100, "Lat"=BC_pacific_cod_example[,'LAT'], "Lon"=BC_pacific_cod_example[,'LON'], "Pass"=0) } if( Data_Set %in% c("GSL_american_plaice")){ data( GSL_american_plaice, package="FishStatsUtils" ) Print_Message( "GSL_american_plaice" ) Data_Geostat = data.frame( "Year"=GSL_american_plaice[,'year'], "Lat"=GSL_american_plaice[,'latitude'], "Lon"=GSL_american_plaice[,'longitude'], "Vessel"="missing", "AreaSwept_km2"=GSL_american_plaice[,'swept'], "Catch_KG"=GSL_american_plaice[,'biomass']*GSL_american_plaice[,'vstd'] ) } if(Data_Set=="EBS_pollock"){ data( EBS_pollock_data, package="FishStatsUtils" ) Data_Geostat = data.frame( "Catch_KG"=EBS_pollock_data[,'catch'], "Year"=EBS_pollock_data[,'year'], "Vessel"="missing", "AreaSwept_km2"=0.01, "Lat"=EBS_pollock_data[,'lat'], "Lon"=EBS_pollock_data[,'long'], "Pass"=0) } if(Data_Set=="GOA_Pcod"){ data( GOA_pacific_cod , package="FishStatsUtils") Data_Geostat = data.frame( "Catch_KG"=GOA_pacific_cod[,'catch'], "Year"=GOA_pacific_cod[,'year'], "Vessel"="missing", "AreaSwept_km2"=0.01, "Lat"=GOA_pacific_cod[,'lat'], "Lon"=GOA_pacific_cod[,'lon'], "Pass"=0) } if(Data_Set=="GOA_pollock"){ data( GOA_walleye_pollock, package="FishStatsUtils" ) Data_Geostat = data.frame( "Catch_KG"=GOA_walleye_pollock[,'catch'], "Year"=GOA_walleye_pollock[,'year'], "Vessel"="missing", "AreaSwept_km2"=0.01, "Lat"=GOA_walleye_pollock[,'lat'], "Lon"=GOA_walleye_pollock[,'lon'], "Pass"=0) } if(Data_Set=="Aleutian_islands_POP"){ data( AI_pacific_ocean_perch, package="FishStatsUtils" ) Data_Geostat = data.frame( "Catch_KG"=AI_pacific_ocean_perch[,'cpue..kg.km.2.'], "Year"=AI_pacific_ocean_perch[,'year'], "Vessel"="missing", "AreaSwept_km2"=1, "Lat"=AI_pacific_ocean_perch[,'start.latitude'], "Lon"=AI_pacific_ocean_perch[,'start.longitude'], "Pass"=0) } if( Data_Set=="GB_spring_haddock"){ data( georges_bank_haddock_spring, package="FishStatsUtils" ) Print_Message( "GB_haddock" ) Data_Geostat = data.frame( "Catch_KG"=georges_bank_haddock_spring[,'CATCH_WT_CAL'], "Year"=georges_bank_haddock_spring[,'YEAR'], "Vessel"="missing", "AreaSwept_km2"=0.0112*1.852^2, "Lat"=georges_bank_haddock_spring[,'LATITUDE'], "Lon"=georges_bank_haddock_spring[,'LONGITUDE']) } if( Data_Set=="GB_fall_haddock"){ data( georges_bank_haddock_fall, package="FishStatsUtils" ) Print_Message( "GB_haddock" ) Data_Geostat = data.frame( "Catch_KG"=georges_bank_haddock_fall[,'CATCH_WT_CAL'], "Year"=georges_bank_haddock_fall[,'YEAR'], "Vessel"="missing", "AreaSwept_km2"=0.0112*1.852^2, "Lat"=georges_bank_haddock_fall[,'LATITUDE'], "Lon"=georges_bank_haddock_fall[,'LONGITUDE']) } if( Data_Set=="SAWC_jacopever"){ data( south_africa_westcoast_jacopever, package="FishStatsUtils" ) Data_Geostat = data.frame( "Catch_KG"=south_africa_westcoast_jacopever[,'HELDAC'], "Year"=south_africa_westcoast_jacopever[,'Year'], "Vessel"="missing", "AreaSwept_km2"=south_africa_westcoast_jacopever[,'area_swept_nm2']*1.852^2, "Lat"=south_africa_westcoast_jacopever[,'cen_lat'], "Lon"=south_africa_westcoast_jacopever[,'cen_long']) } if( Data_Set %in% c("Iceland_cod")){ # WARNING: This data set has not undergone much evaluation for spatio-temporal analysis data( iceland_cod, package="FishStatsUtils" ) Data_Geostat = data.frame( "Catch_KG"=iceland_cod[,'Catch_b'], "Year"=iceland_cod[,'year'], "Vessel"=1, "AreaSwept_km2"=iceland_cod[,'towlength'], "Lat"=iceland_cod[,'lat1'], "Lon"=iceland_cod[,'lon1']) } if( Data_Set %in% c("Chatham_rise_hake")){ data( chatham_rise_hake, package="FishStatsUtils" ) Data_Geostat = data.frame( "Catch_KG"=chatham_rise_hake[,'Hake_kg_per_km2'], "Year"=chatham_rise_hake[,'Year'], "Vessel"=1, "AreaSwept_km2"=1, "Lat"=chatham_rise_hake[,'Lat'], "Lon"=chatham_rise_hake[,'Lon']) } Data_Geostat = na.omit( Data_Geostat )
pander::pandoc.table( Data_Geostat[1:6,], digits=3 )
We also generate the extrapolation grid appropriate for a given region. For new regions, we use Region="Other"
.
if( Region %in% c("California_current","Eastern_Bering_Sea","Gulf_of_Alaska","Aleutian_Islands","Northwest_Atlantic","Gulf_of_St_Lawrence","New_Zealand") ){ Extrapolation_List = Prepare_Extrapolation_Data_Fn( Region=Region, strata.limits=strata.limits ) } if( Region == "British_Columbia" ){ Extrapolation_List = Prepare_Extrapolation_Data_Fn( Region=Region, strata.limits=strata.limits, strata_to_use=c("HS","QCS") ) } if( Region == "South_Africa" ){ Extrapolation_List = Prepare_Extrapolation_Data_Fn( Region=Region, strata.limits=strata.limits, region="west_coast" ) } if( Region == "Other" ){ Extrapolation_List = Prepare_Extrapolation_Data_Fn( Region=Region, strata.limits=strata.limits, observations_LL=Data_Geostat[,c('Lat','Lon')], maximum_distance_from_sample=15 ) }
And we finally generate the information used for conducting spatio-temporal parameter estimation, bundled in list Spatial_List
Spatial_List = Spatial_Information_Fn( grid_size_km=grid_size_km, n_x=n_x, Method=Method, Lon=Data_Geostat[,'Lon'], Lat=Data_Geostat[,'Lat'], Extrapolation_List=Extrapolation_List, randomseed=Kmeans_Config[["randomseed"]], nstart=Kmeans_Config[["nstart"]], iter.max=Kmeans_Config[["iter.max"]], DirPath=DateFile, Save_Results=FALSE ) # Add knots to Data_Geostat Data_Geostat = cbind( Data_Geostat, "knot_i"=Spatial_List$knot_i )
To estimate parameters, we first build a list of data-inputs used for parameter estimation. Data_Fn
has some simple checks for buggy inputs, but also please read the help file ?Data_Fn
.
TmbData = Data_Fn("Version"=Version, "FieldConfig"=FieldConfig, "RhoConfig"=RhoConfig, "ObsModel"=ObsModel, "b_i"=Data_Geostat[,'Catch_KG'], "a_i"=Data_Geostat[,'AreaSwept_km2'], "v_i"=as.numeric(Data_Geostat[,'Vessel'])-1, "s_i"=Data_Geostat[,'knot_i']-1, "t_i"=Data_Geostat[,'Year'], "a_xl"=Spatial_List$a_xl, "MeshList"=Spatial_List$MeshList, "GridList"=Spatial_List$GridList, "Method"=Spatial_List$Method, "Options"=c(SD_site_density=0, SD_site_logdensity=0, Calculate_Range=1, Calculate_evenness=0, Calculate_effective_area=1) )
We then build the TMB object.
TmbList = Build_TMB_Fn("TmbData"=TmbData, "RunDir"=DateFile, "Version"=Version, "RhoConfig"=RhoConfig, "VesselConfig"=VesselConfig, "loc_x"=Spatial_List$loc_x, "Method"=Method) Obj = TmbList[["Obj"]]
Next, we use a gradient-based nonlinear minimizer to identify maximum likelihood estimates for fixed-effects
Opt = TMBhelper::Optimize( obj=Obj, lower=TmbList[["Lower"]], upper=TmbList[["Upper"]], getsd=TRUE, savedir=DateFile, bias.correct=FALSE )
Finally, we bundle and save output
Report = Obj$report() Save = list("Opt"=Opt, "Report"=Report, "ParHat"=Obj$env$parList(Opt$par), "TmbData"=TmbData) save(Save, file=paste0(DateFile,"Save.RData"))
We first apply a set of standard model diagnostics to confirm that the model is reasonable and deserves further attention. If any of these do not look reasonable, the model output should not be interpreted or used.
It is always good practice to conduct exploratory analysis of data. Here, I visualize the spatial distribution of data. Spatio-temporal models involve the assumption that the probability of sampling a given location is statistically independent of the probability distribution for the response at that location. So if sampling "follows" changes in density, then the model is probably not appropriate!
Plot_data_and_knots(Extrapolation_List=Extrapolation_List, Spatial_List=Spatial_List, Data_Geostat=Data_Geostat, PlotDir=DateFile )
Here I print the diagnostics generated during parameter estimation, and I confirm that (1) no parameter is hitting an upper or lower bound and (2) the final gradient for each fixed-effect is close to zero. For explanation of parameters, please see ?Data_Fn
.
pander::pandoc.table( Opt$diagnostics[,c('Param','Lower','MLE','Upper','final_gradient')] )
Next, we check whether observed encounter frequencies for either low or high probability samples are within the 95% predictive interval for predicted encounter probability
Enc_prob = Check_encounter_prob( Report=Report, Data_Geostat=Data_Geostat, DirName=DateFile)
We can visualize fit to residuals of catch-rates given encounters using a Q-Q plot. A good Q-Q plot will have residuals along the one-to-one line.
Q = QQ_Fn( TmbData=TmbData, Report=Report)
Finally, we visualize residuals on a map. To do so, we first define years to plot and generate plotting inputs.
useful plots by first determining which years to plot (Years2Include
), and labels for each plotted year (Year_Set
)
# Get region-specific settings for plots MapDetails_List = MapDetails_Fn( "Region"=Region, "NN_Extrap"=Spatial_List$PolygonList$NN_Extrap, "Extrapolation_List"=Extrapolation_List ) # Decide which years to plot Year_Set = seq(min(Data_Geostat[,'Year']),max(Data_Geostat[,'Year'])) Years2Include = which( Year_Set %in% sort(unique(Data_Geostat[,'Year'])))
We then plot Pearson residuals. If there are visible patterns (areas with consistently positive or negative residuals accross or within years) then this is an indication of the model "overshrinking" results towards the intercept, and model results should then be treated with caution.
SpatialDeltaGLMM:::plot_residuals(Lat_i=Data_Geostat[,'Lat'], Lon_i=Data_Geostat[,'Lon'], TmbData=TmbData, Report=Report, Q=Q, savedir=DateFile, MappingDetails=MapDetails_List[["MappingDetails"]], PlotDF=MapDetails_List[["PlotDF"]], MapSizeRatio=MapDetails_List[["MapSizeRatio"]], Xlim=MapDetails_List[["Xlim"]], Ylim=MapDetails_List[["Ylim"]], FileName=DateFile, Year_Set=Year_Set, Years2Include=Years2Include, Rotate=MapDetails_List[["Rotate"]], Cex=MapDetails_List[["Cex"]], Legend=MapDetails_List[["Legend"]], zone=MapDetails_List[["Zone"]], mar=c(0,0,2,0), oma=c(3.5,3.5,0,0), cex=1.8)
To select among models, we recommend using the Akaike Information Criterion, AIC, via Opt$AIC=
r Opt$AIC
.
Last but not least, we generate pre-defined plots for visualizing results
We can visualize which direction has faster or slower decorrelation (termed "geometric anisotropy")
PlotAniso_Fn( FileName=paste0(DateFile,"Aniso.png"), Report=Report, TmbData=TmbData )
We can visualize many types of output from the model. Here I only show predicted density, but other options are obtained via other integers passed to plot_set
as described in ?PlotResultsOnMap_Fn
PlotResultsOnMap_Fn(plot_set=c(3), MappingDetails=MapDetails_List[["MappingDetails"]], Report=Report, Sdreport=Opt$SD, PlotDF=MapDetails_List[["PlotDF"]], MapSizeRatio=MapDetails_List[["MapSizeRatio"]], Xlim=MapDetails_List[["Xlim"]], Ylim=MapDetails_List[["Ylim"]], FileName=DateFile, Year_Set=Year_Set, Years2Include=Years2Include, Rotate=MapDetails_List[["Rotate"]], Cex=MapDetails_List[["Cex"]], Legend=MapDetails_List[["Legend"]], zone=MapDetails_List[["Zone"]], mar=c(0,0,2,0), oma=c(3.5,3.5,0,0), cex=1.8)
The index of abundance is generally most useful for stock assessment models.
Index = PlotIndex_Fn( DirName=DateFile, TmbData=TmbData, Sdreport=Opt[["SD"]], Year_Set=Year_Set, Years2Include=Years2Include, strata_names=strata.limits[,1], use_biascorr=TRUE ) pander::pandoc.table( Index$Table[,c("Year","Fleet","Estimate_metric_tons","SD_log","SD_mt")] )
We can detect shifts in distribution or range expansion/contraction.
Plot_range_shifts(Report=Report, TmbData=TmbData, Sdreport=Opt[["SD"]], Znames=colnames(TmbData$Z_xm), PlotDir=DateFile, Year_Set=Year_Set)
Most example data-sets don't have vessel effects, so this plot is generally skipped
Return = Vessel_Fn(TmbData=TmbData, Sdreport=Opt[["SD"]], FileName_VYplot=paste0(DateFile,"VY-effect.jpg"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.