# ----------------
# Prep OSD, snow crab and groundfish temperature profiles
# this one has to be done manually .. no longer mainted by anyone ..
if (!exists("year.assessment")) {
year.assessment=lubridate::year(Sys.Date())
year.assessment=lubridate::year(Sys.Date()) - 1
}
p = aegis::aegis_parameters( DS="temperature", yrs=1950:year.assessment ) # these are default years
# ------------------------------
create.baseline.database=FALSE
if ( create.baseline.database ) {
# 0 data assimilation
if (historical.data.redo) {
# data dumps from various sources
temperature.db( p=p, DS="USSurvey_NEFSC" ) # US data .. new additions have to be made at the rawdata level manually
temperature.db( p=p, DS="lobster.redo" ) # FSRS data ... new additions have to be made at the rawdata level manually
temperature.db( p=p, DS="misc.redo" ) # various survey data, mostly scallop ... new additions have to be made at the rawdata level manually until a few db approach is finalized
temperature.db( DS="osd.rawdata.1910_2008", p=p ) # redo whole data set (historical) from 1910 to 2010
temperature.db( DS="osd.rawdata.2008_2016", p=p ) # 2008:2016, 2008 is overlapping ... overwrite the older series
# temperature.db( DS="ODF_ARCHIVE", p=p, yr=1969:2015 ) # specify range or specific year .. not used .. here in case the data series gets reactivated .. will need to be brought into profiles.annual.redo
# NOTE:: if groundfish data profiles are maintained by groundfish databases again then an easy way wold be
# to update aegis::groundfish.db( DS="gshyd.georef" ) to assimilate the data .. this would also mean that 00.surveys.r would need to be run first. ..
}
# Roger Petipas has been maintaining a database, the following loads this data
# the data needs to be unzipped and the Data* files need to be moved into:
# project.directory("aegis", "temperature", "archive", "profiles")
temperature.db( DS="osd.rawdata.annual", p=p, yr=2017:year.assessment ) # specify range or specific year
# Merge depth profiles from all data streams: OSD, groundfish, snowcrab, USSurvey_NEFSC
temperature.db( DS="profiles.annual.redo", p=p, yr=2008:year.assessment )
# Extract bottom data from each profile and discretization of space and time resolution to manageable numbers
temperature.db( DS="bottom.annual.redo", p=p, yr=2008:year.assessment )
(p$yrs) # check the years to ensure we are selecting the correct years 1950:present
temperature.db ( DS="bottom.all.redo", p=p )
}
# ------------------------------
# Basic data uptake now complete .. move to interpolations
# ------------------------------
create.interpolated.results.stmv=TRUE
if (create.interpolated.results.stmv ) {
# 1. stmv interpolations assuming some seasonal pattern
# twostep: ~ 160+ hrs
scale_ram_required_main_process = 0.8 # GB twostep / fft
scale_ram_required_per_process = 1.25 # twostep / fft /fields vario .. (mostly 0.5 GB, but up to 5 GB) -- 20 hrs
scale_ncpus = min( parallel::detectCores(), floor( (ram_local()- scale_ram_required_main_process) / scale_ram_required_per_process ) )
interpolate_ram_required_main_process = 24 # GB twostep / fft
interpolate_ram_required_per_process = 1.25 # 1 GB seems enough for twostep / fft /fields vario .. but make 2 in case
interpolate_ncpus = min( parallel::detectCores(), floor( (ram_local()- interpolate_ram_required_main_process) / interpolate_ram_required_per_process ) )
nyrs = year.assessment-1950
p = aegis::aegis_parameters(
DS="temperature",
data_root = project.datadirectory( "aegis", "temperature" ),
spatial.domain = "canada.east", # default
DATA = 'temperature.db( p=p, DS="stmv.inputs" )',
additional.data=c("groundfish", "snowcrab", "USSurvey_NEFSC", "lobster"),
pres_discretization_temperature = 1 / 100, # 1==p$pres; controls resolution of data prior to modelling (km .. ie 100 linear units smaller than the final discretization pres)
yrs = 1950:year.assessment,
stmv_dimensionality="space-year-season",
stmv_global_modelengine = "none",
stmv_global_modelformula = "none", # only marginally useful .. consider removing it and use "none",
stmv_global_family ="none",
stmv_local_modelengine = "twostep" ,
stmv_local_modelformula_time = formula( paste(
't',
'~ s( yr, k=20, bs="ts") + s(cos.w, k=3, bs="ts") + s(sin.w, k=3, bs="ts") ',
'+ s( yr, cos.w, sin.w, k=30, bs="ts") ',
'+ s( log(z), k=3, bs="ts") + s( plon, k=3, bs="ts") + s( plat, k=3, bs="ts") ',
'+ s( log(z), plon, plat, k=30, bs="ts") '
) ),
stmv_twostep_time = "gam",
stmv_twostep_space = "fft", # everything else is too slow ...
stmv_fft_filter="lowpass_matern_tapered", # matern, krige (very slow), lowpass, lowpass_matern
stmv_range_correlation_fft_taper = 0.5, # in local smoothing convolutions occur of this correlation scale
stmv_local_model_distanceweighted = TRUE,
stmv_rsquared_threshold = 0, # lower threshold .. not used if twostep method
stmv_distance_statsgrid = 5, # resolution (km) of data aggregation (i.e. generation of the ** statistics ** )
stmv_distance_scale = c( 20, 25, 30, 35, 40, 45, 50 ), # km ... approx guess of 95% AC range
stmv_distance_prediction_fraction = 4/5, # i.e. 4/5 * 5 = 4 km
stmv_clusters = list( scale=rep("localhost", scale_ncpus), interpolate=rep("localhost", interpolate_ncpus) ), # ncpus for each runmode
stmv_nmin = 16*nyrs, # ~ 1000 min number of data points req before attempting to model timeseries in a localized space .. control no error in local model
stmv_nmax = 25*nyrs # no real upper bound.. just speed / RAM limits .. can go up to 10 GB / core if too large
)
stmv( p=p, runmode=c("scale", "interpolate") ) #700 MB (main); 500 MB child .. 2 days for scaling and 2 days for interpolation
if (debugging) {
stmv( p=p, runmode=c("debug", "scale", "interpolate"), force_complete_solution=TRUE )
stmv_db( p=p, DS="stmv.results" ) # save to disk for use outside stmv*, returning to user scale
if (really.finished) {
stmv_db( p=p, DS="cleanup.all" )
}
}
# 2. collect predictions from stmv and warp/break into sub-areas defined by
# p$spatial.domain.subareas = c( "SSE", "SSE.mpa", "snowcrab" )
temperature.db( p=p, DS="predictions.redo" ) # ~1hr
temperature.db( p=p, DS="stmv.stats.redo" ) # warp to sub grids
# 3. extract relevant statistics .. including climatologies all defined by p$yrs
temperature.db( p=p, DS="bottom.statistics.annual.redo" )
# 4. all time slices in array format
temperature.db( p=p, DS="spatial.annual.seasonal.redo" )
# 5. time slice at prediction time of year
temperature.db( p=p, DS="timeslice.redo" )
# 6. complete statistics and warp/regrid database ... ~ 2 min :: only for default grid . TODO might as well do for each subregion/subgrid
temperature.db( p=p, DS="complete.redo")
# 7. maps
# run only on local cores ... file swapping seem to reduce efficiency
# p = aegis::aegis_parameters( DS="temperature", yrs=1950:year.assessment )
temperature.map( p=p, DS="all", yr=p$yrs ) # default is to do all
# just redo a couple maps for ResDoc in the SSE domain
#p$bstats = "tmean"
p = spatial_parameters( p=p, spatial.domain = "SSE.mpa" ) # default grid and resolution
p$corners = data.frame(plon=c(150, 1022), plat=c(4600, 5320) ) # alter corners ..
temperature.map( p=p, DS='climatology' )
temperature.map( p=p, DS='stmv.stats' )
temperature.map( p=p, DS='annual' )
temperature.map( p=p, DS='seasonal' )
}
# finished
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.