# -----------------------------
# analysis and spatial database of normalised size spectrum, average size
if (!exists("year.assessment")) {
year.assessment=lubridate::year(Sys.Date())
year.assessment=lubridate::year(Sys.Date()) - 1
}
p = aegis::aegis_parameters( DS="sizespectrum", yrs=1970:year.assessment )
aegis::sizespectrum.db( DS="sizespectrum.by.set.redo", p=p ) #MG takes 1 minute
aegis::sizespectrum.db( DS="sizespectrum.stats.redo", p=p ) #MG took 20 minutes
aegis::sizespectrum.db( DS="sizespectrum.redo", p=p ) # all point data to be interpolated #MG took 5 minutes
# o = aegis::sizespectrum.db( DS="sizespectrum", p=p )
# -----------------------------
# stmv; vn="nss.b1"
ncpus = parallel::detectCores()
if (0) {
# about 700 MB/process ..
ram_required_per_process = 1 # about 600 MB per process in 2017 GB
ncpus = min( parallel::detectCores(), floor( ram_local() / ram_required_per_process ) )
ncpus.covars = min( parallel::detectCores(), floor( ram_local() / .7 ) ) # 700 GB in 2018 .. for prediction of global covars
}
for ( vn in p$varstomodel) {
print(vn)
p = aegis::aegis_parameters( DS="sizespectrum",
variables=list(Y=vn),
yrs = c(1999:year.assessment), # years for modelling and interpolation
stmv_dimensionality="space-year",
stmv_global_modelengine = "gam",
stmv_global_family = gaussian(link="identity"),
stmv_global_modelformula = formula( paste(
vn, '~ s(t, k=3, bs="ts") + s(tsd.climatology, k=3, bs="ts") + s(tmean.climatology, k=3, bs="ts") ',
' + s(t.range, k=3, bs="ts") + s( b.range, k=3, bs="ts") ',
' + s(log(z), k=3, bs="ts") + s( log(dZ), k=3, bs="ts") + s( log(ddZ), k=3, bs="ts")',
' + s(log(substrate.grainsize), k=3, bs="ts") ' )),
stmv_local_modelengine ="twostep",
stmv_twostep_space = "fft",
stmv_twostep_time = "gam",
stmv_local_modelformula_time = formula( paste(
vn, '~ s(yr, k=10, bs="ts") + s(cos.w, k=3, bs="ts") + s(sin.w, k=3, bs="ts") ',
'+ s( cos.w, sin.w, yr, k=30, bs="ts")') ),
stmv_local_model_distanceweighted = TRUE,
stmv_rsquared_threshold = 0.2, # lower threshold
stmv_distance_statsgrid = 4, # resolution (km) of data aggregation (i.e. generation of the ** statistics ** )
stmv_distance_scale = c(40, 60, 80), # km ... approx guess of 95% AC range .. data tends to be sprse realtive to pure space models
# stmv_distance_prediction_fraction = 1, # stmv_distance_prediction = stmv_distance_statsgrid * XX ..this is a half window km (default is 0.75)
depth.filter = 0, # the depth covariate is input as log(depth) so, choose stats locations with elevation > log(1 m) as being on land
stmv_nmin = 8*(year.assessment-1999),# floor( 7 * p$ny ) # min number of data points req before attempting to model timeseries in a localized space
stmv_nmax = 8*(year.assessment-1999)*11, # max( floor( 7 * p$ny ) * 11, 8000), # no real upper bound
stmv_clusters = list( scale=rep("localhost", ncpus), interpolate=rep("localhost", ncpus) )
)
stmv( p=p, runmode=c("globalmodel", "interpolate" ) ) # no global_model and force a clean restart
if (0) { # model testing
o = aegis_db( p=p, DS="stmv_inputs" ) # create fields for
B = o$input
p$stmv_global_modelformula # = formula( t ~ s(z, bs="ts" + s(s.range, bs="ts") + s(dZ, bs="ts") + s(ddZ, bs="ts") + s(log(substrate.grainsize), bs="ts") ) ), # marginally useful .. consider removing it and use "none",
p$stmv_global_family = gaussian(link="identity")
p = stmv_variablelist(p=p) # decompose into covariates, etc
ii = which( is.finite (rowSums(B[ , c(p$variables$Y,p$variables$COV) ])) )
# wgts = 1/B$b.sdTotal[ii]
# wgts = wgts / mean(wgts) # makes loglik constant
global_model = try( gam( formula=p$stmv_global_modelformula, data=B[ii,],
optimizer= p$stmv_gam_optimizer, family=p$stmv_global_family)) #, weights=wgts ) )
summary( global_model )
}
# if (really.finished) stmv_db( p=p, DS="cleanup.all" )
aegis_db( p=p, DS="predictions.redo" ) # warp predictions to other grids
aegis_db( p=p, DS="stmv.stats.redo" ) # warp stats to other grids
aegis_db( p=p, DS="complete.redo" )
aegis_db( p=p, DS="baseline.redo" )
aegis_db_map( p=p )
if (0) {
global_model = stmv_db( p=p, DS="global_model")
summary( global_model )
plot(global_model)
}
}
Family: gaussian
Link function: identity
Formula:
nss.b1 ~ s(t, k = 3, bs = "ts") + s(tsd.climatology, k = 3, bs = "ts") +
s(tmean.climatology, k = 3, bs = "ts") + s(t.range, k = 3,
bs = "ts") + s(b.range, k = 3, bs = "ts") + s(log(z), k = 3,
bs = "ts") + s(log(dZ), k = 3, bs = "ts") + s(log(ddZ), k = 3,
bs = "ts") + s(log(substrate.grainsize), k = 3, bs = "ts")
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.6908 0.0375 -125 <2e-16
Approximate significance of smooth terms:
edf Ref.df F p-value
s(t) 1.91 2 34.2 2.9e-16
s(tsd.climatology) 1.98 2 113.0 < 2e-16
s(tmean.climatology) 1.94 2 43.9 < 2e-16
s(t.range) 1.48 2 12.9 2.4e-07
s(b.range) 1.99 2 70.3 < 2e-16
s(log(z)) 1.81 2 119.3 < 2e-16
s(log(dZ)) 1.97 2 20.6 7.5e-10
s(log(ddZ)) 1.94 2 14.2 4.5e-07
s(log(substrate.grainsize)) 1.91 2 37.1 < 2e-16
R-sq.(adj) = 0.167 Deviance explained = 16.9%
GCV = 10.106 Scale est. = 10.081 n = 7163
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.