inst/scripts/11.sizespectrum.R

  # -----------------------------
  # analysis and spatial database of normalised size spectrum, average size

  if (!exists("year.assessment")) {
    year.assessment=lubridate::year(Sys.Date())
    year.assessment=lubridate::year(Sys.Date()) - 1
  }

  p = aegis::aegis_parameters( DS="sizespectrum", yrs=1970:year.assessment )

  aegis::sizespectrum.db( DS="sizespectrum.by.set.redo", p=p ) #MG takes 1 minute
  aegis::sizespectrum.db( DS="sizespectrum.stats.redo", p=p )  #MG took 20 minutes
  aegis::sizespectrum.db( DS="sizespectrum.redo", p=p )  # all point data to be interpolated #MG took 5 minutes
# o = aegis::sizespectrum.db( DS="sizespectrum", p=p )



  # -----------------------------
  # stmv; vn="nss.b1"
  ncpus = parallel::detectCores()

  if (0) {
    # about 700 MB/process ..
    ram_required_per_process = 1  # about 600 MB per process in 2017 GB
    ncpus = min( parallel::detectCores(), floor( ram_local() / ram_required_per_process ) )
    ncpus.covars = min( parallel::detectCores(), floor( ram_local() / .7 ) )  # 700 GB in 2018 .. for prediction of global covars
  }

  for ( vn in p$varstomodel) {
    print(vn)

    p = aegis::aegis_parameters( DS="sizespectrum",
      variables=list(Y=vn),
      yrs = c(1999:year.assessment),  # years for modelling and interpolation
      stmv_dimensionality="space-year",
      stmv_global_modelengine = "gam",
      stmv_global_family = gaussian(link="identity"),
      stmv_global_modelformula = formula( paste(
        vn, '~ s(t, k=3, bs="ts") + s(tsd.climatology, k=3, bs="ts") + s(tmean.climatology, k=3, bs="ts") ',
        ' + s(t.range, k=3, bs="ts") + s( b.range, k=3, bs="ts") ',
        ' + s(log(z), k=3, bs="ts") + s( log(dZ), k=3, bs="ts") + s( log(ddZ), k=3, bs="ts")',
        ' + s(log(substrate.grainsize), k=3, bs="ts") ' )),
      stmv_local_modelengine ="twostep",
      stmv_twostep_space = "fft",
      stmv_twostep_time = "gam",
      stmv_local_modelformula_time = formula( paste(
        vn, '~ s(yr, k=10, bs="ts") + s(cos.w, k=3, bs="ts") + s(sin.w, k=3, bs="ts")  ',
        '+ s( cos.w, sin.w, yr, k=30, bs="ts")') ),

      stmv_local_model_distanceweighted = TRUE,

      stmv_rsquared_threshold = 0.2, # lower threshold
      stmv_distance_statsgrid = 4, # resolution (km) of data aggregation (i.e. generation of the ** statistics ** )
      stmv_distance_scale = c(40, 60, 80), # km ... approx guess of 95% AC range .. data tends to be sprse realtive to pure space models
      # stmv_distance_prediction_fraction = 1, # stmv_distance_prediction = stmv_distance_statsgrid * XX ..this is a half window km (default is 0.75)
      depth.filter = 0, # the depth covariate is input as log(depth) so, choose stats locations with elevation > log(1 m) as being on land
      stmv_nmin = 8*(year.assessment-1999),# floor( 7 * p$ny ) # min number of data points req before attempting to model timeseries in a localized space
      stmv_nmax = 8*(year.assessment-1999)*11, # max( floor( 7 * p$ny ) * 11, 8000), # no real upper bound
      stmv_clusters = list( scale=rep("localhost", ncpus), interpolate=rep("localhost", ncpus) )
    )

    stmv( p=p, runmode=c("globalmodel", "interpolate" )  ) # no global_model and force a clean restart

      if (0) {  # model testing
          o = aegis_db( p=p, DS="stmv_inputs" )  # create fields for
          B = o$input
          p$stmv_global_modelformula # = formula( t ~ s(z, bs="ts" + s(s.range, bs="ts") + s(dZ, bs="ts") + s(ddZ, bs="ts") + s(log(substrate.grainsize), bs="ts")  ) ), # marginally useful .. consider removing it and use "none",
          p$stmv_global_family = gaussian(link="identity")
          p = stmv_variablelist(p=p)  # decompose into covariates, etc
          ii = which( is.finite (rowSums(B[ , c(p$variables$Y,p$variables$COV) ])) )
#          wgts = 1/B$b.sdTotal[ii]
#          wgts = wgts / mean(wgts)  # makes loglik constant
          global_model = try( gam( formula=p$stmv_global_modelformula, data=B[ii,],
              optimizer= p$stmv_gam_optimizer, family=p$stmv_global_family)) #, weights=wgts ) )
          summary( global_model )
      }


    # if (really.finished) stmv_db( p=p, DS="cleanup.all" )

    aegis_db( p=p, DS="predictions.redo" ) # warp predictions to other grids
    aegis_db( p=p, DS="stmv.stats.redo" ) # warp stats to other grids
    aegis_db( p=p, DS="complete.redo" )
    aegis_db( p=p, DS="baseline.redo" )
    aegis_db_map( p=p )

    if (0) {
      global_model = stmv_db( p=p, DS="global_model")
      summary( global_model )
      plot(global_model)
    }
  }





  Family: gaussian
Link function: identity

Formula:
nss.b1 ~ s(t, k = 3, bs = "ts") + s(tsd.climatology, k = 3, bs = "ts") +
    s(tmean.climatology, k = 3, bs = "ts") + s(t.range, k = 3,
    bs = "ts") + s(b.range, k = 3, bs = "ts") + s(log(z), k = 3,
    bs = "ts") + s(log(dZ), k = 3, bs = "ts") + s(log(ddZ), k = 3,
    bs = "ts") + s(log(substrate.grainsize), k = 3, bs = "ts")

Parametric coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)  -4.6908     0.0375    -125   <2e-16

Approximate significance of smooth terms:
                            edf Ref.df     F p-value
s(t)                       1.91      2  34.2 2.9e-16
s(tsd.climatology)         1.98      2 113.0 < 2e-16
s(tmean.climatology)       1.94      2  43.9 < 2e-16
s(t.range)                 1.48      2  12.9 2.4e-07
s(b.range)                 1.99      2  70.3 < 2e-16
s(log(z))                  1.81      2 119.3 < 2e-16
s(log(dZ))                 1.97      2  20.6 7.5e-10
s(log(ddZ))                1.94      2  14.2 4.5e-07
s(log(substrate.grainsize)) 1.91      2  37.1 < 2e-16

R-sq.(adj) =  0.167   Deviance explained = 16.9%
GCV = 10.106  Scale est. = 10.081    n = 7163
jae0/stmdat documentation built on May 28, 2019, 11 p.m.