emaf: Package that interacts with stmv.

  # -----------------------------
  # count and record rarification curves from all available data --- refresh "survey.db" ~/ecomod/bio/src/bio.r

  if (!exists("year.assessment")) {
    year.assessment=lubridate::year(Sys.Date())
    year.assessment=lubridate::year(Sys.Date()) - 1
  }

  p = aegis::aegis_parameters( DS="speciesarea", yrs=1999:year.assessment )

  speciesarea.db( DS="speciesarea.counts.redo", p=p )  # 60 MB / process  -- can use all cpus
  speciesarea.db( DS="speciesarea.stats.redo", p=p ) # ~ 1 minute
  speciesarea.db( DS="speciesarea.redo", p=p ) # intermediary file for modelling and interpolation ... lookup up missing data and covariates
  # o = aegis::speciesarea.db( DS="speciesarea", p=p )

  # -----------------------------
  # stmv
  # vn="Npred"
  if (0) {
    # about 700 MB/process ..
    ram_required_per_process = 1  # about 600 MB per process in 2017 GB
    ncpus = min( parallel::detectCores(), floor( ram_local() / ram_required_per_process ) )
    ncpus.covars = min( parallel::detectCores(), floor( ram_local() / .7 ) )  # 700 GB in 2018 .. for prediction of global covars
  }

  for ( vn in p$varstomodel) {
    print(vn)
    p = aegis::aegis_parameters( DS="speciesarea",
      variables=list(Y=vn),
      yrs = c(1999:year.assessment),  # years for modelling and interpolation
      stmv_dimensionality="space-year",
      stmv_global_modelengine = "gam",
      stmv_global_family = gaussian(link="identity"),
      stmv_global_modelformula = formula( paste(
        vn, '~ s(t, k=3, bs="ts") + s(tsd.climatology, k=3, bs="ts") + s(tmean.climatology, k=3, bs="ts") ',
        ' + s(t.range, k=3, bs="ts") + s( b.range, k=3, bs="ts") ',
        ' + s(log(z), k=3, bs="ts") + s( log(dZ), k=3, bs="ts") + s( log(ddZ), k=3, bs="ts")',
        ' + s(log(substrate.grainsize), k=3, bs="ts") ' )),
      stmv_local_modelengine ="twostep",
      stmv_twostep_space = "fft",  # other possibilities: "fft", "tps"
      stmv_twostep_time = "gam",
      stmv_local_modelformula_time = formula( paste(
        vn, '~ s(yr, k=10, bs="ts") + s(cos.w, k=3, bs="ts") + s(sin.w, k=3, bs="ts")  ',
        '+ s( cos.w, sin.w, yr, k=30, bs="ts")') ),

      stmv_local_model_distanceweighted = TRUE,

      stmv_rsquared_threshold = 0.2, # lower threshold
      stmv_distance_statsgrid = 4, # resolution (km) of data aggregation (i.e. generation of the ** statistics ** )
      stmv_distance_scale = c(40, 60, 80), # km ... approx guess of 95% AC range .. data tends to be sprse realtive to pure space models
      # stmv_distance_prediction_fraction = 1, # stmv_distance_prediction = stmv_distance_statsgrid * XX ..this is a half window km (default is 0.75)
      depth.filter = 0, # the depth covariate is input as log(depth) so, choose stats locations with elevation > log(1 m) as being on land
      stmv_nmin = 8*(year.assessment-1999),# floor( 7 * p$ny ) # min number of data points req before attempting to model timeseries in a localized space
      stmv_nmax = 8*(year.assessment-1999)*11, # max( floor( 7 * p$ny ) * 11, 8000), # no real upper bound
      stmv_clusters = list( scale=rep("localhost", ncpus), interpolate=rep("localhost", ncpus) ) # must have the same length as stmv_distance_scale
    )

    stmv( p=p, runmode=c("globalmodel", "interpolate") ) # no global_model and force a clean restart

    aegis_db( p=p, DS="predictions.redo" ) # warp predictions to other grids
    aegis_db( p=p, DS="stmv.stats.redo" ) # warp stats to other grids
    aegis_db( p=p, DS="complete.redo" )
    aegis_db( p=p, DS="baseline.redo" )
    aegis_db_map( p=p )

    if (0) {
      global_model = stmv_db( p=p, DS="global_model")
      summary( global_model )
      plot(global_model)
    }
  }