inst/scripts/99_example_bio_groundfish_spacetime.R

# Example analysis to model and create probablity distributions of species in some geometry
# borrows from CARstan .. see 00_example.R for standard examples.

# this is a spatial-temporal example with multiple stations in AU's and unique TU's temporal components


  require(sp)
  require(spdep)
  require(rgeos)
  require(rstan)
  require(aegis.polygons)
  require(carstm)

  year.assessment=2017
  p = aegis.survey::survey_parameters( yrs=1970:year.assessment )

  # set up default map projection
  # p$areal_units_proj4string_planar_km = projection_proj4string("omerc_nova_scotia")   # oblique mercator, centred on Scotian Shelf rotated by 325 degrees
  p$areal_units_source="groundfish_polygons_inla_mesh"
  p$areal_units_overlay="none"
  p$areal_units_proj4string_planar_km=projection_proj4string("utm20")
  p$areal_units_resolution_km = 50  # length scale to base mesh sizes
  p$aegis_internal_resolution_km = 1

  p$boundingbox = list( xlim = c(-70.5, -56.5), ylim=c(39.5, 47.5)) # bounding box for plots using spplot
  p = c(p, aegis.coastline::coastline_layout( p=p ) )
  p$mypalette=RColorBrewer::brewer.pal(9, "YlOrRd")


  sppoly = areal_units( p=p, areal_units_source="groundfish_polygons_inla_mesh", sa_threshold_km2=10, redo=TRUE )
  sppoly = areal_units( p=p, areal_units_source="groundfish_polygons_tesselation", sa_threshold_km2=10, redo=TRUE )

  if (0) {
    plot(sppoly[,"AUID"], col="orange")
    plot( sppoly@nb, coords=st_centroid(st_geometry( as(sppoly, "sf")) ), add=T, col="green" )
  }


groundfish_species_code = 10  #  10= cod

p$selection=list(
  label = "Cod summer standard tow",  # a label
  trawlable_units = "standardtow", # choices: "standardtow", "towdistance", "sweptarea"  # basis for estimating densities
  biologicals=list(
    spec = bio.taxonomy::taxonomy.recode( from="spec", to="parsimonious", tolookup=groundfish_species_code )
  ),
  survey=list(
    data.source="groundfish",
    yr = 1999:year.assessment,      # time frame for comparison
    dyear = c(150,250)/365, #  summer = which( (x>150) & (x<250) ) , spring = which(  x<149 ), winter = which(  x>251 )
    settype = 1,
    gear = c("Western IIA trawl", "Yankee #36 otter trawl"),
    strata_toremove=c("Gulf", "Georges_Bank", "Spring", "Deep_Water"),  # strata to remove from standard strata-based analysis
    polygon_enforce=TRUE,
    ranged_data="dyear"
  )
)

crs_lonlat = sp::CRS(projection_proj4string("lonlat_wgs84"))

set = survey.db( p=p, DS="filter", add_groundfish_strata=TRUE )
set$tag = "observations"

set$AUID = over( SpatialPoints( set[, c("lon", "lat")], crs_lonlat ), spTransform(sppoly, crs_lonlat ) )$AUID # match each datum to an area
set$uid = paste(set$AUID, set$year, set$dyear_discret, sep=".")


    APS = as.data.frame(sppoly)
    APS$AUID = as.character( APS$AUID )
    APS$tag ="predictions"
    APS[,p$variabletomodel] = NA


    BI = carstm_summary ( p=pB )
    jj = match( as.character( APS$AUID), as.character( BI$AUID) )
    APS[, pB$variabletomodel] = BI[[ paste(pB$variabletomodel,"predicted",sep="." ) ]] [jj]
    jj =NULL
    BI = NULL

    SI = carstm_summary ( p=pS )
    jj = match( as.character( APS$AUID), as.character( SI$AUID) )
    APS[, pS$variabletomodel] = SI[[ paste(pS$variabletomodel,"predicted",sep="." )]] [jj]
    jj =NULL
    SI = NULL

    # to this point APS is static, now add time dynamics (teperature)
    # ---------------------

    vn = c( p$variabletomodel, pB$variabletomodel,  pS$variabletomodel, "tag", "AUID" )
    APS = APS[, vn]

    # expand APS to all time slices
    n_aps = nrow(APS)
    APS = cbind( APS[ rep.int(1:n_aps, p$nt), ], rep.int( p$prediction_ts, rep(n_aps, p$nt )) )
    names(APS) = c(vn, "tiyr")
    APS$year = floor( APS$tiyr)
    APS$dyear = APS$tiyr - APS$year


    TI = carstm_summary ( p=pT )
    TI = TI[[ paste(pT$variabletomodel,"predicted",sep="." )]]
    au_map = match( APS$AUID, dimnames(TI)$AUID )
    year_map = match( as.character(APS$year), dimnames(TI)$year )
    dyear_breaks = c(p$dyears, p$dyears[length(p$dyears)]+ diff(p$dyears)[1] )
    dyear_map = as.numeric( cut( APS$dyear, breaks=dyear_breaks, include.lowest=TRUE, ordered_result=TRUE, right=FALSE ) )
    dindex = cbind(au_map, year_map, dyear_map )
    APS[, pT$variabletomodel] = TI[ dindex]
    TI = NULL


    PI = carstm_summary ( p=pPC1 )
    PI = PI[[ paste(pPC1$variabletomodel,"predicted",sep="." )]]
    au_map = match( APS$AUID, dimnames(PI)$AUID )
    year_map = match( as.character(APS$year), dimnames(PI)$year )
    dindex = cbind(au_map, year_map )
    APS[, pPC1$variabletomodel] = PI [dindex]
    PI = NULL

    PI = carstm_summary ( p=pPC2 )
    PI = PI[[ paste(pPC2$variabletomodel,"predicted",sep="." )]]
    au_map = match( APS$AUID, dimnames(PI)$AUID )
    year_map = match( as.character(APS$year), dimnames(PI)$year )
    dindex = cbind(au_map, year_map  )
    APS[, pPC2$variabletomodel] = PI [dindex]
    PI = NULL

    # useful vars to have for analyses outside of carstm_summary
    varstoadd = c( "totwgt", "totno", "sa", "data_offset",  "zn", "qn" )

    for (vn in varstoadd) if (!exists( vn, APS)) APS[,vn] = NA
    APS$data_offset = 1  # force to solve for unit area

    M = rbind( set[, names(APS)], APS )
    APS = NULL


fit = carstm_model( p=pB, M=M, DS="redo", carstm_model_label="test"  ) # run model and obtain predictions








  ## method 2: # using the "traditional" polygons
  ## see https://cran.r-project.org/web/packages/spdep/vignettes/nb.pdf

  areal_units_timeperiod = "pre2014"  # "pre2014" for older
  sppoly = maritimes_groundfish_strata( areal_units_timeperiod=areal_units_timeperiod, returntype="polygons" )
  sppoly = spTransform(sppoly, sp::CRS(p$areal_units_proj4string_planar_km) )
  sppoly$au_sa_km2 = gArea(sppoly, byid=TRUE)  # (km^2 -> km^2)
  sppoly = spTransform(sppoly, sp::CRS(projection_proj4string("lonlat_wgs84")) )
  plot(sppoly)

  W.nb = maritimes_groundfish_strata( areal_units_timeperiod=areal_units_timeperiod, returntype="neighbourhoods" )
  W <- spdep::nb2mat(W.nb, style="B", zero.policy=TRUE) # adjacency matrix ; B = binary ; W=row standardized etc

  library(rgeos)
  # sset = maritimes_groundfish_strata_identify( Y=sset, sppoly=sppoly, xyvars=c("lon", "lat"), planar_crs_km=p$areal_units_proj4string_planar_km, plotdata=TRUE )
  sset = sset[ which(!is.na(sset$AUID)), ]

  sdat = sset[ sset$yr %in% c(2010, 2011, 2012) ,]
  sdat$dummy = 1
  sdat$sa[which(!is.finite(sdat$sa))] = median(sdat$sa, na.rm=TRUE )
  sdat$totno = as.integer( sdat$totno * sdat$sa) # storeed as density .. convert back to counts
  nd = which(!is.finite(sdat$totno))
  sdat$totno[nd] = 0


  # sparse form of the CAR
  # X = model.matrix( ~ c(scale(sdat$dummy)) )
  X = model.matrix( ~ sdat$dummy -1 )

  DATAINPUT = c( list(
    nAU = length(sppoly),  # number of Areal Units
    nTU = length(unique(sdat$yr)),   # number of Temporal Units
    K = nrow(X),         # number of observations
    L = ncol(X),         # number of coefficients
    Wn = sum(W) / 2,     # number of neighbor pairs
    X = X,               # design matrix
    Y = as.integer( sdat$totno),               # observed number of cases
    log_offset = log(sdat$sa * 1000*1000) ,         # log(expected) num. cases / km^2
    AU = as.integer(as.factor(sdat$AUID)), # numeric codes of strata
    TU = as.integer(as.factor(sdat$yr)), # numerical value of time untis
    W = W,               # adjacency matrix
    scaling_factor = bym_scaling_factor(W.nb) ), # additional parameters required by the bym_scaled model (topology and variance scale factor)
    nb2edge(W.nb)
  )



# stcode = stan_model_code( "bym_scaled_heirarchical_ar1_nonseparable" )  # derived from basic scaled form with multiple samples per AU permitted, # ~ 800 sec = 6 min
# stcode = stan_model_code( "bym_scaled_heirarchical_ar1_separable" )  # derived from basic scaled form with multiple samples per AU permitted, # ~ 800 sec = 6 min

# -----------------------------

  MS = rstan::stan_model( model_code=stcode )

  fit  = rstan::sampling(
      object=MS,
      data=DATAINPUT,
      iter=2000,            # total number of iterations per chain
      chains=4,               # number of Markov chains
      cores = 4,               # number of cores (using 2 just for the vignette)
      warmup = 1000,          # number of warmup iterations per chain
      # thin=20,
      control = list(adapt_delta = 0.975, max_treedepth=14),
      verbose=TRUE ); # ~ 1 min per chain

  pars = c('B[1]',  'rho[1,1]', 'sigma[1,1]',  'mu[1,1]', 'ar1[1]', 'lp__')

  print(fit, pars =pars)
  traceplot(fit, pars = pars)   # visualize results

  yhat = apply( rstan::extract( fit, "yhat")[["yhat"]], 2, mean )
  plot( yhat ~ log(DATAINPUT$Y+0.1) )
  cor(yhat , log(DATAINPUT$Y+0.1)  ) #  0.5772 ; R^2=0.333


  log_lik1 <- extract_log_lik(fit, merge_chains = FALSE)
  rel_n_eff <- relative_eff(exp(log_lik1))
  fit_loo_1 = loo(log_lik1, r_eff = rel_n_eff, cores = 2)

fit_loo_1

# Computed from 2000 by 144 log-likelihood matrix
#
#          Estimate     SE
# elpd_loo  -2579.8  488.9
# p_loo      5358.5 1122.7
# looic      5159.5  977.9
# ------
# Monte Carlo SE of elpd_loo is NA.
#
# Pareto k diagnostic values:
#                          Count Pct.    Min. n_eff
# (-Inf, 0.5]   (good)     77    53.5%   106
#  (0.5, 0.7]   (ok)        9     6.2%   99
#    (0.7, 1]   (bad)       8     5.6%   2
#    (1, Inf)   (very bad) 50    34.7%   0
#
#   compare( fit_loo_1 , fit_loo_2 )




  tu  = 1 # time unit
  sppoly$mu = apply( rstan::extract( fit, "mu")[["mu"]][,,tu], 2, mean )

  vn = "mu"
  brks = interval_break(X= sppoly[[vn]], n=length(p$mypalette), style="quantile")
  spplot( sppoly, vn, col.regions=p$mypalette, main=vn, at=brks, sp.layout=p$coastLayout, col="transparent", xlim=p$boundingbox$xlim, ylim=p$boundingbox$ylim )

  dev.new()

  tu  = 2 # time unit
  sppoly$mu = apply( rstan::extract( fit, "mu")[["mu"]][,,tu], 2, mean )
  vn = "mu"
  brks = interval_break(X= sppoly[[vn]], n=length(p$mypalette), style="quantile")
  spplot( sppoly, vn, col.regions=p$mypalette, main=vn, at=brks, sp.layout=p$coastLayout, col="transparent", xlim=p$boundingbox$xlim, ylim=p$boundingbox$ylim )
jae0/ecofishmod documentation built on July 6, 2020, 5:47 a.m.