emgis: Package that interacts with stmv.

  speciesarea.db = function( DS="", p=NULL ) {

    outdir = file.path( project.datadirectory("aegis"), "speciesarea" )

    dir.create( outdir, showWarnings=FALSE, recursive=TRUE )
    infix = paste( p$spatial.domain, p$taxa, paste(p$data.sources, collapse="."), p$speciesarea.method, sep="." )


    if (DS %in% c("speciesarea.counts", "speciesarea.counts.ny", "speciesarea.counts.redo") ) {

      fn = file.path( outdir, paste( "speciesarea.counts", infix, "rdata", sep=".") )
      fn.ny = file.path( outdir, paste( "speciesarea.counts.ny", infix, "rdata", sep=".") )

      if (DS=="speciesarea.counts") {
        load( fn)
        return (SA)
      }
      if (DS=="speciesarea.counts.ny") {
        load( fn.ny)
        return (SA.ny)
      }

      set = survey.db (DS="set", p=p)
      scat = survey.db (DS="cat", p=p)

      p$nsets = nrow( set )
      p$nlengthscale = length(p$lengthscale)
      p$ntimescale = length(p$timescale)

      if (p$use.bigmemory.file.backing) {

        p$fn.tmp = file.path(  make.random.string("speciesarea.bigmemory.tmp" ))
        p$fn.desc = paste( p$fn.tmp, "desc", sep="." )
        p$fn.ny.tmp = file.path(  make.random.string("speciesarea.ny.bigmemory.tmp" ) )
        p$fn.ny.desc = paste( p$fn.ny.tmp, "desc", sep=".")

        sar = big.matrix(nrow=p$nsets, ncol=p$nlengthscale*p$ntimescale,
            type="double" , init=NA, backingfile=p$fn.tmp, descriptorfile=p$fn.desc  )

        sar.ny = big.matrix(nrow=p$nsets, ncol=p$nlengthscale*p$ntimescale,
            type="double" , init=NA, backingfile=p$fn.ny.tmp, descriptorfile=p$fn.ny.desc )

      } else {

        sar = big.matrix(nrow=p$nsets, ncol=p$nlengthscale*p$ntimescale, type="double" , init=NA  )
        sar.ny = big.matrix(nrow=p$nsets, ncol=p$nlengthscale*p$ntimescale, type="double", init=NA )

      }

      p$bigmem.desc = bigmemory::describe(sar)
      p$bigmem.ny.desc =  bigmemory::describe(sar.ny)  # counts the # of years of data

      parallel_run( 
        p=p, 
        set=set, 
        sc=scat, 
        runindex=list( nsets=1:p$nsets ), 
        FUNC =function( ip=NULL, p, set, sc ) {
           # define species counting mechanism
          if (exists( "libs", p)) RLibrary( p$libs )
          if ( is.null(ip) ) ip = 1:p$nruns
          sar <- attach.big.matrix( p$bigmem.desc )
          sar.ny <- attach.big.matrix( p$bigmem.ny.desc )
          coords = c("lon", "lat")
          c0 = 0  # counter
          for ( ii in ip ) {
            seti = set[ii,]
            t0 = seti$yr
            dist = geosphere::distCosine( seti[, coords], set[,coords] ) / 1000
            for (k in 1:p$nlengthscale) {
              qid = which(dist <= p$lengthscale[k])  # sets and taxa within target distance
              for (l in 1:p$ntimescale) {
                m = which ( set$yr[qid] %in% (t0 + c(-p$timescale[l] : p$timescale[l])) )
                u = l + (k-1)*p$ntimescale
                sar[ii, u] = length( unique( sc$spec[ which( sc$id %in% set$id[qid][m] ) ] ) ) # no. species
                sar.ny[ii, u] = length( unique( set$yr[qid][m] ) ) # no. years entering into the count
              }
            }
          }
          return(NULL)
        }
      )

      sar <- attach.big.matrix( p$bigmem.desc )
      sar.ny <- attach.big.matrix( p$bigmem.ny.desc )

      SA = array( data=sar[], dim=c( p$nsets, p$ntimescale, p$nlengthscale) )
      SA.ny = array( data=sar.ny[], dim=c( p$nsets, p$ntimescale, p$nlengthscale) )

      save( SA, file=fn, compress=T )
      save( SA.ny, file=fn.ny, compress=T )

      if (p$use.bigmemory.file.backing) {
        file.remove( p$fn.tmp )
        file.remove( p$fn.desc )
        file.remove( p$fn.ny.tmp )
        file.remove( p$fn.ny.desc )
      }

      return( fn )
    }

    # -------------------------
    
    if (DS %in% c("speciesarea.stats","speciesarea.stats.redo") ) {

      fn = file.path( outdir, paste("speciesarea.stats", infix, "rdata", sep=".") )

      if (DS=="speciesarea.stats") {
        load( fn)
        return ( sa )
      }

      SA = speciesarea.db( DS="speciesarea.counts", p=p )
      p$nvars = 9
      p$nsets = nrow(SA)
      rm(SA); gc()

      if (p$use.bigmemory.file.backing) {
        p$fn.tmp = file.path( make.random.string("speciesarea.stats.bigmemory.tmp") )
        p$fn.desc = paste( p$fn.tmp, "desc", sep="." )
        o = big.matrix(nrow=p$nsets, ncol=p$nvars,
            type="double" , init=NA, backingfile=p$fn.tmp, descriptorfile=p$fn.desc)
      } else {
        o = big.matrix(nrow=p$nsets, ncol=p$nvars, type="double", init=NA )
      }

      p$bigmem.desc = bigmemory::describe(o)

      parallel_run( 
        p=p, 
        runindex=list( nsets=1:p$nsets ),
        FUNC = function( ip=NULL, p ) {
          if (exists( "libs", p)) RLibrary( p$libs )
          if (is.null(ip)) ip = 1:p$nruns
          predict.param = data.frame( sa = pi * ( p$pred.radius ^ 2), nyrs=1 )   # km^2
          Y = speciesarea.db( DS="speciesarea.counts", p=p )
          nY = speciesarea.db( DS="speciesarea.counts.ny", p=p )
          o <- attach.big.matrix( p$bigmem.desc )
          for (ii in ip ) {
            # print(ii)
            y =  t( Y[ii,,] )
            ny =  t( nY[ii,,] )
            rownames(y) = rownames(ny) = p$lengthscale
            colnames(y) = colnames(ny) = p$timescale
            y = as.data.frame.table( y, stringsAsFactors=F )
            colnames(y) = c( "lengthscale", "timescale", "nspec" )
            ny = as.data.frame.table( ny, stringsAsFactors=F )
            colnames(ny) = c( "lengthscale", "timescale", "nyrs" )
            y$lengthscale = as.numeric( y$lengthscale )
            y$timescale = as.numeric( y$timescale )
            y$sa =  pi * (y$lengthscale ^ 2)
            y$nyrs.ee =  y$timescale*2 + 1  # number of years of data entering into the count
            ny$lengthscale = as.numeric( ny$lengthscale )
            ny$timescale = as.numeric( ny$timescale )
            y = merge( y, ny, by=c("lengthscale", "timescale" ), all.x=T, all.y=F, sort=F )
            iiyy = which( !is.finite( y$nyrs ) )
            if (length (iiyy) > 0 ) y$nyrs[iiyy] = y$nyrs.ee[iiyy]
            X = na.omit(y)
            X = X[ which(X$nspec>0), ]
            if ( nrow(X) < 5 ) next()
            if (p$speciesarea.method=="glm") {
              m0 = try ( glm( (nspec+1) ~ log(sa) + log(nyrs), data=X, family=gaussian("log") ) )
              if ("try-error" %in% class( m0)) next()
              m  = summary(m0)
              mod0 = coefficients(m)
              if (nrow(mod0) !=3 ) next()
              p0 = predict( m0, newdata=predict.param, se.fit=T, type="response" )
              out = c( mod0[1,1], mod0[2,1], mod0[3,1], mod0[1,2], mod0[2,2], mod0[3,2], summary.lm(m0)$r.squared, p0$fit-1, p0$se.fit )
            }
            o[ii,] =  out
          }
          return ( NULL )
        }

      )

      o <- attach.big.matrix( p$bigmem.desc )
      o = as.data.frame(o[])

      names( o ) = c( "C", "Z", "T", "C.se", "Z.se", "T.se", "sar.rsq", "Npred", "Npred.se"   )
      o = factor2number( o, c( "C", "Z", "T", "C.se", "Z.se", "T.se", "sar.rsq", "Npred", "Npred.se"   ) )

      # save ( o, file=fn, compress=T )

      set = survey.db (DS="set", p=p)

      if ( nrow(set) != nrow(o ) ) {
        print( "Error: data merge failure" )
        stop( nrow(o) )
      }

      sa = cbind( set, o )
      save ( sa, file=fn, compress=T )

      if (p$use.bigmemory.file.backing) {
        file.remove( p$fn.tmp )
        file.remove( p$fn.desc )
      }

      return( fn )
    }


    # --------------------


    if (DS %in% c( "speciesarea", "speciesarea.redo" ) ) {

      fn = file.path( outdir, paste( "set.speciesarea.merged", infix, "rdata", sep="." ) )

      if (DS=="speciesarea") {
        SA = NULL
        if (file.exists( fn) ) load( fn )
        return ( SA )
      }

      SA = speciesarea.db( DS="speciesarea.stats", p=p )
      SA = lonlat2planar( SA, proj.type=p$internal.crs )
      oo = which(!is.finite( SA$plon+SA$plat ) )
      if (length(oo)>0) SA = SA[ -oo , ]  # a required field for spatial interpolation

      # this forces resolution of p$pres=1km in SSE
      SA$lon = SA$lat = NULL

      yrs = sort( unique( SA$yr ) )

      # check for duplicates
      for ( y in p$yrs ) {
        yy = which (SA$yr == y)
        ii = which( duplicated( SA$id[yy] ) )

        if (length(ii) > 0) {
          print( "The following sets have duplicated positions. The first only will be retained" )
          print( SA[yy,] [ duplicates.toremove( SA$id[yy] ) ] )
          SA = SA[ - ii,]
        }
      }

      save( SA, file=fn, compress=T )
      return (fn)
    }

  }