ecmgis: Package that interacts with stmv.

  sizespectrum.db = function( DS="", p=NULL ) {
    ### dependency is only groundfish db for now. ...

    datadir = project.datadirectory("aegis", "sizespectrum" )
    dir.create( datadir, showWarnings=FALSE, recursive=TRUE )
    infix = paste( p$nss.taxa, p$nss.type, p$nss.base, sep="." )

    if (DS %in% c("sizespectrum.by.set", "sizespectrum.by.set.redo") ) {

      # make the base normalised size spectral statistics summaries

      if (DS == "sizespectrum.by.set" ) {
        fn = file.path( datadir, paste(  "sizespectrum.by.set", infix, "rdata", sep="." ) )
        load( fn )
        return (ss )
      }

      x =  survey.db( p=p, "det" )  # mass and length are not transformed
      x = x[ which( x$data.source=="groundfish"), ]

      # x = x[ which(x$settype %in% c(1,2,5) ), ]
      # settype: 1=stratified random, 2=regular survey, 3=unrepresentative(net damage),
      #  4=representative sp recorded(but only part of total catch), 5=comparative fishing experiment,
      #  6=tagging, 7=mesh/gear studies, 8=explorartory fishing, 9=hydrography

       for (tx in p$nss.taxa) {

        #i.tx = taxonomy.filter.taxa( x$spec_bio, taxafilter=tx)
       i.tx = taxonomy.filter.taxa( x$spec_bio, method=p$taxa, outtype="internalcodes" )
        if ( is.null(i.tx) || length(i.tx) < 30) next()
        XX = x[ i.tx, ]
        rm( i.tx ); gc()

        for (vname in p$nss.type) {

          XX.log = log( XX[,vname], base=p$nss.base )
          XX$sizeclass = cut( XX.log, breaks=p$nss.bins$lb, labels=F, include.lowest=F, right=T )

          jjj = which( is.finite(XX$sizeclass + XX$cf_det) )
          XX = XX[jjj,]

          XX$id=as.factor(XX$id)
          XX$sizeclass=as.factor(XX$sizeclass)

          # closed on the right: (x,x]
          # midpoints = (l.bound [2:n.size] + l.bound [1:(n.size-1)] ) /2
         infix = paste( tx, vname, p$nss.base, sep="." )

          fn = file.path( datadir, paste(  "sizespectrum.by.set", infix, "rdata", sep="." ) )

          ss = NULL
          tt = XX$cf_det*XX[,vname]
          ss = xtab.2way( xval=tt, factors=XX[,c("id", "sizeclass")] )

          ### ss contains number per km2 broken down by age class and weighted for sa, etc
          save( ss, file=fn, compress=T)

          rm (XX, ss); gc()
        }
      }
      return( "Done" )
    }


    # --------------------


    if (DS %in% c( "sizespectrum.stats", "sizespectrum.stats.redo" ) ) {


      fn = file.path( datadir, paste( "set.sizespectrum.stats", infix, "rdata", sep=".")  )

      if (DS=="sizespectrum.stats") {
        nss = NULL
        if (file.exists( fn) ) load( fn )
        return ( nss )
      }

      sm = survey.db( p=p, "set" )
      sm = sm[ which( sm$data.source=="groundfish") ,]
      sm$area = sm$sa
      sm$sa = NULL

      sm$id = as.character( sm$id )
      smdups = which( duplicated (sm$id ) )
      if (length(smdups) > 0) sm = sm[ -smdups, ]
      sm = sm[ order(sm$id) , ]
      gc()

      nss = NULL
      p$newvars = c( "id",  "nss.rsquared", "nss.df", "nss.b0", "nss.b1", "nss.shannon", "nss.evenness", "nss.Hmax" )

      p$nsets = nrow( sm )
      p$nlengthscale = length(p$nss.bins)
    
      if (p$use.bigmemory.file.backing) {
        p$fn.tmp = file.path(  make.random.string( "nss.bigmemory.tmp" ) )
        p$fn.desc = paste( p$fn.tmp, "desc", sep="." )
        nss = big.matrix( nrow=p$nsets, ncol=length(p$newvars), type="double" , init=NA, backingfile=p$fn.tmp, descriptorfile=p$fn.desc )
      } else {
        nss = big.matrix( nrow=p$nsets, ncol=length(p$newvars), type="double" , init=NA, shared=TRUE )
      }

      p$bigmem.desc = bigmemory::describe(nss)

      parallel_run( 
        p=p, 
        runindex=list( nsets=1:p$nsets ) ,
        sm=sm, 
        FUNC = function( ip=NULL, p=NULL, sm=NULL ) {
          if (exists( "libs", p)) RLibrary( p$libs )
          if ( is.null(ip) ) ip = 1:p$nruns
          coords = c("lon", "lat")
          nss <- attach.big.matrix( p$bigmem.desc )
          for ( iip in ip ) {
            sm0 = sm[ iip,]
            print( sm0 )
            smdi = geodist.test( point=sm0[coords], locations=sm[,coords], method="great.circle", threshold=p$nss.distances, type="le")
            smd  = sm[smdi,]
            # tdiff = abs( as.numeric(sm0$timestamp  ) - as.numeric(smd$chron))
            tdiff = abs( as.numeric(difftime(sm0$timestamp, smd$timestamp, units="days")) )
            smti = which(tdiff <= p$nss.stimes)
            good.id = smd$id[ sort( smti) ]
            midpoints=p$nss.bins$mids
            ss = sizespectrum.db( DS="sizespectrum.by.set", p=p )
            ### add an offset (1% of min nonzero value) and then log transform it
            ss0 = as.matrix(ss)
            offset = min(ss0[which(ss0>0)], na.rm=T) / 100
            ss = log( ss+offset, base=p$nss.base ) ## convert to same base as X-values
            variables = colnames(ss)
            vtest = colMeans(ss[, variables], na.rm=T)
            vmode = which.max( vtest )
            vmin = min(which( vtest > log(offset, base=p$nss.base )), na.rm=T )
            vmax = max(which( vtest > log(offset, base=p$nss.base )), na.rm=T )
            vgood = c(vmin:vmax)
            vi = c(vmode:vmax)
            ss$id = rownames(ss)
            ss.i = sort( which( ss$id %in% good.id ) )
            if (length(ss.i)==0) next
            ss = merge( ss[ss.i,], smd, by="id", all.x=T, all.y=F, sort=F )
            # take geometric weighted means  (area is the SA of a tow)
            v = NULL
            v = data.frame(
              logbaseN = colMeans( ss[,variables] * ss$area, na.rm=T ) / sum(ss$area, na.rm=T),
              sc = as.numeric(as.character(variables))
            )
            v$sizeclass = midpoints[v$sc]
            v$size = p$nss.base^v$sizeclass  # return to grams
            v$N = p$nss.base^v$logbaseN - offset
            v$N = zapsmall( v$N)
            v$N[ which(!is.finite(v$N)) ] = 0
            if (sum(v$N) ==0) next()
            si = shannon.diversity( x=array( v$N[vgood]+offset, dim=c(1,length(vgood))), base=2, getid=F )
            vi = intersect( vi, which(is.finite(v$logbaseN)) )
            lm.r = NULL
            lm.r = try( lm( v$sizeclass[vi] ~ v$logbaseN[vi]  ) )
            if (is.null(lm.r) | class(lm.r)=="try-error" | !is.finite(sum(coef(lm.r))) ) next()
            lm.s = summary( lm.r )
            out = NULL
            out = unlist( c( iip, lm.s$r.squared, lm.s$df[2], lm.s$coefficients[1], lm.s$coefficients[2], si ) )
            nss[iip,] = out
            gc()
          }
          return ( NULL )
        }
      )
      
      nssbm <- attach.big.matrix( p$bigmem.desc )
      nss = as.data.frame.matrix(as.matrix(nssbm[]) )
      names(nss) = p$newvars

      nss = factor2number( nss, c( "nss.rsquared", "nss.df", "nss.b0", "nss.b1","nss.shannon", "nss.evenness", "nss.Hmax" ) )
      nss = factor2character( nss, c("id") )

      nss$id = sort( unique(as.character( sm$id ) ) )  # overwrite

      save(nss, file=fn, compress=T)

      if (p$use.bigmemory.file.backing) {
        file.remove( p$fn.tmp )
        file.remove( p$fn.desc )
      }

      return ( "Done" )
    }


    # --------------------


    if (DS %in% c("sizespectrum", "sizespectrum.redo") ) {

      # make the base normalised size spectral statistics summaries

      fn = file.path( datadir, paste( "sizespectrum", infix, "rdata", sep="." ) )

      if ( DS=="sizespectrum" ) {
        SS = NULL
        if (file.exists( fn) ) load( fn )
        return ( SS )
      }

      sm = survey.db( p=p, "set" )
      sm = sm[ which( sm$data.source=="groundfish") ,]
      sm$id = as.character( sm$id )
      smdups = which( duplicated (sm$id ) )
      if (length(smdups) > 0) sm = sm[ -smdups, ]

      sms = sizespectrum.db( DS="sizespectrum.stats", p=p )

      sm = merge (sms, sm, by="id", all.x=T, all.y=F, sort= F)
      sm = lonlat2planar( sm, proj.type=p$internal.crs )
		  oo = which(!is.finite( sm$plon+sm$plat ) )
      if (length(oo)>0) sm = sm[ -oo , ]  # a required field for spatial interpolation

      # check for duplicates
      for ( y in p$yrs ) {
        yy = which (sm$yr == y)
        ii = which( duplicated( sm$id[yy] ) )

        if (length(ii) > 0) {
          print( "The following sets have duplicated positions. The first only will be retained" )
          print( sm[yy,] [ duplicates.toremove( sm$id[yy] ) ] )
          sm = sm[ - ii,]
        }
      }

      SS = sm[ which( is.finite(sm$nss.b0) ) ,]
      save(SS, file=fn, compress=T )
      return ( "Done" )
    }

  }