sizespectrum.db = function( DS="", p=NULL ) {
### dependency is only groundfish db for now. ...
datadir = project.datadirectory("aegis", "sizespectrum" )
dir.create( datadir, showWarnings=FALSE, recursive=TRUE )
infix = paste( p$nss.taxa, p$nss.type, p$nss.base, sep="." )
if (DS %in% c("sizespectrum.by.set", "sizespectrum.by.set.redo") ) {
# make the base normalised size spectral statistics summaries
if (DS == "sizespectrum.by.set" ) {
fn = file.path( datadir, paste( "sizespectrum.by.set", infix, "rdata", sep="." ) )
load( fn )
return (ss )
}
x = survey.db( p=p, "det" ) # mass and length are not transformed
x = x[ which( x$data.source=="groundfish"), ]
# x = x[ which(x$settype %in% c(1,2,5) ), ]
# settype: 1=stratified random, 2=regular survey, 3=unrepresentative(net damage),
# 4=representative sp recorded(but only part of total catch), 5=comparative fishing experiment,
# 6=tagging, 7=mesh/gear studies, 8=explorartory fishing, 9=hydrography
for (tx in p$nss.taxa) {
#i.tx = taxonomy.filter.taxa( x$spec_bio, taxafilter=tx)
i.tx = taxonomy.filter.taxa( x$spec_bio, method=p$taxa, outtype="internalcodes" )
if ( is.null(i.tx) || length(i.tx) < 30) next()
XX = x[ i.tx, ]
rm( i.tx ); gc()
for (vname in p$nss.type) {
XX.log = log( XX[,vname], base=p$nss.base )
XX$sizeclass = cut( XX.log, breaks=p$nss.bins$lb, labels=F, include.lowest=F, right=T )
jjj = which( is.finite(XX$sizeclass + XX$cf_det) )
XX = XX[jjj,]
XX$id=as.factor(XX$id)
XX$sizeclass=as.factor(XX$sizeclass)
# closed on the right: (x,x]
# midpoints = (l.bound [2:n.size] + l.bound [1:(n.size-1)] ) /2
infix = paste( tx, vname, p$nss.base, sep="." )
fn = file.path( datadir, paste( "sizespectrum.by.set", infix, "rdata", sep="." ) )
ss = NULL
tt = XX$cf_det*XX[,vname]
ss = xtab.2way( xval=tt, factors=XX[,c("id", "sizeclass")] )
### ss contains number per km2 broken down by age class and weighted for sa, etc
save( ss, file=fn, compress=T)
rm (XX, ss); gc()
}
}
return( "Done" )
}
# --------------------
if (DS %in% c( "sizespectrum.stats", "sizespectrum.stats.redo" ) ) {
fn = file.path( datadir, paste( "set.sizespectrum.stats", infix, "rdata", sep=".") )
if (DS=="sizespectrum.stats") {
nss = NULL
if (file.exists( fn) ) load( fn )
return ( nss )
}
sm = survey.db( p=p, "set" )
sm = sm[ which( sm$data.source=="groundfish") ,]
sm$area = sm$sa
sm$sa = NULL
sm$id = as.character( sm$id )
smdups = which( duplicated (sm$id ) )
if (length(smdups) > 0) sm = sm[ -smdups, ]
sm = sm[ order(sm$id) , ]
gc()
nss = NULL
p$newvars = c( "id", "nss.rsquared", "nss.df", "nss.b0", "nss.b1", "nss.shannon", "nss.evenness", "nss.Hmax" )
p$nsets = nrow( sm )
p$nlengthscale = length(p$nss.bins)
if (p$use.bigmemory.file.backing) {
p$fn.tmp = file.path( make.random.string( "nss.bigmemory.tmp" ) )
p$fn.desc = paste( p$fn.tmp, "desc", sep="." )
nss = big.matrix( nrow=p$nsets, ncol=length(p$newvars), type="double" , init=NA, backingfile=p$fn.tmp, descriptorfile=p$fn.desc )
} else {
nss = big.matrix( nrow=p$nsets, ncol=length(p$newvars), type="double" , init=NA, shared=TRUE )
}
p$bigmem.desc = bigmemory::describe(nss)
parallel_run(
p=p,
runindex=list( nsets=1:p$nsets ) ,
sm=sm,
FUNC = function( ip=NULL, p=NULL, sm=NULL ) {
if (exists( "libs", p)) RLibrary( p$libs )
if ( is.null(ip) ) ip = 1:p$nruns
coords = c("lon", "lat")
nss <- attach.big.matrix( p$bigmem.desc )
for ( iip in ip ) {
sm0 = sm[ iip,]
print( sm0 )
smdi = geodist.test( point=sm0[coords], locations=sm[,coords], method="great.circle", threshold=p$nss.distances, type="le")
smd = sm[smdi,]
# tdiff = abs( as.numeric(sm0$timestamp ) - as.numeric(smd$chron))
tdiff = abs( as.numeric(difftime(sm0$timestamp, smd$timestamp, units="days")) )
smti = which(tdiff <= p$nss.stimes)
good.id = smd$id[ sort( smti) ]
midpoints=p$nss.bins$mids
ss = sizespectrum.db( DS="sizespectrum.by.set", p=p )
### add an offset (1% of min nonzero value) and then log transform it
ss0 = as.matrix(ss)
offset = min(ss0[which(ss0>0)], na.rm=T) / 100
ss = log( ss+offset, base=p$nss.base ) ## convert to same base as X-values
variables = colnames(ss)
vtest = colMeans(ss[, variables], na.rm=T)
vmode = which.max( vtest )
vmin = min(which( vtest > log(offset, base=p$nss.base )), na.rm=T )
vmax = max(which( vtest > log(offset, base=p$nss.base )), na.rm=T )
vgood = c(vmin:vmax)
vi = c(vmode:vmax)
ss$id = rownames(ss)
ss.i = sort( which( ss$id %in% good.id ) )
if (length(ss.i)==0) next
ss = merge( ss[ss.i,], smd, by="id", all.x=T, all.y=F, sort=F )
# take geometric weighted means (area is the SA of a tow)
v = NULL
v = data.frame(
logbaseN = colMeans( ss[,variables] * ss$area, na.rm=T ) / sum(ss$area, na.rm=T),
sc = as.numeric(as.character(variables))
)
v$sizeclass = midpoints[v$sc]
v$size = p$nss.base^v$sizeclass # return to grams
v$N = p$nss.base^v$logbaseN - offset
v$N = zapsmall( v$N)
v$N[ which(!is.finite(v$N)) ] = 0
if (sum(v$N) ==0) next()
si = shannon.diversity( x=array( v$N[vgood]+offset, dim=c(1,length(vgood))), base=2, getid=F )
vi = intersect( vi, which(is.finite(v$logbaseN)) )
lm.r = NULL
lm.r = try( lm( v$sizeclass[vi] ~ v$logbaseN[vi] ) )
if (is.null(lm.r) | class(lm.r)=="try-error" | !is.finite(sum(coef(lm.r))) ) next()
lm.s = summary( lm.r )
out = NULL
out = unlist( c( iip, lm.s$r.squared, lm.s$df[2], lm.s$coefficients[1], lm.s$coefficients[2], si ) )
nss[iip,] = out
gc()
}
return ( NULL )
}
)
nssbm <- attach.big.matrix( p$bigmem.desc )
nss = as.data.frame.matrix(as.matrix(nssbm[]) )
names(nss) = p$newvars
nss = factor2number( nss, c( "nss.rsquared", "nss.df", "nss.b0", "nss.b1","nss.shannon", "nss.evenness", "nss.Hmax" ) )
nss = factor2character( nss, c("id") )
nss$id = sort( unique(as.character( sm$id ) ) ) # overwrite
save(nss, file=fn, compress=T)
if (p$use.bigmemory.file.backing) {
file.remove( p$fn.tmp )
file.remove( p$fn.desc )
}
return ( "Done" )
}
# --------------------
if (DS %in% c("sizespectrum", "sizespectrum.redo") ) {
# make the base normalised size spectral statistics summaries
fn = file.path( datadir, paste( "sizespectrum", infix, "rdata", sep="." ) )
if ( DS=="sizespectrum" ) {
SS = NULL
if (file.exists( fn) ) load( fn )
return ( SS )
}
sm = survey.db( p=p, "set" )
sm = sm[ which( sm$data.source=="groundfish") ,]
sm$id = as.character( sm$id )
smdups = which( duplicated (sm$id ) )
if (length(smdups) > 0) sm = sm[ -smdups, ]
sms = sizespectrum.db( DS="sizespectrum.stats", p=p )
sm = merge (sms, sm, by="id", all.x=T, all.y=F, sort= F)
sm = lonlat2planar( sm, proj.type=p$internal.crs )
oo = which(!is.finite( sm$plon+sm$plat ) )
if (length(oo)>0) sm = sm[ -oo , ] # a required field for spatial interpolation
# check for duplicates
for ( y in p$yrs ) {
yy = which (sm$yr == y)
ii = which( duplicated( sm$id[yy] ) )
if (length(ii) > 0) {
print( "The following sets have duplicated positions. The first only will be retained" )
print( sm[yy,] [ duplicates.toremove( sm$id[yy] ) ] )
sm = sm[ - ii,]
}
}
SS = sm[ which( is.finite(sm$nss.b0) ) ,]
save(SS, file=fn, compress=T )
return ( "Done" )
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.