In fishvice/husky: a collection of husky functions

knitr::opts_chunk$set(eval = TRUE, echo = TRUE,
                      message = FALSE, warning = FALSE)

Preamble

The calculation of the MRI groundfish survey indices have for quite some time been done via a bundle of R-scripts and data objects that reside in various .RData binary directories in the reikn/Splus5 space. These have been moved from the blackholes to the husky-package. What this means is that those interested in calculating survey indices no longer need to "attach" to .RData binary directories, only install and load the husky-package and run the usual code (examples provided below).

This move is just a stepping stone in the evolution of survey index calculation, the next generation is being developed within the pax-package.

library(ggplot2)
library(dplyr)
library(fjolst)
# devtools::install_github("fishvice/husky", dependencies = FALSE)
library(husky)

Length based SMB indices

Minimum fuzz adaptation

Specs:

SPECIES <- 1
YEARS <- 1985:2016
KYN <- FALSE # only for the huskyverse
lwcoeff <- LWCOEFF[[as.character(SPECIES)]]
lengdir <- LENGDIR[[as.character(SPECIES)]]

NB: The next generation of indices calculation has been seeded in /net/hafkaldi/export/u2/reikn/Splus5/SurveyWork. According to Husky (December 2016) this has not "formally" been adopted.

Script location: Husky's master scripts is /net/hafkaldi/export/u2/reikn/Splus5/SMB/allarteg.sh. This script calls for each species two additional shell scripts: /net/hafkaldi/export/u2/reikn/Splus5/SMB/BIOVISIT_R.sh /net/hafkaldi/export/u2/reikn/Splus5/SMB/BIOVISIT_R/BIOVISIT_R.sh

The difference in the two scripts is:

 diff BIOVISIT_R.sh BIOVISIT.fastar_R.sh 
10c10,11
< st1 <- STODVAR$y999[STODVAR$y999$tognumer %in% 1:39,]
---
> ind <- c(31931,31932,32131,36731,37031,37131,37132,37231,41431,41531,42231,42232,47431,52331)
> st1 <- STODVAR$y999[STODVAR$y999$tognumer %in% 1:19 | !is.na(match(STODVAR$y999$index,ind)) ,]
129,130c130,131
<     base.visit <- tmp.visit1a
<     aggr.visit <- tmp.visit2a
---
>     base.visit.fastar <- tmp.visit1a
>     aggr.visit.fastar <- tmp.visit2a
133,134c134,135
<   base.visit <- rbind(base.visit,tmp.visit1a)
<   aggr.visit <- rbind(aggr.visit,tmp.visit2a) 
---
>   base.visit.fastar <- rbind(base.visit.fastar,tmp.visit1a)
>   aggr.visit.fastar <- rbind(aggr.visit.fastar,tmp.visit2a) 
145,146c146,147
< aggr.visit$bio <- aggr.visit$fj*lwcoeff[1]*aggr.visit$lengd^lwcoeff[2]/1e3
< base.visit$bio <- base.visit$fj*lwcoeff[1]*base.visit$lengd^lwcoeff[2]/1e3
---
> aggr.visit.fastar$bio <- aggr.visit.fastar$fj*lwcoeff[1]*aggr.visit.fastar$lengd^lwcoeff[2]/1e3
> base.visit.fastar$bio <- base.visit.fastar$fj*lwcoeff[1]*base.visit.fastar$lengd^lwcoeff[2]/1e3

The following is based on the script /net/hafkaldi/export/u2/reikn/Splus5/SMB/BIOVISIT_R.sh with some minimum adaptations.

base.visit2 <- list()
aggr.visit2 <- list()
STODVAR2 <- dplyr::bind_rows(STODVAR)  # EH added

for (j in 1:length(YEARS)) {
  #print(YEARS[j])
  #st1 <- STODVAR$'y1985'[STODVAR$'y1985'$tognumer %in% 1:39,]
  st1 <- STODVAR2[STODVAR2$ar == YEARS[j] & STODVAR2$tognumer %in% 1:39,] # EH added
  tmp <- lesa.lengdir(st1$synis.id,SPECIES,col.names="kyn")
  tmp1 <- lesa.numer(st1$synis.id,SPECIES) 
  tmp <- Skala.med.toldum(tmp,tmp1)
  i <- is.na(tmp$fj.alls) 
  tmp$fj.alls[i] <- 0
  tmp$bio <- tmp$fj.alls*lwcoeff[1]*tmp$lengd^lwcoeff[2]/1e6 # tonn
  tmp$fj.alls <- tmp$fj.alls/1e3 # þúsundir


  for( i in 1:length(lengdir)) {

    #print(i) 
    tmp1 <- tmp[tmp$lengd==lengdir[i],]

    if(nrow(tmp1) > 0) {
      x <- apply.shrink(tmp1$fj.alls,tmp1$synis.id,sum) 
      names(x) <- c("synis.id","fj")
      st <- husky:::join(st1[,c("newstrata","toglengd","synis.id")],x,"synis.id",set=0)
    } else {
      st <- st1[,c("newstrata","toglengd","synis.id")]
      st$fj <- rep(0,nrow(st)) 
    }

    if(KYN) {
      tmp1 <- tmp[tmp$lengd==lengdir[i] & tmp$kyn==1 & !is.na(tmp$kyn),]
      if(nrow(tmp1) > 0) {
        x <- apply.shrink(tmp1$fj.alls,tmp1$synis.id,sum) 
        names(x) <- c("synis.id","fjhaenga")
        st <- husky:::join(st,x,"synis.id",set=0)
      } else {
        st$fjhaenga <- rep(0,nrow(st)) 
      }

      tmp1 <- tmp[tmp$lengd==lengdir[i] & tmp$kyn==2 & !is.na(tmp$kyn),]
      if(nrow(tmp1) > 0) {
        x <- apply.shrink(tmp1$fj.alls,tmp1$synis.id,sum) 
        names(x) <- c("synis.id","fjhrygna")
        st <- husky:::join(st,x,"synis.id",set=0)
      }
      else {
        st$fjhrygna <- rep(0,nrow(st)) 
      }
    }

    tmp1 <- tmp[tmp$lengd >= lengdir[i],]
    if(nrow(tmp1) > 0) {
      x <- apply.shrink(tmp1$bio,tmp1$synis.id,sum) 
      names(x) <- c("synis.id","bioge") 
      st <- husky:::join(st,x,"synis.id",set=0)
    } else {
      st$bioge <- rep(0,nrow(st)) 
    }

    tmp1 <- tmp[tmp$lengd <= lengdir[i],]
    if(nrow(tmp1) > 0) {
      x <- apply.shrink(tmp1$fj.alls,tmp1$synis.id,sum) 
      names(x) <- c("synis.id","fjle")
      st <- husky:::join(st,x,"synis.id",set=0)
    } else {
      st$fjle <- rep(0,nrow(st)) 
    }


    tmp.visit <- Calc.index(st,"fj")

    tmp.biovisit <- Calc.index(st,"bioge")
    tmp.seidavisit <- Calc.index(st,"fjle")

    if(KYN) {
      tmp.haengavisit <- Calc.index(st,"fjhaenga")
      tmp.hrygnuvisit <- Calc.index(st,"fjhrygna")
    }

    tmp.visit1 <- tmp.visit$result[,c("strata","total","cv")]
    names(tmp.visit1)[2:3] <- c("fj","cv.fj")
    tmp.visit1$bio.staerri <- tmp.biovisit$result[,"total"]
    tmp.visit1$cv.bio.staerri <- tmp.biovisit$result[,"cv"]
    tmp.visit1$fj.minni <- tmp.seidavisit$result[,"total"]
    tmp.visit1$cv.fj.minni <- tmp.seidavisit$result[,"cv"]

    if(KYN) {
      tmp.visit1$fj.haenga <- tmp.haengavisit$result[,"total"]
      tmp.visit1$cv.fj.haenga <- tmp.haengavisit$result[,"cv"]
      tmp.visit1$fj.hrygna <- tmp.hrygnuvisit$result[,"total"]
      tmp.visit1$cv.fj.hrygna <- tmp.hrygnuvisit$result[,"cv"]
    }

    tmp.visit2 <- tmp.visit$aggr.output[,c("total","cv")]
    names(tmp.visit2) <- c("fj","cv.fj") 
    tmp.visit2$bio.staerri <- tmp.biovisit$aggr.output[,"total"]
    tmp.visit2$cv.bio.staerri <- tmp.biovisit$aggr.output[,"cv"]
    tmp.visit2$fj.minni <- tmp.seidavisit$aggr.output[,"total"]
    tmp.visit2$cv.fj.minni <- tmp.seidavisit$aggr.output[,"cv"]

    if(KYN) {
      tmp.visit2$fj.haenga <- tmp.haengavisit$aggr.output[,"total"]
      tmp.visit2$cv.fj.haenga <- tmp.haengavisit$aggr.output[,"cv"]
      tmp.visit2$fj.hrygna <- tmp.hrygnuvisit$aggr.output[,"total"]
      tmp.visit2$cv.fj.hrygna <- tmp.hrygnuvisit$aggr.output[,"cv"]
    }


    tmp.visit2$svaedi <- dimnames(tmp.visit2)[[1]]
    tmp.visit2$svaedisnr <- 1:nrow(tmp.visit2)
    dimnames(tmp.visit2)[[1]] <- 1:nrow(tmp.visit2)
    tmp.visit1$lengd <- rep(lengdir[i],nrow(tmp.visit1))
    tmp.visit2$lengd <- rep(lengdir[i],nrow(tmp.visit2))
    tmp.visit1$ar <- rep(YEARS[j],nrow(tmp.visit1))
    tmp.visit2$ar <- rep(YEARS[j],nrow(tmp.visit2))
    if(i == 1 ) {
      tmp.visit1a <- tmp.visit1
      tmp.visit2a <- tmp.visit2
    } else {
      tmp.visit1a <- rbind(tmp.visit1a,tmp.visit1)
      tmp.visit2a <- rbind(tmp.visit2a,tmp.visit2)
    }
  }
  base.visit2[[j]] <- tmp.visit1a
  aggr.visit2[[j]] <- tmp.visit2a
} # End year loop

base.visit2 <- dplyr::bind_rows(base.visit2)
aggr.visit2 <- dplyr::bind_rows(aggr.visit2)
# tonn því fjöldi er þegar í 1000 en lengd-þyngdar gefur grömm.  
aggr.visit2$bio <-
  aggr.visit2$fj*lwcoeff[1]*aggr.visit2$lengd^lwcoeff[2]/1e3
base.visit2$bio <-
  base.visit2$fj*lwcoeff[1]*base.visit2$lengd^lwcoeff[2]/1e3

Internal huskyverse comparison: rough double testing

Load the "official calculation":

attach('/net/hafkaldi/export/u2/reikn/Splus5/SMB/TORSKUR/.RData')

ggplot() +
  geom_pointrange(data = aggr.visit2 %>% filter(svaedi == "Heild", lengd == min(lengd)),
                  aes(ar, bio.staerri, 
                      ymin = bio.staerri * (1 - cv.bio.staerri),
                      ymax = bio.staerri * (1 + cv.bio.staerri)),
                  lwd = 1,
                  size = 4) +
  geom_pointrange(data = aggr.visit %>% filter(svaedi == "Heild", lengd == min(lengd)),
                  aes(ar, bio.staerri, 
                      ymin = bio.staerri * (1 - cv.bio.staerri),
                      ymax = bio.staerri * (1 + cv.bio.staerri)),
                  colour = "red") +
  labs(x = NULL, y = NULL, title = "Standardized biomass index")

In the above the black line is the calculation based on the function and objects in the husky-package, the red is the "official calculation". Bottom line: The two huskyverse things are, not surprisingly, the same (visally).

TODO: A more detailed comparison, including cv-calculations

detach("file:/net/hafkaldi/export/u2/reikn/Splus5/SMB/TORSKUR/.RData")

Note the difference in the "old" and new strata scrips

particularily the Calc.index function call, see below:

hafstokkur/net/hafkaldi/export/u2/reikn/Splus5 [1149] diff SMB/BIOVISIT_R.sh SMBNewstrata/BIOVISIT_R.sh 
1c1
< for yr in 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
---
> for yr in 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
6,7c6,8
< Rattach("..")
< Rattach("../GEOMETRY.NEW")
---
> Rattach("../../SMBNewstrata")
> Rattach("../../SMB/GEOMETRY.NEW")
> attach("../../HAUSTRALLNewStrata/Stratifiering/.RData",pos=2)
10,12c11,13
< st1 <- STODVAR$y999[STODVAR$y999$tognumer %in% 1:39,]
< tmp <- lesa.lengdir(st1$synis.id,TEG,col.names="kyn")
< tmp1 <- lesa.numer(st1$synis.id,TEG) 
---
> tmp8 <- STODVAR.all[(STODVAR.all$tognumer %in% 1:39 | is.na(STODVAR.all$tognumer)) & STODVAR.all$ar==999,]
> tmp <- lesa.lengdir(tmp8$synis.id,TEG,col.names="kyn")
> tmp1 <- lesa.numer(tmp8$synis.id,TEG) 
26c27
<      st <- join(st1[,c("newstrata","toglengd","synis.id")],x,"synis.id",set=0)
---
>      st <- join(tmp8[,c("newstrata","toglengd","synis.id")],x,"synis.id",set=0)
29c30
<     st <- st1[,c("newstrata","toglengd","synis.id")]
---
>     st <- tmp8[,c("newstrata","toglengd","synis.id")]
75,77c76,78
<   tmp.visit <- Calc.index(st,"fj")
<   tmp.biovisit <- Calc.index(st,"bioge")
<   tmp.seidavisit <- Calc.index(st,"fjle")
---
>   tmp.visit <- Calc.index(st,"fj",combine.output=SMBaggregation)
>   tmp.biovisit <- Calc.index(st,"bioge",combine.output=SMBaggregation)
>   tmp.seidavisit <- Calc.index(st,"fjle",combine.output=SMBaggregation)
79,80c80,81
<     tmp.haengavisit <- Calc.index(st,"fjhaenga")
<     tmp.hrygnuvisit <- Calc.index(st,"fjhrygna")
---
>     tmp.haengavisit <- Calc.index(st,"fjhaenga",combine.output=SMBaggregation)
>     tmp.hrygnuvisit <- Calc.index(st,"fjhrygna",combine.output=SMBaggregation)
141,142c142,144
< Rattach("..")
< Rattach("../GEOMETRY.NEW")
---
> Rattach("../../SMBNewstrata")
> Rattach("../../SMB/GEOMETRY.NEW")
> attach("../../HAUSTRALLNewStrata/Stratifiering/.RData",pos=2)

Age based SMB indices - thorskur

The following is based on the script /net/hafkaldi/export/u2/reikn/Splus5/GETALK/AGEVISIT/torskur.2016/torskur.visit.r with some minimum adaptations.

ind <- c(31931,31932,32131,36731,37031,37131,37132,37231,41431,41531,42231,42232,47431,52331)
fj  <- list()

years <- 1985:2016
regs <- list(S=c(1,9:10),N=2:8)
regname <- names(regs)
aldur <- 1:14
lengd <- c(seq(4.5,109.5,by=5),119.5,139.5)

for( i in 1:length(years) ) {
 # print(i) 
  fj[[i]] <- data.frame()
  if(i == 17)  stauka <- lesa.stodvar(leidangur="A4-2001")  

  for( j in 1:2) {
    st <- STODVAR[[i]][!is.na(match(STODVAR[[i]]$area,regs[[j]])) & (STODVAR[[i]]$tognumer < 20 | !is.na(match(STODVAR[[i]]$index,ind))) ,]
    st.all <- STODVAR[[i]][!is.na(match(STODVAR[[i]]$area,regs[[j]])),]
    if(i == 17) {
      cn <- c("lat","lon","synis.id")
      sttmp <- stauka[stauka$area %in% regs[[j]],cn]
      st.all <- rbind(st.all[,cn],sttmp[,cn])
    }

    le <- lesa.lengdir(st$synis.id,1)
    kv <- lesa.kvarnir(st.all$synis.id,1,col.names=c("kyn","kynthroski"),oracle=F)

    nu <- lesa.numer(st$synis.id,1)

    alk <- MakeAlk(kv, 1, lengd=lengd, aldur = aldur, FilterAldurLengd = F, kynth = T) # Tók filteraldurlengd af

    ar <- i+1984
    if(ar < 1993){ 
      tmp <- torskur.marsrall.wt.8592[torskur.marsrall.wt.8592$reg==regname[j],]
      } else {
      tmp <- torskur.marsrall.wt[torskur.marsrall.wt$reg==regname[j] & torskur.marsrall.wt$ar==ar,]}
    ldist <- MakeLdistbyStation(le,nu,1,lengd=lengd,Stodvar=st,talid=T,lengd.thyngd.data=tmp)
    fj[[i]]  <- rbind.alk(fj[[i]],Calc.fj.per.station(alk,ldist))
  }
}

torskur.fj <- fj
names(torskur.fj) <- as.character(years)

biomass <- kynthbiomass <-  meanlefj <- sdevfj <- kynthfj <- fj <- list()
biovisit <- fjvisit <- kynthvisit <- meanlevisit <-  sdevvisit <- kynthbiovisit  <- list()

for(i in 1:length(years)) {
  #print(i)
  st <- attributes(torskur.fj[[i]])$Stodvar
  fj <- kynthfj <- biomass <- meanlefj <- sdevfj <- kynthbiomass <- list()
  biom <- torskur.fj[[i]]$WtPerAldur*torskur.fj[[i]]$FjPerAldur
  biom[is.na(biom)] <- 0
  kynthbiom <- torskur.fj[[i]]$KynthWtPerAldur*torskur.fj[[i]]$KynthFjPerAldur
  kynthbiom[is.na(kynthbiom)] <- 0
  for(j in 1:14) {
    #cat(paste(j," "))
    fj[[j]] <- Calc.index(st,z=torskur.fj[[i]]$FjPerAldur[,j])$aggr.output
    kynthfj[[j]] <- Calc.index(st,z=torskur.fj[[i]]$KynthFjPerAldur[,j])$aggr.output
    biomass[[j]] <- Calc.index(st,z=biom[,j])$aggr.output
    meanlefj[[j]] <- Calc.index(st,z=torskur.fj[[i]]$LengdSinnumFjPerAldur[,j])$aggr.output
    sdevfj[[j]] <- Calc.index(st,z=torskur.fj[[i]]$Lengd2SinnumFjPerAldur[,j])$aggr.output
    kynthbiomass[[j]] <- Calc.index(st,z=kynthbiom[,j])$aggr.output
  }
  biovisit[[i]] <- biomass
  fjvisit[[i]] <- fj
  kynthvisit[[i]] <- kynthfj
  kynthbiovisit[[i]] <- kynthbiomass
  sdevvisit[[i]] <- sdevfj
  meanlevisit[[i]] <- meanlefj
}

# net/hafkaldi/export/u2/reikn/Splus5/GETALK/AGEVISIT/torskur.2016/combine.r
#Rattach("/home/hoski/GETALK")
torskur.visit.n <- combine.alk.visit(fjvisit, kynthvisit, biovisit,meanlevisit,sdevvisit,kynthbiovisit ,row = 13, aldur = 1:14,ar=1985:2016)
torskur.visit.s <- combine.alk.visit(fjvisit, kynthvisit, biovisit,meanlevisit,sdevvisit,kynthbiovisit,row = c(14), aldur = 1:14,ar=1985:2016)
torskur.visit.tot  <- combine.alk.visit(fjvisit, kynthvisit, biovisit, meanlevisit,sdevvisit,kynthbiovisit,row = c(22), aldur = 1:14,ar=1985:2016
)

torskur.visit.n$fj <- t(round(torskur.visit.n$fj,2))
torskur.visit.n$cv <- t(round(torskur.visit.n$cv,3))
torskur.visit.n$kynthhlutfall<- t(round(torskur.visit.n$kynthhlutfall*100,1))
torskur.visit.n$wt <- t(round(torskur.visit.n$wt*1000))
torskur.visit.n$kynthwt <- t(round(torskur.visit.n$kynthwt*1000))
torskur.visit.n$meanle <- t(round(torskur.visit.n$meanle,1))
torskur.visit.n$sdev <- t(round(torskur.visit.n$sdev,1))

torskur.visit.s$cv <- t(round(torskur.visit.s$cv,3))
torskur.visit.s$fj <- t(round(torskur.visit.s$fj,2))
torskur.visit.s$kynthhlutfall<- t(round(torskur.visit.s$kynthhlutfall*100,1))
torskur.visit.s$wt <- t(round(torskur.visit.s$wt*1000))
torskur.visit.s$kynthwt <- t(round(torskur.visit.s$kynthwt*1000))
torskur.visit.s$meanle <- t(round(torskur.visit.s$meanle,1))
torskur.visit.s$sdev <- t(round(torskur.visit.s$sdev,1))

torskur.visit.tot$cv <- t(round(torskur.visit.tot$cv,3))
torskur.visit.tot$fj <- t(round(torskur.visit.tot$fj,2))
torskur.visit.tot$kynthhlutfall<- t(round(torskur.visit.tot$kynthhlutfall*100,1))
torskur.visit.tot$wt <- t(round(torskur.visit.tot$wt*1000))
torskur.visit.tot$kynthwt <- t(round(torskur.visit.tot$kynthwt*1000))
torskur.visit.tot$meanle <- t(round(torskur.visit.tot$meanle,1))
torskur.visit.tot$sdev <- t(round(torskur.visit.tot$sdev,1))


# Setja saman fyrir ORACLE

age <- 1:14
year <- 1985:2016
n <- length(age)
age <- matrix(age,n,length(year)) 
year <- matrix(year,n,length(year),byrow=T)
yearage <- data.frame(year=c(year),age=c(age))

tmp <- yearage
tmp$reg <- rep("Tot",nrow(tmp))
tmp$fj <- c(torskur.visit.tot$fj)
tmp$cv <- c(torskur.visit.tot$cv)
tmp$wt <- c(torskur.visit.tot$wt)
tmp$kynthhlutfall <- c(torskur.visit.tot$kynthhlutfall)
tmp$meanle <- c(torskur.visit.tot$meanle)
tmp$sdev <- c(torskur.visit.tot$sdev)
tmp$kynthwt <- c(torskur.visit.tot$kynthwt)

tmp1 <- yearage
tmp1$reg <- rep("N",nrow(tmp1))
tmp1$fj <- c(torskur.visit.n$fj)
tmp1$cv <- c(torskur.visit.n$cv)
tmp1$wt <- c(torskur.visit.n$wt)
tmp1$kynthhlutfall <- c(torskur.visit.n$kynthhlutfall)
tmp1$meanle <- c(torskur.visit.n$meanle)
tmp1$sdev <- c(torskur.visit.n$sdev)
tmp1$kynthwt <- c(torskur.visit.n$kynthwt)

tmp2 <- yearage
tmp2$reg <- rep("S",nrow(tmp2))
tmp2$fj <- c(torskur.visit.s$fj)
tmp2$cv <- c(torskur.visit.s$cv)
tmp2$wt <- c(torskur.visit.s$wt)
tmp2$kynthhlutfall <- c(torskur.visit.s$kynthhlutfall)
tmp2$meanle <- c(torskur.visit.s$meanle)
tmp2$sdev <- c(torskur.visit.s$sdev)
tmp2$kynthwt <- c(torskur.visit.s$kynthwt)

torskur.visit.oracle <- rbind(tmp,tmp1,tmp2)