knitr::opts_chunk$set(eval = TRUE, echo = TRUE, message = FALSE, warning = FALSE)
NB: The next generation of indices calculation has been seeded in /net/hafkaldi/export/u2/reikn/Splus5/SurveyWork
. According to Husky (December 2016) this has not "formally" been adopted.
Husky's master scripts is /net/hafkaldi/export/u2/reikn/Splus5/SMB/allarteg.sh
. This script calls for each species two additional shell scripts:
/net/hafkaldi/export/u2/reikn/Splus5/SMB/BIOVISIT_R.sh
/net/hafkaldi/export/u2/reikn/Splus5/SMB/BIOVISIT_R/BIOVISIT_R.sh
The difference in the two scripts is:
diff BIOVISIT_R.sh BIOVISIT.fastar_R.sh 10c10,11 < st1 <- STODVAR$y999[STODVAR$y999$tognumer %in% 1:39,] --- > ind <- c(31931,31932,32131,36731,37031,37131,37132,37231,41431,41531,42231,42232,47431,52331) > st1 <- STODVAR$y999[STODVAR$y999$tognumer %in% 1:19 | !is.na(match(STODVAR$y999$index,ind)) ,] 129,130c130,131 < base.visit <- tmp.visit1a < aggr.visit <- tmp.visit2a --- > base.visit.fastar <- tmp.visit1a > aggr.visit.fastar <- tmp.visit2a 133,134c134,135 < base.visit <- rbind(base.visit,tmp.visit1a) < aggr.visit <- rbind(aggr.visit,tmp.visit2a) --- > base.visit.fastar <- rbind(base.visit.fastar,tmp.visit1a) > aggr.visit.fastar <- rbind(aggr.visit.fastar,tmp.visit2a) 145,146c146,147 < aggr.visit$bio <- aggr.visit$fj*lwcoeff[1]*aggr.visit$lengd^lwcoeff[2]/1e3 < base.visit$bio <- base.visit$fj*lwcoeff[1]*base.visit$lengd^lwcoeff[2]/1e3 --- > aggr.visit.fastar$bio <- aggr.visit.fastar$fj*lwcoeff[1]*aggr.visit.fastar$lengd^lwcoeff[2]/1e3 > base.visit.fastar$bio <- base.visit.fastar$fj*lwcoeff[1]*base.visit.fastar$lengd^lwcoeff[2]/1e3
The functions and data objects have been moved from the various .RData
spaces to the husky
-package. The following is based on the script /net/hafkaldi/export/u2/reikn/Splus5/SMB/BIOVISIT_R.sh
with some minimum adaptations.
library(tidyverse) library(lubridate) library(fjolst) library(husky) library(smx)
YEARS <- 1985:2016 TEG <- 1 KYN <- FALSE lwcoeff <- LWCOEFF[[as.character(TEG)]] lengdir <- LENGDIR[[as.character(TEG)]]
t1 <- now() base.visit2 <- list() aggr.visit2 <- list() STODVAR2 <- dplyr::bind_rows(STODVAR) # EH added for (j in 1:length(YEARS)) { #print(YEARS[j]) #st1 <- STODVAR$'y1985'[STODVAR$'y1985'$tognumer %in% 1:39,] st1 <- STODVAR2[STODVAR2$ar == YEARS[j] & STODVAR2$tognumer %in% 1:39,] # EH added tmp <- lesa.lengdir(st1$synis.id,TEG,col.names="kyn") tmp1 <- lesa.numer(st1$synis.id,TEG) tmp <- Skala.med.toldum(tmp,tmp1) i <- is.na(tmp$fj.alls) tmp$fj.alls[i] <- 0 tmp$bio <- tmp$fj.alls*lwcoeff[1]*tmp$lengd^lwcoeff[2]/1e6 # tonn tmp$fj.alls <- tmp$fj.alls/1e3 # þúsundir for( i in 1:length(lengdir)) { #print(i) tmp1 <- tmp[tmp$lengd==lengdir[i],] if(nrow(tmp1) > 0) { x <- apply.shrink(tmp1$fj.alls,tmp1$synis.id,sum) names(x) <- c("synis.id","fj") st <- husky:::join(st1[,c("newstrata","toglengd","synis.id")],x,"synis.id",set=0) } else { st <- st1[,c("newstrata","toglengd","synis.id")] st$fj <- rep(0,nrow(st)) } if(KYN) { tmp1 <- tmp[tmp$lengd==lengdir[i] & tmp$kyn==1 & !is.na(tmp$kyn),] if(nrow(tmp1) > 0) { x <- apply.shrink(tmp1$fj.alls,tmp1$synis.id,sum) names(x) <- c("synis.id","fjhaenga") st <- husky:::join(st,x,"synis.id",set=0) } else { st$fjhaenga <- rep(0,nrow(st)) } tmp1 <- tmp[tmp$lengd==lengdir[i] & tmp$kyn==2 & !is.na(tmp$kyn),] if(nrow(tmp1) > 0) { x <- apply.shrink(tmp1$fj.alls,tmp1$synis.id,sum) names(x) <- c("synis.id","fjhrygna") st <- husky:::join(st,x,"synis.id",set=0) } else { st$fjhrygna <- rep(0,nrow(st)) } } tmp1 <- tmp[tmp$lengd >= lengdir[i],] if(nrow(tmp1) > 0) { x <- apply.shrink(tmp1$bio,tmp1$synis.id,sum) names(x) <- c("synis.id","bioge") st <- husky:::join(st,x,"synis.id",set=0) } else { st$bioge <- rep(0,nrow(st)) } tmp1 <- tmp[tmp$lengd <= lengdir[i],] if(nrow(tmp1) > 0) { x <- apply.shrink(tmp1$fj.alls,tmp1$synis.id,sum) names(x) <- c("synis.id","fjle") st <- husky:::join(st,x,"synis.id",set=0) } else { st$fjle <- rep(0,nrow(st)) } tmp.visit <- Calc.index(st,"fj") tmp.biovisit <- Calc.index(st,"bioge") tmp.seidavisit <- Calc.index(st,"fjle") if(KYN) { tmp.haengavisit <- Calc.index(st,"fjhaenga") tmp.hrygnuvisit <- Calc.index(st,"fjhrygna") } tmp.visit1 <- tmp.visit$result[,c("strata","total","cv")] names(tmp.visit1)[2:3] <- c("fj","cv.fj") tmp.visit1$bio.staerri <- tmp.biovisit$result[,"total"] tmp.visit1$cv.bio.staerri <- tmp.biovisit$result[,"cv"] tmp.visit1$fj.minni <- tmp.seidavisit$result[,"total"] tmp.visit1$cv.fj.minni <- tmp.seidavisit$result[,"cv"] if(KYN) { tmp.visit1$fj.haenga <- tmp.haengavisit$result[,"total"] tmp.visit1$cv.fj.haenga <- tmp.haengavisit$result[,"cv"] tmp.visit1$fj.hrygna <- tmp.hrygnuvisit$result[,"total"] tmp.visit1$cv.fj.hrygna <- tmp.hrygnuvisit$result[,"cv"] } tmp.visit2 <- tmp.visit$aggr.output[,c("total","cv")] names(tmp.visit2) <- c("fj","cv.fj") tmp.visit2$bio.staerri <- tmp.biovisit$aggr.output[,"total"] tmp.visit2$cv.bio.staerri <- tmp.biovisit$aggr.output[,"cv"] tmp.visit2$fj.minni <- tmp.seidavisit$aggr.output[,"total"] tmp.visit2$cv.fj.minni <- tmp.seidavisit$aggr.output[,"cv"] if(KYN) { tmp.visit2$fj.haenga <- tmp.haengavisit$aggr.output[,"total"] tmp.visit2$cv.fj.haenga <- tmp.haengavisit$aggr.output[,"cv"] tmp.visit2$fj.hrygna <- tmp.hrygnuvisit$aggr.output[,"total"] tmp.visit2$cv.fj.hrygna <- tmp.hrygnuvisit$aggr.output[,"cv"] } tmp.visit2$svaedi <- dimnames(tmp.visit2)[[1]] tmp.visit2$svaedisnr <- 1:nrow(tmp.visit2) dimnames(tmp.visit2)[[1]] <- 1:nrow(tmp.visit2) tmp.visit1$lengd <- rep(lengdir[i],nrow(tmp.visit1)) tmp.visit2$lengd <- rep(lengdir[i],nrow(tmp.visit2)) tmp.visit1$ar <- rep(YEARS[j],nrow(tmp.visit1)) tmp.visit2$ar <- rep(YEARS[j],nrow(tmp.visit2)) if(i == 1 ) { tmp.visit1a <- tmp.visit1 tmp.visit2a <- tmp.visit2 } else { tmp.visit1a <- rbind(tmp.visit1a,tmp.visit1) tmp.visit2a <- rbind(tmp.visit2a,tmp.visit2) } } base.visit2[[j]] <- tmp.visit1a aggr.visit2[[j]] <- tmp.visit2a } # End year loop base.visit2 <- bind_rows(base.visit2) aggr.visit2 <- bind_rows(aggr.visit2) # tonn því fjöldi er þegar / 1000 en lengd-þyngdar gefur grömm. aggr.visit2$bio <- aggr.visit2$fj*lwcoeff[1]*aggr.visit2$lengd^lwcoeff[2]/1e3 base.visit2$bio <- base.visit2$fj*lwcoeff[1]*base.visit2$lengd^lwcoeff[2]/1e3 print(paste("Duration:", round((now() - t1)/dminutes(1),2), "minutes"))
attach('/net/hafkaldi/export/u2/reikn/Splus5/SMB/TORSKUR/.RData') #detach("file:/net/hafkaldi/export/u2/reikn/Splus5/SMB/TORSKUR/.RData")
ggplot() + geom_pointrange(data = aggr.visit2 %>% filter(svaedi == "Heild", lengd == min(lengd)), aes(ar, bio.staerri, ymin = bio.staerri * (1 - cv.bio.staerri), ymax = bio.staerri * (1 + cv.bio.staerri)), lwd = 1, size = 4) + geom_pointrange(data = aggr.visit %>% filter(svaedi == "Heild", lengd == min(lengd)), aes(ar, bio.staerri, ymin = bio.staerri * (1 - cv.bio.staerri), ymax = bio.staerri * (1 + cv.bio.staerri)), colour = "red") + labs(x = NULL, y = NULL, title = "Standardized biomass index")
ERGO: The two huskyverse things are the same
A little demo on using only dplyr
verbs and ggplot2
to achieve quite a lot of things. Topic is a candidate for the upcoming ICES TCRENV course.
The data used (Icelandic spring groundfish survey) is just a placeholder for the important part - the generic algorithm used to achieve an objective. In this specific case one has a demonstration of a calculation and visualization of stratified survey indices. Of course with the inclusion of the associated uncertainties.
Some constants:
std.towlength <- 4 # seamiles std.towwidth <- 17 # meters std.area <- 4 * 17/1852 # standard area swept square nautical miles std.cv <- 1 # for setting the cv as the mean if number of tows in a # strata is one
Data for some 9 most common species from the Icelandic spring groundfish survey can be obtained by:
t1 <- now() #library(dplyrOracle) #library(mar) #db <- src_oracle("mar") Station <- #readr::read_csv("http://www.hafro.is/~einarhj/data/tcrenv2016/Station.csv") %>% STODVAR %>% bind_rows() %>% filter(tognumer %in% 1:39) %>% select(id = synis.id, year = ar, towlength = toglengd, strata = newstrata) %>% mutate(towlength = trim_towlength(towlength)) %>% arrange(id) Length <- #read.csv("http://www.hafro.is/~einarhj/data/tcrenv2016/Length.csv") lesa.lengdir(Station$id, TEG, col.names="kyn") %>% mutate(species = TEG) %>% select(id = synis.id, species, length = lengd, n = fjoldi, sex = kyn) Subsampling <- #read.csv("http://www.hafro.is/~einarhj/data/tcrenv2016/Subsampling.csv") lesa.numer(Station$id, TEG) %>% mutate(species = TEG) %>% select(id = synis.id, species, n.counted = fj.talid, n.measured = fj.maelt) %>% mutate(n.total = n.counted + n.measured) Stratas <- read.csv("http://www.hafro.is/~einarhj/data/tcrenv2016/Stratas.csv") %>% select(strata, area = rall.area)
For the record we have the following table dimensions:
d <- data_frame(table = c("Station","Length","Subsampling","Stratas"), rows = c(nrow(Station),nrow(Length),nrow(Subsampling),nrow(Stratas)), columns = c(ncol(Station),ncol(Length),ncol(Subsampling),ncol(Stratas))) d
Specify species:
SPECIES <- 1 # Cod
Filter the measurement data for the species in question
Get the raising factor:
ss <- Subsampling %>% filter(species %in% SPECIES) %>% mutate(r = n.total/n.measured) %>% select(id, r)
Get the length measurements and do the raising:
le <- Length %>% filter(species == SPECIES) %>% # a double precaution, in case length bins by sex group_by(id, length) %>% summarize(n = sum(n)) %>% ungroup() %>% left_join(ss, by="id") %>% mutate(n = n * r / 1e3) %>% # units of thousands select(-r)
d <- smx::calc_indices(st = Station, le = le, stratas = Stratas) print(paste("Duration:", round((now() - t1)/dminutes(1),2), "minutes"))
ggplot() + geom_pointrange(data = d$aggr %>% filter(length == min(length)), aes(year, cb, ymin = cb * (1 - cb.cv), ymax = cb * (1 + cb.cv)), col = "black", lwd = 1, size = 4) + geom_pointrange(data = aggr.visit2 %>% filter(svaedi == "Heild", lengd == min(lengd)), aes(ar, bio.staerri, ymin = bio.staerri * (1 - cv.bio.staerri), ymax = bio.staerri * (1 + cv.bio.staerri)), colour = "red") + labs(x = NULL, y = NULL, title = "Standardized biomass index", subtitle = "Tidyverse (black) vs. Höskiverse (red)")
ggplot() + geom_pointrange(data = d$aggr %>% filter(length == max(length)), aes(year, cn, ymin = cn * (1 - cn.cv), ymax = cn * (1 + cn.cv)), col = "black", lwd = 1, size = 4) + geom_pointrange(data = aggr.visit2 %>% filter(svaedi == "Heild", lengd == max(lengd)), aes(ar, fj.minni, ymin = fj.minni * (1 - cv.fj.minni), ymax = fj.minni * (1 + cv.fj.minni)), col = "red") + labs(x = NULL, y = NULL, title = "Standardized abundance index", subtitle = "Tidyverse (black) vs. Höskiverse (red)")
devtools::session_info()
# Storage base <- as_data_frame(expand.grid(length = c(5:140), id = Station$id)) %>% left_join(Station, by = "id") %>% left_join(le, by=c("id","length")) %>% arrange(id, length) %>% group_by(id) %>% # The grouping here is for the cumsum calculation mutate(n = ifelse(is.na(n),0,n) * std.towlength / towlength, # standardized to per 4 miles cn = cumsum(n), b = n * 0.01 * length^3/1e3, cb = sum(b) - cumsum(b) + b) %>% group_by(year, strata, length) %>% summarise(N = n(), n_m = mean(n), n_d = ifelse(N == 1, n_m * std.cv, sd(n)), cn_m = mean(cn), cn_d = ifelse(N == 1, cn_m * std.cv, sd(cn)), b_m = mean(b), b_d = ifelse(N == 1, b_m * std.cv, sd(b)), cb_m = mean(cb), cb_d = ifelse(N == 1, cb_m * std.cv, sd(cb))) %>% ungroup() %>% left_join(Stratas, by = "strata") %>% mutate(area = area/1.852^2 / std.area, n = n_m * area, cn = cn_m * area, b = b_m * area, cb = cb_m * area)
Combining strata:
aggr <- base %>% group_by(year, length) %>% # A la Höski: summarise(n = sum(n), n.cv = smx:::calc_cv(n_m,n_d,area,N), b = sum(b), b.cv = smx:::calc_cv(b_m,b_d,area,N), cn = sum(cn), cn.cv = smx:::calc_cv(cn_m, cn_d, area, N), cb = sum(cb), cb.cv = smx:::calc_cv(cb_m, cb_d, area, N))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.