The flow:
Stations %>% gather_length_data %>% calculate_biomass %>% summarise_by_station
Some stuff below may be an overkill, given the task at hand - but here seeding for other operations.
library(tidyverse) library(mar) con <- connect_mar()
A case example:
d <- les_stod(con) %>% left_join(les_syni(con)) %>% filter(synis_id == 41311) %>% select(synis_id, toglengd) %>% left_join(les_lengd(con)) %>% filter(tegund_nr == 2) %>% mar::skala_med_taldir() %>% collect(n = Inf) d %>% mutate(counted = fjoldi - fjoldi_oskalad, aaraised = fjoldi * 4 / toglengd - fjoldi) %>% select(lengd, measured = fjoldi_oskalad, counted, aaraised) %>% gather(variable, value, -lengd) %>% ggplot(aes(lengd, value, fill = variable)) + geom_col() + scale_fill_brewer(palette = "Set1")
I.e., we have:
Some functions:
gather_length_data <- function(Stations, Species) { # Here could create an switch, if Stations is connection do below # else read from fjolst Lengths <- les_lengd(Stations$src) %>% filter(tegund_nr %in% Species) %>% group_by(synis_id, tegund_nr, lengd) %>% summarise(fjoldi = sum(fjoldi, na.rm = TRUE)) %>% ungroup() Counts <- les_skala(Stations$src) %>% filter(tegund_nr %in% Species) %>% mutate(r = ifelse(taldir == 0 | is.na(taldir), 1, 1 + taldir / ifelse(maeldir == 0 | is.na(maeldir), 1, maeldir))) %>% select(synis_id, tegund_nr, fj_maelt = maeldir, fj_talid = taldir, r) Stations %>% #select(synis_id, ar, toglengd) %>% left_join(Lengths, by = "synis_id") %>% left_join(Counts, by = c("synis_id", "tegund_nr")) } calculate_biomass <- function(d) { lwcoeff <- tbl_mar(d$src, "ops$einarhj.lwcoeff") %>% rename(tegund_nr = tegund) d %>% left_join(lwcoeff, by = "tegund_nr") %>% # if lwcoefficient for species not specified use newtons law mutate(a = ifelse(is.na(a), 0.01, a), b = ifelse(is.na(b), 3.00, b)) %>% # Below statistic is by length class # If include lwcoeff then do a priori a left_join mutate(r = ifelse(is.na(r), 1, r), N = r * ifelse(is.na(fjoldi), 0, fjoldi), B = r * ifelse(is.na(fjoldi), 0, fjoldi) * a * lengd^b/1e3) } cumulate_data <- function(d) { d %>% arrange(synis_id, tegund_nr, lengd) %>% group_by(synis_id, tegund_nr) %>% mutate(cN = cumsum(N), cB = sum(B) - cumsum(B) + B) %>% ungroup() } catch_by_station <- function(Stations, Species, lwcoeff) { # NOTE: The function not standardized to towlength # TODO: Include the species specific lwcoefficient # Possibly allow binning by length class first, as done in pax::make_ldist_by_station d <- gather_length_data(Stations, Species) %>% # ned a better name calculate_biomass() %>% # next step is not neeed, but if used, the appropriate place is here # cumulate_data() %>% # would be nicer to have the collect() here group_by(synis_id, ar, tegund_nr) %>% summarise(toglengd = max(toglengd, na.rm = TRUE), r = max(r, na.rm = TRUE), fj_talid = max(fj_talid, na.rm = TRUE), # next two columns should be the same, kept here for debugging fj_maelt = max(fj_maelt, na.rm = TRUE), # as recorded in fiskar.numer n = sum(fjoldi, na.rm = TRUE), # as calculated in fiskar.lengdir N = sum(N, na.rm = TRUE), # raised numbers B = sum(B, na.rm = TRUE)) %>% # raised biomass collect(n = Inf) # Include stations with zero catch st <- Stations %>% select(synis_id) %>% collect(n = Inf) # If too many species, this could be a memory problem: # e.g. length(st$synis_id) * length(Species) * length(0:160) is 59 million rows! # the object d above (for cod and haddock) is only 0.9 million rows expand.grid(synis_id = st$synis_id, tegund_nr = Species) %>% tbl_df() %>% left_join(d, by = c("synis_id", "tegund_nr")) %>% mutate(N = ifelse(is.na(N), 0, N), B = ifelse(is.na(B), 0, B)) }
Operationally:
Stations <- les_stod(con) %>% left_join(les_syni(con)) %>% filter(veidarfaeri %in% 73, tog_nr %in% 1:39) %>% select(synis_id, ar, toglengd) d <- Stations %>% catch_by_station(Species = c(1,2), lwcoeff)
Some testing:
Below some comparison with the "official" catch per station. The difference is due to difference in the treatment of measured fish in cases where the number of fish measured in the length table is not the same as the number of fish measured in the number table.
#attach("/net/hafkaldi/export/u2/reikn/Splus5/SMB/UTBREIDSLA/.RData") attach("/net/hafkaldi/export/u2/reikn/R/SurveyWork/SMB/catchperstation.rdata") N <- utbrteg %>% tbl_df() %>% select(synis_id = synis.id, `1` = torskur.stk, `2` = ysa.stk) %>% gather(key = tegund_nr, value = N2, -synis_id, convert = TRUE) %>% mutate(N2 = as.numeric(N2)) B <- utbrteg %>% tbl_df() %>% select(synis_id = synis.id, `1` = torskur.kg, `2` = ysa.kg) %>% gather(key = tegund_nr, value = B2, -synis_id, convert = TRUE) %>% mutate(B2 = as.numeric(B2)) detach("file:/net/hafkaldi/export/u2/reikn/R/SurveyWork/SMB/catchperstation.rdata") d <- d %>% left_join(N) %>% left_join(B)
Cod:
d %>% filter(tegund_nr == 1, !near(N, N2)) %>% select(synis_id, toglengd, r, fj_talid, fj_maelt, n, N, N2)
d %>% filter(tegund_nr == 1, !near(B, B2)) %>% select(synis_id, toglengd, fj_talid, fj_maelt, n, B, B2)
Haddock:
d %>% filter(tegund_nr == 2, !near(N, N2)) %>% select(synis_id, toglengd, r, fj_talid, fj_maelt, n, N, N2)
d %>% filter(tegund_nr == 2, !near(B, B2)) %>% select(synis_id, toglengd, fj_maelt, n, B, B2)
rm(catch_by_station)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.