Catch by station

Preamble

The flow:

Stations %>% 
  gather_length_data %>% 
  calculate_biomass %>% 
  summarise_by_station

Some stuff below may be an overkill, given the task at hand - but here seeding for other operations.

Catch by stations

library(tidyverse)
library(mar)
con <- connect_mar()

A case example:

d <- 
  les_stod(con) %>% 
  left_join(les_syni(con)) %>% 
  filter(synis_id == 41311) %>% 
  select(synis_id, toglengd) %>% 
  left_join(les_lengd(con)) %>% 
  filter(tegund_nr == 2) %>% 
  mar::skala_med_taldir() %>% 
  collect(n = Inf)
d %>% 
  mutate(counted = fjoldi - fjoldi_oskalad,
         aaraised = fjoldi * 4 / toglengd - fjoldi) %>% 
  select(lengd, measured = fjoldi_oskalad, counted, aaraised) %>% 
  gather(variable, value, -lengd) %>% 
  ggplot(aes(lengd, value, fill = variable)) +
  geom_col() +
  scale_fill_brewer(palette = "Set1")

I.e., we have:

Some functions:

gather_length_data <- function(Stations, Species) {

  # Here could create an switch, if Stations is connection do below
  #   else read from fjolst
  Lengths <- 
    les_lengd(Stations$src) %>% 
    filter(tegund_nr %in% Species) %>% 
    group_by(synis_id, tegund_nr, lengd) %>% 
    summarise(fjoldi = sum(fjoldi, na.rm = TRUE)) %>% 
    ungroup()

  Counts <-
    les_skala(Stations$src) %>% 
    filter(tegund_nr %in% Species) %>% 
    mutate(r = ifelse(taldir == 0 | is.na(taldir),
                      1,
                      1 + taldir / ifelse(maeldir == 0 | is.na(maeldir), 1, maeldir))) %>% 
    select(synis_id, tegund_nr, fj_maelt = maeldir, fj_talid = taldir, r)

  Stations %>% 
    #select(synis_id, ar, toglengd) %>% 
    left_join(Lengths, by = "synis_id") %>% 
    left_join(Counts, by = c("synis_id", "tegund_nr"))

}

calculate_biomass <- function(d) {

  lwcoeff <- 
    tbl_mar(d$src, "ops$einarhj.lwcoeff") %>% 
    rename(tegund_nr = tegund)

  d %>% 
    left_join(lwcoeff, by = "tegund_nr") %>% 
    # if lwcoefficient for species not specified use newtons law
    mutate(a = ifelse(is.na(a), 0.01, a),
           b = ifelse(is.na(b), 3.00, b)) %>% 
    # Below statistic is by length class
    # If include lwcoeff then do a priori a left_join
    mutate(r = ifelse(is.na(r), 1, r),
           N  = r * ifelse(is.na(fjoldi), 0, fjoldi),
           B  = r * ifelse(is.na(fjoldi), 0, fjoldi) * a * lengd^b/1e3)


}

cumulate_data <- function(d) {

  d %>% 
    arrange(synis_id, tegund_nr, lengd) %>% 
    group_by(synis_id, tegund_nr) %>% 
    mutate(cN = cumsum(N),
           cB = sum(B) - cumsum(B) + B) %>% 
    ungroup()
}

catch_by_station <- function(Stations, Species, lwcoeff) {

  # NOTE: The function not standardized to towlength

  # TODO: Include the species specific lwcoefficient
  #       Possibly allow binning by length class first, as done in pax::make_ldist_by_station

  d <- 
    gather_length_data(Stations, Species) %>% 
    # ned a better name
    calculate_biomass() %>% 
    # next step is not neeed, but if used, the appropriate place is here
    #  cumulate_data() %>% 
    # would be nicer to have the collect() here 
    group_by(synis_id, ar, tegund_nr) %>% 
    summarise(toglengd = max(toglengd, na.rm = TRUE),
              r = max(r, na.rm = TRUE),
              fj_talid = max(fj_talid, na.rm = TRUE),
              # next two columns should be the same, kept here for debugging
              fj_maelt = max(fj_maelt, na.rm = TRUE),   # as recorded in fiskar.numer
              n = sum(fjoldi, na.rm = TRUE),            # as calculated in fiskar.lengdir
              N = sum(N, na.rm = TRUE),                 # raised numbers
              B = sum(B, na.rm = TRUE)) %>%             # raised biomass
    collect(n = Inf)


  # Include stations with zero catch
  st <- 
    Stations %>% 
    select(synis_id) %>% 
    collect(n = Inf)

  # If too many species, this could be a memory problem:
  # e.g. length(st$synis_id) * length(Species) * length(0:160) is 59 million rows!
  #  the object d above (for cod and haddock) is only 0.9 million rows
  expand.grid(synis_id = st$synis_id,
              tegund_nr = Species) %>% 
    tbl_df() %>% 
    left_join(d, by = c("synis_id", "tegund_nr")) %>% 
    mutate(N = ifelse(is.na(N), 0, N),
           B = ifelse(is.na(B), 0, B))

}

Operationally:

Stations <-
  les_stod(con) %>%
  left_join(les_syni(con)) %>% 
  filter(veidarfaeri %in% 73,
         tog_nr %in% 1:39) %>% 
  select(synis_id, ar, toglengd)

d <- 
  Stations %>% 
  catch_by_station(Species = c(1,2), lwcoeff)

Some testing:

Below some comparison with the "official" catch per station. The difference is due to difference in the treatment of measured fish in cases where the number of fish measured in the length table is not the same as the number of fish measured in the number table.

#attach("/net/hafkaldi/export/u2/reikn/Splus5/SMB/UTBREIDSLA/.RData")
attach("/net/hafkaldi/export/u2/reikn/R/SurveyWork/SMB/catchperstation.rdata")
N <- 
  utbrteg %>% 
  tbl_df() %>% 
  select(synis_id = synis.id, `1` = torskur.stk, `2` = ysa.stk) %>% 
  gather(key = tegund_nr, value = N2, -synis_id, convert = TRUE) %>% 
  mutate(N2 = as.numeric(N2))
B <- 
  utbrteg %>% 
  tbl_df() %>% 
  select(synis_id = synis.id, `1` = torskur.kg, `2` = ysa.kg) %>% 
  gather(key = tegund_nr, value = B2, -synis_id, convert = TRUE) %>% 
  mutate(B2 = as.numeric(B2))
detach("file:/net/hafkaldi/export/u2/reikn/R/SurveyWork/SMB/catchperstation.rdata")
d <- 
  d %>% 
  left_join(N) %>% 
  left_join(B)

Cod:

d %>%
  filter(tegund_nr == 1,
         !near(N, N2)) %>% 
  select(synis_id, toglengd, r, fj_talid, fj_maelt, n, N, N2)
d %>% 
  filter(tegund_nr == 1,
         !near(B, B2)) %>% 
  select(synis_id, toglengd, fj_talid, fj_maelt, n, B, B2)

Haddock:

d %>%
  filter(tegund_nr == 2,
         !near(N, N2)) %>% 
  select(synis_id, toglengd, r, fj_talid, fj_maelt, n, N, N2)
d %>% 
  filter(tegund_nr == 2,
         !near(B, B2)) %>% 
  select(synis_id, toglengd, fj_maelt, n, B, B2)
rm(catch_by_station)


einarhjorleifsson/fishdown documentation built on June 26, 2022, 2:31 p.m.