Emulating DATRAS service product

NOTE: Some intro notes here ...

library(tidyverse)
library(lubridate)
library(tidyices)
library(ggridges)
hh <- read_rds("data/ns-ibts_hh.rds")
hl <- read_rds("data/ns-ibts_hl.rds")
common <- c("Clupea harengus", "Sprattus sprattus", "Gadus morhua",
            "Melanogrammus aeglefinus", "Merlangius merlangus", "Pollachius virens",
            "Trisopterus esmarkii", "Scomber scombrus", "Pleuronectes platessa")
cpue <- 
  # only valid hauls
  # Note: This is an inbuild function - not exactly the same as above
  tidyices::cpue_per_length_per_haul(hh = hh %>% filter(haulval == "V", !is.na(nsarea)),
                           hl = hl) %>% 
  filter(latin %in% common) %>% 
  # debugging, this should be fixed upstream
  filter(!is.na(length))
# comparision with the datras-product
Latin <- "Pleuronectes platessa"
d1 <- 
  cpue %>% 
  id_separate(remove = FALSE) %>% 
  filter(latin == Latin,
         year == 2016,
         quarter == 1) %>% 
  select(id, latin, length, n) %>% 
  mutate(length = as.integer(length * 10),
         source = "tidy") %>% 
  arrange(id, length, n)
fi <- 
  "data-raw/CPUE per length per haul per hour_2018-06-30 15_34_50.csv"
d2 <-
  read_csv(fi, na = "-9") %>% 
  rename_all(tolower) %>% 
  filter(species == Latin,
         year == 2016,
         quarter == 1) %>% 
  id_unite() %>% 
  select(id, 
         latin = species, 
         length = lngtclass, 
         n = cpue_number_per_hour) %>% 
  as_tibble() %>% 
  mutate(source = "datras") %>% 
  arrange(id, length, n)
d1 %>% 
  bind_rows(d2) %>% 
  spread(source, n) %>% 
  mutate(diff = !near(tidy, datras, tol = 1e-4)) %>% 
  filter(diff) 
cpue2 <-
  cpue %>%
  id_separate(remove = FALSE) %>%
  filter(year %in% 2005:2016) %>% 
  select(id, latin, length, cpue.tidy = n) %>% 
  mutate(length = as.integer(length * 10))
fi <- 
  "data-raw/CPUE per length per haul per hour_2018-06-30 15_34_50.csv"
cpue.datras.service.product <-
  read_csv(fi, na = "-9") %>% 
  rename_all(tolower)
cpue.datras.service.product <-
  cpue.datras.service.product %>%
  filter(year %in% 2005:2016) %>% 
  id_unite() %>% 
  select(id, latin = species, length = lngtclass, cpue.dsp = cpue_number_per_hour) %>% 
  as_tibble()

diff <-
  cpue2 %>% 
  full_join(cpue.datras.service.product) %>%
  mutate(diff = !near(cpue.dsp, cpue.tidy, tol = 1e-4),
         txt = ifelse(!diff, "Same",
                      ifelse(diff, "Difference", "One of the value is NA")))
diff %>% 
  group_by(txt) %>% 
  summarise(n.records = n()) %>% 
  knitr::kable()

We have 515584 records that are OK and then 953 records that are different. We have 1642 values that are "NA" meaning that either of the cpue or both were missing.

x <- diff %>% filter(is.na(txt))
table(!is.na(x$cpue.tidy), !is.na(x$cpue.dsp))

So have 1374 observations that are in "tidy" but not in "dsp" and 268 observations in "dsp" that are not in "tidy"

Lets print the 268 observations that are not in "tidy":

diff %>% 
  filter(is.na(txt), !is.na(cpue.dsp)) %>% 
  arrange(latin) %>% 
  knitr::kable()

What is the story with the single haddock?:

raw <- read_rds("data-raw/datras/ns-ibts_raw.rds")
hl_raw <-
  raw$hl %>% 
  rename_all(tolower)
hl_raw %>% 
  as_tibble() %>% 
  id_unite() %>% 
  filter(id == "2009_1_THA2_GOV_26",
         valid_aphia == 126437) %>% glimpse()

So, it does not appear in the hl_raw dataframe. And because these are all of lngtcode == "0" these fish are what is termed as 0.5 cm length class, reporting units of millimeter. For the time being this does not quite make sense in my head.

What do we have in tidy for the haul and species in question?:

cpue2 %>% 
  filter(id == "2009_1_THA2_GOV_26",
         latin == "Melanogrammus aeglefinus") %>% glimpse()

So no 20 cm fish

And in the dsp:

cpue.datras.service.product %>% 
  filter(id == "2009_1_THA2_GOV_26",
         latin == "Melanogrammus aeglefinus") %>% glimpse()

We have only 20, 30 and 40 cm fish. So it seems like in the dsp the length class are treated as millimeters that are then converted to centimeters by rounding.

Field names




einarhjorleifsson/datrasdoodle documentation built on May 27, 2019, 2:09 p.m.