knitr::opts_chunk$set(echo=FALSE, warning = FALSE, 
                                            message = FALSE, cache = FALSE,
                                            progress = TRUE, verbose = FALSE, comment = F
                                            , error = FALSE, dev = 'png', dpi = 200)
# setwd("C:/Users/benjamin.galuardi/Documents/GitHub/discaRd/CAMS/")

library(odbc)
library(dplyr, warn.conflicts = FALSE)
# library(dbplyr)
library(ggplot2)
# library(config)
library(stringr)
library(discaRd)
options(scipen = 999)

# local run
dw_apsd <- config::get(value = "apsd", file = "K:/R_DEV/config.yml")

# if on server..
#dw_apsd <- config::get(value = "apsd", file = "~/config.yml")

bcon <- dbConnect(odbc::odbc(), 
                                    DSN = dw_apsd$dsn, 
                                    UID = dw_apsd$uid, 
                                    PWD = dw_apsd$pwd)

'%!in%' <- function(x,y)!('%in%'(x,y))

source('cams_discard_functions.R')
species_nespp3 = '012'  # monkfish

# define species stock if needed
# species_stock = 'GOM'  # GOM cod

# if using a unit stock, make this NULL!!
species_stock = NA  # all unit stocks

FY <- 2018
# Stat areas table

CAMS_STATAREA_STOCK = tbl(bcon, sql('select * from apsd.CAMS_STATAREA_STOCK')) 
# %>% collect()

# GEAR TABLE

  CAMS_GEAR_STRATA = tbl(bcon, sql('  select *   from APSD.CAMS_GEARCODE_STRATA')) %>% 
    collect()

# Mortality table
CAMS_DISCARD_MORTALITY_STOCK = tbl(bcon, sql("select *
from apsd.CAMS_DISCARD_MORTALITY_STOCK")) 
# %>% 
#   collect()

# unique stat areas for stock ID if needed
STOCK_AREAS = CAMS_STATAREA_STOCK %>% 
  filter(NESPP3 == species_nespp3) %>%  # removed  & AREA_NAME == species_stock
  distinct(STAT_AREA) %>% 
  collect()

CAMS_DISCARD_MORTALITY_STOCK = CAMS_DISCARD_MORTALITY_STOCK %>% 
  collect() %>% 
  mutate(SPECIES_STOCK = AREA_NAME) %>% 
  select(-AREA_NAME) %>% 
  filter(NESPP3 == species_nespp3) ## don't reallt want this here... 
# get catch and matched obs data together

c_o_dat2 <- tbl(bcon, sql(paste0("
with obs_cams as (
   select year
    , month
    --, case when region = 'N' then 'NE'
  --       when region = 'S' then 'MA'
  --       end as region
    , case when month in (5,6,7,8,9,10) then 1
           when month in (11,12,1,2,3,4) then 2
           end as halfofyear
    , carea
    , vtrserno
    , link1
    , docid
    , dmis_trip_id
    , nespp3
    , GEARCODE
    , NEGEAR
    , GEARTYPE
    , MESHGROUP
    , SECTOR_ID
    , tripcategory
    , accessarea
    , activity_code_1
    , permit_EFP_1
  , permit_EFP_2
  , permit_EFP_3
  , permit_EFP_4
  , redfish_exemption
    , closed_area_exemption
    , sne_smallmesh_exemption
    , xlrg_gillnet_exemption
    , NVL(sum(discard),0) as discard
    , NVL(round(max(subtrip_kall)),0) as subtrip_kall
    , NVL(round(max(obs_kall)),0) as obs_kall
    , NVL(sum(discard)/round(max(obs_kall)), 0) as dk
    from apsd.bg_cams_obs_catch
--  where nespp3 is not null
    group by year, carea, vtrserno, link1, nespp3, docid, GEARCODE, NEGEAR, GEARTYPE
    , MESHGROUP, dmis_trip_id, month
    --, region
    , halfofyear
    , sector_id
    , tripcategory
    , accessarea
    , activity_code_1
    , permit_EFP_1
  , permit_EFP_2
  , permit_EFP_3
  , permit_EFP_4
  , redfish_exemption
    , closed_area_exemption
    , sne_smallmesh_exemption
    , xlrg_gillnet_exemption
    order by vtrserno asc
    ) 

, regions as (

select STAT_AREA 
    , AREA_NAME as REGION
    from APSD.CAMS_STATAREA_STOCK
    where NESPP3 = ", species_nespp3, "
)

, strata as ( 

select CAMS_GEAR_GROUP
     , VTR_GEAR_CODE
    from APSD.CAMS_GEARCODE_STRATA
    where NESPP3 = ", species_nespp3, "
)
--
--select count(distinct(vtrserno)) n_vtr
--, count(distinct(DMIS_TRIP_ID)) n_dmisid
--, count(distinct(LINK1)) n_link1
--, count(distinct(STRATA)) n_strata
----select distinct(GEARCODE)
--from (

     select case when MONTH in (1,2,3,4) then YEAR-1 else YEAR end as GF_YEAR
  , case when MONTH in (1,2,3) then YEAR-1 else YEAR end as SCAL_YEAR
  , o.*
  , r.region
  , c.match_nespp3
  , coalesce(c.match_nespp3, o.nespp3) as nespp3_final
  , NVL(s.CAMS_GEAR_GROUP, '0')||'-'||o.MESHGROUP||'-'||NVL(r.REGION,'na')||'-'||o.HALFOFYEAR as STRATA
  , NVL(s.CAMS_GEAR_GROUP, '0') CAMS_GEAR_GROUP
  from obs_cams o
  left join apsd.s_nespp3_match_conv c on o.nespp3 = c.nespp3
  left join (select * from strata) s 
  ON s.VTR_GEAR_CODE = o.GEARCODE
  left join (select * from regions) r 
  ON r.STAT_AREA = o.CAREA

"
    )
  )
) 
# %>% 
#   collect()
#---------------------------------------------------------------------------------------#
# Here is where various stratification would make sense
# DOCID is hard coded in discaRd.. in this case, CV is calculated using  N = subtrips
#---------------------------------------------------------------------------------------#

ddat_focal <- c_o_dat2 %>% 
  filter(GF_YEAR == FY) %>%   ## time element is here!!
  filter(CAREA %in% local(STOCK_AREAS$STAT_AREA)) %>% 
  # group_by(STRATA) %>% 
  collect() %>% 
  mutate(LIVE_POUNDS = SUBTRIP_KALL
         ,SEADAYS = 0
             , NESPP3 = NESPP3_FINAL) %>% 
  left_join(., y = CAMS_DISCARD_MORTALITY_STOCK %>% 
              dplyr::select(-NESPP3) %>% 
              mutate(REGION = SPECIES_STOCK)
            , by = c('REGION', 'CAMS_GEAR_GROUP')
            ) %>% 
  relocate('COMMON_NAME','SPECIES_ITIS','NESPP3','REGION','CAMS_GEAR_GROUP','Discard_Mortality_Ratio')


ddat_prev <- c_o_dat2 %>% 
  filter(GF_YEAR == FY-1) %>%    ## time element is here!!
  filter(CAREA %in% local(STOCK_AREAS$STAT_AREA)) %>% 
  # group_by(STRATA) %>% 
  collect() %>% 
  mutate(LIVE_POUNDS = SUBTRIP_KALL
         ,SEADAYS = 0
             , NESPP3 = NESPP3_FINAL) %>% 
  left_join(., y = CAMS_DISCARD_MORTALITY_STOCK %>% 
              dplyr::select(-NESPP3) %>% 
              mutate(REGION = SPECIES_STOCK)
            , by = c('REGION', 'CAMS_GEAR_GROUP')
            ) %>% 
  relocate('COMMON_NAME','SPECIES_ITIS','NESPP3','REGION','CAMS_GEAR_GROUP','Discard_Mortality_Ratio')

save(ddat_focal,file="ddat_focal_monkfishFY18.Rdata")
save(ddat_prev,file="ddat_prev_monkfishFY18.Rdata")

For species that have the same stock definition (i.e. unit stocks), these data pieces may be pulled at the same time. The next set of chunks can be run in sequence on a series of nespp3 (species_nespp3) codes.

load(file="ddat_focal_monkfishFY18.Rdata")
load(file="ddat_prev_monkfishFY18.Rdata")
# FIX THIS!!! this could be the duping KALL issue.. 

# need to slice the first record for each observed trip.. these trips are multi rowed while unobs trips are single row.. 
ddat_focal_spp = ddat_focal %>% 
  filter(!is.na(LINK1)) %>% 
  group_by(LINK1, VTRSERNO) %>% 
  slice(1) %>% 
  ungroup()

# and join to the unobserved trips

ddat_focal_spp = ddat_focal_spp %>% 
  union_all(ddat_focal %>% 
              filter(is.na(LINK1)) %>% 
               group_by(VTRSERNO) %>% 
               slice(1) %>% 
               ungroup()
            )


# if using the combined catch/obs table, which seems necessary for groundfish.. need to roll your own table to use with run_discard function

bdat_spp = ddat_focal %>% 
  filter(!is.na(LINK1)) %>% 
  mutate(DISCARD_PRORATE = DISCARD
         , OBS_AREA = CAREA
         , OBS_HAUL_KALL_TRIP = OBS_KALL
         , PRORATE = 1)

gf_ex = run_discard(bdat = bdat_spp
                                             , ddat = ddat_focal_spp
                                             , c_o_tab = ddat_focal
                                             # , year = 2019
                                             # , species_nespp3 = '081' # haddock...
                                             , species_nespp3 = species_nespp3  #'081' #cod... 
                                             # , stratvars = c('CAMS_GEAR_GROUP', 'MESHGROUP', 'REGION', 'HALFOFYEAR')
                                             , stratvars = c('CAMS_GEAR_GROUP', 'MESHGROUP', 'REGION', 'HALFOFYEAR')  #CAMS_GEAR_GROUP
                                              # , stratvars = c('GEARTYPE', 'MESHGROUP', 'REGION', 'HALFOFYEAR')  #OLD GEAR TYPE
                                             , aidx = c(1,2)
                                             )

# sum of all discard, by subtrip, multiplied by mortality ratio
sum(gf_ex$res$DISCARD*gf_ex$res$Discard_Mortality_Ratio, na.rm = T) # 

# discard rates by strata
dest_strata = gf_ex$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

dest_strata %>% slice(grep('50_*', dest_strata$STRATA))


gf_ex$res %>%  mutate(newd = DISCARD*Discard_Mortality_Ratio) %>% group_by(STRATA) %>% summarise(D=sum(newd,na.rm=T),d_rate=mean(DISC_RATE,na.rm=T)) %>% arrange(desc(D)) %>% print.data.frame()

The next chunk could be combined with the previous one if running several species in sequence.

# set up trips table for current year
# done above 


# set up trips table for previous year
ddat_prev_spp = ddat_prev %>% 
  filter(!is.na(LINK1)) %>% 
  group_by(LINK1) %>% 
  slice(1)

ddat_prev_spp = ddat_prev_spp %>% 
  union_all(ddat_prev %>% filter(is.na(LINK1)))


# previous year observer data needed.. 
bdat_prev_spp = ddat_prev %>% 
  filter(!is.na(LINK1)) %>% 
  mutate(DISCARD_PRORATE = DISCARD
         , OBS_AREA = CAREA
         , OBS_HAUL_KALL_TRIP = OBS_KALL
         , PRORATE = 1)

# Run the discaRd functions on previous year
d_prev = run_discard(bdat = bdat_prev_spp
                                             , ddat = ddat_prev_spp
                                             , c_o_tab = ddat_prev
                                             # , year = 2018
                                             , species_nespp3 = species_nespp3
                                             , stratvars = c('CAMS_GEAR_GROUP', 'MESHGROUP', 'REGION', 'HALFOFYEAR')
                                             , aidx = c(1,2)
                                             )


# Run the discaRd functions on current year
d_focal = run_discard(bdat = bdat_spp
                                             , ddat = ddat_focal_spp
                                             , c_o_tab = ddat_focal
                                             # , year = 2019
                                             # , species_nespp3 = '081' # haddock...
                                             , species_nespp3 = species_nespp3  #'081' #cod... 
                                             , stratvars = c('CAMS_GEAR_GROUP', 'MESHGROUP', 'REGION', 'HALFOFYEAR')
                                             , aidx = c(1,2)
                                             )

# summarize each result for convenience
dest_strata_p = d_prev$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

dest_strata_f = d_focal$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

# substitute transition rates where needed

trans_rate_df = dest_strata_f %>% 
  left_join(., dest_strata_p, by = 'STRATA') %>% 
  mutate(STRATA = STRATA
         , n_obs_trips = n.x
         , in_season_rate = drate.x
         , previous_season_rate = drate.y
         , trans_rate = get.trans.rate(l_observed_trips = n_obs_trips
                                         , l_assumed_rate = previous_season_rate
                                         , l_inseason_rate = in_season_rate
                                         )
         ) %>% 
  dplyr::select(STRATA
         , n_obs_trips 
         , in_season_rate 
         , previous_season_rate 
         , trans_rate)


trans_rate_df = trans_rate_df %>% 
  mutate(final_rate = case_when((in_season_rate != trans_rate & !is.na(trans_rate)) ~ trans_rate)) 

 trans_rate_df$final_rate = coalesce(trans_rate_df$final_rate, trans_rate_df$in_season_rate)

# compare subbed rate total to in season total. 
 trans_rate_df %>% 
   left_join(dest_strata_f, by = 'STRATA') %>% 
   summarise(in_season_discard = sum(in_season_rate*KALL, na.rm = T)
             , final_rate_discard = sum(final_rate*KALL, na.rm = T)
             ) %>% 
   knitr::kable()

# park the final rate into the trips table
 trans_rate_df %>% 
   right_join(., y = d_focal$res, by = 'STRATA') %>% 
   summarise(inseason_rate_d = sum(in_season_rate*SUBTRIP_KALL*Discard_Mortality_Ratio, na.rm = T)
             , trans_rate_d = sum(trans_rate*SUBTRIP_KALL*Discard_Mortality_Ratio, na.rm = T)
             , final_rate_d = sum(final_rate*SUBTRIP_KALL*Discard_Mortality_Ratio, na.rm = T))
assumed_discard %>% knitr::kable(caption = 'Assumed discard. This could be a generalized rate from current year. This could also be built from previous years discard info.')

dest_strata %>% knitr::kable(caption = 'Stratified Estimate From discaRd')

# out_tab %>% head() %>% 
    # knitr::kable(format= 'markdown', caption = "Exampe of tabular output")
# obs only 

obs = tbl(bcon, sql('
    select o.*
    , case when c.match_nespp3 is not null then c.match_nespp3 else o.nespp3 end as match_nespp3
    from obs_cams_prorate o
    left join (select * from apsd.s_nespp3_match_conv) c
    on o.nespp3 = c.nespp3'
))

bdat <- obs %>% 
    # filter(YEAR == 2019) %>% 
    collect()


bdat$MESHGROUP[bdat$MESHGROUP == 'na'] = NA


# catch only

# dat = tbl(bcon, sql('select * from bg_cams_catch_mock'))


dat = tbl(bcon, sql('
select DMIS_TRIP_ID
, YEAR
, RECORD_LAND 
, MONTH
, REGION
, case when month in (1,2,3,4,5,6) then 1
           when month in (7,8,9,10,11,12) then 2
           end as HALFOFYEAR
, VTRSERNO
, GEARNM
, GEARCODE
, GEARTYPE
, NEGEAR
, MESHGROUP
, CAREA
, sector_id
, activity_code_1
, permit_EFP_1
, permit_EFP_2
, permit_EFP_3
, permit_EFP_4
, redfish_exemption
, closed_area_exemption
, sne_smallmesh_exemption
, xlrg_gillnet_exemption
, tripcategory
, accessarea
, sum(pounds) as live_Pounds
from bg_cams_catch
group by DMIS_TRIP_ID, VTRSERNO, YEAR, GEARNM, GEARCODE, NEGEAR, GEARTYPE, MESHGROUP, CAREA, YEAR
, MONTH
, RECORD_LAND
, REGION
, sector_id
, activity_code_1
, permit_EFP_1
, permit_EFP_2
, permit_EFP_3
, permit_EFP_4
, redfish_exemption
, closed_area_exemption
, sne_smallmesh_exemption
, xlrg_gillnet_exemption
, tripcategory
, accessarea
, case when month in (1,2,3,4,5,6) then 1
           when month in (7,8,9,10,11,12) then 2
           end
'))

# %>% 
#   collect()
mesh = tbl(bcon, sql("
select meshgroup||'-'||obs_meshgroup as vtr_obs_meshes
, vtrserno
, link1
, subtrip_kall
from bg_cams_obs_catch
where link1 is not null
and negear = 50
--group by  meshgroup||'-'||obs_meshgroup
")
) %>% collect()

 mesh %>% 
   group_by(VTRSERNO) %>% 
   slice(1) %>% 
   ungroup() %>% 
   group_by(VTR_OBS_MESHES) %>% 
   dplyr::summarise(nvtrs = length(unique(VTRSERNO))
                                   , KALL = sum(SUBTRIP_KALL, na.rm = T))


noaa-garfo/discaRd documentation built on April 17, 2025, 10:32 p.m.