knitr::opts_chunk$set(echo=FALSE, warning = FALSE, 
                                            message = FALSE, cache = FALSE,
                                            progress = TRUE, verbose = FALSE, comment = F
                                            , error = FALSE, dev = 'png', dpi = 200)
t1 = Sys.time()

# setwd("C:/Users/benjamin.galuardi/Documents/GitHub/discaRd/CAMS/")
library(tidyverse)
library(odbc)
library(ROracle)
library(dplyr, warn.conflicts = FALSE)
# library(dbplyr)
library(ggplot2)
# library(config)
library(stringr)
library(discaRd)
devtools::load_all()
options(scipen = 999)

# local run
#dw_maps <- config::get(value = "maps", file = "~/config.yml")

# # if on server..
# dw_maps <- config::get(value = "maps", file = "~/config.yml")

 dw_maps <- config::get(config = "maps", file = "~/config_group.yml")
# 
 # con_maps <- dbConnect(odbc::odbc(), 
 #                                  DSN = dw_maps$dsn, 
 #                                  UID = dw_maps$uid, 
 #                                  PWD = dw_maps$pwd)

# Connect to database - move this to config file in the future - quick addition for server
  connectString <- paste(
    "(DESCRIPTION=",
    "(ADDRESS=(PROTOCOL=tcp)(HOST=", dw_maps$host, ")(PORT=", dw_maps$port, "))",
    "(CONNECT_DATA=(SERVICE_NAME=",dw_maps$svc, ")))",
    sep = ""
  )

# Connect to oracle each loop in case of timeouts
    con_maps <- ROracle::dbConnect(
      drv = ROracle::Oracle(),
      username = dw_maps$uid,
      password = dw_maps$pwd,
      dbname = connectString
    )


'%!in%' <- function(x,y)!('%in%'(x,y))

source('~/discaRd/CAMS/R/cams_discard_functions.R')
FY <- 2019

FY_TYPE = 'MARCH START'

#--------------------------------------------------------------------------#
# group of species ITIS codes.. 
# SMB, river herring, bluefish, summer flounder, 
# black seabass and scup.
#Not sure what else is needed for herring and shad catch cap.
 itis <-  c('620992')

 #itis <- itis
 itis_num <- as.numeric(itis)


 species = tbl(con_maps, sql("select *
                                                from CAMS_DISCARD_MORTALITY_STOCK")) %>% 

    collect() %>% 

    filter(SPECIES_ITIS %in% itis_num) %>% group_by(SPECIES_ITIS) %>%
  slice(1)

 species$ITIS_TSN <- stringr::str_sort(itis)

#species$ITIS_TSN <- as.numeric(species$SPECIES_ITIS)
# species$ITIS_TSN <- as.character(species$SPECIES_ITIS)

#--------------------------------------------------------------------------#
# a sumamry table for comaprison

# final_discard_table = data.frame(YEAR = FY, SPECIES_ITIS = species$ITIS_TSN, COMNAME = species$COMMON_NAME, DISCARD = NA)
#--------------------------------------------------------------------------#
import_query = " with obs_cams as (
   select year
    , month
  , PERMIT
    , case when month in (5,6,7,8,9,10) then 1
           when month in (11,12,1,2,3,4) then 2
           end as halfofyear
    -- , carea
  , AREA
    , vtrserno
  , CAMS_SUBTRIP
    , link1
    , link3
    , docid
    , CAMSID
    , nespp3
  , itis_tsn as SPECIES_ITIS
  -- , itis_group1
    , SECGEAR_MAPPED as GEARCODE
    , NEGEAR
    , GEARTYPE
    , MESHGROUP
    , SECTID
  , GF
, case when activity_code_1 like 'NMS-COM%' then 'COMMON_POOL'
       when activity_code_1 like 'NMS-SEC%' then 'SECTOR'
             else 'non_GF' end as SECTOR_TYPE
, case when PERMIT = '000000' then 'STATE'
       else 'FED' end as FED_OR_STATE
    , tripcategory
    , accessarea
    , activity_code_1
  --, permit_EFP_1
  --, permit_EFP_2
  --, permit_EFP_3
  --, permit_EFP_4
  , EM
  , redfish_exemption
    , closed_area_exemption
    , sne_smallmesh_exemption
    , xlrg_gillnet_exemption
    , NVL(sum(discard_prorate),0) as discard
    , NVL(sum(discard_prorate),0) as discard_prorate
    , NVL(round(max(subtrip_kall)),0) as subtrip_kall
    , NVL(round(max(obs_kall)),0) as obs_kall
    ,  NVL(sum(discard)/nullif(round(max(obs_kall)), 0), 0) as dk
    from CAMS_OBS_CATCH

 WHERE YEAR >= 2017 
  and YEAR <= 2020

    group by year
  -- , carea
  , AREA
  , PERMIT
    , vtrserno
  , CAMS_SUBTRIP
    , link1
    , link3
    , docid
    , nespp3    
  , itis_tsn
  -- , itis_group1
    , SECGEAR_MAPPED
    , NEGEAR
    , GEARTYPE
    , MESHGROUP
    , SECTID
  , GF
  , case when activity_code_1 like 'NMS-COM%' then 'COMMON_POOL'
       when activity_code_1 like 'NMS-SEC%' then 'SECTOR'
             else 'non_GF' end
  , case when PERMIT = '000000' then 'STATE'
       else 'FED' end
  , CAMSID
  , month
    , halfofyear
    , tripcategory
    , accessarea
    , activity_code_1
  --  , permit_EFP_1
  --, permit_EFP_2
  --, permit_EFP_3
  --, permit_EFP_4
  , EM
  , redfish_exemption
    , closed_area_exemption
    , sne_smallmesh_exemption
    , xlrg_gillnet_exemption
    order by vtrserno asc
    ) 

  select case when MONTH in (1,2,3,4) then YEAR-1 else YEAR end as GF_YEAR
  , case when MONTH in (1,2,3) then YEAR-1 else YEAR end as SCAL_YEAR
  , o.*
  , c.match_nespp3
  , coalesce(c.match_nespp3, o.nespp3) as nespp3_final
  from obs_cams o
  left join apsd.s_nespp3_match_conv c on o.nespp3 = c.nespp3         

"

c_o_dat2 <- ROracle::dbGetQuery(con_maps, import_query)

c_o_dat2 = c_o_dat2 %>% 
    mutate(PROGRAM = substr(ACTIVITY_CODE_1, 9, 10)) %>% 
  mutate(SCALLOP_AREA = case_when(substr(ACTIVITY_CODE_1,1,3) == 'SES' & PROGRAM == 'OP' ~ 'OPEN' 
       , PROGRAM == 'NS' ~ 'NLS'
       , PROGRAM == 'NN' ~ 'NLSN'
       , PROGRAM == 'NH' ~ 'NLSS'  # includes the NLS south Deep
       , PROGRAM == 'NW' ~ 'NLSW'
       , PROGRAM == '1S' ~ 'CAI'
       , PROGRAM == '2S' ~ 'CAII'
       , PROGRAM %in% c('MA', 'ET', 'EF', 'HC', 'DM') ~ 'MAA'
       )
) %>% 
    mutate(SCALLOP_AREA = case_when(substr(ACTIVITY_CODE_1,1,3) == 'SES' ~ dplyr::coalesce(SCALLOP_AREA, 'OPEN'))) %>% 
    mutate(DOCID = CAMS_SUBTRIP)

# NOTE: CAMS_SUBTRIP being defined as DOCID so the discaRd functions don't have to change!! DOCID hard coded in the functions..


# 4/13/22
# need to make LINK1 NA when LINK3 is null.. this is due to data mismatches in putting hauls at the subtrip level. If we don't do this step, OBS trips will get values of 0 for any evaluated species. this may or may not be correct.. it's not possible to know without a haul to subtrip match. This is a hotfix that may change in the future 

link3_na = c_o_dat2 %>% 
    filter(!is.na(LINK1) & is.na(LINK3))


# make these values 0 or NA or 'none' depending on the default for that field
link3_na = link3_na %>% 
    mutate(LINK1 = NA
                 , DISCARD = NA
                 , DISCARD_PRORATE = NA
                 , OBSRFLAG = NA
                 , OBSVTR = NA
                 , OBS_AREA = NA
                 , OBS_GEAR = NA
                 , OBS_HAUL_KALL_TRIP = 0
                 , OBS_HAUL_KEPT = 0
                 , OBS_KALL = 0
                 , OBS_LINK1 = NA
                 , OBSVTR = NA
                 , OBS_MESHGROUP = 'none'
                 , PRORATE = NA)


tidx = c_o_dat2$CAMSID %in% link3_na$CAMSID

c_o_dat2 = c_o_dat2[tidx == F,]

c_o_dat2 = c_o_dat2 %>% 
    bind_rows(link3_na)

# continue the data import


state_trips = c_o_dat2 %>% filter(FED_OR_STATE == 'STATE')
fed_trips = c_o_dat2 %>% filter(FED_OR_STATE == 'FED')

fed_trips = fed_trips %>% 
    mutate(ROWID = 1:nrow(fed_trips)) %>% 
    relocate(ROWID)

# filter out link1 that are doubled on VTR

multilink = fed_trips %>% 
    filter(!is.na(LINK1)) %>% 
    group_by(VTRSERNO) %>% 
    dplyr::summarise(nlink1 = n_distinct(LINK1)) %>% 
    arrange(desc(nlink1)) %>% 
    filter(nlink1>1)

remove_links = fed_trips %>% 
    filter(is.na(SPECIES_ITIS) & !is.na(LINK1) & VTRSERNO %in% multilink$VTRSERNO) %>% 
    dplyr::select(LINK1) %>% 
    distinct()

remove_id = fed_trips %>% 
    filter(is.na(SPECIES_ITIS) & !is.na(LINK1) & VTRSERNO %in% multilink$VTRSERNO) %>% 
      distinct(ROWID)

fed_trips =
    fed_trips %>% 
    filter(ROWID %!in% remove_id$ROWID)

c_o_dat2 = fed_trips %>% 
#   filter(GF == 0) %>% 
    bind_rows(., state_trips) %>% 
    mutate(GF = "0")

# gf_dat = fed_trips%>% 
#   filter(GF == 1)

rm(fed_trips, state_trips)
# Stratification variables

stratvars = c('SPECIES_STOCK'
              ,'CAMS_GEAR_GROUP'
                            , 'MESHGROUP'
                          , 'TRIPCATEGORY'
                          , 'ACCESSAREA')


# Begin loop


for(i in 1:length(species$SPECIES_ITIS)){

t1 = Sys.time() 

print(paste0('Running ', species$COMMON_NAME[i], ' For Fishing Year ', FY)) 

# species_nespp3 = species$NESPP3[i]  
#species_itis = species$ITIS_TSN[i] 

species_itis <- as.character(species$ITIS_TSN[i])
species_itis_srce = as.character(as.numeric(species$ITIS_TSN[i]))
#--------------------------------------------------------------------------#
# Support table import by species

# GEAR TABLE
CAMS_GEAR_STRATA = tbl(con_maps, sql('  select * from CAMS_GEARCODE_STRATA')) %>% 
    collect() %>% 
  dplyr::rename(GEARCODE = VTR_GEAR_CODE) %>% 
  # filter(NESPP3 == species_nespp3) %>% 
    filter(SPECIES_ITIS == species_itis) %>%
  dplyr::select(-NESPP3, -SPECIES_ITIS)

# Stat areas table  
# unique stat areas for stock ID if needed
STOCK_AREAS = tbl(con_maps, sql('select * from CAMS_STATAREA_STOCK')) %>%
  # filter(NESPP3 == species_nespp3) %>%  # removed  & AREA_NAME == species_stock
    dplyr::filter(SPECIES_ITIS == species_itis) %>%
    collect() %>% 
  group_by(AREA_NAME) %>% 
  distinct(STAT_AREA) %>%
  mutate(AREA = as.character(STAT_AREA)
         , SPECIES_STOCK = AREA_NAME) %>% 
  ungroup() #%>% 
  #dplyr::select(SPECIES_STOCK, AREA)

# Mortality table
CAMS_DISCARD_MORTALITY_STOCK = tbl(con_maps, sql("select * from CAMS_DISCARD_MORTALITY_STOCK"))  %>%
  collect() %>%
  mutate(SPECIES_STOCK = AREA_NAME
         , GEARCODE = CAMS_GEAR_GROUP) %>%
  select(-AREA_NAME) %>%
   mutate(CAMS_GEAR_GROUP = as.character(CAMS_GEAR_GROUP)) %>% 
  # filter(NESPP3 == species_nespp3) %>% 
    filter(SPECIES_ITIS == species_itis_srce)
 # dplyr::select(-NESPP3, -SPECIES_ITIS) %>% 
 # dplyr::rename(DISC_MORT_RATIO = Discard_Mortality_Ratio)

# Observer codes to be removed
OBS_REMOVE = tbl(con_maps, sql("select * from CAMS_OBSERVER_CODES"))  %>%
    collect() %>% 
    filter(SPECIES_ITIS == species_itis) %>% 
    distinct(OBS_CODES) 

#--------------------------------------------------------------------------------#
# make tables
ddat_focal <- all_dat %>% 
  filter(GF_YEAR == FY) %>%   ## time element is here!!
  filter(AREA %in% STOCK_AREAS$AREA) %>% 
  mutate(LIVE_POUNDS = SUBTRIP_KALL
         ,SEADAYS = 0) %>% 
             # , NESPP3 = NESPP3_FINAL
   left_join(., y = STOCK_AREAS, by = 'AREA') %>% 
   left_join(., y = CAMS_GEAR_STRATA, by = 'GEARCODE') %>% 
   left_join(., y = CAMS_DISCARD_MORTALITY_STOCK
            , by = c('SPECIES_STOCK', 'CAMS_GEAR_GROUP')
            ) %>% 
    dplyr::select(-SPECIES_ITIS.y, -GEARCODE.y) %>% 
        dplyr::rename(COMMON_NAME= 'COMMON_NAME.x',SPECIES_ITIS = 'SPECIES_ITIS.x', NESPP3 = 'NESPP3.x',
                  GEARCODE = 'GEARCODE.x') %>% 
  relocate('COMMON_NAME','SPECIES_ITIS','NESPP3','SPECIES_STOCK','CAMS_GEAR_GROUP','DISC_MORT_RATIO')


ddat_prev <- all_dat %>% 
  filter(GF_YEAR == FY-1) %>%   ## time element is here!!
  filter(AREA %in% STOCK_AREAS$AREA) %>% 
  mutate(LIVE_POUNDS = SUBTRIP_KALL
         ,SEADAYS = 0) %>%
             # , NESPP3 = NESPP3_FINAL 
   left_join(., y = STOCK_AREAS, by = 'AREA') %>% 
   left_join(., y = CAMS_GEAR_STRATA, by = 'GEARCODE') %>% 
   left_join(., y = CAMS_DISCARD_MORTALITY_STOCK
            , by = c('SPECIES_STOCK', 'CAMS_GEAR_GROUP')
            ) %>% 
    dplyr::select(-SPECIES_ITIS.y, -GEARCODE.y) %>% 
    dplyr::rename(COMMON_NAME= 'COMMON_NAME.x',SPECIES_ITIS = 'SPECIES_ITIS.x', NESPP3 = 'NESPP3.x',
                  GEARCODE = 'GEARCODE.x') %>% 
  relocate('COMMON_NAME','SPECIES_ITIS','NESPP3','SPECIES_STOCK','CAMS_GEAR_GROUP','DISC_MORT_RATIO')



# need to slice the first record for each observed trip.. these trips are multi rowed while unobs trips are single row.. 
ddat_focal_cy = ddat_focal %>% 
  filter(!is.na(LINK1)) %>% 
    mutate(SPECIES_EVAL_DISCARD = case_when(SPECIES_ITIS == species_itis ~ DISCARD
                                                                                    )) %>% 
    mutate(SPECIES_EVAL_DISCARD = coalesce(SPECIES_EVAL_DISCARD, 0)) %>% 
  group_by(LINK1, VTRSERNO) %>% 
    arrange(desc(SPECIES_EVAL_DISCARD)) %>% 
    slice(1) %>% 
  ungroup()

# and join to the unobserved trips

ddat_focal_cy = ddat_focal_cy %>% 
  union_all(ddat_focal %>% 
              filter(is.na(LINK1)))  
            #    group_by(VTRSERNO, CAMSID) %>% 
            #    slice(1) %>% 
            #    ungroup()
            # )


# if using the combined catch/obs table, which seems necessary for groundfish.. need to roll your own table to use with run_discard function
# DO NOT NEED TO FILTER SPECIES HERE. NEED TO RETAIN ALL TRIPS. THE MAKE_BDAT_FOCAL.R FUNCTION TAKES CARE OF THIS. 

bdat_cy = ddat_focal %>% 
  filter(!is.na(LINK1)) %>% 
    filter(FISHDISP != '090') %>%
    filter(LINK3_OBS == 1) %>%
    filter(substr(LINK1, 1,3) %!in% OBS_REMOVE$OBS_CODES) %>% 
  mutate(DISCARD_PRORATE = DISCARD
         , OBS_AREA = AREA
         , OBS_HAUL_KALL_TRIP = OBS_KALL
         , PRORATE = 1)


# set up trips table for previous year
ddat_prev_cy = ddat_prev %>% 
  filter(!is.na(LINK1)) %>% 
    mutate(SPECIES_EVAL_DISCARD = case_when(SPECIES_ITIS == species_itis ~ DISCARD
                                                                                    )) %>% 
    mutate(SPECIES_EVAL_DISCARD = coalesce(SPECIES_EVAL_DISCARD, 0)) %>% 
  group_by(LINK1, VTRSERNO) %>% 
    arrange(desc(SPECIES_EVAL_DISCARD)) %>% 
    slice(1) %>% 
  ungroup()

ddat_prev_cy = ddat_prev_cy %>% 
  union_all(ddat_prev %>% 
                         filter(is.na(LINK1))) #%>% 
               # group_by(VTRSERNO,CAMSID) %>% 
               # slice(1) %>% 
               # ungroup()



# previous year observer data needed.. 
bdat_prev_cy = ddat_prev %>% 
  filter(!is.na(LINK1)) %>% 
    filter(FISHDISP != '090') %>%
    filter(LINK3_OBS == 1) %>%
    filter(substr(LINK1, 1,3) %!in% OBS_REMOVE$OBS_CODES) %>% 
  mutate(DISCARD_PRORATE = DISCARD
         , OBS_AREA = AREA
         , OBS_HAUL_KALL_TRIP = OBS_KALL
         , PRORATE = 1)

# Run the discaRd functions on previous year
d_prev = run_discard(bdat = bdat_prev_cy
                                             , ddat = ddat_prev_cy
                                             , c_o_tab = ddat_prev
                                             # , year = 2018
                                             # , species_nespp3 = species_nespp3
                                           , species_itis = species_itis
                                             , stratvars = stratvars
                                             , aidx = c(1:length(stratvars))
                                             )

# Run the discaRd functions on current year
d_focal = run_discard(bdat = bdat_cy
                                             , ddat = ddat_focal_cy
                                             , c_o_tab = ddat_focal
                                             # , year = 2019
                                             # , species_nespp3 = '081' # haddock...
                                             # , species_nespp3 = species_nespp3  #'081' #cod...
                                             , species_itis = species_itis
                                             , stratvars = stratvars
                                             , aidx = c(1:length(stratvars))  # this makes sure this isn't used.. 
                                             )

# summarize each result for convenience
dest_strata_p = d_prev$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

dest_strata_f = d_focal$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

# substitute transition rates where needed

trans_rate_df = dest_strata_f %>% 
  left_join(., dest_strata_p, by = 'STRATA') %>% 
  mutate(STRATA = STRATA
         , n_obs_trips_f = n.x
             , n_obs_trips_p = n.y
         , in_season_rate = drate.x
         , previous_season_rate = drate.y
  ) %>% 
    mutate(n_obs_trips_p = coalesce(n_obs_trips_p, 0)) %>% 
  mutate(trans_rate = get.trans.rate(l_observed_trips = n_obs_trips_f
                                         , l_assumed_rate = previous_season_rate
                                         , l_inseason_rate = in_season_rate
                                         )
         ) %>% 
  dplyr::select(STRATA
         , n_obs_trips_f
         , n_obs_trips_p
         , in_season_rate 
         , previous_season_rate 
         , trans_rate
         , CV_f = CV.x
         )


trans_rate_df = trans_rate_df %>% 
  mutate(final_rate = case_when((in_season_rate != trans_rate & !is.na(trans_rate)) ~ trans_rate)) 

 trans_rate_df$final_rate = coalesce(trans_rate_df$final_rate, trans_rate_df$in_season_rate)


 trans_rate_df_full = trans_rate_df

 full_strata_table = trans_rate_df_full %>% 
   right_join(., y = d_focal$res, by = 'STRATA') %>% 
   as_tibble() %>% 
        mutate(SPECIES_ITIS_EVAL = species_itis
                 , COMNAME_EVAL = species$COMNAME[i]
                 , FISHING_YEAR = FY
                 , FY_TYPE = FY_TYPE) %>% 
       dplyr::rename(FULL_STRATA = STRATA) 

#
# SECTOR ROLLUP
#
# print(paste0("Getting rates across sectors for ", species_itis, " ", FY)) 

stratvars_assumed = c("SPECIES_STOCK"
                                            , "CAMS_GEAR_GROUP"
                                            , "MESHGROUP")


### All tables in previous run can be re-used wiht diff stratification

# Run the discaRd functions on previous year
d_prev_pass2 = run_discard(bdat = bdat_prev_cy
                                             , ddat = ddat_prev_cy
                                             , c_o_tab = ddat_prev
                                             # , year = 2018
                                             # , species_nespp3 = species_nespp3
                                           , species_itis = species_itis
                                             , stratvars = stratvars_assumed
                                             # , aidx = c(1:length(stratvars_assumed))  # this makes sure this isn't used.. 
                                            , aidx = c(1)  # this creates an unstratified broad stock rate
                                             )


# Run the discaRd functions on current year
d_focal_pass2 = run_discard(bdat = bdat_cy
                                             , ddat = ddat_focal_cy
                                             , c_o_tab = ddat_focal
                                             # , year = 2019
                                             # , species_nespp3 = '081' # haddock...
                                             # , species_nespp3 = species_nespp3  #'081' #cod...
                                             , species_itis = species_itis
                                             , stratvars = stratvars_assumed
                                             # , aidx = c(1:length(stratvars_assumed))  # this makes sure this isn't used.. 
                                            , aidx = c(1)  # this creates an unstratified broad stock rate
                                             )

# summarize each result for convenience
dest_strata_p_pass2 = d_prev_pass2$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

dest_strata_f_pass2 = d_focal_pass2$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

# substitute transition rates where needed

trans_rate_df_pass2 = dest_strata_f_pass2 %>% 
  left_join(., dest_strata_p_pass2, by = 'STRATA') %>% 
  mutate(STRATA = STRATA
         , n_obs_trips_f = n.x
             , n_obs_trips_p = n.y
         , in_season_rate = drate.x
         , previous_season_rate = drate.y
  ) %>% 
    mutate(n_obs_trips_p = coalesce(n_obs_trips_p, 0)) %>% 
  mutate(trans_rate = get.trans.rate(l_observed_trips = n_obs_trips_f
                                         , l_assumed_rate = previous_season_rate
                                         , l_inseason_rate = in_season_rate
                                         )
         ) %>% 
  dplyr::select(STRATA
         , n_obs_trips_f
         , n_obs_trips_p
         , in_season_rate 
         , previous_season_rate 
         , trans_rate
         , CV_f = CV.x
         )


trans_rate_df_pass2 = trans_rate_df_pass2 %>% 
  mutate(final_rate = case_when((in_season_rate != trans_rate & !is.na(trans_rate)) ~ trans_rate)) 

 trans_rate_df_pass2$final_rate = coalesce(trans_rate_df_pass2$final_rate, trans_rate_df_pass2$in_season_rate)


 # get a table of broad stock rates using discaRd functions. Previosuly we used sector rollupresults (ARATE in pass2)


bdat_2yrs = bind_rows(bdat_prev_cy, bdat_cy)
ddat_cy_2yr = bind_rows(ddat_prev_cy, ddat_focal_cy)
ddat_2yr = bind_rows(ddat_prev, ddat_focal)

mnk = run_discard( bdat = bdat_2yrs
            , ddat_focal = ddat_cy_2yr
            , c_o_tab = ddat_2yr
            , species_itis = species_itis
            , stratvars = stratvars[1:2]  #"SPECIES_STOCK"   "CAMS_GEAR_GROUP"
            )

# rate table
mnk$allest$C

SPECIES_STOCK <-sub("_.*", "", mnk$allest$C$STRATA)  

CAMS_GEAR_GROUP <- sub(".*?_", "", mnk$allest$C$STRATA) 

BROAD_STOCK_RATE <-  mnk$allest$C$RE_mean

CV_b <- round(mnk$allest$C$RE_rse, 2)

BROAD_STOCK_RATE_TABLE <- as.data.frame(cbind(SPECIES_STOCK, CAMS_GEAR_GROUP, BROAD_STOCK_RATE, CV_b))

BROAD_STOCK_RATE_TABLE$BROAD_STOCK_RATE <- as.numeric(BROAD_STOCK_RATE_TABLE$BROAD_STOCK_RATE)
BROAD_STOCK_RATE_TABLE$CV_b <- as.numeric(BROAD_STOCK_RATE_TABLE$CV_b)


names(trans_rate_df_pass2) = paste0(names(trans_rate_df_pass2), '_a')

#
# join full and assumed strata tables
#
# print(paste0("Constructing output table for ", species_itis, " ", FY)) 

joined_table = assign_strata(full_strata_table, stratvars_assumed) %>% 
    dplyr::select(-STRATA_ASSUMED) %>%  # not using this anymore here..
    dplyr::rename(STRATA_ASSUMED = STRATA) %>% 
    left_join(., y = trans_rate_df_pass2, by = c('STRATA_ASSUMED' = 'STRATA_a')) %>% 
    left_join(., y = BROAD_STOCK_RATE_TABLE, by = c('SPECIES_STOCK','CAMS_GEAR_GROUP')) %>% 
    mutate(COAL_RATE = case_when(n_obs_trips_f >= 5 ~ final_rate  # this is an in season rate
                                                             , n_obs_trips_f < 5 & 
                                                                n_obs_trips_p >=5 ~ final_rate  # this is a final IN SEASON rate taking transition into account
                                                             , n_obs_trips_f < 5 & 
                                                                n_obs_trips_p < 5 ~ trans_rate_a  # this is an final assumed rate taking trasnition into account
                                   )
    ) %>% 
    mutate(COAL_RATE = coalesce(COAL_RATE, BROAD_STOCK_RATE)) %>%
    mutate(SPECIES_ITIS_EVAL = species_itis
                 , COMNAME_EVAL = species$COMNAME[i]
                 , FISHING_YEAR = FY
                 , FY_TYPE = FY_TYPE) 

#
# add discard source
#


# >5 trips in season gets in season rate
# < 5 i nseason but >=5 past year gets transition
# < 5 and < 5 in season, but >= 5 sector rolled up rate (in season) gets get sector rolled up rate
# <5, <5,  and <5 gets broad stock rate

joined_table = joined_table %>% 
    mutate(DISCARD_SOURCE = case_when(!is.na(LINK1) & LINK3_OBS == 1 ~ 'O'  # observed with at least one obs haul
                                                                        , !is.na(LINK1) & LINK3_OBS == 0 ~ 'I'  # observed but no obs hauls..  
                                                                        , is.na(LINK1) & 
                                                                            n_obs_trips_f >= 5 ~ 'I'
                                                                        # , is.na(LINK1) & COAL_RATE == previous_season_rate ~ 'P'
                                                                        , is.na(LINK1) & 
                                                                            n_obs_trips_f < 5 & 
                                                                            n_obs_trips_p >=5 ~ 'T'
                                                                        , is.na(LINK1) & 
                                                                            n_obs_trips_f < 5 &
                                                                            n_obs_trips_p < 5 &
                                                                            n_obs_trips_f_a >= 5 ~ 'GM'
                                                                        , is.na(LINK1) & 
                                                                            n_obs_trips_f < 5 &
                                                                            n_obs_trips_p < 5 &
                                                                            n_obs_trips_p_a >= 5 ~ 'G'
                                                                        , is.na(LINK1) & 
                                                                            n_obs_trips_f < 5 & 
                                                                            n_obs_trips_p < 5 & 
                                                                            n_obs_trips_f_a < 5 & 
                                                                            n_obs_trips_p_a < 5 ~ 'G'))
                                                                #       , is.na(LINK1) & is.na(final_rate) ~ 'NA'
                                                                #       , is.na(LINK1) & is.nan(final_rate) ~ 'NA'))

#
# make sure CV type matches DISCARD SOURCE}
#

# obs trips get 0, broad stock rate is NA



joined_table = joined_table %>% 
    mutate(CV = case_when(DISCARD_SOURCE == 'O' ~ 0
                                                , DISCARD_SOURCE == 'I' ~ CV_f
                                                , DISCARD_SOURCE == 'T' ~ CV_f
                                                , DISCARD_SOURCE == 'GM' ~ CV_f_a
                                                , DISCARD_SOURCE == 'G' ~ CV_b
                                            #   , DISCARD_SOURCE == 'NA' ~ 'NA'
                                                )  # , DISCARD_SOURCE == 'B' ~ NA
                 )

# Make note of the stratification variables used according to discard source

stratvars_gear = c("SPECIES_STOCK"
                                            , "CAMS_GEAR_GROUP")

strata_f = paste(stratvars, collapse = ';')
strata_a = paste(stratvars_assumed, collapse = ';')
strata_b = paste(stratvars_gear, collapse = ';')

joined_table = joined_table %>% 
    mutate(STRATA_USED = case_when(DISCARD_SOURCE == 'O' & LINK3_OBS == 1 ~ ''
                                                , DISCARD_SOURCE == 'O' & LINK3_OBS == 0 ~ strata_f
                                                , DISCARD_SOURCE == 'I' ~ strata_f
                                                , DISCARD_SOURCE == 'T' ~ strata_f
                                                , DISCARD_SOURCE == 'GM' ~ strata_a
                                                , DISCARD_SOURCE == 'G' ~ strata_b
                                            #   , DISCARD_SOURCE == 'NA' ~ 'NA'
                                                ) 
                 )


#
# get the discard for each trip using COAL_RATE}
#

# discard mort ratio tht are NA for odd gear types (e.g. cams gear 0) get a 1 mort ratio. 
# the KALLs should be small.. 

joined_table = joined_table %>% 
    mutate(DISC_MORT_RATIO = coalesce(DISC_MORT_RATIO, 1)) %>%
    mutate(DISCARD = case_when(!is.na(LINK1) & LINK3_OBS == 1 ~ DISC_MORT_RATIO*OBS_DISCARD # observed with at least one obs haul
                                                         , !is.na(LINK1) & LINK3_OBS == 0 ~ DISC_MORT_RATIO*COAL_RATE*LIVE_POUNDS # observed but no obs hauls..
                                                         , is.na(LINK1) ~ DISC_MORT_RATIO*COAL_RATE*LIVE_POUNDS)
                 )


 fst::write_fst(x = joined_table, path = file.path(getOption("maps.discardsPath"), paste0('discard_est_', species_itis, '_trips', FY,'.fst')))

t2 = Sys.time()

print(paste('RUNTIME: ', round(difftime(t2, t1, units = "mins"),2), ' MINUTES',  sep = ''))
}
joined_table %>% 
    group_by(SPECIES_STOCK, DISCARD_SOURCE) %>% 
    dplyr::summarise(DISCARD_EST = sum(DISCARD)) %>% 
    pivot_wider(names_from = 'SPECIES_STOCK', values_from = 'DISCARD_EST') %>% 
    dplyr::select(-1) %>% 
    colSums(na.rm = T) %>% 
    round()

# saveRDS(joined_table, file = paste0('/home/bgaluardi/PROJECTS/discaRd/CAMS/MODULES/GROUNDFISH/OUTPUT/discard_est_', species_itis, '_gftrips_only.RDS')
#               
# fst::write_fst(x = joined_table, path = paste0('/home/bgaluardi/PROJECTS/discaRd/CAMS/MODULES/GROUNDFISH/OUTPUT/discard_est_', species_itis, '_gftrips_only', FY,'.fst'))
# 
#  t2 = Sys.time()
#   
# print(paste('RUNTIME: ', round(difftime(t2, t1, units = "mins"),2), ' MINUTES',  sep = ''))
#  
# }
# 
#          

 joined_table %>% 
  dplyr::group_by(FED_OR_STATE) %>%
  dplyr::summarise(Discard_total = sum(DISCARD, na.rm=TRUE), 
            Kall_total = sum(SUBTRIP_KALL, na.rm=TRUE))


 #add subtrip kall after CV

    cy_discard_example <- joined_table %>% 
        mutate(GF_STOCK_DEF = paste0(COMMON_NAME, '-', SPECIES_STOCK)) %>% 
        dplyr::select(-SPECIES_ITIS) %>%
        # dplyr::select(-COMMON_NAME, -SPECIES_ITIS) %>%
    dplyr::rename('STRATA_FULL' = 'FULL_STRATA'
                                , 'CAMS_DISCARD_RATE' = 'COAL_RATE'
                                # , 'COMMON_NAME' = 'COMNAME_EVAL'
                                , 'SPECIES_ITIS' = 'SPECIES_ITIS_EVAL'
                                , 'ACTIVITY_CODE' = 'ACTIVITY_CODE_1'
                                , 'N_OBS_TRIPS_F' = 'n_obs_trips_f'
                                ) %>% 
    mutate(DATE_RUN = as.character(Sys.Date())
                 , FY = as.integer(FY)) %>%
    dplyr::select(
        DATE_RUN,
        FY,
        YEAR,
        MONTH,
        SPECIES_ITIS,
        COMMON_NAME,
        FY_TYPE,
        ACTIVITY_CODE,
        VTRSERNO,
        CAMSID,
        FED_OR_STATE,
        GF,
        AREA,
        LINK1,
        N_OBS_TRIPS_F,
        STRATA_USED,
        STRATA_FULL,
        STRATA_ASSUMED,
        DISCARD_SOURCE,
        OBS_DISCARD,
        OBS_KALL,
        SUBTRIP_KALL,
        BROAD_STOCK_RATE,
        CAMS_DISCARD_RATE,
        DISC_MORT_RATIO,
        DISCARD,
        CV,
        SPECIES_STOCK,
        CAMS_GEAR_GROUP,
        MESHGROUP,
        SECTID,
        EM,
        REDFISH_EXEMPTION,
        SNE_SMALLMESH_EXEMPTION,
        XLRG_GILLNET_EXEMPTION,
        TRIPCATEGORY,
        ACCESSAREA,
        SCALLOP_AREA
      # eval(strata_unique)
    )

 cy_discard_example <- cy_discard_example %>% dplyr::mutate(DISCARD_SOURCE = case_when(is.na(DISCARD) ~ 'N',TRUE ~ DISCARD_SOURCE)) %>% dplyr::mutate(STRATA_USED = case_when(is.na(DISCARD) ~ 'NA',TRUE ~ STRATA_USED))


 cy_discard_example$CV[is.nan(cy_discard_example$CV)]<-NA

 cy_discard_example$CV[is.infinite(cy_discard_example$CV)] <- NA    

 cy_discard_example$CAMS_DISCARD_RATE[is.nan(cy_discard_example$CAMS_DISCARD_RATE)]<-NA

 cy_discard_example$CAMS_DISCARD_RATE[is.infinite(cy_discard_example$CAMS_DISCARD_RATE)] <- NA 

 cy_discard_example$BROAD_STOCK_RATE[is.nan(cy_discard_example$BROAD_STOCK_RATE)]<-NA

 cy_discard_example$BROAD_STOCK_RATE[is.infinite(cy_discard_example$BROAD_STOCK_RATE)] <- NA 

 cy_discard_example$DISCARD[is.nan(cy_discard_example$DISCARD)]<-NA

 cy_discard_example$DISCARD[is.infinite(cy_discard_example$DISCARD)] <- NA    

 species$COMMON_NAME[i]
#db_drop_table(con = con_maps, table = 'CAMS_DISCARD_EXAMPLE_CY_BLACKSEABASS_19', force = F)

dbWriteTable(con_maps, name = 'CAMS_DISCARD_REDCRAB_20', value = cy_discard_example, overwrite = T)

cy_discard_example %>% filter(FED_OR_STATE == 'FED') %>% group_by(DISCARD_SOURCE, CAMS_GEAR_GROUP, SPECIES_STOCK) %>% dplyr::summarise(D = sum(DISCARD), K = sum(SUBTRIP_KALL), Drate = mean(CAMS_DISCARD_RATE), Dmort = mean(DISC_MORT_RATIO)) %>% arrange(desc(D))%>% top_n(10) %>% dplyr::mutate(across(where(is.numeric), round, 4))%>%
        DT::datatable(caption = 'Discard rates by Strata for Monkfish')


# save trip by trip info to RDS 
 #saveRDS(final_table, file = paste0('discard_est_', species_itis, '.RDS'))
 # saveRDS(final_table, file = paste0('discard_est_', species_itis, '.RDS'))
#---------------------------------------------------------------------#
# End loop
#Need to modify this loop so it produces the oracle tables on each loop.

# t2 = Sys.time()
# 
# print(paste(species_itis, ' RAN IN ', t2-t1, ' SECONDS',  sep = ''))


noaa-garfo/discaRd documentation built on April 17, 2025, 10:32 p.m.