knitr::opts_chunk$set(echo=FALSE, warning = FALSE, 
                                            message = FALSE, cache = FALSE,
                                            progress = TRUE, verbose = FALSE, comment = F
                                            , error = FALSE, dev = 'png', dpi = 200)
t1 = Sys.time()

# setwd("C:/Users/benjamin.galuardi/Documents/GitHub/discaRd/CAMS/")
library(tidyverse)
library(odbc)
library(ROracle)
library(dplyr, warn.conflicts = FALSE)
# library(dbplyr)
library(ggplot2)
# library(config)
library(stringr)
library(discaRd)
library(data.table)
devtools::load_all()
options(scipen = 999)

# local run
#dw_maps <- config::get(value = "maps", file = "~/config.yml")

# # if on server..
# dw_maps <- config::get(value = "maps", file = "~/config.yml")

 dw_maps <- config::get(config = "maps", file = "~/config_group.yml")
# 
 # con_maps <- dbConnect(odbc::odbc(), 
 #                                  DSN = dw_maps$dsn, 
 #                                  UID = dw_maps$uid, 
 #                                  PWD = dw_maps$pwd)

# Connect to database - move this to config file in the future - quick addition for server
  connectString <- paste(
    "(DESCRIPTION=",
    "(ADDRESS=(PROTOCOL=tcp)(HOST=", dw_maps$host, ")(PORT=", dw_maps$port, "))",
    "(CONNECT_DATA=(SERVICE_NAME=",dw_maps$svc, ")))",
    sep = ""
  )

# Connect to oracle each loop in case of timeouts
    con_maps <- ROracle::dbConnect(
      drv = ROracle::Oracle(),
      username = dw_maps$uid,
      password = dw_maps$pwd,
      dbname = connectString
    )


'%!in%' <- function(x,y)!('%in%'(x,y))

#source('~/discaRd/CAMS//R/cams_discard_functions.R') 
# TODO
# This section should move to the control script

FY <- 2019

FY_TYPE = 'JAN START'

#--------------------------------------------------------------------------#
# herring ITIS code
 itis <-  c('161722')   

 #itis <- itis
 itis_num <- as.numeric(itis)


 species = tbl(con_maps, sql("select *
                                                from MAPS.CAMS_DISCARD_MORTALITY_STOCK")) %>% 

    collect() %>% 

    filter(SPECIES_ITIS %in% itis_num) %>% group_by(SPECIES_ITIS) %>%
  slice(1)

 species$ITIS_TSN <- stringr::str_sort(itis)

#species$ITIS_TSN <- as.numeric(species$SPECIES_ITIS)
# species$ITIS_TSN <- as.character(species$SPECIES_ITIS)

#--------------------------------------------------------------------------#
# a summary table for comparison

# final_discard_table = data.frame(YEAR = FY, SPECIES_ITIS = species$ITIS_TSN, COMNAME = species$COMMON_NAME, DISCARD = NA)
#--------------------------------------------------------------------------#
# TODO
# This should probably be it's own function since it's so specific to herring fields

  import_query = " with obs_cams as (
   select year
    , month
  , PERMIT
    , case when month in (5,6,7,8,9,10) then 1
           when month in (11,12,1,2,3,4) then 2
           end as halfofyear
    -- , carea
  , AREA
    , vtrserno
  , CAMS_SUBTRIP
    , LINK1
    , link3
    , link3_obs
    , docid
    , CAMSID
    , nespp3
  , itis_tsn as SPECIES_ITIS
  -- , itis_group1
    , SECGEAR_MAPPED as GEARCODE
    , NEGEAR
    , GEARTYPE
    , MESHGROUP
    ,FISHDISP 
    , SECTID
  , GF
, case when activity_code_1 like 'NMS-COM%' then 'COMMON_POOL'
       when activity_code_1 like 'NMS-SEC%' then 'SECTOR'
             else 'non_GF' end as SECTOR_TYPE
, case when PERMIT = '000000' then 'STATE'
       else 'FED' end as FED_OR_STATE
    , tripcategory
    , accessarea
    , activity_code_1
  --, permit_EFP_1
  --, permit_EFP_2
  --, permit_EFP_3
  --, permit_EFP_4
  , EM
  , redfish_exemption
    , closed_area_exemption
    , sne_smallmesh_exemption
    , xlrg_gillnet_exemption
    , NVL(sum(discard_prorate),0) as discard
    , NVL(sum(discard_prorate),0) as discard_prorate
    , NVL(round(max(subtrip_kall)),0) as subtrip_kall
    , NVL(round(max(obs_kall)),0) as obs_kall
    ,  NVL(sum(discard)/nullif(round(max(obs_kall)), 0), 0) as dk
    from MAPS.CAMS_OBS_CATCH

 WHERE YEAR >= 2018
  and YEAR <= 2019

    group by year
  -- , carea
  , AREA
  , PERMIT
    , vtrserno
  , CAMS_SUBTRIP
    , LINK1
    , link3
    , link3_obs
    , docid
    , nespp3    
  , itis_tsn
  -- , itis_group1
    , SECGEAR_MAPPED
    , NEGEAR
    , GEARTYPE
    , MESHGROUP
    , SECTID
    ,FISHDISP
  , GF
  , case when activity_code_1 like 'NMS-COM%' then 'COMMON_POOL'
       when activity_code_1 like 'NMS-SEC%' then 'SECTOR'
             else 'non_GF' end
  , case when PERMIT = '000000' then 'STATE'
       else 'FED' end
  , CAMSID
  , month
    , case when month in (5,6,7,8,9,10) then 1
           when month in (11,12,1,2,3,4) then 2
           end  , tripcategory
    , accessarea
    , activity_code_1
  --  , permit_EFP_1
  --, permit_EFP_2
  --, permit_EFP_3
  --, permit_EFP_4
  , EM
  , redfish_exemption
    , closed_area_exemption
    , sne_smallmesh_exemption
    , xlrg_gillnet_exemption
    order by vtrserno asc
    ) , cams_obs_spp as( 

  select case when MONTH in (1,2,3,4) then YEAR-1 else YEAR end as GF_YEAR
  , case when MONTH in (1,2,3) then YEAR-1 else YEAR end as SCAL_YEAR
  , o.*
  , c.match_nespp3
  , coalesce(c.match_nespp3, o.nespp3) as nespp3_final
  from obs_cams o
  left join apsd.s_nespp3_match_conv c on o.nespp3 = c.nespp3)  

  , cams_herr as(
  select distinct (cl.camsid||'_'||cl.subtrip) cams_subtrip
  ,case when itis_tsn = '161722' then 'HERR_TRIP' else NULL end herr_targ
  ,cl.lat_dd
  ,cl.lon_dd
  ,(select hs.area_herr from cams_garfo.cfg_fed_area hs where cl.area = hs.area) stat_area_hma
  from maps.cams_landings cl
  WHERE YEAR >= 2018
  and YEAR <= 2019)

  select 
   cos.*
  ,ch.cams_subtrip landings_subt
  ,ch.herr_targ
  , case when ch.herr_targ = 'HERR_TRIP' --or cos.species_itis = '161722' 
  then 'HERR' else 'NON_HERR' end HERR_FLAG
  , ch.lat_dd
  , ch.lon_dd
  ,nvl((select hma.area from apsd.gis_herring_mgmt_areas hma where sdo_contains(hma.ora_geometry,sdo_geometry(2001,8307,sdo_point_type(NVL(ch.lon_dd,0),NVL(ch.lat_dd,0),NULL),NULL,NULL)) = 'TRUE'),stat_area_hma) herr_area
  from cams_obs_spp cos
  left join cams_herr ch
  on cos.cams_subtrip = ch.cams_subtrip

"


c_o_dat2 <- ROracle::dbGetQuery(con_maps, import_query)

c_o_dat2 = c_o_dat2 %>% 
    mutate(PROGRAM = substr(ACTIVITY_CODE_1, 9, 10)) %>% 
  mutate(SCALLOP_AREA = case_when(substr(ACTIVITY_CODE_1,1,3) == 'SES' & PROGRAM == 'OP' ~ 'OPEN' 
       , PROGRAM == 'NS' ~ 'NLS'
       , PROGRAM == 'NN' ~ 'NLSN'
       , PROGRAM == 'NH' ~ 'NLSS'  # includes the NLS south Deep
       , PROGRAM == 'NW' ~ 'NLSW'
       , PROGRAM == '1S' ~ 'CAI'
       , PROGRAM == '2S' ~ 'CAII'
       , PROGRAM %in% c('MA', 'ET', 'EF', 'HC', 'DM') ~ 'MAA'
       )
) %>% 
    mutate(SCALLOP_AREA = case_when(substr(ACTIVITY_CODE_1,1,3) == 'SES' ~ dplyr::coalesce(SCALLOP_AREA, 'OPEN'))) %>% 
    mutate(DOCID = CAMS_SUBTRIP)

# NOTE: CAMS_SUBTRIP being defined as DOCID so the discaRd functions don't have to change!! DOCID hard coded in the functions..


# 4/13/22
# need to make LINK1 NA when LINK3 is null.. this is due to data mismatches in putting hauls at the subtrip level. If we don't do this step, OBS trips will get values of 0 for any evaluated species. this may or may not be correct.. it's not possible to know without a haul to subtrip match. This is a hotfix that may change in the future 

link3_na = c_o_dat2 %>% 
    filter(!is.na(LINK1) & is.na(LINK3))


# make these values 0 or NA or 'none' depending on the default for that field
link3_na = link3_na %>% 
    mutate(LINK1 = NA
                 , DISCARD = NA
                 , DISCARD_PRORATE = NA
                 , OBSRFLAG = NA
                 , OBSVTR = NA
                 , OBS_AREA = NA
                 , OBS_GEAR = NA
                 , OBS_HAUL_KALL_TRIP = 0
                 , OBS_HAUL_KEPT = 0
                 , OBS_KALL = 0
                 , LINK1 = NA
                 , OBSVTR = NA
                 , OBS_MESHGROUP = 'none'
                 , PRORATE = NA)


tidx = c_o_dat2$CAMSID %in% link3_na$CAMSID

c_o_dat2 = c_o_dat2[tidx == F,]

c_o_dat2 = c_o_dat2 %>% 
    bind_rows(link3_na)

# continue the data import


state_trips = c_o_dat2 %>% filter(FED_OR_STATE == 'STATE')
fed_trips = c_o_dat2 %>% filter(FED_OR_STATE == 'FED')

fed_trips = fed_trips %>% 
    mutate(ROWID = 1:nrow(fed_trips)) %>% 
    relocate(ROWID)

# filter out link1 that are doubled on VTR

multilink = fed_trips %>% 
    filter(!is.na(LINK1)) %>% 
    group_by(VTRSERNO) %>% 
    dplyr::summarise(nlink1 = n_distinct(LINK1)) %>% 
    arrange(desc(nlink1)) %>% 
    filter(nlink1>1)

remove_links = fed_trips %>% 
    filter(is.na(SPECIES_ITIS) & !is.na(LINK1) & VTRSERNO %in% multilink$VTRSERNO) %>% 
    dplyr::select(LINK1) %>% 
    distinct()

remove_id = fed_trips %>% 
    filter(is.na(SPECIES_ITIS) & !is.na(LINK1) & VTRSERNO %in% multilink$VTRSERNO) %>% 
      distinct(ROWID)

fed_trips =
    fed_trips %>% 
    filter(ROWID %!in% remove_id$ROWID)

c_o_dat2 = fed_trips %>% 
#   filter(GF == 0) %>% 
    bind_rows(., state_trips) %>% 
    mutate(GF = "0")

# gf_dat = fed_trips%>% 
#   filter(GF == 1)

rm(fed_trips, state_trips)

#get trips that caught herring  
#targ_trips <- c_o_dat2 %>%
#  filter(NESPP3 == 168) %>% select(CAMS_SUBTRIP)

# add new column for whether it was a herring trip or not
#c_o_dat2 <- c_o_dat2 %>%
#  mutate(HERR_TARG = case_when(CAMS_SUBTRIP %in% unlist(targ_trips) ~ 'HERR'
#                               , TRUE ~ 'NON_HERR'))
# Stratification variables

stratvars = c(#'SPECIES_STOCK'
              'HERR_FLAG' # target vs non target herring
              ,'HERR_AREA' # herring management area
              ,'CAMS_GEAR_GROUP')# gear 
                            #, 'MESHGROUP'
                          #, 'TRIPCATEGORY'
                          #, 'ACCESSAREA')


# Begin loop

i <- 1
for(i in 1:length(species$ITIS_TSN)){

t1 = Sys.time() 

print(paste0('Running ', species$ITIS_NAME[i], " for Fishing Year ", FY))   

# species_nespp3 = species$NESPP3[i]  
#species_itis = species$ITIS_TSN[i] 

species_itis <- as.character(species$ITIS_TSN[i])
species_itis_srce = as.character(as.numeric(species$ITIS_TSN[i]))

#--------------------------------------------------------------------------#
# Support table import by species

# GEAR TABLE
CAMS_GEAR_STRATA = tbl(con_maps, sql('  select * from MAPS.CAMS_GEARCODE_STRATA')) %>% 
    collect() %>% 
  dplyr::rename(GEARCODE = VTR_GEAR_CODE) %>% 
  # filter(NESPP3 == species_nespp3) %>% 
    filter(SPECIES_ITIS == species_itis) %>%
  dplyr::select(-NESPP3, -SPECIES_ITIS) %>%
## AWA change gear group here, ask to have base table changed?
#  test <- CAMS_GEAR_STRATA %>%
mutate_all(function(x) ifelse(str_detect(x, '^0_'), 'other', x))


# Stat areas table  
# unique stat areas for stock ID if needed
STOCK_AREAS = tbl(con_maps, sql('select * from MAPS.CAMS_STATAREA_STOCK')) %>%
  # filter(NESPP3 == species_nespp3) %>%  # removed  & AREA_NAME == species_stock
    dplyr::filter(SPECIES_ITIS == species_itis) %>%
    collect() %>% 
  group_by(AREA_NAME) %>% 
  distinct(STAT_AREA) %>%
  mutate(AREA = as.character(STAT_AREA)
         , SPECIES_STOCK = AREA_NAME) %>% 
  ungroup() #%>% 
  #dplyr::select(SPECIES_STOCK, AREA)

# Mortality table
CAMS_DISCARD_MORTALITY_STOCK = tbl(con_maps, sql("select * from MAPS.CAMS_DISCARD_MORTALITY_STOCK"))  %>%
  collect() %>%
  mutate(SPECIES_STOCK = AREA_NAME
         , GEARCODE = CAMS_GEAR_GROUP) %>%
  select(-AREA_NAME) %>%
   mutate(CAMS_GEAR_GROUP = as.character(CAMS_GEAR_GROUP)) %>% 
  # filter(NESPP3 == species_nespp3) %>% 
    filter(ITIS_TSN == species_itis_srce)
 # dplyr::select(-NESPP3, -SPECIES_ITIS) %>% 
 # dplyr::rename(DISC_MORT_RATIO = Discard_Mortality_Ratio)

# Observer codes to be removed
OBS_REMOVE = tbl(con_maps, sql("select * from MAPS.CAMS_OBSERVER_CODES"))  %>%
    collect() %>% 
    filter(SPECIES_ITIS == species_itis) %>% 
    distinct(OBS_CODES)

#--------------------------------------------------------------------------------#
# make tables
ddat_focal <- alldat %>% 
  filter(YEAR == FY) %>%   ## time element is here!!
  filter(AREA %in% STOCK_AREAS$AREA) %>% 
  mutate(LIVE_POUNDS = SUBTRIP_KALL
         ,SEADAYS = 0
             , NESPP3 = NESPP3_FINAL) %>% 
   left_join(., y = STOCK_AREAS, by = 'AREA') %>% 
   left_join(., y = CAMS_GEAR_STRATA, by = 'GEARCODE') %>% 
   left_join(., y = CAMS_DISCARD_MORTALITY_STOCK
            , by = c('SPECIES_STOCK', 'CAMS_GEAR_GROUP')
            ) %>% 
    dplyr::select(-GEARCODE.y) %>% 
        dplyr::rename(COMMON_NAME= 'COMMON_NAME.x',SPECIES_ITIS = 'SPECIES_ITIS', NESPP3 = 'NESPP3.x',
                  GEARCODE = 'GEARCODE.x') %>% 
  relocate('COMMON_NAME','SPECIES_ITIS','NESPP3','SPECIES_STOCK','CAMS_GEAR_GROUP','DISC_MORT_RATIO')


ddat_prev <- alldat %>% 
  filter(YEAR == FY-1) %>%   ## time element is here!!
  filter(AREA %in% STOCK_AREAS$AREA) %>% 
  mutate(LIVE_POUNDS = SUBTRIP_KALL
         ,SEADAYS = 0
             , NESPP3 = NESPP3_FINAL) %>% 
   left_join(., y = STOCK_AREAS, by = 'AREA') %>% 
   left_join(., y = CAMS_GEAR_STRATA, by = 'GEARCODE') %>% 
   left_join(., y = CAMS_DISCARD_MORTALITY_STOCK
            , by = c('SPECIES_STOCK', 'CAMS_GEAR_GROUP')
            ) %>% 
    dplyr::select( -GEARCODE.y) %>% 
    dplyr::rename(COMMON_NAME= 'COMMON_NAME.x',SPECIES_ITIS = 'SPECIES_ITIS', NESPP3 = 'NESPP3.x',
                  GEARCODE = 'GEARCODE.x') %>% 
  relocate('COMMON_NAME','SPECIES_ITIS','NESPP3','SPECIES_STOCK','CAMS_GEAR_GROUP','DISC_MORT_RATIO')



# need to slice the first record for each observed trip.. these trips are multi rowed while unobs trips are single row.. 
ddat_focal_cy = ddat_focal %>% 
  filter(!is.na(LINK1)) %>% 
    mutate(SPECIES_EVAL_DISCARD = case_when(SPECIES_ITIS == species_itis ~ DISCARD
                                                                                    )) %>% 
    mutate(SPECIES_EVAL_DISCARD = coalesce(SPECIES_EVAL_DISCARD, 0)) %>% 
  group_by(LINK1, VTRSERNO) %>% 
    arrange(desc(SPECIES_EVAL_DISCARD)) %>% 
    slice(1) %>% 
  ungroup()

# and join to the unobserved trips

ddat_focal_cy = ddat_focal_cy %>% 
  union_all(ddat_focal %>% 
              filter(is.na(LINK1)))  
            #    group_by(VTRSERNO, CAMSID) %>% 
            #    slice(1) %>% 
            #    ungroup()
            # )


# if using the combined catch/obs table, which seems necessary for groundfish.. need to roll your own table to use with run_discard function
# DO NOT NEED TO FILTER SPECIES HERE. NEED TO RETAIN ALL TRIPS. THE MAKE_BDAT_FOCAL.R FUNCTION TAKES CARE OF THIS. 

bdat_cy = ddat_focal %>% 
  filter(!is.na(LINK1)) %>% 
    filter(FISHDISP != '090') %>%
    filter(LINK3_OBS == 1) %>%
    filter(substr(LINK1, 1,3) %!in% OBS_REMOVE$OBS_CODES) %>% 
  mutate(DISCARD_PRORATE = DISCARD
         , OBS_AREA = AREA
         , OBS_HAUL_KALL_TRIP = OBS_KALL
         , PRORATE = 1)


# set up trips table for previous year
ddat_prev_cy = ddat_prev %>% 
  filter(!is.na(LINK1)) %>% 
    mutate(SPECIES_EVAL_DISCARD = case_when(SPECIES_ITIS == species_itis ~ DISCARD
                                                                                    )) %>% 
    mutate(SPECIES_EVAL_DISCARD = coalesce(SPECIES_EVAL_DISCARD, 0)) %>% 
  group_by(LINK1, VTRSERNO) %>% 
    arrange(desc(SPECIES_EVAL_DISCARD)) %>% 
    slice(1) %>% 
  ungroup()

ddat_prev_cy = ddat_prev_cy %>% 
  union_all(ddat_prev %>% 
                         filter(is.na(LINK1))) #%>% 
               # group_by(VTRSERNO,CAMSID) %>% 
               # slice(1) %>% 
               # ungroup()



# previous year observer data needed.. 
bdat_prev_cy = ddat_prev %>% 
  filter(!is.na(LINK1)) %>% 
    filter(FISHDISP != '090') %>%
    filter(LINK3_OBS == 1) %>%
    filter(substr(LINK1, 1,3) %!in% OBS_REMOVE$OBS_CODES) %>% 
  mutate(DISCARD_PRORATE = DISCARD
         , OBS_AREA = AREA
         , OBS_HAUL_KALL_TRIP = OBS_KALL
         , PRORATE = 1)

# Run the discaRd functions on previous year
d_prev = run_discard(bdat = bdat_prev_cy
                                             , ddat = ddat_prev_cy
                                             , c_o_tab = ddat_prev
                                             # , year = 2018
                                             # , species_nespp3 = species_nespp3
                                           , species_itis = species_itis
                                             , stratvars = stratvars
                                             , aidx = c(1:length(stratvars))
                                             )

# Run the discaRd functions on current year
d_focal = run_discard(bdat = bdat_cy
                                             , ddat = ddat_focal_cy
                                             , c_o_tab = ddat_focal
                                             # , year = 2019
                                             # , species_nespp3 = '081' # haddock...
                                             # , species_nespp3 = species_nespp3  #'081' #cod...
                                             , species_itis = species_itis
                                             , stratvars = stratvars
                                             , aidx = c(1:length(stratvars))  # this makes sure this isn't used.. 
                                             )

# summarize each result for convenience
dest_strata_p = d_prev$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

dest_strata_f = d_focal$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

# substitute transition rates where needed

trans_rate_df = dest_strata_f %>% 
  left_join(., dest_strata_p, by = 'STRATA') %>% 
  mutate(STRATA = STRATA
         , n_obs_trips_f = n.x
             , n_obs_trips_p = n.y
         , in_season_rate = drate.x
         , previous_season_rate = drate.y
  ) %>% 
    mutate(n_obs_trips_p = coalesce(n_obs_trips_p, 0)) %>% 
  mutate(trans_rate = get.trans.rate(l_observed_trips = n_obs_trips_f
                                         , l_assumed_rate = previous_season_rate
                                         , l_inseason_rate = in_season_rate
                                         )
         ) %>% 
  dplyr::select(STRATA
         , n_obs_trips_f
         , n_obs_trips_p
         , in_season_rate 
         , previous_season_rate 
         , trans_rate
         , CV_f = CV.x
         )


trans_rate_df = trans_rate_df %>% 
  mutate(final_rate = case_when((in_season_rate != trans_rate & !is.na(trans_rate)) ~ trans_rate)) 

 trans_rate_df$final_rate = coalesce(trans_rate_df$final_rate, trans_rate_df$in_season_rate)


 trans_rate_df_full = trans_rate_df

 full_strata_table = trans_rate_df_full %>% 
   right_join(., y = d_focal$res, by = 'STRATA') %>% 
   as_tibble() %>% 
        mutate(SPECIES_ITIS_EVAL = species_itis
                 , COMNAME_EVAL = species$ITIS_NAME[i]
                 , FISHING_YEAR = FY
                 , FY_TYPE = FY_TYPE) %>% 
       dplyr::rename(FULL_STRATA = STRATA) 

#
# Target/non-target and gear stratification
#
# print(paste0("Getting rates across sectors for ", species_itis, " ", FY)) 

stratvars_assumed = c("HERR_FLAG"
                                            , "CAMS_GEAR_GROUP") #AWA
                                            #, "MESHGROUP")


### All tables in previous run can be re-used with diff stratification

# Run the discaRd functions on previous year
d_prev_pass2 = run_discard(bdat = bdat_prev_cy
                                             , ddat = ddat_prev_cy
                                             , c_o_tab = ddat_prev
                                             # , year = 2018
                                             # , species_nespp3 = species_nespp3
                                           , species_itis = species_itis
                                             , stratvars = stratvars_assumed
                                             # , aidx = c(1:length(stratvars_assumed))  # this makes sure this isn't used.. 
                                            , aidx = c(1)  # this creates an unstratified broad stock rate
                                             )


# Run the discaRd functions on current year
d_focal_pass2 = run_discard(bdat = bdat_cy
                                             , ddat = ddat_focal_cy
                                             , c_o_tab = ddat_focal
                                             # , year = 2019
                                             # , species_nespp3 = '081' # haddock...
                                             # , species_nespp3 = species_nespp3  #'081' #cod...
                                             , species_itis = species_itis
                                             , stratvars = stratvars_assumed
                                             # , aidx = c(1:length(stratvars_assumed))  # this makes sure this isn't used.. 
                                            , aidx = c(1)  # this creates an unstratified broad stock rate
                                             )

# summarize each result for convenience
dest_strata_p_pass2 = d_prev_pass2$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

dest_strata_f_pass2 = d_focal_pass2$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

# substitute transition rates where needed

trans_rate_df_pass2 = dest_strata_f_pass2 %>% 
  left_join(., dest_strata_p_pass2, by = 'STRATA') %>% 
  mutate(STRATA = STRATA
         , n_obs_trips_f = n.x
             , n_obs_trips_p = n.y
         , in_season_rate = drate.x
         , previous_season_rate = drate.y
  ) %>% 
    mutate(n_obs_trips_p = coalesce(n_obs_trips_p, 0)) %>% 
  mutate(trans_rate = get.trans.rate(l_observed_trips = n_obs_trips_f
                                         , l_assumed_rate = previous_season_rate
                                         , l_inseason_rate = in_season_rate
                                         )
         ) %>% 
  dplyr::select(STRATA
         , n_obs_trips_f
         , n_obs_trips_p
         , in_season_rate 
         , previous_season_rate 
         , trans_rate
         , CV_f = CV.x
         )


trans_rate_df_pass2 = trans_rate_df_pass2 %>% 
  mutate(final_rate = case_when((in_season_rate != trans_rate & !is.na(trans_rate)) ~ trans_rate)) 

 trans_rate_df_pass2$final_rate = coalesce(trans_rate_df_pass2$final_rate, trans_rate_df_pass2$in_season_rate)


 # get a table of broad stock rates using discaRd functions. Previously we used sector rollupresults (ARATE in pass2)
 # herring uses this as target vs non target level rate 

bdat_2yrs = bind_rows(bdat_prev_cy, bdat_cy)
ddat_cy_2yr = bind_rows(ddat_prev_cy, ddat_focal_cy)
ddat_2yr = bind_rows(ddat_prev, ddat_focal)

#mnk = run_discard( bdat = bdat_2yrs
#           , ddat_focal = ddat_cy_2yr
#           , c_o_tab = ddat_2yr
#           , species_itis = species_itis
#           , stratvars = stratvars[1]  #"SPECIES_STOCK"   "CAMS_GEAR_GROUP" #AWA just target vs non target
#           )

# rate table
#mnk$allest$C

# previous year broad stock rate
mnk_prev = run_discard(bdat = bdat_prev_cy
                                             , ddat = ddat_prev_cy
                                             , c_o_tab = ddat_prev
                                           , species_itis = species_itis
                                             , stratvars = stratvars[1]
                                             )


# Run the discaRd functions on current year broad stock
mnk_current = run_discard(bdat = bdat_cy
                                             , ddat = ddat_focal_cy
                                             , c_o_tab = ddat_focal
                                             , species_itis = species_itis
                                             , stratvars = stratvars[1]
                                             )

#SPECIES_STOCK <-sub("_.*", "", mnk$allest$C$STRATA)  
HERR_FLAG <- mnk_prev$allest$C$STRATA

#CAMS_GEAR_GROUP <- sub(".*?_", "", mnk$allest$C$STRATA) 

BROAD_STOCK_RATE <-  mnk_prev$allest$C$RE_mean
BROAD_STOCK_RATE_CUR <-  mnk_current$allest$C$RE_mean


CV_b <- round(mnk_prev$allest$C$RE_rse, 2)
CV_b_cur <- round(mnk_current$allest$C$RE_rse, 2)


BROAD_STOCK_RATE_TABLE <- as.data.frame(cbind(HERR_FLAG, BROAD_STOCK_RATE, CV_b))
BROAD_STOCK_RATE_TABLE_CUR <- as.data.frame(cbind(HERR_FLAG, BROAD_STOCK_RATE_CUR, CV_b))


BROAD_STOCK_RATE_TABLE$BROAD_STOCK_RATE <- as.numeric(BROAD_STOCK_RATE_TABLE$BROAD_STOCK_RATE)
BROAD_STOCK_RATE_TABLE$CV_b <- as.numeric(BROAD_STOCK_RATE_TABLE$CV_b)
BROAD_STOCK_RATE_TABLE_CUR$BROAD_STOCK_RATE_CUR <- as.numeric(BROAD_STOCK_RATE_TABLE_CUR$BROAD_STOCK_RATE_CUR)
BROAD_STOCK_RATE_TABLE_CUR$CV_b <- as.numeric(BROAD_STOCK_RATE_TABLE_CUR$CV_b)


names(trans_rate_df_pass2) = paste0(names(trans_rate_df_pass2), '_a')

#
# join full and assumed strata tables
#
# print(paste0("Constructing output table for ", species_itis, " ", FY)) 

joined_table = assign_strata(full_strata_table, stratvars_assumed) %>% 
    dplyr::select(-STRATA_ASSUMED) %>%  # not using this anymore here..
    dplyr::rename(STRATA_ASSUMED = STRATA) %>% 
    left_join(., y = trans_rate_df_pass2, by = c('STRATA_ASSUMED' = 'STRATA_a')) %>% 
    left_join(., y = BROAD_STOCK_RATE_TABLE, by = c('HERR_FLAG')) %>% 
    mutate(COAL_RATE = case_when(n_obs_trips_f >= 5 ~ final_rate  # this is an in season rate (target,gear, HMA)
                                                             , n_obs_trips_f < 5 & 
                                                                n_obs_trips_p >=5 ~ final_rate  # in season transition (target, gear, HMA)
                                                             , n_obs_trips_f < 5 & 
                                                                n_obs_trips_p < 5 & n_obs_trips_p_a > 5 ~ trans_rate_a  #in season gear, HMA transition
                                   )
    ) %>% 
    mutate(COAL_RATE = coalesce(COAL_RATE, BROAD_STOCK_RATE)) %>%
    mutate(SPECIES_ITIS_EVAL = species_itis
                 , COMNAME_EVAL = species$COMMON_NAME[i]
                 , FISHING_YEAR = FY
                 , FY_TYPE = FY_TYPE) 

#
# add discard source
#


#AWA check this
# >5 trips in season gets in season rate
# < 5 inseason but >=5 past year gets transition
# < 5 and < 5 in season, but >= 5 sector rolled up rate (in season) gets get sector rolled up rate
# <5, <5,  and <5 gets broad stock rate

joined_table = joined_table %>% 
    mutate(DISCARD_SOURCE = case_when(!is.na(LINK1) & LINK3_OBS == 1 ~ 'O'  # observed with at least one obs haul
                                                                        , !is.na(LINK1) & LINK3_OBS == 0 ~ 'I'  # observed but no obs hauls..  
                                                                        , is.na(LINK1) & 
                                                                            n_obs_trips_f >= 5 ~ 'I'
                                                                        # , is.na(LINK1) & COAL_RATE == previous_season_rate ~ 'P'
                                                                        , is.na(LINK1) & 
                                                                            n_obs_trips_f < 5 & 
                                                                            n_obs_trips_p >=5 ~ 'T'
                                                                        , is.na(LINK1) & 
                                                                            n_obs_trips_f < 5 &
                                                                            n_obs_trips_p < 5 &
                                                                            n_obs_trips_f_a >= 5 ~ 'A' 
                                                                        , is.na(LINK1) & 
                                                                            n_obs_trips_f < 5 &
                                                                            n_obs_trips_p < 5 &
                                                                            n_obs_trips_f_a <= 5 &
                                                                          n_obs_trips_p_a >= 5 ~ 'G'
                                                                        , is.na(LINK1) & 
                                                                            n_obs_trips_f < 5 & 
                                                                            n_obs_trips_p < 5 & 
                                                                            n_obs_trips_f_a < 5 & 
                                                                            n_obs_trips_p_a < 5 ~ 'B'))
                                                                #       , is.na(LINK1) & is.na(final_rate) ~ 'NA'
                                                                #       , is.na(LINK1) & is.nan(final_rate) ~ 'NA'))

#
# make sure CV type matches DISCARD SOURCE}
#

# obs trips get 0, broad stock rate is NA



joined_table = joined_table %>% 
    mutate(CV = case_when(DISCARD_SOURCE == 'O' ~ 0
                                                , DISCARD_SOURCE == 'I' ~ CV_f
                                                , DISCARD_SOURCE == 'T' ~ CV_f
                                                , DISCARD_SOURCE == 'A' ~ CV_f_a
                                                , DISCARD_SOURCE == 'G' ~ CV_b
                                            #   , DISCARD_SOURCE == 'NA' ~ 'NA'
                                                )  # , DISCARD_SOURCE == 'B' ~ NA
                 )

# Make note of the stratification variables used according to discard source

stratvars_gear = c(#"SPECIES_STOCK", #AWA 
                    "HERR_FLAG")

strata_f = paste(stratvars, collapse = ';')
strata_a = paste(stratvars_assumed, collapse = ';')
strata_b = paste(stratvars_gear, collapse = ';')

joined_table = joined_table %>% 
    mutate(STRATA_USED = case_when(DISCARD_SOURCE == 'O' & LINK3_OBS == 1 ~ ''
                                                , DISCARD_SOURCE == 'O' & LINK3_OBS == 0 ~ strata_f
                                                , DISCARD_SOURCE == 'I' ~ strata_f
                                                , DISCARD_SOURCE == 'T' ~ strata_f
                                                , DISCARD_SOURCE == 'A' ~ strata_a
                                                , DISCARD_SOURCE == 'G' ~ strata_b
                                            #   , DISCARD_SOURCE == 'NA' ~ 'NA'
                                                ) 
                 )


#
# get the discard for each trip using COAL_RATE}
#

# discard mort ratio tht are NA for odd gear types (e.g. cams gear 0) get a 1 mort ratio. 
# the KALLs should be small.. 

joined_table = joined_table %>% 
    mutate(DISC_MORT_RATIO = coalesce(DISC_MORT_RATIO, 1)) %>%
    mutate(DISCARD = case_when(!is.na(LINK1) & LINK3_OBS == 1 ~ DISC_MORT_RATIO*OBS_DISCARD # observed with at least one obs haul
                                                         , !is.na(LINK1) & LINK3_OBS == 0 ~ DISC_MORT_RATIO*COAL_RATE*LIVE_POUNDS # observed but no obs hauls..
                                                         , is.na(LINK1) ~ DISC_MORT_RATIO*COAL_RATE*LIVE_POUNDS)
                 )

joined_table %>% 
    group_by(SPECIES_STOCK, DISCARD_SOURCE) %>% 
    dplyr::summarise(DISCARD_EST = sum(DISCARD)) %>% 
    pivot_wider(names_from = 'SPECIES_STOCK', values_from = 'DISCARD_EST') %>% 
    dplyr::select(-1) %>% 
    colSums(na.rm = T) %>% 
    round()

# saveRDS(joined_table, file = paste0('/home/bgaluardi/PROJECTS/discaRd/CAMS/MODULES/GROUNDFISH/OUTPUT/discard_est_', species_itis, '_gftrips_only.RDS')
#               
# fst::write_fst(x = joined_table, path = paste0('/home/bgaluardi/PROJECTS/discaRd/CAMS/MODULES/GROUNDFISH/OUTPUT/discard_est_', species_itis, '_gftrips_only', FY,'.fst'))
# 
#  t2 = Sys.time()
#   
# print(paste('RUNTIME: ', round(difftime(t2, t1, units = "mins"),2), ' MINUTES',  sep = ''))
#  
# }
# 
#          

 joined_table %>% 
  dplyr::group_by(FED_OR_STATE) %>%
  dplyr::summarise(Discard_total = sum(DISCARD, na.rm=TRUE), 
            Kall_total = sum(SUBTRIP_KALL, na.rm=TRUE))


 #add subtrip kall after CV

    cy_discard_example <- joined_table %>% 
        mutate(GF_STOCK_DEF = paste0(COMMON_NAME, '-', SPECIES_STOCK)) %>% 
        dplyr::select(-SPECIES_ITIS) %>%
        # dplyr::select(-COMMON_NAME, -SPECIES_ITIS) %>%
    dplyr::rename('STRATA_FULL' = 'FULL_STRATA'
                                , 'CAMS_DISCARD_RATE' = 'COAL_RATE'
                                # , 'COMMON_NAME' = 'COMNAME_EVAL'
                                , 'SPECIES_ITIS' = 'SPECIES_ITIS_EVAL'
                                , 'ACTIVITY_CODE' = 'ACTIVITY_CODE_1'
                                , 'N_OBS_TRIPS_F' = 'n_obs_trips_f'
                                ) %>% 
    mutate(DATE_RUN = as.character(Sys.Date())
                 , FY = as.integer(FY)) %>%
    dplyr::select(
        DATE_RUN,
        FY,
        YEAR,
        MONTH,
        SPECIES_ITIS,
        COMMON_NAME,
        FY_TYPE,
        ACTIVITY_CODE,
        VTRSERNO,
        CAMSID,
        FED_OR_STATE,
        GF,
        AREA,
        LINK1,
        N_OBS_TRIPS_F,
        STRATA_USED,
        STRATA_FULL,
        STRATA_ASSUMED,
        DISCARD_SOURCE,
        OBS_DISCARD,
        OBS_KALL,
        SUBTRIP_KALL,
        BROAD_STOCK_RATE,
        CAMS_DISCARD_RATE,
        DISC_MORT_RATIO,
        DISCARD,
        CV,
        SPECIES_STOCK,
        CAMS_GEAR_GROUP,
        MESHGROUP,
        SECTID,
        EM,
        REDFISH_EXEMPTION,
        SNE_SMALLMESH_EXEMPTION,
        XLRG_GILLNET_EXEMPTION,
        TRIPCATEGORY,
        ACCESSAREA,
        SCALLOP_AREA
        ,HERR_FLAG
        ,HERR_AREA
      # eval(strata_unique)
    )

 cy_discard_example <- cy_discard_example %>% dplyr::mutate(DISCARD_SOURCE = case_when(is.na(DISCARD) ~ 'N',TRUE ~ DISCARD_SOURCE)) %>% dplyr::mutate(STRATA_USED = case_when(is.na(DISCARD) ~ 'NA',TRUE ~ STRATA_USED))



 cy_discard_example$CV[is.nan(cy_discard_example$CV)]<-NA

 cy_discard_example$CV[is.infinite(cy_discard_example$CV)] <- NA    

 cy_discard_example$CAMS_DISCARD_RATE[is.nan(cy_discard_example$CAMS_DISCARD_RATE)]<-NA

 cy_discard_example$CAMS_DISCARD_RATE[is.infinite(cy_discard_example$CAMS_DISCARD_RATE)] <- NA 

 cy_discard_example$BROAD_STOCK_RATE[is.nan(cy_discard_example$BROAD_STOCK_RATE)]<-NA

 cy_discard_example$BROAD_STOCK_RATE[is.infinite(cy_discard_example$BROAD_STOCK_RATE)] <- NA 

 cy_discard_example$DISCARD[is.nan(cy_discard_example$DISCARD)]<-NA

 cy_discard_example$DISCARD[is.infinite(cy_discard_example$DISCARD)] <- NA     

 species$COMMON_NAME[i]
#db_drop_table(con = con_maps, table = 'MAPS.CAMS_DISCARD_EXAMPLE_CY_BLACKSEABASS_19', force = F)

dbWriteTable(con_maps, name = 'CAMS_DISCARD_ATLANTICHERRING_CY_19', value = cy_discard_example, overwrite = T)

#cy_discard_example %>% filter(FED_OR_STATE == 'STATE') %>% group_by(DISCARD_SOURCE, CAMS_GEAR_GROUP, SPECIES_STOCK) %>% dplyr::summarise(D = sum(DISCARD), K = sum(SUBTRIP_KALL), Drate = mean(CAMS_DISCARD_RATE), Dmort = mean(DISC_MORT_RATIO)) %>% arrange(desc(D))%>% top_n(10) %>% dplyr::mutate(across(where(is.numeric), round, 4))%>%
#       DT::datatable(caption = 'Discard rates by Strata for Atlantic Herring')


# save trip by trip info to RDS 
 #saveRDS(final_table, file = paste0('discard_est_', species_itis, '.RDS'))
 # saveRDS(final_table, file = paste0('discard_est_', species_itis, '.RDS'))
#---------------------------------------------------------------------#
# End loop
#Need to modify this loop so it produces the oracle tables on each loop.

# t2 = Sys.time()
# 
# print(paste(species_itis, ' RAN IN ', t2-t1, ' SECONDS',  sep = ''))

# save summary table of observed trips #AWA needs work here
obs_trips <- bdat_2yrs %>%
  filter(HERR_FLAG == 'HERR' & LINK3_OBS == 1) %>%
  select(LINK1, PERMIT, YEAR, GEARCODE, OBSRFLAG, AREA, NESPP3.y, FISHDISP, LIVE_POUNDS, LAT_DD, LON_DD, HERR_AREA)

# join pass 1 (target, area, gear) with pass 2 (target, gear) and broad stock rate to create bm_herr_discard_rate table for use in QM

# change syntax
full_strata_rates <- trans_rate_df_full %>%
  mutate(TARGET = case_when(STRATA %like% 'NON' ~ 'NON_HERR',
                            TRUE ~ 'HERR')) %>%
  mutate(GEAR = case_when(STRATA %like% '050' ~ 'BT'
                          ,STRATA %like% '120' ~ 'PS'
                          ,STRATA %like% '170' ~ 'MW'
                          ,TRUE ~ 'OTHER')) %>%
  unite("TARG_GEAR_CAT", TARGET:GEAR, remove = FALSE)

#change syntax to match
gear_rates <- trans_rate_df_pass2 %>%
  mutate(TARGET = case_when(STRATA_a %like% 'NON' ~ 'NON_HERR',
                            TRUE ~ 'HERR')) %>%
  mutate(GEAR = case_when(STRATA_a %like% '050' ~ 'BT'
                          ,STRATA_a %like% '120' ~ 'PS'
                          ,STRATA_a %like% '170' ~ 'MW'
                          ,TRUE ~ 'OTHER')) %>%
  unite("TARG_GEAR_CAT", TARGET:GEAR, remove = FALSE)

# match tables on TARGET and GEAR CAT

rates <- full_strata_rates %>% 
  left_join(gear_rates, by = "TARG_GEAR_CAT") %>%
  left_join(BROAD_STOCK_RATE_TABLE, by = c("TARGET.x" = "HERR_FLAG")) %>%
  left_join(BROAD_STOCK_RATE_TABLE_CUR, by = c("TARGET.x" = "HERR_FLAG"))


# same logic as above to get discard rates
disc_rates <- rates %>%
  mutate(CURRENT_RATE = case_when(n_obs_trips_f >= 5 ~ in_season_rate
                        ,n_obs_trips_f < 5 & n_obs_trips_p >=5 ~ trans_rate
                        ,n_obs_trips_f < 5 & n_obs_trips_p < 5 & n_obs_trips_f_a >= 5 ~ in_season_rate_a
                        ,n_obs_trips_f < 5 & n_obs_trips_p < 5 & n_obs_trips_f_a < 5 & n_obs_trips_p_a >= 5 ~ trans_rate_a
                                            ,n_obs_trips_f < 5 & n_obs_trips_p < 5 & n_obs_trips_f_a < 5 & n_obs_trips_p_a < 5 ~ BROAD_STOCK_RATE))



# get herring discard rates and format
herr_disc_rts <- disc_rates %>%
  filter(TARGET.x == 'HERR') %>% 
  separate(STRATA, c('TARG', 'AREA_GEAR'), sep = 'HERR_') %>%
  separate(AREA_GEAR, c('AREA', 'GEAR_CODE'), sep = '_') %>%
  unite(GEAR_CAT_AREA, c("GEAR.x", "AREA")) %>%
  mutate(INSEASON_GLOBAL_TRIPS = sum(n_obs_trips_f)) %>%
  mutate(ASSUMED_GLOBAL_TRIPS = sum(n_obs_trips_p)) %>%
  select(CURRENT_RATE, GEAR_CAT_AREA, GEAR.y, n_obs_trips_f, n_obs_trips_f_a,INSEASON_GLOBAL_TRIPS, in_season_rate, in_season_rate_a, BROAD_STOCK_RATE_CUR, n_obs_trips_p, n_obs_trips_p_a, ASSUMED_GLOBAL_TRIPS, previous_season_rate, previous_season_rate_a, BROAD_STOCK_RATE) %>%
  dplyr::rename(GEAR_CAT = GEAR.y, INSEASON_GEAR_CAT_AREA_TRIPS = n_obs_trips_f, INSEASON_GEAR_CAT_TRIPS = n_obs_trips_f_a, INSEASON_GEAR_CAT_AREA_RATE = in_season_rate, INSEASON_GEAR_CAT_RATE = in_season_rate_a, INSEASON_GLOBAL_RATE = BROAD_STOCK_RATE_CUR, ASSUMED_GEAR_CAT_AREA_TRIPS = n_obs_trips_p, ASSUMED_GEAR_CAT_TRIPS = n_obs_trips_p_a, ASSUMED_GEAR_CAT_AREA_RATE = previous_season_rate, ASSUMED_GEAR_CAT_RATE = previous_season_rate_a, ASSUMED_GLOBAL_RATE = BROAD_STOCK_RATE)

# fill out other strata that had no coverage with broad stock rate
hma <- c('1A', '1B', '2', '3')
gears <- c('BT', 'MW', 'PS', 'other')
hma_gears <- crossing(hma, gears) %>%
  unite(all_hma_gears, c('gears', 'hma'))

bm_herr_discard_rate <- hma_gears %>%
  left_join(herr_disc_rts, by = c('all_hma_gears' = 'GEAR_CAT_AREA')) %>%
  dplyr::rename(GEAR_CAT_AREA = all_hma_gears) %>%
  mutate(CURRENT_RATE = case_when(is.na(CURRENT_RATE) ~ BROAD_STOCK_RATE_TABLE[BROAD_STOCK_RATE_TABLE$HERR_FLAG == 'HERR',]$BROAD_STOCK_RATE
                                  ,TRUE ~ CURRENT_RATE))

#dbWriteTable(con_maps, name = 'BM_HERR_DISCARD_RATE', value = bm_herr_discard_rate, overwrite = T)

### AWA test code
other_1A <- cy_discard_example %>%
  filter(STRATA_FULL == 'HERR_1A_other')

PUR_1A_minusone <- ddat_prev_cy %>%
  filter(CAMS_GEAR_GROUP == '120')#& LINK1 == '000201806P53021')

MW_2 <- cy_discard_example %>%
  filter(STRATA_FULL == 'HERR_2_170' & DISCARD_SOURCE == 'O')

disc <- cy_discard_example %>%
  filter( HERR_FLAG == 'HERR') %>%
  summarise(DISC = sum(DISCARD))

} 

rm(list = ls())

.rs.restartR()



noaa-garfo/discaRd documentation built on April 17, 2025, 10:32 p.m.