knitr::opts_chunk$set(echo=FALSE, warning = FALSE, 
                                            message = FALSE, cache = FALSE,
                                            progress = TRUE, verbose = FALSE, comment = F
                                            , error = FALSE, dev = 'png', dpi = 200)
t1 = Sys.time()

# setwd("C:/Users/benjamin.galuardi/Documents/GitHub/discaRd/CAMS/")

library(odbc)
library(dplyr, warn.conflicts = FALSE)
# library(dbplyr)
library(ggplot2)
# library(config)
library(stringr)
library(discaRd)
options(scipen = 999)

# local run
# dw_apsd <- config::get(value = "apsd", file = "K:/R_DEV/config.yml")

# if on server..
dw_apsd <- config::get(value = "maps", file = "~/config.yml")

bcon <- dbConnect(odbc::odbc(), 
                                    DSN = dw_apsd$dsn, 
                                    UID = dw_apsd$uid, 
                                    PWD = dw_apsd$pwd)

'%!in%' <- function(x,y)!('%in%'(x,y))

setwd('../')
source('cams_discard_functions.R')
# species_nespp3 = '012'  # monkfish
# species_nespp3 = '335'  # black seabass

# define species stock if needed
# species_stock = 'GOM'  # GOM cod

# if using a unit stock, make this NULL!!
species_stock = NA  # all unit stocks

FY <- 2019
FY_TYPE = 'MAY START'

#--------------------------------------------------------------------------#
# group of species
species = tbl(bcon, sql("
select distinct(b.species_itis)
    , COMNAME
    , a.nespp3
from fso.v_obSpeciesStockArea a
left join (select *  from APSD.CAMS_GEARCODE_STRATA) b on a.nespp3 = b.nespp3
where stock_id not like 'OTHER'
and b.species_itis is not null
")
) %>% 
    collect()


# species = tbl(bcon, sql("
#     select distinct(nespp3) as nespp3
#     from fso.v_obSpeciesStockArea 
#     where stock_id not like 'OTHER'
#     ")) %>% 
#   collect() %>% 
#   filter(NESPP3 != '269') %>% # cod is also 081
#   filter(NESPP3 != '082') %>% # cod is also 081
#   filter(NESPP3 != '119') %>% # winter flounder is also 120
#   filter(NESPP3 != '148') %>%  # haddock is also 147
#   filter(NESPP3 != '153') %>%  # white hake is also 154..
#   arrange(NESPP3)


# species = as.character(c(335, 212, 801, 802, '051'))

final_discard_table = data.frame(YEAR = FY, SPECIES_ITIS = species$SPECIES_ITIS, COMNAME = species$COMNAME, DISCARD = NA)

#--------------------------------------------------------------------------#
# get catch and matched obs data together

c_o_dat2 <- tbl(bcon, sql(
" with obs_cams as (
   select year
    , month
    , case when month in (5,6,7,8,9,10) then 1
           when month in (11,12,1,2,3,4) then 2
           end as halfofyear
    -- , carea
  , AREA
    , vtrserno
    , link1
    , docid
    , CAMSID
    , nespp3
  , itis_tsn as SPECIES_ITIS
  , itis_group1
    , SECGEAR_MAPPED as GEARCODE
    , NEGEAR
    , GEARTYPE
    , MESHGROUP
    , SECTID
    , tripcategory
    , accessarea
    , activity_code_1
    , permit_EFP_1
  , permit_EFP_2
  , permit_EFP_3
  , permit_EFP_4
  , redfish_exemption
    , closed_area_exemption
    , sne_smallmesh_exemption
    , xlrg_gillnet_exemption
    , NVL(sum(discard),0) as discard
    , NVL(round(max(subtrip_kall)),0) as subtrip_kall
    , NVL(round(max(obs_kall)),0) as obs_kall
    , NVL(sum(discard)/round(max(obs_kall)), 0) as dk
    from MAPS.CAMS_OBS_CATCH
    group by year
  -- , carea
  , AREA
    , vtrserno
    , link1
    , docid
    , nespp3
  , itis_tsn
  , itis_group1
    , SECGEAR_MAPPED
    , NEGEAR
    , GEARTYPE
    , MESHGROUP
    , SECTID
  , CAMSID
  , month
    , halfofyear
    , tripcategory
    , accessarea
    , activity_code_1
    , permit_EFP_1
  , permit_EFP_2
  , permit_EFP_3
  , permit_EFP_4
  , redfish_exemption
    , closed_area_exemption
    , sne_smallmesh_exemption
    , xlrg_gillnet_exemption
    order by vtrserno asc
    ) 

  select case when MONTH in (1,2,3,4) then YEAR-1 else YEAR end as GF_YEAR
  , case when MONTH in (1,2,3) then YEAR-1 else YEAR end as SCAL_YEAR
  , o.*
  , c.match_nespp3
  , coalesce(c.match_nespp3, o.nespp3) as nespp3_final
  from obs_cams o
  left join apsd.s_nespp3_match_conv c on o.nespp3 = c.nespp3        

"
  )
) %>%
    collect() %>% 
    filter(substr(ACTIVITY_CODE_1, 1,3) == 'NMS')
# Stratification variables

stratvars = c('SPECIES_STOCK'
              , 'CAMS_GEAR_GROUP'
              , 'MESHGROUP'
              # , 'CAREA'
              , 'SECTID'
              , "PERMIT_EFP_1"
              , "PERMIT_EFP_2"
              , "PERMIT_EFP_3"
              , "PERMIT_EFP_4"
              , "REDFISH_EXEMPTION"
              , "SNE_SMALLMESH_EXEMPTION"
              , "XLRG_GILLNET_EXEMPTION"
              )


# Begin loop

for(i in 1:length(species$SPECIES_ITIS)){

print(paste0('Running ', species$COMNAME[i]))   

# species_nespp3 = species$NESPP3[i]  
species_itis = species$SPECIES_ITIS[i] 
#--------------------------------------------------------------------------#
# Support table import by species

# GEAR TABLE
CAMS_GEAR_STRATA = tbl(bcon, sql('  select * from APSD.CAMS_GEARCODE_STRATA')) %>% 
    collect() %>% 
  dplyr::rename(GEARCODE = VTR_GEAR_CODE) %>% 
  # filter(NESPP3 == species_nespp3) %>% 
    filter(SPECIES_ITIS == species_itis) %>%
  dplyr::select(-NESPP3, -SPECIES_ITIS)

# Stat areas table  
# unique stat areas for stock ID if needed
STOCK_AREAS = tbl(bcon, sql('select * from apsd.CAMS_STATAREA_STOCK')) %>%
  # filter(NESPP3 == species_nespp3) %>%  # removed  & AREA_NAME == species_stock
    filter(SPECIES_ITIS == species_itis) %>%
    collect() %>% 
  group_by(AREA_NAME, SPECIES_ITIS) %>% 
  distinct(STAT_AREA) %>%
  mutate(AREA = as.character(STAT_AREA)
         , SPECIES_STOCK = AREA_NAME) %>% 
  ungroup() 
# %>% 
#   dplyr::select(SPECIES_STOCK, AREA)

# Mortality table
CAMS_DISCARD_MORTALITY_STOCK = tbl(bcon, sql("select * from apsd.CAMS_DISCARD_MORTALITY_STOCK"))  %>%
  collect() %>%
  mutate(SPECIES_STOCK = AREA_NAME
         , GEARCODE = CAMS_GEAR_GROUP) %>%
  select(-AREA_NAME) %>%
  # mutate(CAREA = as.character(STAT_AREA)) %>% 
  # filter(NESPP3 == species_nespp3) %>% 
    filter(SPECIES_ITIS == species_itis) %>%
  dplyr::select(-NESPP3, -SPECIES_ITIS) %>% 
  dplyr::rename(DISC_MORT_RATIO = Discard_Mortality_Ratio)

#--------------------------------------------------------------------------------#
# make tables
ddat_focal <- c_o_dat2 %>% 
  filter(GF_YEAR == FY) %>%   ## time element is here!!
  filter(AREA %in% STOCK_AREAS$AREA) %>% 
  mutate(LIVE_POUNDS = SUBTRIP_KALL
         ,SEADAYS = 0
             , NESPP3 = NESPP3_FINAL) %>% 
   left_join(., y = STOCK_AREAS, by = 'AREA') %>% 
   left_join(., y = CAMS_GEAR_STRATA, by = 'GEARCODE') %>% 
   left_join(., y = CAMS_DISCARD_MORTALITY_STOCK
            , by = c('SPECIES_STOCK', 'CAMS_GEAR_GROUP')
            ) %>% 
    dplyr::select(-SPECIES_ITIS.y, -GEARCODE.y) %>% 
    dplyr::rename(SPECIES_ITIS = 'SPECIES_ITIS.x', GEARCODE = 'GEARCODE.x') %>% 
  relocate('COMMON_NAME','SPECIES_ITIS','NESPP3','SPECIES_STOCK','CAMS_GEAR_GROUP','DISC_MORT_RATIO')


ddat_prev <- c_o_dat2 %>% 
  filter(GF_YEAR == FY-1) %>%   ## time element is here!!
  filter(AREA %in% STOCK_AREAS$AREA) %>% 
  mutate(LIVE_POUNDS = SUBTRIP_KALL
         ,SEADAYS = 0
             , NESPP3 = NESPP3_FINAL) %>% 
   left_join(., y = STOCK_AREAS, by = 'AREA') %>% 
   left_join(., y = CAMS_GEAR_STRATA, by = 'GEARCODE') %>% 
   left_join(., y = CAMS_DISCARD_MORTALITY_STOCK
            , by = c('SPECIES_STOCK', 'CAMS_GEAR_GROUP')
            ) %>%  
        dplyr::select(-SPECIES_ITIS.y, -GEARCODE.y) %>% 
    dplyr::rename(SPECIES_ITIS = 'SPECIES_ITIS.x', GEARCODE = 'GEARCODE.x') %>% 
  relocate('COMMON_NAME','SPECIES_ITIS','NESPP3','SPECIES_STOCK','CAMS_GEAR_GROUP','DISC_MORT_RATIO')



# need to slice the first record for each observed trip.. these trips are multi rowed while unobs trips are single row.. 
ddat_focal_gf = ddat_focal %>% 
  filter(!is.na(LINK1)) %>% 
  group_by(LINK1, VTRSERNO) %>% 
  slice(1) %>% 
  ungroup()

# and join to the unobserved trips

ddat_focal_gf = ddat_focal_gf %>% 
  union_all(ddat_focal %>% 
              filter(is.na(LINK1)) %>% 
               group_by(VTRSERNO) %>% 
               slice(1) %>% 
               ungroup()
            )


# if using the combined catch/obs table, which seems necessary for groundfish.. need to roll your own table to use with run_discard function

bdat_gf = ddat_focal %>% 
  filter(!is.na(LINK1)) %>% 
  mutate(DISCARD_PRORATE = DISCARD
         , OBS_AREA = AREA
         , OBS_HAUL_KALL_TRIP = OBS_KALL
         , PRORATE = 1)


# set up trips table for previous year
ddat_prev_gf = ddat_prev %>% 
  filter(!is.na(LINK1)) %>% 
  group_by(LINK1) %>% 
  slice(1)

ddat_prev_gf = ddat_prev_gf %>% 
  union_all(ddat_prev %>% filter(is.na(LINK1)))


# previous year observer data needed.. 
bdat_prev_gf = ddat_prev %>% 
  filter(!is.na(LINK1)) %>% 
  mutate(DISCARD_PRORATE = DISCARD
         , OBS_AREA = AREA
         , OBS_HAUL_KALL_TRIP = OBS_KALL
         , PRORATE = 1)

# Run the discaRd functions on previous year
d_prev = run_discard(bdat = bdat_prev_gf
                                             , ddat = ddat_prev_gf
                                             , c_o_tab = ddat_prev
                                             # , year = 2018
                                             # , species_nespp3 = species_nespp3
                                           , species_itis = species_itis
                                             , stratvars = stratvars
                                             , aidx = c(1,2, 3)
                                             )


# Run the discaRd functions on current year
d_focal = run_discard(bdat = bdat_gf
                                             , ddat = ddat_focal_gf
                                             , c_o_tab = ddat_focal
                                             # , year = 2019
                                             # , species_nespp3 = '081' # haddock...
                                             # , species_nespp3 = species_nespp3  #'081' #cod...
                                             , species_itis = species_itis
                                             , stratvars = stratvars
                                             , aidx = c(1,2,3)
                                             )

# summarize each result for convenience
dest_strata_p = d_prev$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

dest_strata_f = d_focal$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

# substitute transition rates where needed

trans_rate_df = dest_strata_f %>% 
  left_join(., dest_strata_p, by = 'STRATA') %>% 
  mutate(STRATA = STRATA
         , n_obs_trips = n.x
         , in_season_rate = drate.x
         , previous_season_rate = drate.y
         , trans_rate = get.trans.rate(l_observed_trips = n_obs_trips
                                         , l_assumed_rate = previous_season_rate
                                         , l_inseason_rate = in_season_rate
                                         )
         ) %>% 
  dplyr::select(STRATA
         , n_obs_trips 
         , in_season_rate 
         , previous_season_rate 
         , trans_rate)


trans_rate_df = trans_rate_df %>% 
  mutate(final_rate = case_when((in_season_rate != trans_rate & !is.na(trans_rate)) ~ trans_rate)) 

 trans_rate_df$final_rate = coalesce(trans_rate_df$final_rate, trans_rate_df$in_season_rate)

# compare subbed rate total to in season total. 
     # trans_rate_df %>% 
     #   left_join(dest_strata_f, by = 'STRATA') %>% 
     #   summarise(in_season_discard = sum(in_season_rate*KALL, na.rm = T)
     #             , final_rate_discard = sum(final_rate*KALL, na.rm = T)
     #             ) %>% 
     #   knitr::kable()

# park the final rate into the trips table
 final_table = trans_rate_df %>% 
   right_join(., y = d_focal$res, by = 'STRATA') %>% 
   as_tibble() %>% 
        mutate(SPECIES_ITIS_EVAL = species_itis
                 , COMNAME_EVAL = species$COMNAME[i]
                 , FISHING_YEAR = FY
                 , FY_TYPE = FY_TYPE) %>% 
    mutate(DISCARD_SOURCE = case_when(!is.na(LINK1) ~ 'O'
                                                                        , is.na(LINK1) & CRATE == DISC_RATE ~ 'E'
                                                                        , is.na(LINK1) & CRATE == previous_season_rate ~ 'P'
                                                                        , is.na(LINK1) & CRATE == trans_rate ~ 'T'
                                                                        , is.na(LINK1) & CRATE == ARATE ~ 'A'
                                                                        , is.na(LINK1) & is.na(ARATE) ~ 'A')  # this may be replaced with model estimate!
                 )


 # final_table %>%
 #  dplyr::select(DISCARD_SOURCE, ACTIVITY_CODE_1, VTRSERNO, ARATE, CRATE, DISC_RATE, STRATA_ASSUMED, STRATA, ARATE_IDX, LINK1, EST_DISCARD, DISCARD, OBS_DISCARD) %>% 
 #  View()
 #       # select(FISHING_YEAR, FY_TYPE, DISCARD_SOURCE, VTRSERNO, CAMSID, eval(stratvars),  STRATA, DISC_MORT_RATIO, CV, ACTIVITY_CODE_1, SPECIES_ITIS_EVAL, COMNAME_EVAL) %>% 
 #  relocate(FISHING_YEAR, DISCARD_SOURCE, STRATA, FY_TYPE, SPECIES_ITIS_EVAL, COMNAME_EVAL)
 # 


#    mutate(INSEASON_RATE_DISCARD = in_season_rate*SUBTRIP_KALL*DISC_MORT_RATIO
#              , TRANS_RATE_DISCARD =  trans_rate*SUBTRIP_KALL*DISC_MORT_RATIO
#              , FINAL_RATE_DISCARD = final_rate*SUBTRIP_KALL*DISC_MORT_RATIO
#           ) %>%
#   
#    select(VTRSERNO, CAMSID, eval(stratvars), INSEASON_RATE_DISCARD, TRANS_RATE_DISCARD, FINAL_RATE_DISCARD, STRATA, DISC_MORT_RATIO, CV, ACTIVITY_CODE_1) %>% 
# %>% 
#   relocate(FISHING_YEAR, FY_TYPE, SPECIES_ITIS_EVAL, COMNAME_EVAL)
#  # 
# save total results to summary table
 final_discard_table$DISCARD[i] = final_table %>% 
    filter(substr(ACTIVITY_CODE_1, 1, 3) =='NMS') %>% 
    dplyr::summarise(TOTAL = DISCARD*DISC_MORT_RATIO) %>% 
    dplyr::select(TOTAL) %>% 
    sum(., na.rm = T)

# save trip by trip info to RDS 
 saveRDS(final_table, file = paste0('discard_est_', species_itis, '_gftrips_only.RDS'))

#---------------------------------------------------------------------#
# End loop

t2 = Sys.time()

print(paste(species_itis, ' RAN IN ', t2-t1, ' SECONDS',  sep = ''))

} 
res_list = NULL

ii = 1

for(j in species$SPECIES_ITIS){

    res_list[[ii]] <- readRDS(paste0("discard_est_", j, "_gftrips_only.RDS"))
    ii = ii+1

}

allgf = do.call(rbind, res_list) %>% 
    filter(substr(ACTIVITY_CODE_1, 1, 3) =='NMS')

allgf %>% 
    group_by(SPECIES_STOCK, SPECIES_ITIS_EVAL) %>% 
    dplyr::summarise(D = round(sum(DISCARD*DISC_MORT_RATIO, na.rm = T))) %>% 
    # filter(SPECIES_ITIS %in% species$SPECIES_ITIS) %>% 
    pivot_wider(names_from = 'SPECIES_STOCK', values_from = 'D') %>% 
    mutate(SPECIES_ITIS = SPECIES_ITIS_EVAL) %>% 
    left_join(., species, by = 'SPECIES_ITIS') %>% 
    dplyr::select(-SPECIES_ITIS, -NESPP3) %>% 
    relocate('COMNAME','SPECIES_ITIS_EVAL') %>% 
    write.csv('groundfish_loop_results_gftrips_only_012622.csv', row.names = F)
    # write.csv('groundfish_loop_results_gftrips_only_012122.csv', row.names = F)
    # View()


# gfidx = grep('NMS*', d_focal$res$ACTIVITY_CODE_1)
# 
# res$GFIDX = substr(res$ACTIVITY_CODE_1, 1,3) == 'NMS'
# 
#   res %>% 
#   filter(GFIDX == T ) %>% 
#    group_by(SPECIES_STOCK) %>% 
#    dplyr::summarise(inseason_rate_d = sum(in_season_rate*SUBTRIP_KALL*DISC_MORT_RATIO, na.rm = T)
#              , trans_rate_d = sum(trans_rate*SUBTRIP_KALL*DISC_MORT_RATIO, na.rm = T)
#              , final_rate_d = sum(final_rate*SUBTRIP_KALL*DISC_MORT_RATIO, na.rm = T))
discard2019 = tbl(bcon, sql("
select round(sum(POKGMASS_DISCARD)) POKGMASS_DISCARD
,round(sum(CODGMSS_DISCARD)) CODGMSS_DISCARD
,round(sum(CODGBE_DISCARD)) CODGBE_DISCARD
,round(sum(CODGBW_DISCARD)) CODGBW_DISCARD
,round(sum(FLDSNEMA_DISCARD)) FLDSNEMA_DISCARD
,round(sum(FLWGB_DISCARD)) FLWGB_DISCARD
,round(sum(FLWGMSS_DISCARD)) FLWGMSS_DISCARD
,round(sum(PLAGMMA_DISCARD)) PLAGMMA_DISCARD
,round(sum(YELCCGM_DISCARD)) YELCCGM_DISCARD
,round(sum(HADGBW_DISCARD)) HADGBW_DISCARD
,round(sum(WITGMMA_DISCARD)) WITGMMA_DISCARD
-- ,round(sum(FLWGMSS_DISCARD)) FLWGMSS_DISCARD
,round(sum(HALGMMA_DISCARD)) HALGMMA_DISCARD
,round(sum(YELGB_DISCARD)) YELGB_DISCARD
,round(sum(FLGMGBSS_DISCARD)) FLGMGBSS_DISCARD
,round(sum(HKWGMMA_DISCARD)) HKWGMMA_DISCARD
,round(sum(REDGMGBSS_DISCARD)) REDGMGBSS_DISCARD
-- ,round(sum(FLWGB_DISCARD)) FLWGB_DISCARD
,round(sum(HADGM_DISCARD)) HADGM_DISCARD
,round(sum(OPTGMMA_DISCARD)) OPTGMMA_DISCARD
,round(sum(WOLGMMA_DISCARD)) WOLGMMA_DISCARD
,round(sum(FLWSNEMA_DISCARD)) FLWSNEMA_DISCARD
,round(sum(HADGBE_DISCARD)) HADGBE_DISCARD
-- ,round(sum(CODGBW_DISCARD)) CODGBW_DISCARD
,round(sum(YELSNE_DISCARD)) YELSNE_DISCARD

from apsd.dmis_all_years
where fishing_year = 2019
"))  %>% 
  collect() %>% 
  t() %>% 
  as.data.frame() %>% 
  mutate(stock = row.names(.))

names(discard2019)[1] = 'DMIS_DISCARD'

discard2019$STOCK_ID = unlist(lapply(strsplit(discard2019$stock, split = '_'), function(x) x[[1]]))

# get stock names and nespp3

stock_nespp3 = tbl(bcon, sql("
    select max(nespp3) as nespp3
    , stock_id
    , comname
    from fso.v_obSpeciesStockArea 
    where stock_id not like 'OTHER'
    group by stock_id, comname
")
) %>% 
    collect()

stock_discard_2019 = stock_nespp3 %>% 
  left_join(., discard2019, by = 'STOCK_ID') %>% 
  dplyr::select(-stock)

write.csv(stock_discard_2019, 'dmis2019_groundfish_discard.csv', row.names = F)
db_example = final_table %>%
    mutate(DATE_RUN = as.character(lubridate::today())
                 , FY = as.integer(FY)) %>%
    dplyr::select(
    DATE_RUN,
    FY,
    SPECIES_ITIS_EVAL,
    FY_TYPE,
    DISCARD_SOURCE,
    ACTIVITY_CODE_1,
    VTRSERNO,
    ARATE,
    CRATE,
    DISC_RATE,
    STRATA,
    STRATA_ASSUMED,
    LINK1,
    OBS_DISCARD,
    EST_DISCARD,
    DISCARD,
    n_obs_trips,
    CV,
    eval(stratvars)
    )

dw_apsd <- config::get(value = "apsd", file = "~/config.yml")

acon <- dbConnect(odbc::odbc(), 
                                    DSN = dw_apsd$dsn, 
                                    UID = dw_apsd$uid, 
                                    PWD = dw_apsd$pwd)
db_drop_table(acon, 'CAMS_DISCARD_EXMAPLE_GF19')

dbWriteTable(acon, name = 'CAMS_DISCARD_EXAMPLE_GF19', value = db_example, overwrite = T)


noaa-garfo/discaRd documentation built on April 17, 2025, 10:32 p.m.