Discard Rate Current Method Summary

1) Rates determine by observer reported values (gear, area, etc) 2) Incomplete observed trips have missing 'hauls' prorated by observed information from that trip 3) Trips with observer get reported/calculated observed discards of that specific trip 4) Unobserved trips get discards from the rate calculated from 1) 5) QM is only interested in the summary total of discards for each trip, not subtrips. Often the interested number is a summary of trips, ie. the herring total of bycatch for an area/season or a sector's season's total of GB Cod.

6) Other others are driven by regs. Here I would place transition rates and EM methods.

I recommend separating out issues such as mismatching gear and area for the future QA data system.

And of course, any change management must be worked through proper and transparent interaction with all end users and clients including council and SFD.

knitr::opts_chunk$set(echo=FALSE, warning = FALSE, 
                                            message = FALSE, cache = FALSE,
                                            progress = TRUE, verbose = FALSE, comment = F
                                            , error = FALSE, dev = 'png', dpi = 200)
setwd("C:/Users/benjamin.galuardi/Documents/GitHub/discaRd/CAMS/")

library(odbc)
# library(RODBC)
library(dplyr, warn.conflicts = FALSE)
# library(dbplyr)
library(ggplot2)
library(rgdal)
library(raster)
library(config)
library(stringr)
library(discaRd)

dw_apsd <- config::get(value = "apsd", file = "K:/R_DEV/config.yml")

bcon <- dbConnect(odbc::odbc(), 
                                    DSN = dw_apsd$dsn, 
                                    UID = dw_apsd$uid, 
                                    PWD = dw_apsd$pwd)
'%!in%' <- function(x,y)!('%in%'(x,y))

# get catch and matched obs data together

# rolled up by trip..

# apsd.bg_obs_cams_tmp1 has only trips with multiple subtrips
# apsd.bg_obs_cams_tmp2 has all trips.. 

# in this table, DISACRD is already PRORATED

c_o_dat2 <- tbl(bcon, sql('
with obs_cams as (
   select year
    , month
    , region
    , halfofyear
    , area
    , vtrserno
    , link1
    , docid
    , dmis_trip_id
    , nespp3
    , NEGEAR
    , GEARTYPE
    , MESHGROUP
    , SECTOR_ID
    , tripcategory
    , accessarea
    , activity_code_1
    , permit_EFP_1
  , permit_EFP_2
  , permit_EFP_3
  , permit_EFP_4
    , NVL(sum(discard),0) as discard
    , NVL(round(max(subtrip_kall)),0) as subtrip_kall
    , NVL(round(max(obs_kall)),0) as obs_kall
    , NVL(sum(discard)/round(max(obs_kall)), 0) as dk
    from apsd.bg_obs_cams_tmp3
    where nespp3 is not null
    group by year, area, vtrserno, link1, nespp3, docid, NEGEAR, GEARTYPE
    , MESHGROUP, dmis_trip_id, month
    , region
    , halfofyear
    , sector_id
    , tripcategory
    , accessarea
    , activity_code_1
    , permit_EFP_1
  , permit_EFP_2
  , permit_EFP_3
  , permit_EFP_4
    order by vtrserno desc
    ) 

 select o.*
, case when c.match_nespp3 is not null then c.match_nespp3 else o.nespp3 end as match_nespp3
from obs_cams o
left join (select * from apsd.s_nespp3_match_conv) c
on o.nespp3 = c.nespp3 
')) %>% 
    collect()

# obs only 

obs = tbl(bcon, sql('
    select o.*
    , case when c.match_nespp3 is not null then c.match_nespp3 else o.nespp3 end as match_nespp3
    from obs_cams_prorate o
    left join (select * from apsd.s_nespp3_match_conv) c
    on o.nespp3 = c.nespp3'
))



# catch only

# dat = tbl(bcon, sql('select * from bg_cams_catch_mock'))


dat = tbl(bcon, sql('
select DMIS_TRIP_ID
, YEAR
, MONTH
, REGION
, case when month in (1,2,3,4,5,6) then 1
           when month in (7,8,9,10,11,12) then 2
           end as HALFOFYEAR
, VTRSERNO
, GEARNM
, GEARCODE
, GEARTYPE
, NEGEAR
, MESHGROUP
, CAREA
, sector_id
, activity_code_1
, permit_EFP_1
, permit_EFP_2
, permit_EFP_3
, permit_EFP_4
, tripcategory
, accessarea
, sum(pounds) as live_Pounds
from bg_cams_catch_ta_mock
group by DMIS_TRIP_ID, VTRSERNO, YEAR, GEARNM, GEARCODE, NEGEAR, GEARTYPE, MESHGROUP, CAREA, YEAR
, MONTH
, REGION
, sector_id
, activity_code_1
, permit_EFP_1
, permit_EFP_2
, permit_EFP_3
, permit_EFP_4
, tripcategory
, accessarea
, case when month in (1,2,3,4,5,6) then 1
           when month in (7,8,9,10,11,12) then 2
           end
'))

# %>% 
#   collect()

# Stat areas table

stat_area_sp = tbl(bcon, sql('select * from apsd.stat_areas_def'))
# SQUID EXAMPLE
# pull table as is for a species
# this can be used to assign discard directly to subtrip

# bdat <- c_o_dat2 %>% 
#   # group_by(DMIS_TRIP_ID, NESPP3) %>% 
#   mutate(SPECIES_DISCARD = case_when(NESPP3 == '802' ~ DISCARD)) %>% 
#   mutate(SPECIES_DISCARD = replace_na(SPECIES_DISCARD, 0))
#   # filter(NESPP3 == 802) %>% 
#   # mutate(SEADAYS = 0
#   #            , DISCARD = sum(DISCARD, na.rm = T)
#   #            , KALL = sum(OBS_HAUL_KEPT, na.rm = T)
#   #            ) %>% 
#   # collect()

# bdat$MESHGROUP[bdat$MESHGROUP == 'na'] = NA


# Use this to park discard into subtrips..
obs_discard = c_o_dat2 %>% 
    # group_by(DMIS_TRIP_ID, NESPP3) %>% 
    mutate(SPECIES_DISCARD = case_when(NESPP3 == '802' ~ DISCARD)) %>% 
    mutate(SPECIES_DISCARD = replace_na(SPECIES_DISCARD, 0)) %>%  
    group_by(YEAR
                     , VTRSERNO
                     # , NEGEAR
                     , GEARTYPE
                     , MESHGROUP
                     ) %>% 
    dplyr::summarise(KALL = sum(SUBTRIP_KALL), DISCARD = sum(SPECIES_DISCARD), dk = sum(SPECIES_DISCARD, na.rm = T)/sum(SUBTRIP_KALL, na.rm = T)) %>% 
    ungroup()

# Make an assumed rate by gear/mesh
## Use OBS table directly... not the joined catch/obs

bdat <- obs %>% 
    filter(YEAR == 2019) %>% 
    collect()


bdat$MESHGROUP[bdat$MESHGROUP == 'na'] = NA
#---------------------------------------------------------------------------------------#
# Here is where various stratification would make sense
# DOCID is hard coded in discaRd.. in this case, CV is calculated using  N = subtrips
#---------------------------------------------------------------------------------------#

ddat_focal <- dat %>% 
        filter(YEAR == 2019) %>% 
      collect() %>% 
      # mutate(STRATA = paste(GEARTYPE, MESHGROUP, REGION, sep = '_')) %>% 
    ungroup() %>%
    mutate(DOCID = DMIS_TRIP_ID)

# Region, halfof year, sector id ,  etc. already added
# add REGION is desired..
    # ddat_focal = ddat_focal %>% 
    #   mutate(REGION = ifelse(CAREA < 600, 'N', 'S')) %>% 
    #   mutate(STRATA = paste(GEARTYPE, MESHGROUP, REGION, HALFOFYEAR, sep = '_'))
# choose species here
bdat_focal = bdat %>% 
    mutate(SPECIES_DISCARD = case_when(NESPP3 == '802' ~ DISCARD_PRORATE)) %>% 
    mutate(SPECIES_DISCARD = replace_na(SPECIES_DISCARD, 0)) %>% 
        mutate(STRATA = paste(GEARTYPE, MESHGROUP, REGION, HALFOFYEAR, sep = '_')) %>% 
  dplyr::group_by(LINK1
                                # , NEGEAR
                                , GEARTYPE
                                , MESHGROUP
                                , STRATA
                                ) %>% 
    # be careful here... need to take the max values since they are repeated..
  dplyr::summarise(KALL = sum(max(OBS_HAUL_KALL_TRIP, na.rm = T)*max(PRORATE, na.rm = T)), BYCATCH = sum(SPECIES_DISCARD, na.rm = T)) %>% 
    mutate(KALL = replace_na(KALL, 0), BYCATCH = replace_na(BYCATCH, 0)) %>% 
    ungroup()


#---------------------------------------------------------------------------------------#
# trips data: rolled up by sub-trip
# This could be a generic table of KALL by subtrip..
#---------------------------------------------------------------------------------------#
# choose species here
bdat_focal = c_o_dat2 %>% 
    filter(!is.na(LINK1) & YEAR == 2019) %>%
    mutate(MESHGROUP = ifelse(MESHGROUP == 'na', NA, MESHGROUP)) %>% 
    mutate(SPECIES_DISCARD = case_when(NESPP3 == '802' ~ DISCARD)) %>% 
    mutate(SPECIES_DISCARD = replace_na(SPECIES_DISCARD, 0)) %>% 
        # mutate(STRATA = paste(GEARTYPE, MESHGROUP, REGION, HALFOFYEAR, sep = '_')) %>% 
    mutate(STRATA = paste(GEARTYPE, MESHGROUP, REGION, HALFOFYEAR, ACCESSAREA, TRIPCATEGORY, sep = '_')) %>% 
    # mutate(STRATA = paste(GEARTYPE, MESHGROUP, AREA, SECTOR_ID, sep = '_')) %>% 
  dplyr::group_by(LINK1
                                # , NEGEAR
                                # , GEARTYPE
                                # , MESHGROUP
                                , STRATA
                                ) %>% 
    # be careful here... need to take the max values since they are repeated..
  dplyr::summarise(KALL = max(OBS_KALL, na.rm = T), BYCATCH = sum(SPECIES_DISCARD, na.rm = T)) %>% 
    mutate(KALL = replace_na(KALL, 0), BYCATCH = replace_na(BYCATCH, 0)) %>% 
    ungroup()
# this may need to use the previous time period

assumed_discard = bdat_focal %>% 
  dplyr::group_by(
    # LINK1
                                # , NEGEAR
                                 GEARTYPE
                                , MESHGROUP
                                # , STRATA
                                ) %>% 
    # be careful here... need to take the max values since they are repeated..
  dplyr::summarise(KALL = sum(KALL, na.rm = T), BYCATCH = sum(BYCATCH, na.rm = T)) %>% 
    mutate(KALL = replace_na(KALL, 0), BYCATCH = replace_na(BYCATCH, 0)) %>% 
    ungroup() %>% 
    mutate(dk = BYCATCH/KALL)
# Get complete strata
strata_complete = unique(c(bdat_focal$STRATA, ddat_focal$STRATA))

allest = get.cochran.ss.by.strat(bydat = bdat_focal, trips = ddat_focal, strata_name = 'STRATA', targCV = .3, strata_complete = strata_complete)        

# discard rates by strata
dest_strata = allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

# look at the small mesh numbers
allest$C %>% slice(grep('Otter Trawl_sm*', allest$C$STRATA))

# plug in estimated rates to the unobserved trips
ddat_rate = ddat_focal

ridx = match(ddat_rate$STRATA, dest_strata$STRATA)

ddat_rate$DISC_RATE = dest_strata$drate[ridx]   
ddat_rate$CV = dest_strata$CV[ridx] 


# substitute assumed rate where we can
assumed_discard = assumed_discard %>% 
    mutate(STRATA = paste(GEARTYPE, MESHGROUP, sep = '_'))
# mutate(STRATA = paste(NEGEAR, MESHGROUP, sep = '_'))

ddat_rate = ddat_rate %>% 
    mutate(STRATA_ASSUMED = paste(GEARTYPE, MESHGROUP, sep = '_')) %>% 
    # mutate(STRATA = paste(NEGEAR, MESHGROUP, sep = '_'))
    mutate(ARATE_IDX = match(STRATA_ASSUMED, assumed_discard$STRATA)) 

# ddat_rate$ARATE_IDX[is.na(ddat_rate$ARATE_IDX)] = 0
ddat_rate$ARATE = assumed_discard$dk[ddat_rate$ARATE_IDX]


# incorporate teh assumed rate into the calculated discard rates
ddat_rate <- ddat_rate %>% 
    mutate(CRATE = coalesce(DISC_RATE, ARATE)) %>%
    mutate(CRATE = replace_na(CRATE, 0)) 


# merge observed discards with estimated discards
# Use the observer tables created, NOT the merged trips/obs table.. 
# match on VTRSERNO? 

out_tab = obs_discard %>% 
    ungroup() %>% 
    dplyr::select(VTRSERNO, DISCARD) %>% 
    right_join(x = ., y = ddat_rate, by = 'VTRSERNO') %>% 
  mutate(EST_DISCARD = CRATE*LIVE_POUNDS) %>% 
    mutate(DISCARD = if_else(!is.na(DISCARD), DISCARD, EST_DISCARD)
                 ) 

# check how much discard is observed directly vs. estimated

obs_discard %>% 
    ungroup() %>% 
    dplyr::select(VTRSERNO, DISCARD) %>% 
    right_join(x = ., y = ddat_rate, by = 'VTRSERNO') %>% 
  mutate(EST_DISCARD = CRATE*LIVE_POUNDS) %>% dplyr::select(DISCARD, EST_DISCARD) %>% colSums(na.rm = T)

# not a good way to do this... 
out_tab %>%
    dplyr::summarise(d = sum(DISCARD, na.rm = T)
                        ,est_d = sum(EST_DISCARD, na.rm = T)
                        , OBS_DISCARD = sum(DISCARD, na.rm = T) - sum(EST_DISCARD, na.rm = T)) %>% 
        knitr::kable(format = 'markdown', col.names = c('Total Discard','Portion that was Estimated','Difference between estimated and observed on observed trips only'))

# compare to obs_discard standalone table
obs_discard %>% 
    filter(YEAR == 2019) %>% 
    ungroup() %>% 
    dplyr::select(DISCARD) %>% 
    sum() %>% 
    knitr::kable(format = 'markdown', col.names = 'Observed Discard from OBS table')
assumed_discard %>% knitr::kable(caption = 'Assumed discard. This could be a generalized rate from current year. This could also be built from previous years discard info.')

dest_strata %>% knitr::kable(caption = 'Stratified Estimate From discaRd')

# out_tab %>% head() %>% 
    # knitr::kable(format= 'markdown', caption = "Exampe of tabular output")
# make_bdat_cams <- function(input_table = bdat, species_nespp3 = '802') {

    # this need to sue the obs table explicitly.. 

#   bdat <- input_table %>% 
#   # group_by(DMIS_TRIP_ID, NESPP3) %>% 
#   mutate(SPECIES_DISCARD = case_when(NESPP3 == species_nespp3 ~ DISCARD)) %>% 
#   mutate(SPECIES_DISCARD = replace_na(SPECIES_DISCARD, 0))
# 
# 
#  bdat$MESHGROUP[bdat$MESHGROUP == 'na'] = NA
# 
#  bdat
#  


#  bdat = input_table %>% 
#   mutate(SPECIES_DISCARD = case_when(NESPP3 == species_nespp3 ~ DISCARD_PRORATE)) %>% 
#   mutate(SPECIES_DISCARD = replace_na(SPECIES_DISCARD, 0)) %>% 
#       mutate(STRATA = paste(GEARTYPE, MESHGROUP, REGION, HALFOFYEAR, sep = '_')) %>% 
#   dplyr::group_by(LINK1
#                                   # , NEGEAR
#                                   , GEARTYPE
#                                   , MESHGROUP
#                                   , STRATA
#                                   ) %>% 
#   # be careful here... need to take the max values since they are repeated..
#   dplyr::summarise(KALL = sum(max(OBS_HAUL_KALL_TRIP, na.rm = T)*max(PRORATE)), BYCATCH = sum(SPECIES_DISCARD, na.rm = T)) %>% 
#   mutate(KALL = replace_na(KALL, 0), BYCATCH = replace_na(BYCATCH, 0)) %>% 
#   ungroup()
#  
#  bdat
#  
#  }

# Use this to park discard into subtrips..
get_obs_disc_vals <- function(c_o_tab = c_o_dat2, species_nespp3 = '802', year = 2019){

    codat <- c_o_tab %>%
        filter(YEAR == year) %>% 
    # group_by(DMIS_TRIP_ID, NESPP3) %>%
    mutate(SPECIES_DISCARD = case_when(MATCH_NESPP3 == species_nespp3 ~ DISCARD)) %>%
    mutate(SPECIES_DISCARD = tidyr::replace_na(SPECIES_DISCARD, 0))


 codat$MESHGROUP[codat$MESHGROUP == 'na'] = NA

obs_discard = codat %>% 
    group_by(YEAR
                     , VTRSERNO
                     # , NEGEAR
                     , GEARTYPE
                     , MESHGROUP
                     ) %>% 
    dplyr::summarise(KALL = sum(SUBTRIP_KALL), DISCARD = sum(SPECIES_DISCARD), dk = sum(SPECIES_DISCARD, na.rm = T)/sum(SUBTRIP_KALL, na.rm = T))

obs_discard

}

# Make an assumed rate by gear/mesh

make_assumed_rate <- function(bdat_focal, year = 2019){
assumed_discard = bdat_focal %>% 
  dplyr::group_by( GEARTYPE
                                , MESHGROUP
                                ) %>% 
    # be careful here... max values already represented from bdat_focal. So, take the sum here
  dplyr::summarise(KALL = sum(KALL, na.rm = T), BYCATCH = sum(BYCATCH, na.rm = T)) %>% 
    mutate(KALL = tidyr::replace_na(KALL, 0), BYCATCH = tidyr::replace_na(BYCATCH, 0)) %>% 
    ungroup() %>% 
    mutate(dk = BYCATCH/KALL)

 assumed_discard

}   

# set up bdat for discaRd: rolled up by sub-trip
# bdat is acquired outside of the function snce it's a large table of all species

make_bdat_focal <- function(bdat, year = 2019, species_nespp3 = '802'){ #, strata = paste(GEARTYPE, MESHGROUP, AREA, sep = '_')

    # choose species here
# bdat_focal = bdat %>% 
#   mutate(SPECIES_DISCARD = case_when(NESPP3 == '802' ~ DISCARD_PRORATE)) %>% 
#   mutate(SPECIES_DISCARD = replace_na(SPECIES_DISCARD, 0)) %>% 
#       mutate(STRATA = paste(GEARTYPE, MESHGROUP, REGION, HALFOFYEAR, sep = '_')) %>% 
#   dplyr::group_by(LINK1
#                                   # , NEGEAR
#                                   , GEARTYPE
#                                   , MESHGROUP
#                                   , STRATA
#                                   ) %>% 
#   # be careful here... need to take the max values since they are repeated..
#   dplyr::summarise(KALL = sum(max(OBS_HAUL_KALL_TRIP, na.rm = T)*max(PRORATE, na.rm = T)), BYCATCH = sum(SPECIES_DISCARD, na.rm = T)) %>% 
#   mutate(KALL = replace_na(KALL, 0), BYCATCH = replace_na(BYCATCH, 0)) %>% 
#   ungroup()



 bdat_focal = bdat %>% 
    filter(YEAR == year) %>% 
    mutate(SPECIES_DISCARD = case_when(MATCH_NESPP3 == species_nespp3 ~ DISCARD_PRORATE)) %>% 
    mutate(SPECIES_DISCARD = tidyr::replace_na(SPECIES_DISCARD, 0)) %>% 
        mutate(STRATA = paste(GEARTYPE, MESHGROUP, REGION, HALFOFYEAR, sep = '_')) %>% 
  dplyr::group_by(LINK1
                                # , NEGEAR
                                , GEARTYPE
                                , MESHGROUP
                                , STRATA
                                ) %>% 
    # be careful here... need to take the max values since they are repeated..
  dplyr::summarise(KALL = sum(max(OBS_HAUL_KALL_TRIP, na.rm = T)*max(PRORATE)), BYCATCH = sum(SPECIES_DISCARD, na.rm = T)) %>% 
    mutate(KALL = tidyr::replace_na(KALL, 0), BYCATCH = tidyr::replace_na(BYCATCH, 0)) %>% 
    ungroup()

 bdat_focal

}


run_discard <- function(bdat, ddat_focal, c_o_tab = c_o_dat2, year = 2019, species_nespp3 = '802'){ #, strata = paste(GEARTYPE, MESHGROUP, AREA, sep = '_')
    # bdat = make_bdat_cams(obstab, species_nespp3 = species_nespp3)
    bdat_focal = make_bdat_focal(bdat, year = year, species_nespp3 = species_nespp3)
    obs_discard = get_obs_disc_vals(c_o_tab, species_nespp3 = species_nespp3, year = year)
    assumed_discard = make_assumed_rate(bdat_focal, year = year)

    # Get complete strata
strata_complete = unique(c(bdat_focal$STRATA, ddat_focal$STRATA))

allest = get.cochran.ss.by.strat(bydat = bdat_focal, trips = ddat_focal, strata_name = 'STRATA', targCV = .3, strata_complete = strata_complete)        

# discard rates by strata
dest_strata = allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

# plug in estimated rates to the unobserved
ddat_rate = ddat_focal
ddat_rate$DISC_RATE = dest_strata$drate[match(ddat_rate$STRATA, dest_strata$STRATA)]    
ddat_rate$CV = dest_strata$CV[match(ddat_rate$STRATA, dest_strata$STRATA)]  


# substitute assumed rate where we can
assumed_discard = assumed_discard %>% 
    mutate(STRATA = paste(GEARTYPE, MESHGROUP, sep = '_'))
# mutate(STRATA = paste(NEGEAR, MESHGROUP, sep = '_'))

ddat_rate = ddat_rate %>% 
    mutate(STRATA_ASSUMED = paste(GEARTYPE, MESHGROUP, sep = '_')) %>% 
    # mutate(STRATA = paste(NEGEAR, MESHGROUP, sep = '_'))
    mutate(ARATE_IDX = match(STRATA_ASSUMED, assumed_discard$STRATA)) 

# ddat_rate$ARATE_IDX[is.na(ddat_rate$ARATE_IDX)] = 0
ddat_rate$ARATE = assumed_discard$dk[ddat_rate$ARATE_IDX]


# incorporate teh assumed rate into the calculated discard rates
ddat_rate <- ddat_rate %>% 
    mutate(CRATE = coalesce(DISC_RATE, ARATE)) %>%
    mutate(CRATE = tidyr::replace_na(CRATE, 0)) 


# merge observed discards with estimated discards
# Use the observer tables created, NOT the merged trips/obs table.. 
# match on VTRSERNO? 

out_tab = obs_discard %>% 
    ungroup() %>% 
    dplyr::select(VTRSERNO, DISCARD) %>% 
    right_join(x = ., y = ddat_rate, by = 'VTRSERNO') %>% 
  mutate(EST_DISCARD = CRATE*LIVE_POUNDS) %>% 
    mutate(DISCARD = if_else(!is.na(DISCARD), DISCARD, EST_DISCARD)
                 ) 

list(species = species_nespp3
         , allest = allest
         , res = out_tab
 )

}
# ddat_focal is run by itself since it takes a longtime.. and can be repeated

ddat_focal <- ddat_focal %>% 
      mutate(STRATA = paste(GEARTYPE, MESHGROUP, REGION, HALFOFYEAR, sep = '_')
                 , SEADAYS = 0
                 , DOCID = DMIS_TRIP_ID)

# squid_ex = run_discard(obstab = c_o_dat2, ddat = ddat_focal, year = 2019, species_nespp3 = '802')
squid_ex = run_discard(bdat = bdat, ddat = ddat_focal, c_o_tab = c_o_dat2, year = 2019, species_nespp3 = '802')


squid_ex$res$DISCARD %>% sum(na.rm = T)

# discard rates by strata
dest_strata = squid_ex$allest$C %>% summarise(STRATA = STRATA
                       , N = N
                       , n = n
                       , orate = round(n/N, 2)
                       , drate = RE_mean
                       , KALL = K, disc_est = round(D)
                       , CV = round(RE_rse, 2)
                                        )

dest_strata %>% slice(grep('Otter Trawl_sm*', dest_strata$STRATA))
spec_list = c('801','802','212','051', '012', '125')

out_list = list()

ii = 1

for(i in spec_list){
    print(paste0('Estimating discaRd for NESPP3 ', i))
    out_list[[ii]] = run_discard(bdat = bdat, ddat = ddat_focal, c_o_tab = c_o_dat2, year = 2019, species_nespp3 = i)
    out_list[[ii]]$res$MATCH_NESPP3 = i
    ii = ii+1

}

names(out_list) = spec_list


# pull out list parts and rearrange

out_res = out_list[[1]]$res %>% 
    dplyr::select(DMIS_TRIP_ID, MATCH_NESPP3 , DISCARD)

for(i in 2:length(out_list)){
    tmp = out_list[[i]]$res %>% 
        dplyr::select(DMIS_TRIP_ID, MATCH_NESPP3 , DISCARD)

    out_res = rbind(out_res, tmp)

}

# get SMB ACL accounting numbers for 2019
acl_tab = readr::read_csv('H:/Hocking/smb/smb_acl_accounting/smb_acl_2019/summary_table.csv') %>% 
    mutate(SPPNM = c("BUTTERFISH",  "ATLANTIC MACKEREL" ,  "LONGFIN SQUID"  ,   "ILLEX SQUID"))

# get species names
species = offshoreWind::SPECIES %>% 
    mutate(NESPP3 = stringr::str_pad(NESPP3, width = 3, side = 'left', pad = 0))

out_res %>% 
    mutate(NESPP3 = MATCH_NESPP3) %>% 
    group_by(NESPP3) %>% 
    dplyr::summarise(`discaRd CAMS` = sum(DISCARD, na.rm = T)) %>% 
    left_join(., species, by = 'NESPP3') %>% 
        dplyr::select(1,3,2) %>% 
     left_join(acl_tab, by = 'SPPNM') %>% 
    dplyr::select(1,2,3,7) %>% 
    dplyr::rename(`ACL Discard` = Discards) 
# %>% 
#   knitr::kable(caption = 'Comaprison of CAMS discaRd and ACL accounting for 2019')

Squid Example

NOTES:

The discard estimation runs normally using discaRd.

The stratification scheme for this example was simplified to use only gear and mesh.

For Quota Monitoring ACL accounting, region (North/south), time of year, and (usually) different scallop trip types are designated.

Observed Discard is not typically assigned..

I seem to be getting much lower trip and obs trip counts than what exists in the ACL annual reports. This may be due to the apportionment processing.. SOLVED! it's how trips are counted in ACL reports..

I am using subtrip as the N unit.

the total number of observed trips is comparable in the newly created tables but something is getting lost on the import step.. ACL report: 4299 total from new table: 4210 total in imported table 4000 total from discaRd is r dest_strata$n %>% sum()

hard match on LINK1, NEGEAR, MESHGROUP, CAREA is causing some obs trips to drop.. Example: scallop dredge has a total of 562 observed trips in the 2019 final ACL accounting but here there are only 515. I think this is likely due to CAREA more than anything.

example of output table

This can be modified to perhaps save only the dmis_trip_id, strata, and discard estimate.

out_tab %>% 
    head() %>% 
    DT::datatable(., filter = "top")
# need to add gear here

# mgear = readxl::read_xlsx('C:/Users/benjamin.galuardi/Documents/GitHub/discaRd/CAMS/GAR master gear codes.xlsx')

mgear = tbl(bcon, sql('select * from apsd.master_gear'))

mgear = mgear %>% 
    group_by(VTR_GEAR_CODE) %>% 
    distinct(NEGEAR, VTR_GEAR_CODE) %>% 
  ungroup() %>% 
    collect()
cdat %>% 
    mutate(MESHGROUP = case_when(NEGEAR %in% c('054','057') ~ 'lg'
                                                        )
                 , GEARTYPE = case_when( GEARTYPE == 'Unknown' & is.na(GEARTYPE) ~ 'Other'
                                                  , NESPP3 == 800 & GEARTYPE == 'Unknown' & is.na(GEARTYPE) ~ 'Scallop Dredge')
                 ) %>%
    filter(NESPP3 == 800) %>% 
    group_by(GEARTYPE, MESHGROUP) %>% 
    summarise(psum = sum(POUNDS, na.rm = T))

# UPDATE bg_total_landings_1 a
# SET a.geartype = 'Scallop Dredge'
# WHERE a.nespp3 = '800' 
# AND a.geartype = 'Unknown'
# AND a.gearcode IS NULL    
# /
# UPDATE bg_total_landings_1 a
# SET a.geartype = 'Other'
# WHERE a.geartype = 'Unknown'
# AND a.gearcode IS NULL    


noaa-garfo/discaRd documentation built on April 17, 2025, 10:32 p.m.