knitr::opts_chunk$set(echo=FALSE
                                            , warning = FALSE
                                            , message = FALSE
                                            , cache = FALSE
                                            , progress = TRUE
                                            , verbose = FALSE
                                            , comment = F
                                            , error = FALSE
                                            , dev = 'png'
                                            , dpi = 200
                                            , prompt = F
                                            , results='hide')

options(dplyr.summarise.inform = FALSE)
# setwd("C:/Users/benjamin.galuardi/Documents/GitHub/discaRd/CAMS/")

# library(odbc)
library(ROracle)
library(keyring)
library(apsdFuns)
library(dplyr, warn.conflicts = FALSE)
# library(dbplyr)
library(ggplot2)
# library(config)
library(stringr)
library(discaRd)
library(fst)
options(scipen = 999)

# unlock keyring
keyring::keyring_unlock("apsd_ma")

# local run
# dw_apsd <- config::get(value = "apsd", file = "K:/R_DEV/config.yml")

# if on server..
# dw_apsd <- config::get(value = "maps", file = "~/config.yml")


# connect to MAPS

con_maps = apsdFuns::roracle_login(key_name = 'apsd_ma', key_service = 'maps')

# 
# con_maps <- ROracle::dbConnect(
#     drv = ROracle::Oracle(),
#     username = dw_apsd$uid,
#     password = dw_apsd$pwd,
#     dbname = "NERO.world"
# )

'%!in%' <- function(x,y)!('%in%'(x,y))

source('~/PROJECTS/discaRd/CAMS/R/cams_discard_functions.R')

setwd('~/PROJECTS/discaRd/CAMS/MODULES/GROUNDFISH/')
# get catch and matched obs data together

import_query = "  with obs_cams as (
   select year
    , month
    , date_trip
  , PERMIT
    , case when month in (5,6,7,8,9,10) then 1
           when month in (11,12,1,2,3,4) then 2
           end as halfofyear
  , AREA
    , vtrserno
    , CAMS_SUBTRIP
    , link1
    , link3
    , docid
    , CAMSID
    , nespp3
  , itis_tsn as SPECIES_ITIS
  , SECGEAR_MAPPED as GEARCODE
    , NEGEAR
    , GEARTYPE
    , MESHGROUP
    , SECTID
  , GF
, case when activity_code_1 like 'NMS-COM%' then 'COMMON_POOL'
       when activity_code_1 like 'NMS-SEC%' then 'SECTOR'
             else 'non_GF' end as SECTOR_TYPE
, case when PERMIT = '000000' then 'STATE'
       else 'FED' end as FED_OR_STATE
    , tripcategory
    , accessarea
    , activity_code_1
  , EM
  , redfish_exemption
    , closed_area_exemption
    , sne_smallmesh_exemption
    , xlrg_gillnet_exemption
    , NVL(sum(discard),0) as discard
    , NVL(sum(discard_prorate),0) as discard_prorate
    , NVL(round(max(subtrip_kall)),0) as subtrip_kall
    , NVL(round(max(obs_kall)),0) as obs_kall
    from MAPS.CAMS_OBS_CATCH
    group by year

  , AREA
  , PERMIT
    , vtrserno
    , CAMS_SUBTRIP
    , link1
    , link3
    , docid
    , nespp3    
  , itis_tsn
    , SECGEAR_MAPPED
    , NEGEAR
    , GEARTYPE
    , MESHGROUP
    , SECTID
  , GF
  , case when activity_code_1 like 'NMS-COM%' then 'COMMON_POOL'
       when activity_code_1 like 'NMS-SEC%' then 'SECTOR'
             else 'non_GF' end
  , case when PERMIT = '000000' then 'STATE'
       else 'FED' end
  , CAMSID
  , month
  , date_trip
    , halfofyear
    , tripcategory
    , accessarea
    , activity_code_1
  , EM
  , redfish_exemption
    , closed_area_exemption
    , sne_smallmesh_exemption
    , xlrg_gillnet_exemption
    order by vtrserno asc
    ) 

  select case when o.MONTH in (1,2,3,4) then o.YEAR-1 else o.YEAR end as GF_YEAR
  , case when o.MONTH in (1,2,3) then o.YEAR-1 else o.YEAR end as SCAL_YEAR
  , o.*
  , c.date_trip
  from obs_cams o
  left join (
      select 
      distinct(camsid)
      , date_trip
      from maps.cams_landings
  ) c 
  on o.camsid = c.camsid  


"

c_o_dat2 <- ROracle::dbGetQuery(con_maps, import_query)

c_o_dat2 = c_o_dat2 %>% 
    mutate(PROGRAM = substr(ACTIVITY_CODE_1, 9, 10)) %>% 
  mutate(SCALLOP_AREA = case_when(substr(ACTIVITY_CODE_1,1,3) == 'SES' & PROGRAM == 'OP' ~ 'OPEN' 
       , PROGRAM == 'NS' ~ 'NLS'
       , PROGRAM == 'NN' ~ 'NLSN'
       , PROGRAM == 'NH' ~ 'NLSS'  # includes the NLS south Deep
       , PROGRAM == 'NW' ~ 'NLSW'
       , PROGRAM == '1S' ~ 'CAI'
       , PROGRAM == '2S' ~ 'CAII'
       , PROGRAM %in% c('MA', 'ET', 'EF', 'HC', 'DM') ~ 'MAA'
       )
) %>% 
    mutate(SCALLOP_AREA = case_when(substr(ACTIVITY_CODE_1,1,3) == 'SES' ~ dplyr::coalesce(SCALLOP_AREA, 'OPEN'))) %>% 
    mutate(DOCID = CAMS_SUBTRIP)

# NOTE: CAMS_SUBTRIP being defined as DOCID so the discaRd functions don't have to change!! DOCID hard coded in the functions..


# 4/13/22
# need to make LINK1 NA when LINK3 is null.. this is due to data mismatches in putting hauls at the subtrip level. If we don't do this step, OBS trips will get values of 0 for any evaluated species. this may or may not be correct.. it's not possible to know without a haul to subtrip match. This is a hotfix that may change in the future 

link3_na = c_o_dat2 %>% 
    filter(!is.na(LINK1) & is.na(LINK3))


# make these values 0 or NA or 'none' depending on the default for that field
link3_na = link3_na %>% 
    mutate(LINK1 = NA
                 , DISCARD = NA
                 , DISCARD_PRORATE = NA
                 , OBSRFLAG = NA
                 , OBSVTR = NA
                 , OBS_AREA = NA
                 , OBS_GEAR = NA
                 , OBS_HAUL_KALL_TRIP = 0
                 , OBS_HAUL_KEPT = 0
                 , OBS_KALL = 0
                 , OBS_LINK1 = NA
                 , OBSVTR = NA
                 , OBS_MESHGROUP = 'none'
                 , PRORATE = NA)


tidx = c_o_dat2$CAMSID %in% link3_na$CAMSID

c_o_dat2 = c_o_dat2[tidx == F,]

c_o_dat2 = c_o_dat2 %>% 
    bind_rows(link3_na)

# continue the data import


state_trips = c_o_dat2 %>% filter(FED_OR_STATE == 'STATE')
fed_trips = c_o_dat2 %>% filter(FED_OR_STATE == 'FED')

fed_trips = fed_trips %>% 
    mutate(ROWID = 1:nrow(fed_trips)) %>% 
    relocate(ROWID)

# filter out link1 that are doubled on VTR

multilink = fed_trips %>% 
    filter(!is.na(LINK1)) %>% 
    group_by(VTRSERNO) %>% 
    dplyr::summarise(nlink1 = n_distinct(LINK1)) %>% 
    arrange(desc(nlink1)) %>% 
    filter(nlink1>1)

remove_links = fed_trips %>% 
    filter(is.na(SPECIES_ITIS) & !is.na(LINK1) & VTRSERNO %in% multilink$VTRSERNO) %>% 
    dplyr::select(LINK1) %>% 
    distinct()

remove_id = fed_trips %>% 
    filter(is.na(SPECIES_ITIS) & !is.na(LINK1) & VTRSERNO %in% multilink$VTRSERNO) %>% 
      distinct(ROWID)

fed_trips =
    fed_trips %>% 
    filter(ROWID %!in% remove_id$ROWID)

non_gf_dat = fed_trips %>% 
    filter(GF == 0) %>% 
    bind_rows(., state_trips) %>% 
    mutate(GF = "0")

gf_dat = fed_trips%>% 
    filter(GF == 1)

rm(c_o_dat2, fed_trips, state_trips)

gc()
# this section may be repeated for other modules with other lists of species

#--------------------------------------------------------------------------#
# get groundfish species list
species = tbl(con_maps, sql("
select distinct(b.species_itis)
    , COMNAME
    , a.nespp3
from fso.v_obSpeciesStockArea a
left join (select *  from MAPS.CAMS_GEARCODE_STRATA) b on a.nespp3 = b.nespp3
where stock_id not like 'OTHER'
and b.species_itis is not null
")
) %>% 
    collect()

# make a script from RMD.. 
knitr::purl('groundfish_loop_050422.Rmd', documentation = 0)

# run two years worth of GF
for(jj in 2020:2021){
    FY <- jj
    FY_TYPE = 'MAY START'
  source('groundfish_loop_050422.R')
}

# clean the workspace; restart likely not necessary anymore 
rm(list = ls())
gc()
.rs.restartR()
# this section may be repeated for other modules with other lists of species

#--------------------------------------------------------------------------#
# group of species


# make a script from RMD.. 
knitr::purl('<PATH TO CALENDAR YEAR RMD>', documentation = 0)

# Define year to run
for(jj in <DEFINE YEAR TO RUN>){
    FY <- jj
    FY_TYPE = 'JANUARY START'
  source('<PATH TO CALENDAR YEAR RMD>.R')
}

# clean the workspace; restart likely not necessary anymore 
rm(list = ls())
gc()
# .rs.restartR()
#build one big table for a FY for each species in a folder of results

source('~/PROJECTS/discaRd/CAMS/R/cams_discard_functions.R')

# connect to maps again
keyring::keyring_unlock("apsd_ma")

con_maps = apsdFuns::roracle_login(key_name = 'apsd_ma', key_service = 'maps')

 Sys.setenv(TZ = "America/New_York")
 Sys.setenv(ORA_SDTZ = "America/New_York")

# dw_apsd <- config::get(value = "maps", file = "~/config.yml")
# 
# con_maps <- ROracle::dbConnect(
#     drv = ROracle::Oracle(),
#     username = dw_apsd$uid,
#     password = dw_apsd$pwd,  
#     dbname = "NERO.world"
# )

for (FY in 2018:2021){

parse_upload_discard(con_maps, filepath = '~/PROJECTS/discaRd/CAMS/MODULES/GROUNDFISH/OUTPUT/', FY = FY)

}

gc()

### Add lines for uploading from multiple folders of discard .fst results
tab_list = ROracle::dbGetQuery(con_maps, " 
SELECT object_name, object_type
    FROM all_objects
    WHERE object_type = 'TABLE'
    and owner = 'MAPS'
and object_name like 'CAMS_DISCARD%'
and object_name not like '%DISCARD_MORTALITY%'
                                                             ")

  sq = paste0("GRANT SELECT ON MAPS.", tab_list$OBJECT_NAME," TO CAMS_GARFO")

  # sq = stringr::str_flatten(sq)

  for(i in 1:nrow(tab_list)){
    ROracle::dbSendQuery(con_maps, sq[i])
  }
 # now make the tables on CAMS_GARFO

con_cams = apsdFuns::roracle_login(key_name = 'apsd_ma', key_service = 'maps')

# condat <- config::get(value = "bgaluardi_cams_garfo", file = "~/config.yml")
# 
# con_cams = ROracle::dbConnect(
#     drv = ROracle::Oracle(),
#     username = condat$uid,
#     password = condat$pwd,  
#     dbname = "NERO.world"
# )

# make a list of tables to  add
tab_list_cm = ROracle::dbGetQuery(con_cams, " 
SELECT object_name, object_type
    FROM all_objects
    WHERE object_type = 'TABLE'
    and owner = 'MAPS'
and object_name like 'CAMS_DISCARD%'
and object_name not like '%DISCARD_MORTALITY%'
 -- and created >= SYSDATE - 10   --include if we want to only copy over newer tables. should work well for a weekly run or similar.. 
                                                             "
)

# remove all the old tables
 for(i in 1:nrow(tab_list_cm)){
        if(ROracle::dbExistsTable(con_cams, tab_list_cm$OBJECT_NAME[i])){
            ROracle::dbRemoveTable(con_cams, tab_list_cm$OBJECT_NAME[i])
        }
 }


# make a script version of the list of tables to  add
  make_tab_sq = paste0("CREATE TABLE CAMS_GARFO.", tab_list$OBJECT_NAME," AS SELECT * FROM MAPS.", tab_list$OBJECT_NAME)

# make the tables from MAPS
  for(i in 1:nrow(tab_list)){
    if(DBI::dbExistsTable(con_cams, tab_list$OBJECT_NAME[i])) next
    print(paste0("MAKING TABLE ",  tab_list$OBJECT_NAME[i], " ON CAMS_GARFO"))
    ROracle::dbSendQuery(con_cams, make_tab_sq[i])
  }
#===============================================
# comments

print(paste("Updating Oracle comments"))

definitions <-
    googlesheets4::read_sheet(
        "https://docs.google.com/spreadsheets/d/1YorwnjozdPwVFJPabC7Ikta6wNzlhzkUtO1TPRbR9-s/edit?usp=sharing"
    )

# save(definitions, file = "data/definitions.rda")
devtools::load_all('~/PROJECTS/MAPS/')

# con_maps <- apsdFuns::roracle_login("apsd_ma", key_service = "maps")

add_comments(con = con_maps, schema = "MAPS", definitions = definitions) # 3-10 minutes

add_comments(con = con_cams, schema = "CAMS_GARFO") # 3-10 minutes

# dbDisconnect(con_maps)
# get list of discard tables on CAMS_GARFO

tab_list = ROracle::dbGetQuery(con_maps, " 
SELECT object_name, object_type
    FROM all_objects
    WHERE object_type = 'TABLE'
    and owner = 'MAPS'
and object_name like 'CAMS_DISCARD%'
and object_name not like '%DISCARD_MORTALITY%'
and object_name not like '%CY%'  -- gets rid of experimental tables
                                                             ")

st = "CREATE OR REPLACE VIEW MAPS.CAMS_DISCARD_ALL_YEARS AS "

tab_line = paste0("select * from MAPS.", tab_list$OBJECT_NAME," UNION ALL " )  # [22:23]  # groundfish only.. 

# bidx = grep('*MORTALITY*', tab_line)
# 
# tab_line = tab_line[-bidx]

tab_line[length(tab_line)] = gsub(replacement = "", pattern = "UNION ALL", x = tab_line[length(tab_line)])


# create a script to pass to SQL

sq = stringr::str_c(st, stringr::str_flatten(tab_line))

# pass the script to make a view
ROracle::dbSendQuery(con_maps, sq)

# test it!


ROracle::dbGetQuery(con_maps, "
    select round(sum(discard)) as total_discard
    , species_stock
    , COMMON_NAME
    , species_itis
    , FY
    , GF
    from MAPS.CAMS_DISCARD_ALL_YEARS
    group by species_itis, fy, species_stock, GF, COMMON_NAME
    order by COMMON_NAME
    "
)
# get list of discard tables on CAMS_GARFO

tab_list = ROracle::dbGetQuery(con_cams, " 
SELECT object_name, object_type
    FROM all_objects
    WHERE object_type = 'TABLE'
    and owner = 'CAMS_GARFO'
and object_name like 'CAMS_DISCARD%'
and object_name not like '%DISCARD_MORTALITY%'
and object_name not like '%CY%'  -- gets rid of experimental tables
                                                             ")

st = "CREATE OR REPLACE VIEW CAMS_GARFO.CAMS_DISCARD_ALL_YEARS AS "

tab_line = paste0("select * from CAMS_GARFO.", tab_list$OBJECT_NAME," UNION ALL " )  # [22:23]  # groundfish only.. 

# bidx = grep('*MORTALITY*', tab_line)
# 
# tab_line = tab_line[-bidx]

tab_line[length(tab_line)] = gsub(replacement = "", pattern = "UNION ALL", x = tab_line[length(tab_line)])


# create a script to pass to SQL

sq = stringr::str_c(st, stringr::str_flatten(tab_line))

# pass the script to make a view
ROracle::dbSendQuery(con_cams, sq)


# Grant to CAMS_GARFO @NOVA

# ROracle::dbSendQuery(con_maps, "GRANT SELECT ON MAPS.CAMS_DISCARD_ALL_YEARS TO CAMS_GARFO")

# Grant to CAMS_GARFO_FOR_NEFSC

ROracle::dbSendQuery(con_cams, "GRANT SELECT ON CAMS_GARFO.CAMS_DISCARD_ALL_YEARS TO CAMS_GARFO_FOR_NEFSC")

# test it!


ROracle::dbGetQuery(con_cams, "
    select round(sum(discard)) as total_discard
    , species_stock
    , COMMON_NAME
    , species_itis
    , FY
    , GF
    from CAMS_GARFO.CAMS_DISCARD_ALL_YEARS
    group by species_itis, fy, species_stock, GF, COMMON_NAME
    order by COMMON_NAME
    "
)


noaa-garfo/discaRd documentation built on April 17, 2025, 10:32 p.m.