knitr::opts_chunk$set(echo=FALSE , warning = FALSE , message = FALSE , cache = FALSE , progress = TRUE , verbose = FALSE , comment = F , error = FALSE , dev = 'png' , dpi = 200 , prompt = F , results='hide') options(dplyr.summarise.inform = FALSE)
library(ROracle) library(keyring) library(apsdFuns) library(dplyr, warn.conflicts = FALSE) # library(dbplyr) library(ggplot2) library(stringr) library(discaRd) library(fst) options(scipen = 999) # unlock keyring keyring::keyring_unlock("apsd_ma") # connect to MAPS con_maps = apsdFuns::roracle_login(key_name = 'apsd_ma', key_service = 'maps') con_cams = apsdFuns::roracle_login(key_name = 'apsd_ma', key_service = 'bgaluardi_cams_garfo') '%!in%' <- function(x,y)!('%in%'(x,y)) source('~/PROJECTS/discaRd/CAMS/R/cams_discard_functions.R')
new_link1_sq <- " with obs1 as ( select a.* from ( select o.link3 , link1 -- , vtrserno , extract(year from dateland) as year -- , o.month , o.obsrflag , o.area as obs_area , o.negear as obs_gear , o.geartype , round(o.meshsize, 0) as obs_mesh , o.meshgroup , substr(nespp4, 1, 3) as NESPP3 , SUM(case when catdisp = 0 then o.livewt else 0 end) as discard , SUM(case when catdisp = 1 then o.livewt else 0 end) as obs_haul_kept from ( cams_obdbs_all_years ) o where tripext in ('C','X') group by o.link3 , link1 , o.obsrflag , o.area , o.geartype , o.negear , round(o.meshsize, 0) , o.meshgroup , substr(nespp4, 1, 3) , extract(year from dateland) ) a group by link3 , link1 -- , vtrserno , year -- , month , obsrflag , obs_area , obs_gear , geartype , obs_mesh , meshgroup , NESPP3 , discard , obs_haul_kept ) , land as ( select distinct(camsid), subtrip, negear, vtr_mesh, mesh_cat, area, area_source, gear_source, activity_code_1, secgear_mapped--, year from cams_garfo.cams_landings c left join ( select distinct(NEGEAR) as VTR_NEGEAR , SECGEAR_MAPPED from MAPS.STG_OBS_VTR_GEARMAP where NEGEAR is not null ) g on c.NEGEAR = g.VTR_NEGEAR ) ,ulink as ( select permit , FIRST_VALUE(obs_link1) OVER (partition by camsid) AS min_link1 , count(distinct(obs_link1)) over(partition by camsid) as N_link1_camsid , obs_link1 , camsid from MATCH_OBS a where extract(year from a.obs_land) > 2016 group by permit, camsid, obs_link1 order by permit ) select a.* , case when secgear_obs = secgear_mapped then 'match' else 'no match' end as gear_match , case when meshgroup = mesh_cat then 'match' else 'no match' end as mesh_match , case when obs_area = area then 'match' else 'no match' end as area_match from ( select o.*, c.* , g.secgear_mapped as secgear_obs from obs1 o left join ( select distinct(NEGEAR) as VTR_NEGEAR , SECGEAR_MAPPED from MAPS.STG_OBS_VTR_GEARMAP where NEGEAR is not null ) g on o.OBS_GEAR = g.VTR_NEGEAR left join ulink m on o.link1 = m.obs_link1 left join land c on c.camsid = m.camsid -- where o.link1 not in (select distinct link1 from cams_obs_catch where link1 is not null) -- and year = 2020) ) a " new_link1_df = tbl(con_cams, sql(new_link1_sq)) # get unique link1 and link3 from original cams_obs_catch co_links = tbl(con_cams, sql('select distinct(link3), link1, year from cams_obs_catch')) %>% collect() # same thing but from test table built with new join order (on maps) co_links_test = tbl(con_maps, sql('select distinct(link3), link1, year from cams_obs_catch_test')) %>% collect()
CAMS_OBS_CATCH
What rises to the top?
a = new_link1_df %>% # filter(YEAR < 2022) %>% dplyr::select(LINK1, LINK3, YEAR) %>% distinct() %>% collect() # how many link1 were not in original cams_obs_catch? a %>% dplyr::select(LINK1, YEAR) %>% distinct() %>% mutate(nomatch = LINK1 %!in% co_links$LINK1) %>% group_by(YEAR) %>% dplyr::summarise(no_match = sum(nomatch)) # how many link1 were in cams_obs_catch incorrectly? co_links %>% dplyr::select(LINK1, YEAR) %>% distinct() %>% mutate(nomatch = LINK1 %!in% a$LINK1) %>% group_by(YEAR) %>% dplyr::summarise(no_match = sum(nomatch)) # how many link1 are not in cams_obs_catch_test? a %>% dplyr::select(LINK1, YEAR) %>% distinct() %>% mutate(nomatch = LINK1 %!in% co_links_test$LINK1) %>% group_by(YEAR) %>% dplyr::summarise(no_match = sum(nomatch)) # wouldn't we expect to see more link1 in the old cams_obs_catch due to the matching order?? the opposite seems true. # also, it seems that there are link1 missing in the original cams_obs_catch which makes no sense
# Link3 a %>% filter(LINK3 %!in% co_links$LINK3) %>% group_by(YEAR) %>% n_distinct(LINK3) group_by(YEAR, GEAR_MATCH, MESH_MATCH, AREA_MATCH) %>% dplyr::summarise(`Number of LINK1` = n_distinct(LINK1)) %>% collect() %>% DT::datatable()
nolink1_df %>% filter(YEAR < 2022) %>% filter(GEAR_MATCH == 'match' & MESH_MATCH == 'match' & AREA_MATCH == 'match') %>% collect() %>% # group_by(YEAR, CAMSID) %>% # dplyr::summarise(nlink1 = n_distinct(LINK1)) %>% # collect() %>% DT::datatable()
nolink3_df %>% filter(YEAR == 2021) %>% # filter(GEAR_MATCH == 'match' & MESH_MATCH == 'match' & AREA_MATCH == 'match') %>% collect() %>% # group_by(YEAR, CAMSID) %>% # dplyr::summarise(nlink1 = n_distinct(LINK1)) %>% # collect() %>% DT::datatable()
obs1 = tbl(con_maps, sql(" select o.link3 , link1 -- , vtrserno , extract(year from dateland) as year -- , o.month , o.obsrflag , o.area as obs_area , o.negear as obs_gear , o.geartype , round(o.meshsize, 0) as obs_mesh , o.meshgroup , substr(nespp4, 1, 3) as NESPP3 , SUM(case when catdisp = 0 then o.livewt else 0 end) as discard , SUM(case when catdisp = 1 then o.livewt else 0 end) as obs_haul_kept from ( select * from cams_obdbs_all_years ) o group by o.link3 , link1 -- , vtrserno -- , o.month , o.obsrflag , o.area , o.geartype , o.negear , round(o.meshsize, 0) , o.meshgroup , substr(nespp4, 1, 3) , extract(year from dateland) " )) # -- this is supposed to use only link1s that had hauls... no hauls, no need for anything. ulink = tbl(con_maps, sql(" select permit , min(obs_link1) as min_link1 -- this is the minimum link1 for the vtr , obs_link1 , camsid from MAPS.MATCH_OBS a where extract(year from a.obs_land) > 2016 group by permit, camsid, obs_link1 order by permit ")) land = tbl(con_maps, sql(" select d.permit , d.camsid , d.year , d.month , d.date_trip , d.docid , d.vtrserno , d.camsid || '_' || d.subtrip as cams_subtrip , d.geartype , d.negear -- , NVL(g.SECGEAR_MAPPED, 'OTH') as SECGEAR_MAPPED , NVL(d.mesh_cat, 'na') as meshgroup , d.area , round(sum(d.LIVLB)) as subtrip_kall , d.sectid , d.GF , d.activity_code_1 , d.activity_code_2 , d.EM , redfish_exemption , closed_area_exemption , sne_smallmesh_exemption , xlrg_gillnet_exemption , d.tripcategory , d.accessarea -- , o.link1 -- , v.min_link1 -- , count(distinct(d.vtrserno)) over(partition by link1) as nvtr_link1 -- count how many vtrs for each link1 -- , count(distinct(d.area)) over(partition by link1) as narea_link1 from MAPS.CAMS_LANDINGS d group by d.permit , d.camsid , d.year , d.month , d.date_trip , d.docid , d.vtrserno , d.camsid || '_' || d.subtrip , d.geartype , d.negear , NVL(d.mesh_cat, 'na') , d.area , d.sectid , d.GF , d.activity_code_1 , d.activity_code_2 , d.EM , redfish_exemption , closed_area_exemption , sne_smallmesh_exemption , xlrg_gillnet_exemption , d.tripcategory , d.accessarea ")) # join original way join1 = land %>% left_join(., ulink, by = 'CAMSID', keep = T) %>% left_join(., obs1, by = c('OBS_LINK1'='LINK1'), keep = TRUE) #--- # join a different way.. link1's are being added from match_obs where they have been omitted from obdbs_all_years... join2 = obs1 %>% left_join(., ulink, by = c('LINK1' = 'OBS_LINK1'), keep = T) %>% right_join(., land, by = 'CAMSID') #--- # put the results together t1 = join1 %>% filter(YEAR.x >= 2017 & YEAR.x <2022) %>% group_by(YEAR.x) %>% dplyr::summarise(n_link1_v1 = n_distinct(OBS_LINK1)) %>% collect() t2 = join2 %>% filter(YEAR.x >= 2017 & YEAR.x <2022) %>% group_by(YEAR.x) %>% dplyr::summarise(n_link1_v2 = n_distinct(LINK1)) %>% collect() %>% arrange(YEAR.x) t3 = obs1 %>% filter(YEAR >= 2017 & YEAR <2022) %>% group_by(YEAR) %>% dplyr::summarise(n_link1_obs = n_distinct(LINK1)) %>% collect() %>% arrange(YEAR) t1 %>% left_join(., t2, by = 'YEAR.x') %>% dplyr::rename('YEAR' = 'YEAR.x') %>% left_join(., t3, by = 'YEAR') %>% arrange(YEAR) # look at dropped trips, if any obs_l = obs1 %>% filter(YEAR >= 2017 & YEAR <2022) %>% dplyr::select(LINK1) %>% distinct() %>% collect() %>% mutate(OBS = 1) j2_l = join2 %>% filter(YEAR.x >= 2017 & YEAR.x <2022) %>% dplyr::select(LINK1) %>% distinct() %>% collect() %>% mutate(JOIN2 = 1) obs_l %>% full_join(., j2_l, by = 'LINK1') missing = obs_l %>% full_join(., j2_l, by = 'LINK1') %>% filter(is.na(JOIN2)) %>% left_join(., ulink, by = c('LINK1' = 'OBS_LINK1'), copy = T) # full_join(., join2 %>% filter(YEAR.x >= 2017 & YEAR.x <2022), by = 'LINK1') %>% # filter(is.na(LINK1)) %>% # group_by(YEAR) %>% # summarise(dropped_trips = n_distinct(CAMS_SUBTRIP)) # look at dropped hauls: OBS to join2 obs1 %>% filter(YEAR >= 2017 & YEAR <2022) %>% full_join(., join2 %>% filter(YEAR.x >= 2017 & YEAR.x <2022), by = 'LINK1') %>% filter(is.na(LINK3.y)) %>% group_by(YEAR) %>% dplyr::summarise(dropped_hauls = n_distinct(LINK3.x)) %>% collect() %>% arrange(YEAR) # look at dropped hauls: OBS to join1... EXACTLY the same! obs1 %>% filter(YEAR >= 2017 & YEAR <2022) %>% full_join(., join1 %>% filter(YEAR.x >= 2017 & YEAR.x <2022), by = c('LINK1'='OBS_LINK1')) %>% filter(is.na(LINK3.y)) %>% group_by(YEAR) %>% dplyr::summarise(dropped_hauls = n_distinct(LINK3.x)) %>% collect() %>% arrange(YEAR) # cams_obs_catch cams_obs = tbl(con_maps, sql("select * from cams_obs_catch where year < 2022")) cams_obs_test = tbl(con_maps, sql("select * from cams_obs_catch_test where year < 2022")) # look at one year # c1 = cams_obs %>% filter(YEAR == 2021 & !is.na(LINK1)) %>% dplyr::distinct(LINK1) %>% collect() # -- looks good! all are there... c2 = join1 %>% filter(YEAR.x == 2021 & !is.na(OBS_LINK1)) %>% dplyr::distinct(OBS_LINK1) %>% collect() c3 = cams_obs_test %>% filter(YEAR == 2021 & !is.na(LINK1)) %>% dplyr::distinct(LINK1) %>% collect() # now look at join2 c4 = join2 %>% filter(YEAR.x == 2021 & !is.na(LINK1)) %>% dplyr::distinct(LINK1) %>% collect() # which ones didn't make it .. c2 %>% full_join(., c1, by = c('OBS_LINK1' = 'LINK1')) %>% View() merge(c2, c1, by.x = 'OBS_LINK1', by.y = 'LINK1', all = T) %>% View() tbl(con_maps, sql("select * from cams_obdbs_all_years where link1 = '230202103D30003'")) tbl(con_maps, sql("select * from cams_obs_catch where link1 = '230202103D30003'")) tbl(con_maps, sql("select * from cams_obs_catch_test where link1 = '230202103D30003'")) --select count(distinct(link1)) --from join_1 --where link1 not in (select distinct(link1) from cams_obs_catch where year < 2022) -- also, good... all are there... WTF?! --select count(distinct(link3)) --from join_1 --where link3 not in (select distinct(link3) from cams_obs_catch where year < 2022) , trips as (select j.* , NVL(g.SECGEAR_MAPPED, 'G_OTH') as SECGEAR_MAPPED from join_1 j left join ( select distinct(NEGEAR) as VTR_NEGEAR , SECGEAR_MAPPED from MAPS.STG_OBS_VTR_GEARMAP where NEGEAR is not null ) g on j.NEGEAR = g.VTR_NEGEAR ) -- get observer data -- join to gearmapping for match , obs as ( select a.* , NVL(g.SECGEAR_MAPPED, 'G_OTH') as SECGEAR_MAPPED , i.ITIS_TSN -- , i.ITIS_GROUP1 from OBS1 a left join ( select distinct(NEGEAR) as OBS_NEGEAR , SECGEAR_MAPPED from maps.STG_OBS_VTR_GEARMAP where NEGEAR is not null ) g on a.OBS_GEAR = g.OBS_NEGEAR left join(select * from maps.CFG_NESPP3_ITIS ) i --where SRCE_ITIS_STAT = 'valid' on a.NESPP3 = i.DLR_NESPP3 ) -- /* staged matching trips with no link1 (unobserved) */ , trips_null as ( select t.* -- , o.vtrserno as obsvtr , o.link1 as obs_link1 , o.link3 , o.obsrflag , o.obs_area as obs_area , o.nespp3 , o.ITIS_TSN -- , o.ITIS_GROUP1 -- , o.discard_prorate as discard , o.discard , o.obs_haul_kept -- , o.obs_haul_kall_trip+o.obs_nohaul_kall_trip as obs_kall , o.obs_gear as obs_gear , o.obs_mesh as obs_mesh , NVL(o.meshgroup, 'none') as obs_meshgroup from trips t left join (select * from obs ) o on (t.link1 = o.link1) where (t.LINK1 is null) ) -- trips with no VTR but still observed i.e. clam trips.. , trips_0 as ( select t.* -- , o.vtrserno as obsvtr , o.link1 as obs_link1 , o.link3 , o.obsrflag , o.obs_area as obs_area , o.nespp3 , o.ITIS_TSN -- , o.ITIS_GROUP1 -- , o.discard_prorate as discard , o.discard , o.obs_haul_kept -- , o.obs_haul_kall_trip+o.obs_nohaul_kall_trip as obs_kall , o.obs_gear as obs_gear , o.obs_mesh as obs_mesh , NVL(o.meshgroup, 'none') as obs_meshgroup from trips t left join (select * from obs ) o on (t.link1 = o.link1) where nvtr_link1 = 1 and t.link1 is not null ) -- obs trips with exactly one VTR , trips_1 as ( select t.* -- , o.obsvtr as obsvtr , o.link1 as obs_link1 , o.link3 , o.obsrflag , o.obs_area as obs_area , o.nespp3 , o.ITIS_TSN -- , o.ITIS_GROUP1 -- , o.discard_prorate as discard , o.discard , o.obs_haul_kept -- , o.obs_haul_kall_trip+o.obs_nohaul_kall_trip as obs_kall , o.obs_gear as obs_gear , o.obs_mesh as obs_mesh , NVL(o.meshgroup, 'none') as obs_meshgroup from ( select t.* from trips t ) t left join (select * from obs ) o on (t.link1 = o.link1) where nvtr_link1 = 1 and t.link1 is not null -- and t.vtrserno is not null -- take this out.. -- and t.year = 2019 ) -- trips with >1 link1 , trips_2_area_1 as ( select t.* -- , o.obsvtr as obsvtr , o.link1 as obs_link1 , o.link3 , o.obsrflag , o.obs_area as obs_area , o.nespp3 , o.ITIS_TSN , o.discard , o.obs_haul_kept , o.obs_gear as obs_gear , o.obs_mesh as obs_mesh , NVL(o.meshgroup, 'none') as obs_meshgroup from ( select t.* from trips t ) t left join (select * from obs ) o on (o.link1 = t.link1 AND o.SECGEAR_MAPPED = t.SECGEAR_MAPPED AND o.meshgroup = t.meshgroup) -- don't use area when narea = 1 where (nvtr_link1 > 1 AND nvtr_link1 < 20) -- there should never be more than a few subtrips.. zeros add up to lots, so we dont' want those here and narea_link1 = 1 and t.link1 is not null and t.cams_subtrip is not null ) -- trips with >1 link1 and multiple areas on link1s , trips_2_area_2 as ( select t.* -- , o.obsvtr as obsvtr , o.link1 as obs_link1 , o.link3 , o.obsrflag , o.obs_area as obs_area , o.nespp3 , o.ITIS_TSN , o.discard , o.obs_haul_kept , o.obs_gear as obs_gear , o.obs_mesh as obs_mesh , NVL(o.meshgroup, 'none') as obs_meshgroup from ( select t.* from trips t ) t left join (select * from obs ) o on (o.link1 = t.link1 AND o.SECGEAR_MAPPED = t.SECGEAR_MAPPED AND o.meshgroup = t.meshgroup AND o.OBS_AREA = t.AREA) -- use area when narea >1 where (nvtr_link1 > 1 AND nvtr_link1 < 20) -- there should never be more than a few subtrips.. zeros add up to lots, so we dont' want those here and narea_link1 > 1 and t.link1 is not null and t.cams_subtrip is not null ) , obs_catch as ( select * from trips_null union all select * from trips_0 union all select * from trips_1 union all select * from trips_2_area_1 union all select * from trips_2_area_2 ) , obs_catch_2 as ( /* add OBS KALL amounts, prorated discard and find duped subtrips */ select a.* , count(distinct(a.cams_subtrip)) OVER(PARTITION BY a.link3) as n_subtrips_link3 -- finds duped link3.. , round(SUM(case when a.obsrflag = 1 then a.obs_haul_kept else 0 end) OVER(PARTITION BY a.cams_subtrip)) as obs_haul_kall_trip , round(SUM(case when a.obsrflag = 0 then a.obs_haul_kept else 0 end) OVER(PARTITION BY a.cams_subtrip)) as obs_nohaul_kall_trip --, round(SUM(a.obs_haul_kept) OVER(PARTITION BY a.cams_subtrip)) , round(SUM(case when a.obsrflag = 1 then a.obs_haul_kept else 0 end) OVER(PARTITION BY a.cams_subtrip)) as OBS_KALL -- will be the same as obs_haul_kall_trip , SUM(a.obs_haul_kept) OVER(PARTITION BY a.cams_subtrip) / NULLIF(SUM(case when a.obsrflag = 1 then a.obs_haul_kept else 0 end) OVER(PARTITION BY a.cams_subtrip),0) as prorate , round((SUM(a.obs_haul_kept) OVER(PARTITION BY a.cams_subtrip) / NULLIF(SUM(case when a.obsrflag = 1 then a.obs_haul_kept else 0 end) OVER(PARTITION BY a.cams_subtrip),0)*a.discard), 2) as discard_prorate from obs_catch a ) /* now deal with link3 dupes */ select ACCESSAREA ,ACTIVITY_CODE_1 ,AREA ,CAMSID ,CAMS_SUBTRIP ,CLOSED_AREA_EXEMPTION , case when n_subtrips_link3 > 1 THEN DISCARD/n_subtrips_link3 ELSE DISCARD end as DISCARD , case when n_subtrips_link3 > 1 THEN DISCARD_PRORATE/n_subtrips_link3 ELSE DISCARD_PRORATE end as DISCARD_PRORATE ,DATE_TRIP ,DOCID ,EM ,GEARTYPE ,GF --,HALFOFYEAR ,ITIS_TSN ,LINK1 ,LINK3 ,MESHGROUP ,MONTH ,NEGEAR ,NESPP3 ,NVTR_LINK1 ,OBSRFLAG --,OBSVTR ,OBS_AREA ,OBS_GEAR , case when n_subtrips_link3 > 1 THEN OBS_HAUL_KALL_TRIP/n_subtrips_link3 ELSE OBS_HAUL_KALL_TRIP end as OBS_HAUL_KALL_TRIP , case when n_subtrips_link3 > 1 THEN OBS_HAUL_KEPT/n_subtrips_link3 ELSE OBS_HAUL_KEPT end as OBS_HAUL_KEPT , case when n_subtrips_link3 > 1 THEN OBS_NOHAUL_KALL_TRIP/n_subtrips_link3 ELSE OBS_NOHAUL_KALL_TRIP end as OBS_NOHAUL_KALL_TRIP , case when n_subtrips_link3 > 1 THEN OBS_KALL/n_subtrips_link3 ELSE OBS_KALL end as OBS_KALL ,OBS_LINK1 ,OBS_MESH ,OBS_MESHGROUP ,PERMIT ,PRORATE ,REDFISH_EXEMPTION ,SECGEAR_MAPPED ,SECTID ,SNE_SMALLMESH_EXEMPTION ,SUBTRIP_KALL ,TRIPCATEGORY ,VTRSERNO ,XLRG_GILLNET_EXEMPTION ,YEAR from obs_catch_2 "
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.