knitr::opts_chunk$set(echo = TRUE) library(tidyverse) library(mgcv) library(hookCompetition)
The Rmarkdown document runs the analysis used in the paper A statistical censoring approach accounts for hook competition in abundance indices from longline surveys.
First, we load in the data
species_vec = c("yelloweye rockfish", "arrowtooth flounder", "lingcod", "north pacific spiny dogfish", "pacific cod", "pacific halibut", "redbanded rockfish", "sablefish", "big skate", "longnose skate", "shortspine thornyhead") simple_species_vec = c("yelloweye", "arrowtooth", "lingcod", "dogfish", "cod", "halibut", "redbanded", "sablefish", "bigskate", "longnose", "shortspine") data <- hookCompetition::read_Data_hookcomp( species_vec = species_vec, simple_species_vec = simple_species_vec, at_PBS = F, data_wd = './', min_dist = 50, min_dist_to_boundary = 0, years_all_vec = c(1995,1996,2003:2012, 2014:2019), years_20_vec = c(1997:2002, 2013, 2020, 2021), survey_boundaries=NULL ) list2env(data, globalenv()) rm(data)
Next, we plot the observed catch counts $n_{i,t,k}$ vs the proportion of baits removed during each fishing event $p_{i,t,k}$, after controlling for the effects of year, and fishing station
Reduced_data_sp <- Reduced_data_sp %>% filter(year >= 1998) Reduced_data_sp$year <- Reduced_data_sp$year - min(Reduced_data_sp$year) + 1 # Restrict ourselves to the usable data Reduced_data_sp <- Reduced_data_sp[Reduced_data_sp$usable=='Y',] # Restrict ourselves to the 'standard stations' Reduced_data_sp <- Reduced_data_sp[Reduced_data_sp$standard=='Y',] Reduced_data_sp$region_INLA <- 1 # Estimate coastwide indices only survey_boundaries <- sf::st_as_sfc(sf::st_bbox(Reduced_data_sp)) Reduced_data_sp$logeffSkateIPHC <- log(Reduced_data_sp$effSkateIPHC) Reduced_data_sp$year_fac <- factor(Reduced_data_sp$year) Reduced_data_sp$station <- factor(Reduced_data_sp$station) # Create a list for storing the results exploratory_results <- expand.grid(Species=species_vec, Comparison = c('p=1 vs p=0.95','p=1 vs p=0.85'), Percent_Decline = 0, Significant_diff_0 = 0, Significant_higher_10perc = 0 ) propsat_results <- expand.grid(Species=species_vec, Prop_Sat = seq(from=0.6,to=1, by=0.005), Mean = 0, UCL = 0, LCL = 0 ) effskate_results <- expand.grid(Species=species_vec, logeffSkateIPHC = seq(from=log(min(Reduced_data_sp$effSkateIPHC)), to=log(max(Reduced_data_sp$effSkateIPHC)), length.out=100), Mean = 0, UCL = 0, LCL = 0 ) ind_100 <- which(seq(from=0.6,to=1, by=0.005) == 1) ind_95 <- which(seq(from=0.6,to=1, by=0.005) == 0.95) ind_85 <- which(seq(from=0.6,to=1, by=0.005) == 0.85) count <- 1 for(i in simple_species_vec) { mod <- mgcv::gam( formula=as.formula(paste0(paste0('N_it_',i),' ~ -1 + s(prop_removed) + offset(logeffSkateIPHC) + year_fac + s(station, bs="re")')), family='nb', data=Reduced_data_sp ) mod2 <- mgcv::gam( formula=as.formula(paste0(paste0('N_it_',i),' ~ -1 + s(logeffSkateIPHC, k=4) + year_fac + s(station, bs="re")')), family='nb', data=Reduced_data_sp ) pred_df <- expand.grid(prop_removed=seq(from=0.6,to=1, by=0.005), logeffSkateIPHC=0, year_fac=1, station=1) pred_df2 <- expand.grid(logeffSkateIPHC = seq(from=log(min(Reduced_data_sp$effSkateIPHC)), to=log(max(Reduced_data_sp$effSkateIPHC)), length.out=100), year_fac=1, station=1) pred_mod <- predict.gam( mod, newdata = pred_df, se.fit = T, type = 'terms', terms='s(prop_removed)' ) pred_mod2 <- predict.gam( mod2, newdata = pred_df2, se.fit = T, type = 'terms', terms='s(logeffSkateIPHC)' ) exploratory_results[ exploratory_results$Species==species_vec[count], c('Percent_Decline','Significant_diff_0','Significant_higher_10perc') ] <- c(1 - exp(pred_mod$fit[c(ind_100,ind_100)] - pred_mod$fit[c(ind_95,ind_85)]), ifelse( 1.96*sqrt(pred_mod$se.fit[c(ind_100,ind_100)]^2 + pred_mod$se.fit[c(ind_95,ind_85)]^2) <= abs(pred_mod$fit[c(ind_100,ind_100)] - pred_mod$fit[c(ind_95,ind_85)]), 1,0), ifelse( 1.96*sqrt(pred_mod$se.fit[c(ind_100,ind_100)]^2 + pred_mod$se.fit[c(ind_95,ind_85)]^2) <= abs(pred_mod$fit[c(ind_100,ind_100)] - pred_mod$fit[c(ind_95,ind_85)] - log(0.9)) , 1,0) ) propsat_results[ propsat_results$Species==species_vec[count], c('Mean','LCL','UCL') ] <- c(pred_mod$fit, pred_mod$fit - 1.96*pred_mod$se.fit, pred_mod$fit + 1.96*pred_mod$se.fit ) effskate_results[ effskate_results$Species==species_vec[count], c('Mean','LCL','UCL') ] <- c(pred_mod2$fit, pred_mod2$fit - 1.96*pred_mod2$se.fit, pred_mod2$fit + 1.96*pred_mod2$se.fit ) print(paste0('Finished processing species number ',count,' out of ',length(simple_species_vec))) count <- count + 1 } exploratory_results propsat_results %>% mutate(Species = str_to_title(Species), P_hat_star = ifelse(Species %in% c('north pacific spiny dogfish', 'shortspine thornyhead'), 1, ifelse(Species %in% c('redbanded rockfish', 'lingcod'), 0.85, 0.95)) ) %>% ggplot(aes(x=Prop_Sat, y=Mean, ymax=UCL, ymin=LCL, colour=Species, fill=Species)) + geom_line() + geom_ribbon(alpha=0.2) + geom_vline(mapping=aes(xintercept = P_hat_star), data= propsat_results %>% mutate( P_hat_star = ifelse(Species %in% c('north pacific spiny dogfish', 'shortspine thornyhead'), 1, ifelse(Species %in% c('redbanded rockfish', 'lingcod'), 0.85, 0.95)), Species = str_to_title(Species)) ) + #0.95) + facet_wrap(~Species, nrow=4, ncol=3) + theme(legend.position = 'none') + xlab('Proportion of baits removed') + ylab('Model-estimated residual effect of hook competition on log mean catch count') #+ #ggtitle('Estimated change in log mean catch count vs. proportion of baits removed', # subtitle = 'Mean and 95% confidence intervals for the penalized spline #shown') effskate_results %>% mutate(Species = str_to_title(Species)) %>% group_by(Species) %>% mutate(UCL=UCL, LCL=LCL, Mean=Mean) %>% ggplot(aes(x=logeffSkateIPHC, y=Mean, ymax=UCL, ymin=LCL, colour=Species, fill=Species)) + geom_line() + geom_ribbon(alpha=0.2) + #geom_abline(intercept = 0, slope = 1) + geom_smooth(formula=y~1+offset(x), method='lm', colour='black', se=F)+ facet_wrap(~Species, nrow=4, ncol=3, scales = 'free') + theme(legend.position = 'none') + xlab('Log of the effective skate') + ylab('Model-estimated residual effect of effective skate on log mean catch count') #+ #ggtitle('Estimated change in log mean catch count vs. log effective skate', # subtitle = 'Mean and 95% confidence intervals for the penalized spline shown #\nDrawn is a fitted linear regression curve with free intercept and slope fixed at 1')
We now fit all three indices to each of the species and plot.
# Create a list for storing the results results_list <- vector('list', length=length(simple_species_vec)) names(results_list) <- simple_species_vec # create a character vector storing the convergence status of censored Poisson model censored_poisson_convergence <- rep('right-censored', length(simple_species_vec)) # Which values of hat{p_i^star} should be used for each species based on earlier plots? cprop_values <- c(0.95, 0.95, 0.85, 1, 0.95, 0.95, 0.85,0.95, 0.95, 0.95, 1) upper_quantile_values <- c(1.1, 1.1, 1.1, 0.85, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 0.85) fit_mods <- F if(fit_mods) { count <- 1 for(i in simple_species_vec) { CPUE_ind <- hookCompetition::censored_index_fun_sdmTMB( data=Reduced_data_sp, survey_boundaries=survey_boundaries, species=i, M=1000, return=T, ICR_adjust=F, cprop=1.1, keep=T, use_upper_bound=FALSE, upper_bound_quantile=1.1, plot=F, allyears=F, station_effects=T, seed=0, verbose=F, n_trajectories=0, preserve_inter_regional_differences = F, time_effect = 'unstructured', twentyhook_adjust = F ) if(!is.null(CPUE_ind$pred_overdisp)) { CPUE_ind$pred_overdisp$Species=species_vec[count] CPUE_ind$pred_overdisp$Method='CPUE-based' } ICR_ind <- hookCompetition::censored_index_fun_sdmTMB( data=Reduced_data_sp, survey_boundaries=survey_boundaries, species=i, M=1000, return=T, ICR_adjust=T, cprop=1.1, keep=F, use_upper_bound=FALSE, upper_bound_quantile=1.1, plot=F, allyears=F, station_effects=T, seed=0, verbose=F, n_trajectories=0, preserve_inter_regional_differences = F, time_effect = 'unstructured', twentyhook_adjust = F ) if(!is.null(ICR_ind$pred_overdisp)) { ICR_ind$pred_overdisp$Species=species_vec[count] ICR_ind$pred_overdisp$Method='ICR-based' } # First try to fit the censored Poisson model without any upper bound censored_ind <- hookCompetition::censored_index_fun_sdmTMB( data=Reduced_data_sp, survey_boundaries=survey_boundaries, species=i, M=1000, return=T, ICR_adjust=F, cprop=cprop_values[count], keep=F, use_upper_bound=FALSE, upper_bound_quantile=upper_quantile_values[count], plot=F, allyears=F, station_effects=T, seed=0, verbose=F, n_trajectories=0, prev_fit =CPUE_ind$mod, preserve_inter_regional_differences = F, time_effect = 'unstructured', twentyhook_adjust = F ) # If it fails to converge, try adding upper bound and removing censorship from upper 5% of data if(is.null(censored_ind$pred_overdisp)) { censored_ind <- hookCompetition::censored_index_fun_sdmTMB( data=Reduced_data_sp, survey_boundaries=survey_boundaries, species=i, M=1000, return=T, ICR_adjust=F, cprop=cprop_values[count], keep=F, use_upper_bound=TRUE, upper_bound_quantile=0.95, plot=F, allyears=F, station_effects=T, seed=0, verbose=F, n_trajectories=0, prev_fit =CPUE_ind$mod, preserve_inter_regional_differences = F, time_effect = 'unstructured', twentyhook_adjust = F ) censored_poisson_convergence[count] <- 'interval-censored and upper 5% of data uncensored' # If it fails to converge again, remove censorship from upper 15% of data if(is.null(censored_ind$pred_overdisp)) { censored_ind <- hookCompetition::censored_index_fun_sdmTMB( data=Reduced_data_sp, survey_boundaries=survey_boundaries, species=i, M=1000, return=T, ICR_adjust=F, cprop=cprop_values[count], keep=F, use_upper_bound=TRUE, upper_bound_quantile=0.85, plot=F, allyears=F, station_effects=T, seed=0, verbose=F, n_trajectories=0, prev_fit =CPUE_ind$mod, preserve_inter_regional_differences = F, time_effect = 'unstructured', twentyhook_adjust = F ) censored_poisson_convergence[count] <- 'interval-censored and upper 15% of data uncensored' } } if(!is.null(censored_ind$pred_overdisp)) { censored_ind$pred_overdisp$Species=species_vec[count] censored_ind$pred_overdisp$Method='Censored' } results_list[[count]] <- rbind(CPUE_ind$pred_overdisp, ICR_ind$pred_overdisp, censored_ind$pred_overdisp) print(paste0('Finished processing species number ',count,' out of ',length(simple_species_vec))) count <- count + 1 } } # Add some bootstrap indices too - since this is done in practice # Note that unlike the model-based approaches which can control for # the effects of different stations being added and dropped each year # the bootstrapping approach is sensitive. Restrict the analysis to # Only consider the stations online for all 23 years. if(fit_mods) { count <- 1 for(i in simple_species_vec) { Boot_ind <- hookCompetition:::bootstrap_index_fun( data=Reduced_data_sp %>% group_by(station) %>% mutate(N = n()) %>% ungroup() %>% filter(N==23), species=i, R=1000, return=T, ICR_adjust=F, plot=F, ncpus = 6 ) if(!is.null(Boot_ind)) { Boot_ind$Species=species_vec[count] Boot_ind$Method='Bootstrap CPUE' } results_list[[count]] <- rbind(results_list[[count]], Boot_ind[,names(results_list[[count]])]) print(paste0('Finished processing species number ',count,' out of ',length(simple_species_vec))) count <- count + 1 } } if(!fit_mods) { results_list <- readRDS(file='C:/Users/WATSONJOE/OneDrive - DFO-MPO/Documents/hookcompetition/Case Study/Case_Study_Models.rds') } # Plot the results results <- do.call('rbind',results_list) results %>% filter(Species %in% c('arrowtooth flounder', 'sablefish', 'north pacific spiny dogfish'), Method %in% c('Bootstrap CPUE', 'Censored')) %>% full_join(sf::st_drop_geometry(Reduced_data_sp) %>% group_by(year) %>% summarize(Species='Proportion of Events with >85% Baits Removed', mean=mean(prop_removed>0.85), region='All', q0.975=NA, q0.025=NA, sd=NA, Method='Bootstrap CPUE') %>% mutate(year=factor(year)) %>% ungroup())%>% full_join(sf::st_drop_geometry(Reduced_data_sp) %>% group_by(year) %>% summarize(Species='Proportion of Events with >95% Baits Removed', mean=mean(prop_removed>0.95), region='All', q0.975=NA, q0.025=NA, sd=NA, Method='Bootstrap CPUE') %>% mutate(year=factor(year)) %>% ungroup())%>% full_join(sf::st_drop_geometry(Reduced_data_sp) %>% group_by(year) %>% summarize(Species='Proportion of Events with 100% Baits Removed', mean=mean(prop_removed==1), region='All', q0.975=NA, q0.025=NA, sd=NA, Method='Bootstrap CPUE') %>% mutate(year=factor(year)) %>% ungroup()) %>% mutate(Species = str_to_title(Species), Species = factor(Species, levels=c('Arrowtooth Flounder', 'Sablefish', 'North Pacific Spiny Dogfish','Proportion Of Events With >85% Baits Removed','Proportion Of Events With >95% Baits Removed','Proportion Of Events With 100% Baits Removed'), ordered = T), year = as.numeric(year)+1997) %>% ggplot(aes(x=year, y=mean, ymax=q0.975, ymin=q0.025, colour=Method, group=Method, shape=Method)) + geom_point(position = position_dodge(width=0.7), size=2) + geom_errorbar(position = position_dodge(width=0.7)) + facet_wrap(~Species, nrow=4, ncol=3, scales = 'free') + theme(legend.position = 'left') + xlab('Year') + ylab('Estimated relative abundance or proportion of events') + ggtitle('Relative abundance vs. year for competing estimators', subtitle = 'Mean and 95% confidence intervals shown') results %>% filter(Species %in% c('lingcod', 'arrowtooth flounder', 'north pacific spiny dogfish')) %>% full_join(sf::st_drop_geometry(Reduced_data_sp) %>% group_by(year) %>% summarize(Method='Proportion Censored', Species='lingcod', mean=mean(prop_removed>0.85), region='All', q0.975=NA, q0.025=NA, sd=NA) %>% mutate(year=factor(year)) %>% ungroup())%>% full_join(sf::st_drop_geometry(Reduced_data_sp) %>% group_by(year) %>% summarize(Method='Proportion Censored', Species = 'arrowtooth flounder', mean=mean(prop_removed>0.95), region='All', q0.975=NA, q0.025=NA, sd=NA) %>% mutate(year=factor(year)) %>% ungroup())%>% full_join(sf::st_drop_geometry(Reduced_data_sp) %>% group_by(year) %>% summarize(Method='Proportion Censored', Species='north pacific spiny dogfish', mean=mean(prop_removed==1), region='All', q0.975=NA, q0.025=NA, sd=NA) %>% mutate(year=factor(year)) %>% ungroup()) %>% mutate(Species = str_to_title(Species), Species = factor(Species, levels=c('Lingcod', 'Arrowtooth Flounder', 'North Pacific Spiny Dogfish','Proportion Of Events With >85% Baits Removed','Proportion Of Events With >95% Baits Removed','Proportion Of Events With 100% Baits Removed'), ordered = T), year = as.numeric(year)+1997, Method = factor(Method, levels = c('Bootstrap CPUE', 'CPUE-based','ICR-based','Censored','Proportion Censored'), ordered = T)) %>% ggplot(aes(x=year, y=mean, ymax=q0.975, ymin=q0.025, colour=Method, group=Method)) + geom_point(position = position_dodge(width=0.7), size=2) + geom_errorbar(position = position_dodge(width=0.7)) + facet_wrap(Species~Method, scales = 'free', nrow=3, ncol=5) + theme(legend.position = 'none') + xlab('Year') + ylab('Estimated relative abundance or proportion of events') + ggtitle('Relative abundance vs. year for competing estimators', subtitle = 'Mean and 95% confidence intervals shown. Exploratory analysis has determined that a fishing event is censored if > 85%, \n> 95%, and 100% of baits have been removed for Lingcod, Arrowtooth Flounder, and Spiny Dogfish respectively.') results %>% filter(Species %in% c('lingcod', 'arrowtooth flounder', 'north pacific spiny dogfish')) %>% group_by(Species) %>% mutate(min = 0, max = max(q0.975)+0.1) %>% ungroup() %>% full_join(sf::st_drop_geometry(Reduced_data_sp) %>% group_by(year) %>% summarize(Method='Proportion Censored', Species='lingcod', mean=mean(prop_removed>0.85), region='All', q0.975=NA, q0.025=NA, sd=NA, max = 1, min = 0) %>% mutate(year=factor(year)) %>% ungroup())%>% full_join(sf::st_drop_geometry(Reduced_data_sp) %>% group_by(year) %>% summarize(Method='Proportion Censored', Species = 'arrowtooth flounder', mean=mean(prop_removed>0.95), region='All', q0.975=NA, q0.025=NA, sd=NA, max = 1, min = 0) %>% mutate(year=factor(year)) %>% ungroup())%>% full_join(sf::st_drop_geometry(Reduced_data_sp) %>% group_by(year) %>% summarize(Method='Proportion Censored', Species='north pacific spiny dogfish', mean=mean(prop_removed==1), region='All', q0.975=NA, q0.025=NA, sd=NA, max = 1, min = 0) %>% mutate(year=factor(year)) %>% ungroup()) %>% mutate(Species = str_to_title(Species), Species = factor(Species, levels=c('Lingcod', 'Arrowtooth Flounder', 'North Pacific Spiny Dogfish','Proportion Of Events With >85% Baits Removed','Proportion Of Events With >95% Baits Removed','Proportion Of Events With 100% Baits Removed'), ordered = T), year = as.numeric(year)+1997, Method = factor(Method, levels = c('Bootstrap CPUE', 'CPUE-based','ICR-based','Censored','Proportion Censored'), ordered = T)) %>% ggplot(aes(x=year, y=mean, ymax=q0.975, ymin=q0.025, colour=Method, group=Method)) + geom_point(position = position_dodge(width=0.7), size=2) + geom_errorbar(position = position_dodge(width=0.7)) + facet_wrap(Species~Method, scales = 'free', nrow=3, ncol=5, labeller = labeller( Method=c( `Bootstrap CPUE` = 'Bootstrap CPUE', `CPUE-based` = 'CPUE', `ICR-based` = 'ICR', Censored = 'Censored', `Proportion Censored` = 'Proportion Censored' ) ) ) + geom_hline(mapping = aes(yintercept=max), colour='white', size=1e-3) + geom_hline(mapping = aes(yintercept=min), colour='white', size=1e-3) + scale_colour_viridis_d(begin=0, end=0.9, option='A') + theme(legend.position = 'none') + xlab('Year') + ylab('Estimated relative abundance or proportion of events') #+ #ggtitle('Relative abundance vs. year for competing estimators', # subtitle = 'Mean and 95% confidence intervals shown. Exploratory analysis #has determined that a fishing event is censored if > 85%, \n> 95%, and 100% of baits #have been removed for Lingcod, Arrowtooth Flounder, and Spiny Dogfish respectively.') library(MASS) results %>% filter(Species %in% species_vec[cprop_values==0.95], Method %in% c('CPUE-based', 'Censored')) %>% group_by(year, Species) %>% summarise(Diff = 100*((mean[Method=='Censored']-mean[Method=='CPUE-based'])/mean[Method=='CPUE-based'])) %>% ungroup() %>% full_join(sf::st_drop_geometry(Reduced_data_sp) %>% group_by(year) %>% summarize(Prop_Removed_85=mean(prop_removed>0.85), Prop_Removed_95=mean(prop_removed>0.95), Prop_Removed_100=mean(prop_removed==1), region='All') %>% mutate(year=factor(year)) %>% ungroup()) %>% mutate(Species = str_to_title(Species), year = as.numeric(year)+1997) %>% ggplot(aes(x=Prop_Removed_95, y=Diff, colour=Species, group=Species, fill=Species)) + # stat_smooth(method=function(formula,data,weights=weight) rlm(formula, # data, # weights=weight, # method="MM"), # fullrange=TRUE, alpha=0.2) + geom_smooth(alpha=0.4) + geom_point() + geom_hline(yintercept = 0) + geom_vline(xintercept = mean(Reduced_data_sp$prop_removed>0.95)) + theme(legend.position = 'none') + ylab('Percentage change in estimated relative abundance') + xlab('Proportion of fishing events with > 95% baits removed') + facet_wrap(~Species) #ggtitle('Percentage Change in Relative Abundance Estimates vs. \nProportion of #Fishing Events with > 95% Baits Removed', subtitle = 'Percentage change is computed #for censored Poisson estimates \nrelative to CPUE-based estimates as baseline') + results %>% filter(Species %in% species_vec[cprop_values==0.85], Method %in% c('CPUE-based', 'Censored')) %>% group_by(year, Species) %>% summarise(Diff = 100*((mean[Method=='Censored']-mean[Method=='CPUE-based'])/mean[Method=='CPUE-based'])) %>% ungroup() %>% full_join(sf::st_drop_geometry(Reduced_data_sp) %>% group_by(year) %>% summarize(Prop_Removed_85=mean(prop_removed>0.85), Prop_Removed_95=mean(prop_removed>0.95), Prop_Removed_100=mean(prop_removed==1), region='All') %>% mutate(year=factor(year)) %>% ungroup()) %>% mutate(Species = str_to_title(Species), year = as.numeric(year)+1997) %>% ggplot(aes(x=Prop_Removed_85, y=Diff, colour=Species, group=Species, fill=Species)) + # stat_smooth(method=function(formula,data,weights=weight) rlm(formula, # data, # weights=weight, # method="MM"), # fullrange=TRUE, alpha=0.2) + geom_smooth(alpha=0.4) + geom_point() + geom_hline(yintercept = 0) + geom_vline(xintercept = mean(Reduced_data_sp$prop_removed>0.85)) + theme(legend.position = 'none') + ylab('Percentage change in estimated relative abundance') + xlab('Proportion of fishing events with > 85% baits removed') + facet_wrap(~Species) #ggtitle('Percentage Change in Relative Abundance Estimates vs. \nProportion of #Fishing Events with > 85% Baits Removed', subtitle = 'Percentage change is computed #for censored Poisson estimates \nrelative to CPUE-based estimates as baseline') + results %>% filter(Species %in% species_vec[cprop_values==1], Method %in% c('CPUE-based', 'Censored')) %>% group_by(year, Species) %>% summarise(Diff = 100*((mean[Method=='Censored']-mean[Method=='CPUE-based'])/mean[Method=='CPUE-based'])) %>% ungroup() %>% full_join(sf::st_drop_geometry(Reduced_data_sp) %>% group_by(year) %>% summarize(Prop_Removed_85=mean(prop_removed>0.85), Prop_Removed_95=mean(prop_removed>0.95), Prop_Removed_100=mean(prop_removed==1), region='All') %>% mutate(year=factor(year)) %>% ungroup()) %>% mutate(Species = str_to_title(Species), year = as.numeric(year)+1997) %>% ggplot(aes(x=Prop_Removed_100, y=Diff, colour=Species, group=Species, fill=Species)) + # stat_smooth(method=function(formula,data,weights=weight) rlm(formula, # data, # weights=weight, # method="MM"), # fullrange=TRUE, alpha=0.2) + geom_smooth(alpha=0.4) + geom_point() + geom_hline(yintercept = 0) + geom_vline(xintercept = mean(Reduced_data_sp$prop_removed==1)) + theme(legend.position = 'none') + ylab('Percentage change in estimated relative abundance') + xlab('Proportion of fishing events with 100% of baits removed') + facet_wrap(~Species) #ggtitle('Percentage Change in Relative Abundance Estimates vs. \nProportion of #Fishing Events with 100% of Baits Removed', subtitle = 'Percentage change is computed #for censored Poisson estimates \nrelative to CPUE-based estimates as baseline') + results %>% filter(Species %in% species_vec[cprop_values==0.95], !(Method %in% c('Bootstrap CPUE'))) %>% mutate(Species = str_to_title(Species), year = as.numeric(year)+1997) %>% ggplot(aes(x=year, y=mean, ymax=q0.975, ymin=q0.025, colour=Method, group=Method, shape=Method)) + geom_point(position = position_dodge(width=1), size=2) + geom_errorbar(position = position_dodge(width=1)) + facet_wrap(~Species, scales = 'free') + theme(legend.position = 'left', axis.title.x = element_blank()) + ylab('Estimated relative abundance') + # ggtitle('Relative abundance estimates vs. year for competing #estimators', # subtitle = 'Mean and 95% confidence intervals shown #for species judged to be affected by hook competition effects #once 95% of baits have \nbeen removed.') + theme(legend.position = c(0.5, 0.15)) results %>% filter(Species %in% species_vec[cprop_values==0.85], !(Method %in% c('Bootstrap CPUE'))) %>% mutate(Species = str_to_title(Species), year = as.numeric(year)+1997) %>% ggplot(aes(x=year, y=mean, ymax=q0.975, ymin=q0.025, colour=Method, group=Method, shape=Method)) + geom_point(position = position_dodge(width=1), size=2) + geom_errorbar(position = position_dodge(width=1)) + facet_wrap(~Species, scales = 'free') + theme(legend.position = 'left') + xlab('Year') + ylab('Estimated relative abundance') + #ggtitle('Relative abundance estimates vs. year for competing #estimators', # subtitle = 'Mean and 95% confidence intervals shown #for species judged to be affected by hook competition #\neffects once 85% of baits have been removed.') + theme(legend.position = c(0.9, 0.8)) results %>% filter(Species %in% species_vec[cprop_values==1], !(Method %in% c('Bootstrap CPUE'))) %>% mutate(Species = str_to_title(Species), year = as.numeric(year)+1997) %>% ggplot(aes(x=year, y=mean, ymax=q0.975, ymin=q0.025, colour=Method, group=Method, shape=Method)) + geom_point(position = position_dodge(width=1), size=2) + geom_errorbar(position = position_dodge(width=1)) + facet_wrap(~Species, scales = 'free') + theme(legend.position = 'left') + xlab('Year') + ylab('Estimated relative abundance') + #ggtitle('Relative abundance estimates vs. year for competing #estimators', # subtitle = 'Mean and 95% confidence intervals shown #for species judged to be affected by gear saturation \neffects #once 100% of baits have been removed.') + theme(legend.position = c(0.9, 0.8))
Next, we compute metrics to compare the estimators. Due to some limited data in the first 2 years, we restrict our comparisons to the years 1998-2021.
results_summary <- results %>% group_by(Species,region) %>% summarise(Correlation_PI = ifelse(sum(Method=='CPUE-based') > 0 & sum(Method=='ICR-based') > 0 , cor(mean[Method=='CPUE-based'], mean[Method=='ICR-based'], use='pairwise.complete.obs', method='spearman'), NA), Correlation_PC = ifelse(sum(Method=='CPUE-based') > 0 & sum(Method=='Censored') > 0 , cor(mean[Method=='CPUE-based'], mean[Method=='Censored'], use='pairwise.complete.obs', method='spearman'), NA), Correlation_IC = ifelse(sum(Method=='Censored') > 0 & sum(Method=='ICR-based') > 0 , cor(mean[Method=='Censored'], mean[Method=='ICR-based'], use='pairwise.complete.obs', method='spearman'), NA), Percentage_Overlap_PI = ifelse(sum(Method=='CPUE-based') > 0 & sum(Method=='ICR-based') > 0 , mean((q0.025[Method=='CPUE-based'] > q0.025[Method=='ICR-based'] & q0.025[Method=='CPUE-based'] < q0.975[Method=='ICR-based']) | (q0.975[Method=='CPUE-based'] > q0.025[Method=='ICR-based'] & q0.975[Method=='CPUE-based'] < q0.975[Method=='ICR-based'])), NA), Percentage_Overlap_PC = ifelse(sum(Method=='CPUE-based') > 0 & sum(Method=='Censored') > 0 , mean((q0.025[Method=='CPUE-based'] > q0.025[Method=='Censored'] & q0.025[Method=='CPUE-based'] < q0.975[Method=='Censored']) | (q0.975[Method=='CPUE-based'] > q0.025[Method=='Censored'] & q0.975[Method=='CPUE-based'] < q0.975[Method=='Censored'])), NA), Percentage_Overlap_IC = ifelse(sum(Method=='Censored') > 0 & sum(Method=='ICR-based') > 0 , mean((q0.025[Method=='ICR-based'] > q0.025[Method=='Censored'] & q0.025[Method=='ICR-based'] < q0.975[Method=='Censored']) | (q0.975[Method=='ICR-based'] > q0.025[Method=='Censored'] & q0.975[Method=='ICR-based'] < q0.975[Method=='Censored'])), NA), Interval_Width_Poisson = ifelse(sum(Method=='CPUE-based') > 0, median(q0.975[Method=='CPUE-based']/ q0.025[Method=='CPUE-based']), NA), Interval_Width_ICR = ifelse(sum(Method=='ICR-based') > 0, median(q0.975[Method=='ICR-based']/ q0.025[Method=='ICR-based']), NA), Interval_Width_Censored = ifelse(sum(Method=='Censored') > 0, median(q0.975[Method=='Censored']/ q0.025[Method=='Censored']), NA), MAD_Poisson = ifelse(sum(Method=='CPUE-based') > 0, mad(mean[Method=='CPUE-based']), NA), MAD_ICR = ifelse(sum(Method=='ICR-based') > 0, mad(mean[Method=='ICR-based']), NA), MAD_Censored = ifelse(sum(Method=='Censored') > 0, mad(mean[Method=='Censored']), NA), Range_Poisson = ifelse(sum(Method=='CPUE-based') > 0, range(mean[Method=='CPUE-based'])[2]/ range(mean[Method=='CPUE-based'])[1], NA), Range_ICR = ifelse(sum(Method=='ICR-based') > 0, range(mean[Method=='ICR-based'])[2]/ range(mean[Method=='ICR-based'])[1], NA), Range_Censored = ifelse(sum(Method=='Censored') > 0, range(mean[Method=='Censored'])[2]/ range(mean[Method=='Censored'])[1], NA), Percentage_Unchanged_Poisson = ifelse(sum(Method=='CPUE-based') > 0, mean(q0.975[Method=='CPUE-based'] >= 1 & q0.025[Method=='CPUE-based'] <= 1), NA), Percentage_Unchanged_ICR = ifelse(sum(Method=='ICR-based') > 0, mean(q0.975[Method=='ICR-based'] >= 1 & q0.025[Method=='ICR-based'] <= 1), NA), Percentage_Unchanged_Censored = ifelse(sum(Method=='Censored') > 0, mean(q0.975[Method=='Censored'] >= 1 & q0.025[Method=='Censored'] <= 1), NA) ) %>% ungroup() %>% summarise(Comparison = rep(c('CPUE-based vs ICR-based','CPUE-based vs Censored','ICR-based vs Censored'), times=6), Measure = rep(c('Correlation','Percentage Overlap','Interval Width', 'MAD','Range','Percentage Unchanged'), each=3), Median = c(median(Correlation_PI, na.rm=T), median(Correlation_PC, na.rm=T), median(Correlation_IC, na.rm=T), median(Percentage_Overlap_PI, na.rm=T), median(Percentage_Overlap_PC, na.rm=T), median(Percentage_Overlap_IC, na.rm=T), median(Interval_Width_Poisson-Interval_Width_ICR, na.rm=T), median(Interval_Width_Poisson-Interval_Width_Censored, na.rm=T), median(Interval_Width_ICR-Interval_Width_Censored, na.rm=T), median(MAD_Poisson-MAD_ICR, na.rm=T), median(MAD_Poisson-MAD_Censored, na.rm=T), median(MAD_ICR-MAD_Censored, na.rm=T), median(Range_Poisson-Range_ICR, na.rm=T), median(Range_Poisson-Range_Censored, na.rm=T), median(Range_ICR-Range_Censored, na.rm=T), median(Percentage_Unchanged_Poisson-Percentage_Unchanged_ICR, na.rm=T), median(Percentage_Unchanged_Poisson-Percentage_Unchanged_Censored, na.rm=T), median(Percentage_Unchanged_ICR-Percentage_Unchanged_Censored, na.rm=T)), MAD = c(mad(Correlation_PI, na.rm=T), mad(Correlation_PC, na.rm=T), mad(Correlation_IC, na.rm=T), mad(Percentage_Overlap_PI, na.rm=T), mad(Percentage_Overlap_PC, na.rm=T), mad(Percentage_Overlap_IC, na.rm=T), mad(Interval_Width_Poisson-Interval_Width_ICR, na.rm=T), mad(Interval_Width_Poisson-Interval_Width_Censored, na.rm=T), mad(Interval_Width_ICR-Interval_Width_Censored, na.rm=T), mad(MAD_Poisson-MAD_ICR, na.rm=T), mad(MAD_Poisson-MAD_Censored, na.rm=T), mad(MAD_ICR-MAD_Censored, na.rm=T), mad(Range_Poisson-Range_ICR, na.rm=T), mad(Range_Poisson-Range_Censored, na.rm=T), mad(Range_ICR-Range_Censored, na.rm=T), mad(Percentage_Unchanged_Poisson-Percentage_Unchanged_ICR, na.rm=T), mad(Percentage_Unchanged_Poisson-Percentage_Unchanged_Censored, na.rm=T), mad(Percentage_Unchanged_ICR-Percentage_Unchanged_Censored, na.rm=T)), N_comparisons = c(sum(!is.na(Correlation_PI)), sum(!is.na(Correlation_PC)), sum(!is.na(Correlation_IC)), sum(!is.na(Percentage_Overlap_PI)), sum(!is.na(Percentage_Overlap_PC)), sum(!is.na(Percentage_Overlap_IC)), sum(!is.na(Interval_Width_Poisson-Interval_Width_ICR)), sum(!is.na(Interval_Width_Poisson-Interval_Width_Censored)), sum(!is.na(Interval_Width_ICR-Interval_Width_Censored)), sum(!is.na(MAD_Poisson-MAD_ICR)), sum(!is.na(MAD_Poisson-MAD_Censored)), sum(!is.na(MAD_ICR-MAD_Censored)), sum(!is.na(Range_Poisson-Range_ICR)), sum(!is.na(Range_Poisson-Range_Censored)), sum(!is.na(Range_ICR-Range_Censored)), sum(!is.na(Percentage_Unchanged_Poisson-Percentage_Unchanged_ICR)), sum(!is.na(Percentage_Unchanged_Poisson-Percentage_Unchanged_Censored)), sum(!is.na(Percentage_Unchanged_ICR-Percentage_Unchanged_Censored)) ) ) results_summary$Comparison <- factor(results_summary$Comparison, levels=c('CPUE-based vs ICR-based', 'CPUE-based vs Censored', 'ICR-based vs Censored'), ordered = T) results_summary$Measure <- factor(results_summary$Measure, levels=c('Correlation', 'Percentage Overlap', 'Interval Width', 'MAD', 'Range', 'Percentage Unchanged' ), ordered = T)
Finally, we plot our results
results_summary %>% mutate(Median = ifelse(Measure=='Percentage Unchanged',100*Median,Median), MAD = ifelse(Measure=='Percentage Unchanged',100*MAD,MAD)) %>% ggplot(aes(x=Comparison, y=Median, ymax=Median+1.96*(MAD/sqrt(N_comparisons)), ymin=Median-1.96*(MAD/sqrt(N_comparisons)), colour=Comparison, group=Comparison, shape=Comparison)) + geom_errorbar() + geom_point(size=3) + geom_hline(mapping = aes(yintercept=ifelse( Measure %in% c('Correlation','Percentage Overlap'), 1, 0))) + facet_wrap(~Measure, scales='free_y', nrow=3, ncol=2) + ylab('Median correlation, percentage overlap, or difference') + ggtitle('Comparison of relative abundance estimates from the three estimators') +#, #subtitle = 'Results are aggregated over the 11 #species and 23 years with 95% confidence intervals \ncomputed #based on an assumption that the 11 species are a random sample #from \npopulation of all species caught by IPHC survey') + xlab('Estimators Being Compared') + theme(axis.ticks.x = element_blank(), axis.text.x = element_blank())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.