#Working script for Chapter 4
#Evaluating catch-quota balancing under different TAC configurations in a catch
#share fishery
setwd('/Users/peterkuriyama/School/Research/ch4')
library(devtools)
devtools::install_github('peterkuriyama/ch4', auth_token = '99e5af72394589ca13957dcc6c0e411fd8d047e6')
#Make sure that packages are loaded
library(ch4)
library(plyr)
library(ggplot2)
library(dplyr)
library(reshape2)
library(parallel)
library(lubridate)
#---------------------------------------------------------------------------------
#Start of Analysis
#Load the Tow Data
# wc_data <- load_tow_data() #Data should have spatial ids with years and without years
# #Data Processing initially
# #Try sampling of data to stay within the TAC of multiple species
# wc_data <- wc_data[-grep("\\.", wc_data$row_name), ]
# save(wc_data, file = 'output/unique_wc_tows.Rdata')
#Load Unique tows
load('output/unique_wc_tows.Rdata')
#Load states map
states_map <- map_data("state")
#Plot clusters function
# plot_clusts()
#Do the bin2d thing
names(wc_data)
write.csv(names(wc_data), row.names = FALSE, file = 'output/logbook_names.csv')
#---------------------------------------------------------------------------------
#Filter so that only tows with ha_ratio between 0.6 and 1.2 are kept
wc_data$ha_ratio <- wc_data$hpounds / wc_data$apounds
#Lose about half of the tows
wc_data_orig <- wc_data
#---------------------------------------------------------------------------------
load("/Users/peterkuriyama/School/Research/ch2_vms_old/data/LBKDATA_Barnett_Logbook_Data_2002_2014_2015-11-24.Rda")
lewis <- LBK.out
names(lewis) <- tolower(names(lewis))
lewis <- lewis %>% filter(ryear >= 2008 & ryear <= 2013)
paste(c(1:5), collapse = ' ')
peter_logbook <- lewis %>% group_by(agid, ryear) %>% summarize(peter_nunq_tows = length(unique(haul_id)),
nunq_vessels = length(unique(drvid)), avg_duration = round(mean(duration, na.rm = T), digits = 1),
avg_uplat = round(mean(up_lat, na.rm = T), digits = 1),
avg_uplong = round(mean(up_long, na.rm = TRUE), digits = 1), avg_depth1 = round(mean(depth1), digits = 1),
targets = paste0(unique(target), collapse = " ")) %>%
as.data.frame
names(peter_logbook) <- c('agid', 'ryear', 'peter_nunq_tows', 'nunq_vessels', "peter_avg_duration",
'peter_avg_uplat', 'peter_avg_uplong', 'peter_avg_depth1', 'targets')
targs <- peter_logbook$targets
peter_logbook$targets <- NULL
#Read in Dan's data
dan_logbook <- read.csv('output/dan_logbook.csv')
names(dan_logbook) <- c('agid', 'ryear', 'dan_nunq_tows', 'sumofduration', 'dan_avg_duration', 'dan_avg_uplat',
"dan_avg_uplong", 'dan_avg_depth1')
comp <- left_join(peter_logbook, dan_logbook, by = c('agid', 'ryear'))
#Calculate differences
comp$diff_nunq_tows <- comp$peter_nunq_tows - comp$dan_nunq_tows
comp$diff_avg_duration <- comp$peter_avg_duration - comp$dan_avg_duration
comp$diff_avg_uplat <- comp$peter_avg_uplat - comp$dan_avg_uplat
comp$diff_avg_uplong <- comp$peter_avg_uplong - comp$dan_avg_uplong
comp$diff_avg_depth1 <- comp$peter_avg_depth1 - comp$dan_avg_depth1
#Organize the column by metric
comp1 <- comp[, c(1, 2, 4, grep('nunq_tows', names(comp)),
c(5, 11, 16),
grep('uplat', names(comp)),
grep('uplong', names(comp)),
grep('depth1', names(comp)))]
comp1$targets <- targs
write.csv(comp1, 'output/dan_peter_comparison.csv', row.names = FALSE)
#Combine the two
#Check numbers with dan holland
wc_data_orig %>% group_by(agid, ryear) %>% summarize(count = n(), nunq_tos = length(unique(haul_id)),
avgduration = mean(duration), avguplat = mean(up_lat), avguplong = mean(up_long), avgdepth = mean(depth1))
unique(lewis$gr_sector)
#Filter data based on ratios, removes about 50% of the data
# filt_dat <- wc_data %>% filter(ha_ratio >= 0.6 & ha_ratio <= 1.2)
# input <- filt_dat
#---------------------------------------------------------------------------------
#--------------------------------------------------------------------------------
#Analaysis
#Data are now filtered so hailed and adjusted pounds in relative agreement
#ch4_format_data pulls only the top100 clusters (by number of tows)
#takes 50 seconds for top100, 1.3 minutes for unfiltereed
top100 <- ch4_format_data(filt_dat, top100 = TRUE)
# clusts <- ch4_format_data(filt_dat, top100 = FALSE)
#Add date
top100_clusts$date <- paste0(top100_clusts$dyear, "-", top100_clusts$dmonth, "-",
top100_clusts$dday)
top100_clusts$date <- ymd(top100_clusts$date)
#Table of significance; slightly different, look into at some point
top100_clusts %>% distinct(unq_clust, sig_ntows) %>% nrow
top100_clusts %>% distinct(unq_clust, sig_ntows) %>% group_by(sig_ntows) %>%
summarize(sigtows = length(unique(unq_clust)) / 100)
top100_clusts %>% distinct(unq_clust, sig_nvess) %>% group_by(sig_nvess) %>%
summarize(sigtows = length(unique(unq_clust)) / 100)
#Add on species category percentages
top100_clusts <- top100_clusts %>% group_by(dyear, unq_clust, type) %>%
mutate(avg_perc = mean(perc)) %>% as.data.frame
top100_clusts <- ch4_perm_test(input = top100_clusts, column = 'avg_perc', ndraws = 1000,
gb = "dyear, unq_clust, type", clust_cat = 'unq_clust, type', summ = "mean(avg_perc)",
crit = "<" )
start_time <- Sys.time()
top100_clusts <- ch4_perm_test(input = top100_clusts, column = "hpounds", ndraws = 1000,
gb = "dyear, unq_clust, species", clust_cat = 'unq_clust, species',
summ = "hpounds", crit = "<", annual = FALSE, save_resamps = TRUE)
runtime <- Sys.time() - start_time
(runtime)
#took 26 minutes
save(top100_clusts, file = 'output/top100_clusts.Rdata')
#Remove old duplicated rows
top100_clusts[, c(47, 48)] <- NULL
top100_clusts %>% filter(sig_hpounds == 'sig increase', type == 'targets') %>%
ggplot(aes())
#---------------------------------------------------------------------------------------------
#Which clusters had increases in vessels and target catches?
top100_clusts %>% filter(sig_nvess == 'sig increase')
top100_clusts %>% group_by(sig_nvess) %>% summarize(nclusts = length(unique(unq_clust)))
top100_clusts %>% group_by(sig_ntows) %>% summarize(nclusts = length(unique(unq_clust)))
unique(top100_clusts$sig_ntows)
top100_clusts %>% filter(sig_nvess == 'sig increase')
#Which clusters had significant increases in target species
top100_clusts %>% filter(sig_hpounds == 'sig increase', type == 'targets') %>%
ggplot() + geom_segment(aes(x = -set_long, xend = -up_long, y = set_lat,
yend = up_lat, colour = unq_clust)) +
facet_wrap(~ species)
top100_clusts %>% filter(sig_hpounds == 'sig increase', type == 'targets')
#Load the data here to save time
unique(top100_clusts$sig_hpounds)
#-------------------------------------------------
#-------------------------------------------------
# gb <- "dyear, unq_clust, type"
# summ <- "unique(avg_perc)"
# clust_cat <- 'unq_clust, type'
#Summarize significance
#Check that things are working right
temp <- top100_clusts %>% filter(sig_avg_perc == 'sig increase', type == 'weaks')
#-------------------------------------------------
top100_clusts %>% filter(species == 'Yelloweye Rockfish')
top100_clusts %>% filter(species == 'Widow Rockfish')
#-------------------------------------------------
#Increase in catch percentages
dec_targets <- top100_clusts %>% filter(sig_avg_perc == 'sig decrease', type == 'targets')
dec_weaks <- top100_clusts %>% filter(sig_avg_perc == 'sig decrease', type == 'weaks')
dec_targets %>% distinct(unq_clust, sig_ntows) %>% group_by(sig_ntows) %>
dec_targets %>% distinct(unq_clust, sig_ntows) %>% group_by(sig_ntows) %>%
summarize(sigs = length(unique(unq_clust))) %>%
#Histograms of catch category percentages by port
top100_clusts %>% filter(species == 'Petrale Sole') %>% ggplot(aes(x = perc)) +
geom_histogram() + facet_wrap(~ dport_desc)
#Constraining Species
#Were certain ports more selective
top100_clusts %>% filter(type %in% c('targets', 'weaks')) %>%
ggplot(aes(x = prop_zero, y = skew)) + geom_point(aes(colour = type)) +
facet_wrap(~ dport_desc)
#Target species only
top100_clusts %>% filter(type %in% c('targets')) %>%
ggplot(aes(x = prop_zero, y = skew)) + geom_point(aes(colour = dport_desc)) +
facet_wrap(~ species + dport_desc)
#Weak stock species
top100_clusts %>% filter(type %in% c('weaks')) %>%
ggplot(aes(x = prop_zero, y = skew)) + geom_point(aes(colour = dport_desc)) +
facet_wrap(~ species + dport_desc)
ggplot(aes(x = prop_zero, y = skew)) + geom_point(aes(colour))
top100_clusts %>% filter(species == 'Sablefish') %>%
ggplot(aes(x = prop_zero, y = skew)) + geom_point(aes(colour = type)) +
facet_wrap(~ dport_desc)
ggplot(top100_clusts, aes(x = prop_zero, y = skew)) + geom_point()
top100_clusts %>% group_by(unq_clust, species) %>% mutate(avg_perc = mean(perc)) %>%
select(dport_desc, type, perc, avg_perc) %>% filter(dport_desc %in% ports$dport_desc,
type %in% c('targets', 'weaks')) %>%
ggplot() + geom_histogram(aes(x = perc)) + facet_wrap(~ type + dport_desc, ncol = 2)
%>% summarize(avg_perc = mean(perc))
dec_ <- plot_clusts(dat = dcatch, show_fig = F, specify_lims = T)
dd + facet_wrap(~ type)
itow_plot <- pl
plot_clusts(dat = temp, show_fig = T, specify_lims = F)
select(unq_clust, dyear, type, when, avg_perc) %>% distinct() %>%
arrange(unq_clust, dyear)
temp
# ch4_perm_test(input = temp, column = 'avg_perc', ndraws = 1000,
# gb = 'dyear, unq_clust, type', summ = 'mean(avg_perc)',
# clust_cat = 'unq_clust, type', crit = "<")
arrange(unq_clust, dyear) %>%
ggplot(aes(x = dyear, y = avg_perc)) + geom_line(aes(colour = unq_clust, group = unq_clust))
#Resample these tows
dcast(unq_clust + type ~ when, value.var = "avg_perc")
temp %>% filter(unq_clust == 2, type == 'targets') %>% distinct() %>% head
dcast(unq_clust, )
plot_clusts(dat = temp, show_fig = TRUE, specify_lims = TRUE)
#--------------------------------------------------------------------------------
#Plot clusters on a map
xx <- plot_clusts(dat = top100_clusts, show_fig = TRUE, specify_lims = TRUE)
#--------------------------------------------------------------------------------
#Sampling in clusters, way to move among them
#pick port
astoria <- top100_clusts %>% filter(dport_desc == 'ASTORIA')
astoria %>% geom
#Come up with rules to move among clusters
#Need to look into how individual vessels move among clusters,
# is there a fixed order for each vessel?
tt <- astoria %>% filter(drvid == '626614')
tt <- tt %>% arrange(date)
#Did effort for individual vessels contract or exapnd? Is there a metric for this?
#Pinsky centroid stuff
geom_segment(aes(x = set_long, xend = up_long, y = set_lat, yend = up_lat, colour = clust)) +
facet_wrap(~ dyear)
#how much variability is there in sablefish catches, for example
png(width = 13, height = 10, file = 'figs/cluster_delta_plots.png', units = 'in',
res = 200)
top100_clusts %>% filter(type %in% c('targets', 'weaks')) %>% ggplot() +
geom_point(aes(x = prop_zero, y = skew, colour = unq_clust)) + facet_wrap(~ species)
dev.off()
#Did individual vessels fish in fewer clusters over time due to catch shares?
top100_clusts %>% group_by(drvid, dyear) %>% summarize(nclusts = length(unique(unq_clust))) %>%
ggplot(aes(x = dyear, y = nclusts)) + geom_line(aes(colour = drvid))
#Write function to run a permutation test for different columns
#Could be number of clusters per vessel, number of vessels per cluster, number of tows
#per cluster for example
#--------------------------------------------------------------------------------
#Way to look at species interactions
#top
high <- top100_clusts %>% filter(high_weak_perc == max(top100_clusts$high_weak_perc),
type %in% c('targets', 'weaks'))
pp <- high %>% select(perc, species, haul_id) %>% dcast(haul_id ~ species, value.var = 'perc')
#Replace all NAs with 0
pp <- melt(pp)
pp[is.na(pp$value), 'value'] <- 0
pp <- dcast(pp, haul_id ~ variable, value.var = 'value')
pp$haul_id <- factor(pp$haul_id)
pairs(pp[, c(3, 4, 7, 8)])
ggplot(high) + geom_point(aes(x = date, y = perc)) + facet_wrap(~ species)
high %>% select(perc, species) %>% pairs
pairs(high[c("perc", "species")])
%>%
ggplot() + geom_point(aes(x = date, y = perc)) + facet_wrap(~ species)
ggplot(top100_clusts, )
#Add temporal values
top100_clusts %>% filter(high_weak_perc >= .001) %>%
arrange(dyear, dmonth, dday) %>% filter(unq_clust == 1073) %>%
ggplot() +
length(unique(top100_clusts$unq_clust))
#Number of tows histogram
top100_clusts %>% select(unq_clust, ntows) %>% distinct() %>% ggplot() +
geom_histogram(aes(x = ntows))
#Number of vessels in each cluster histogram
top100_clusts %>% select(unq_clust, nvess) %>% distinct() %>% ggplot() +
geom_histogram(aes(x = nvess))
#Look into clusters that high bycatch percentages of weak stock,
top100_clusts %>% select(unq_clust, high_weak_perc) %>% distinct() %>% ggplot() +
geom_histogram(aes(x = high_weak_perc))
hist
#Avoidance of certain locations after large haul of overfished species?
top100_clusts %>% filter(perc == )
#--------------------------------------------------------------------------------
#Figures
#Plot of vessels fishing in clusters
clust_tows %>% group_by(drvid, dyear) %>% summarize(nclusts = length(unique(unq_clust))) %>%
arrange(desc(nclusts)) %>% group_by(drvid) %>% mutate(avg_nclusts = mean(nclusts)) %>%
arrange(desc(avg_nclusts)) %>% ggplot(aes(x = dyear, y = nclusts)) + geom_line(aes(colour = drvid))
#General plot of tows
clust_tows %>% filter(drvid == 511697) %>% ggplot() +
geom_segment(aes(x = set_long, xend = up_long, y = set_lat, yend = up_lat, colour = clust)) +
facet_wrap(~ dyear)
#look at order of tows after catch of weak stock
top100_clusts <- top100_clusts %>% arrange(desc(high_weak_perc))
top100_clusts %>% filter(unq_clust == 37, type == 'weaks', perc >= 0.5)
top100_clusts %>% filter(unq_clust == 37, dyear == 2012, type == 'weaks', perc >= 0.5)
top100_clusts %>% filter(drvid == "511697", dyear == 2012) %>% ggplot() +
geom_segment(aes(x = set_long, xend = up_long, y = set_lat, yend = up_lat)) +
facet_wrap(~ dmonth)
#--------------------------------------------------------------------------------
#Do certain tows catch a higher proportion of certain species?
#Is this due to catch shares?
#Look at median and mean values of certain species catch in each cluster
# temp <- top100_clusts %>% filter(unq_clust == 5)
top100_clusts %>% group_by(when, unq_clust, species) %>% mutate(mn = mean(hpounds, na.rm = T),
med = median(hpounds))
med_mean_spp <- top100_clusts %>% group_by(unq_clust, when, species) %>% mutate(med = median(hpounds))
,
mn = mean(hpounds))
%>% select(med, mn, type)
#Median values
meds <- med_mean_spp %>% dcast(unq_clust + species ~ when, value.var = 'med')
meds$diffs <- meds$after - meds$before
#Mean values
means <- med_mean_spp %>% dcast(unq_clust + species ~ when, value.var = 'mn')
means$diffs <- means$after - means$before
hist(meds$diffs, breaks = 30)
hist(means$diffs, breaks = 30)
#Relationship between number of tows and proportion of tows with bycatch?
type == 'weaks', perc >= 0.5)
top100_clusts %>% filter(unq_clust == 37)
#Test hypothesis,
#
#Fishers move towards areas with low bycatch
#Relationship between number of tows and proportion of tows with bycatch?
top100_clusts %>% distinct() %>% ggplot(aes(x = ntows, y = high_weak_perc)) + geom_point() +
facet_wrap(~ sig_vess)
#Look at delta plots for tows with sig increase in number of
top100_clusts %>% filter(sig_vess == 'sig increase', type == 'targets') %>%
ggplot(aes(x = hpounds)) + geom_histogram() + facet_wrap(~ species + when, ncol = 2)
ggplot(aes(x = prop_zero, y = skew)) + geom_point() + facet_wrap(~ species)
#Which
top100_clusts %>% filter(type == 'targets', dport_desc == "ASTORIA") %>%
ggplot(aes(x = prop_zero, y = skew)) +
geom_point(aes(size = ntows, colour = high_weak_perc)) + facet_wrap(~ species)
ggplot(top100_clusts, aes(x = prop_zero, y = skew)) + geom_point(aes(size = high_weak_perc))
top100_clusts %>% arrange(desc(high_weak_perc)) %>% head
top100_clusts$high_weak_perc <- top100_clusts$nhigh / top100_clusts$ntows
,
high_weak_perc = nhigh/ntows)
,
high_weak_perc = nhigh / ntows) %>% select(high_weak_perc)
top100_clusts$weak_perc <-
temp <- top100_clusts %>% filter(unq_clust == 5)
#What percentage of the tows caught a lot of a weak stock species?
top100_clusts %>% filter(unq_clust == 5, type == 'targets', high_perc == 'no',
haul_id == '10906516')
top100_clusts %>% group_by(haul_id, type, high_perc) %>% mutate(prop = length(high_perc) / ntows) %>%
select(prop)
length(unique(temp$haul_id))
temp %>% filter(type == 'weaks', high_perc == 'yes') %>% nrow / length(unique(temp$haul_id))
select(high_perc) %>% filter == 'yes'
top100_clusts %>% group_by(unq_clust) %>% mutate(numrow = length(perc)) %>%
group_by(unq_clust, type) %>% mutate(perc_high = sum(which(high_perc == 'yes')) ) %>%
select(perc_high)
select(numrow)
#Calculate frequency of high proportion of weaks
top100_clusts <- top100_clusts %>% group_by(clust, type) %>% mutate(nn = length(perc >= 0.5), freq_high = length(perc >= 0.5) / ntows) %>%
as.data.frame
temp <- top100_clusts %>% filter(clust == 5)
top100_clusts %>% distinct(species, prop_zero, skew) %>% ggplot(aes(x = prop_zero, y = skew, label = species)) +
geom_point() + geom_text(hjust = -.05)
#Rule to be delta plot targets is less than 0.5 prop_zero and lower than 0 skew
#--------------------------------------------------------------------------------
#Was there a change in catch compositions before and after in sig decrease clusters?
#Connect back with
temp %>% group_by(species, when) %>% summarize(pounds = sum(hpounds)) %>%
arrange(desc(pounds)) %>% dcast(species ~ when, value.var = 'pounds')
temp %>% filter(species == "Dover Sole") %>% ggplot(aes(x = hpounds)) + geom_histogram() +
facet_wrap(~ when)
#Look at differences in species-specific histograms
#Look at delta plots
#--------------------------------------------------------------------------------
#Find which clusters are visited the most
clust_tows %>%
# summarize(nclusts = length(unique(clust))) %>%
group_by(drvid, dport_desc, clust) %>% mutate(nhauls_per_clust = length(unique(haul_id))) %>%
as.data.frame -> xx
#tc for test cluster
tc <- xx %>% filter(dport_desc == 'ASTORIA' & clust == 41)
tc %>% group_by(species) %>% summarize(tot_hpounds = sum(hpounds), ntows = length(unique(haul_id))) %>%
arrange(desc(tot_hpounds)) %>% as.data.frame
tc %>% filter(species == 'Sablefish') %>% ggplot(aes(x = hpounds)) + geom_histogram() +
facet_wrap(~ dyear)
#Do Delta plots for each cluster, without time
deltas <- xx %>% group_by(dport_desc, clust, species) %>% mutate(prop_zero = 1 - (length(hpounds) / unique(ntows)),
skew = calc_skew(log(hpounds))) %>% as.data.frame()
#Delta plots for all species
ggplot(deltas, aes(x = prop_zero, y = skew)) + geom_point(aes(colour = clust)) +
facet_wrap(~ species)
#Have to filter these by species,
%>% ggplot(aes(prop_zero, y = skew)) + geom_point(aes(colour = species)) +
geom_label()
1 - (length(hpounds) / ntows))
#Is it the total pounds that are consistent
#Or the proportion of certain target species?
#Pick vessel 629368 and dig into clusters that it visited every year
clust_tows %>% filter(drvid == 629368) %>% distinct(dport_desc)
clust_tows %>% filter(drvid == 511697) %>% group_by(clust) %>%
summarize(nyears = length(unique(dyear))) %>% arrange(desc(nyears))
clust_tows %>% filter(drvid == 511697, clust) %>% ggplot() +
geom_segment(aes(x = set_long, xend = up_long, y = set_lat, yend = up_lat, colour = clust)) +
facet_wrap(~ dyear)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.