#' Create real diet datasets for mskeyrun-hydra
#'
#' Diet dataset built by Brian Smith on github is accessed, mean wt and proportion
#' of prey in diet by predator species and length with variance is calculated
#'
#' Modified \code{get_diet} function by Sean Hardison, original here
#' https://github.com/NOAA-EDAB/ECSA/blob/master/R/get_diet.R
#' Creates a dataset of annual and seasonal weighted diet composition with sample sizes.
#' Datasets are for a selected species and set of seasonal survey strata.
#' Need to clearly define what each data column is, render to tidy data.
#' This is a first draft that will be revised with dplyr functions later.
#' n.b., Original file from Brian Smith, August 23 2018 was allwt_nstoms.R, renamed as get_diet.R
#'
#'
#'@param focalspp mskeyrun dataset, output of \code\link{{create_focal_species}} or vector of ALL CAPS predator scientific names
#'@param survstrat list of NEFSC trawl survey strata
#'@param saveToData Boolean. Export to data folder (Default = T)
#'
#'@return A tibble (Also written to \code{data} folder)
#'dataframe with variables svspp, year, season, meansw, num_tows, variance, cv, prey, totwt, relmsw, ci, relci, nstom
#'\item{ModSim}{Atlantis model name and simulation id}
#'\item{year}{year diet data collected}
#'\item{Code}{SPECIES_ITIS code for predator species}
#'\item{Name}{Common name for predator species}
#'\item{season}{season diet data collected}
#'\item{pdlen}{predator length bin (1 cm)}
#'\item{prey}{Common name for prey item}
#'\item{variable}{mean prey item weight in year/species/season/pdlen (meansw),
#' variance of meansw (variance),
#' number of tows in year/species/season/prey/pdlen (num_tows),
#' cv of meansw (cv),
#' confidence interval for mean prey weight in diet (ci),
#' total prey weight in year/species/season/pdlen (totwt),
#' percent of prey item in diet in year/species/season/pdlen (relmsw),
#' confidence interval for percent prey in diet (relci),
#' number of stomachs in year/species/season/pdlen (nstom)}
#'\item{value}{value of the variable}
#'\item{units}{units of the variable; grams for meansw, variance, ci, totwt,
#' percent for relmsw, relci, numbers for num_tows, nstom, unitless cv}
#'
library(magrittr)
library(dplyr)
create_real_lendietcomp <- function(focalspp, survstrat, saveToData = T){
# code similar to create_real_dietcomp but keeping pdlen (predator length) as grouping variable throughout
# object is called `allfh`
load(url("https://github.com/Laurels1/Condition/raw/master/data/allfh.RData"))
survdietraw <- allfh %>%
dplyr::filter(pdscinam %in% unique(focalspp$SCIENTIFIC_NAME),
stratum %in% survstrat,
pynam != 'BLOWN',
pynam != 'PRESERVED',
pynam != ' ',
!is.na(year),
purcode == 10)
## Number of stomachs by length and weight
## Sum by length
allsum_len <- survdietraw %>%
dplyr::group_by(year,
season,
svspp,
cruise,
station,
pdid,
pdsex,
pdlen) %>%
dplyr::summarise(totwt = sum(pyamtw)) %>%
na.omit() %>%
dplyr::group_by(year,
season,
svspp,
pdlen) %>% #added
dplyr::summarise(nstom = n(),
meanstom = mean(totwt, na.rm = TRUE),
varstom = var(totwt, na.rm = TRUE),
meanlen = mean(pdlen, na.rm = TRUE),
numlen = n(),
minstom = min(totwt, na.rm = TRUE),
maxstom = max(totwt, na.rm = TRUE),
minlen = min(pdlen, na.rm = TRUE),
maxlen = max(pdlen, na.rm = TRUE))
## Sum by weight
allsum_wgt <- survdietraw %>%
dplyr::group_by(year,
season,
svspp,
cruise,
station,
pdid,
pdsex,
pdwgt,
pdlen) %>%
dplyr::summarise(totwt = sum(pyamtw, na.rm = TRUE)) %>%
na.omit() %>%
dplyr::group_by(year,
season,
svspp,
pdlen) %>% #added
dplyr::summarise(meanwgt = mean(pdwgt, na.rm = TRUE),
numwgt = n(),
minwgt = min(pdwgt, na.rm = TRUE),
maxwgt = max(pdwgt, na.rm = TRUE))
#stomstats2
nstom_df <- allsum_len %>%
dplyr::left_join(allsum_wgt, by = c("year", "season", "svspp",
"pdlen")) %>% #added
# mutate(std.error = sqrt(varstom/nstom)) %>%
dplyr::select(svspp, year, season, pdlen, nstom)
#----------------------------------Start weighted diet proportion code-------------------------------------#
## Select and rename the appropriate columns
allsum_strat <- survdietraw %>%
select(cruise6, stratum, station,
year,
season,
svspp, pdid,
pdsex, pdlen,
tax = collsci,
pyamt = pyamtw,
catnum, numlen,
tot_catnum_stratum, tot_catwgt_stratum, tot_tows_spp_stratum,
stratum_area)
## Group into seasonal length-class and remove NAs
num_strat <- allsum_strat %>%
group_by(cruise6,
station,
year,
season,
svspp,
pdid,
pdsex,
pdlen) %>%
summarise(xxxx = n()) %>%
group_by(cruise6,
station,
year,
season,
svspp,
pdsex,
pdlen) %>%
summarise(numlen2 = n()) %>%
left_join(allsum_strat, by = c("cruise6", "station", "year",
"season", "svspp", "pdsex", "pdlen")) %>%
mutate(numlen_fin = case_when(numlen < numlen2 ~ numlen2,
numlen >= numlen2 ~ numlen,
TRUE ~ NA_integer_)) %>%
select(c(cruise6, stratum, station,
year,
season,
svspp, pdid, pdsex, pdlen, tax, pyamt,
catnum, tot_catnum_stratum, tot_catwgt_stratum,
tot_tows_spp_stratum, stratum_area, numlen, numlen_fin))
sum_strat <- num_strat %>%
group_by(cruise6,
station,
year,
season,
svspp,
pdsex,
pdlen,
numlen_fin) %>%
summarise(dummy_var = sum(numlen, na.rm = TRUE)) %>%
na.omit() %>%
group_by(cruise6,
station,
year,
season,
svspp,
pdlen) %>%
summarise(rnumlen_fin = sum(numlen_fin, na.rm = TRUE)) %>%
left_join(num_strat, by = c("cruise6", "station", "year", "season", "svspp", "pdlen")) %>%
mutate(rnumlen_fin = ifelse(svspp !=13 | svspp !=15,
numlen_fin,
rnumlen_fin)) %>%
group_by(cruise6,
station,
year,
season,
svspp,
pdsex,
pdlen, #added
catnum) %>%
summarise(dummy_var = sum(numlen)) %>%
na.omit()
sum_catnum <- sum_strat %>%
group_by(cruise6,
station,
year,
season,
svspp,
pdlen) %>% #added
summarise(rcatnum = sum(catnum, na.rm = TRUE)) %>%
na.omit() %>%
left_join(num_strat, by = c("cruise6", "station", "year", "season", "svspp",
"pdlen")) %>% #added
mutate(rcatnum = ifelse(svspp != 13 | svspp != 15,
catnum,
rcatnum))
sum_catstratum <- sum_catnum %>%
group_by(cruise6, stratum,
year,
season,
svspp,
pdsex,
pdlen, #added
tot_tows_spp_stratum,
tot_catnum_stratum,
tot_catwgt_stratum) %>%
summarise(dum_var = sum(rcatnum, na.rm = TRUE)) %>%
na.omit()
####################################################################################################################################
max_tot_tows_spp_stratum <- sum_catstratum %>%
group_by(cruise6,
stratum,
year,
season,
svspp,
pdlen) %>% #added
summarise(tot_tows_spp_stratum = max(tot_tows_spp_stratum, na.rm = TRUE))
sum_rcatstratum <- sum_catstratum %>%
group_by(cruise6,
stratum,
year,
season,
svspp,
pdlen) %>% #added
summarise(rtot_catnum_stratum = sum(tot_catnum_stratum, na.rm = TRUE),
rtot_catwgt_stratum = sum(tot_catwgt_stratum, na.rm = TRUE)) %>%
na.omit() %>%
left_join(max_tot_tows_spp_stratum, by = c("cruise6", "stratum", "year", "season", "svspp",
"pdlen")) %>% #added
rename(rtot_tows_spp_stratum = tot_tows_spp_stratum)
final_strat <- sum_rcatstratum %>%
left_join(sum_catnum, by = c("cruise6", "stratum", "year", "season", "svspp",
"pdlen")) %>% #added
mutate(rtot_catnum_stratum = ifelse(svspp != 13 | svspp != 15,
tot_catnum_stratum,
rtot_catnum_stratum),
rtot_catwgt_stratum = ifelse(svspp != 13 | svspp != 15,
tot_catwgt_stratum,
rtot_catwgt_stratum),
rtot_tows_spp_stratum = ifelse(svspp != 13 | svspp != 15,
tot_tows_spp_stratum,
rtot_tows_spp_stratum))
####################################################################################################################################
py_raw <- final_strat %>%
group_by(cruise6,
station,
year,
season,
svspp, pdid, pdlen, tax) %>%
summarise(pysum = sum(pyamt, na.rm = TRUE)) %>%
na.omit()
py_nostom <- py_raw %>%
ungroup() %>%
arrange(cruise6, station,
year,
season,
svspp,
pdid) %>%
group_by(cruise6,
station,
year,
season,
svspp,
pdid) %>%
top_n(-1, wt = pysum) %>%
group_by(cruise6,
station,
year,
season,
svspp,
pdlen) %>% #added
summarise(nostom = n())
py_list <- data.frame(tax = unique(py_raw$tax),
pycode = paste0('prey', 1:length(unique(py_raw$tax))))
py_all <- py_nostom %>%
left_join(py_raw, by = c("cruise6", "station", "year", "season", "svspp",
"pdlen")) %>% #added
left_join(py_list, by = "tax")
pd_strat <- final_strat %>%
group_by(cruise6,
station,
year,
season,
svspp,
pdid,
pdlen) %>%
top_n(-1, wt = stratum_area)
pd_nas <- pd_strat %>%
group_by(cruise6,
station,
year,
season,
svspp,
pdlen) %>%
summarise(rnumlen_fin = sum(numlen_fin, na.rm = TRUE)) %>%
na.omit()
pd_strat <- pd_strat %>%
left_join(pd_nas, by = c("cruise6", "year", "season", "svspp", "station", "pdlen")) %>%
select(cruise6, station, stratum,
year,
season,
svspp, pdid, pdlen,
rcatnum, rtot_catnum_stratum,
tot_catwgt_stratum, rtot_catwgt_stratum,
rtot_tows_spp_stratum, stratum_area, rnumlen_fin)
#Working with fish data.frame----------------------------------------------------------------------------------#
fish_ply <- py_all %>%
select(-tax) %>%
left_join(pd_strat, by = c("cruise6", "station", "year", "season", "svspp", "pdid", "pdlen"))
####################################################################################################################################
## Take the median value for each prey group
trans_ply <- fish_ply %>%
group_by(cruise6, stratum, station,
year, season,
svspp, pdlen, #added
rcatnum,
rtot_catnum_stratum,
rtot_catwgt_stratum,
rtot_tows_spp_stratum,
stratum_area, rnumlen_fin, pycode) %>%
summarize(pysum = median(pysum, na.rm = FALSE))
## Weighted mean and mean of pysum
pysum_wm <- trans_ply %>%
group_by(cruise6, stratum, station,
year, season,
svspp, pdlen, #added
rcatnum,
rtot_catnum_stratum,
rtot_tows_spp_stratum,
stratum_area, pycode) %>%
summarize(wmean = weighted.mean(pysum, w = rnumlen_fin))# %>%
## Mean
pysum_m <- pysum_wm %>%
group_by(cruise6, stratum, station,
year, season,
svspp, pdlen, #added
rcatnum,
rtot_catnum_stratum,
rtot_tows_spp_stratum,
stratum_area,
pycode) %>%
summarize(musw = mean(wmean, na.rm = TRUE),
musw2 = wmean * rcatnum)
## by station
pysum_wm_s <- pysum_m %>%
group_by(cruise6, stratum,
year, season,
svspp, pdlen, #added
rtot_catnum_stratum,
rtot_tows_spp_stratum,
stratum_area,
pycode) %>%
summarize(tmsw_strat = sum(musw2, na.rm = TRUE))
musw_strat_ply <- pysum_wm_s %>%
group_by(cruise6, stratum,
year, season,
svspp, pdlen, #added
rtot_catnum_stratum,
rtot_tows_spp_stratum,
stratum_area,
pycode) %>%
summarize(musw_strat = tmsw_strat/rtot_tows_spp_stratum) %>%
ungroup() %>%
mutate(munfish_strat = rtot_catnum_stratum / rtot_tows_spp_stratum)
munfish_stratdat_ply <- musw_strat_ply %>%
ungroup() %>%
mutate(munfish_strat = rtot_catnum_stratum / rtot_tows_spp_stratum) %>%
select(cruise6, stratum,
year, season,
svspp, pdlen, #added
munfish_strat)
#weight by stratum area
pymean_strat <- musw_strat_ply %>%
group_by(svspp, year, season, pdlen, #added
pycode) %>%
summarize(msw_strat = weighted.mean(musw_strat, stratum_area, na.rm = TRUE),
m_nfish_strat = weighted.mean(munfish_strat, stratum_area, na.rm = TRUE),
num_stra = n())
meansw_s_ply <- pymean_strat %>%
group_by(svspp, year, season, pdlen, #added
pycode) %>%
summarize(meansw_s = msw_strat/m_nfish_strat)
meansw_ply <- pysum_m %>%
group_by(svspp, year, season, pdlen, #added
pycode) %>%
summarize(meansw = weighted.mean(musw, rcatnum, na.rm = TRUE),
num_tows = n())
master_ply <- pysum_m %>% #musw and musw2
left_join(pysum_wm_s) %>% #tmsw_strat
left_join(musw_strat_ply) %>% # musw_strat
left_join(meansw_ply) %>% # meansw
left_join(meansw_s_ply) %>% # meansw_s
left_join(pymean_strat) %>% # msw_strat
select(-munfish_strat) %>%
mutate(prod = rcatnum^2 * ((musw - meansw))^2,
prodf = (rcatnum - m_nfish_strat)^2,
prodd = (musw2 - musw_strat)^2,
prod_cov = (rcatnum - m_nfish_strat) * (musw2 - musw_strat))# %>%
mprod_mnumfish2_ply <- master_ply %>%
group_by(svspp, year, season, pdlen, #added
pycode) %>%
summarize(mprod = mean(prod, na.rm = TRUE),
mnumfish = mean(rcatnum, na.rm = TRUE))
new4_ply <- master_ply %>%
group_by(svspp, year, season,
pdlen, #added
pycode) %>%
summarize(sprod = sum(prod, na.rm = TRUE),
mprod = mean(prod, na.rm = TRUE),
mnumfish = mean(rcatnum, na.rm = TRUE))
new6_ply <- master_ply %>%
group_by(svspp, year, season, pdlen, #added
pycode, cruise6, stratum,
stratum_area, rtot_tows_spp_stratum,
m_nfish_strat) %>%
summarize(sprodf = sum(prodf, na.rm = TRUE),
sprodd = sum(prodd, na.rm = TRUE),
sprod_cov = sum(prod_cov, na.rm = TRUE)) %>%
ungroup() %>%
mutate(dfntows_strat = rtot_tows_spp_stratum - 1,
varprodf = (stratum_area^2) * ((sprodf/dfntows_strat)/rtot_tows_spp_stratum),
varprodd = (stratum_area^2) * ((sprodd/dfntows_strat)/rtot_tows_spp_stratum),
varprod_cov = (stratum_area^2) * ((sprod_cov/dfntows_strat)/rtot_tows_spp_stratum),
varprodf = if_else(rtot_tows_spp_stratum > 1,
varprodf,
0),
varprodd = if_else(rtot_tows_spp_stratum > 1,
varprodd,
0),
varprod_cov = if_else(rtot_tows_spp_stratum > 1,
varprod_cov,
0))
sumvarprod_ply <- new6_ply %>%
group_by(svspp, year, season, pdlen, #added
pycode) %>%
summarize(svarprodf = sum(varprodf, na.rm = TRUE),
svarprodd = sum(varprodd, na.rm = TRUE),
svarprod_cov = sum(varprod_cov, na.rm = TRUE),
stratum_area = sum(stratum_area, na.rm = TRUE)) %>%
ungroup() %>%
mutate(varf = svarprodf / stratum_area^2,
vard = svarprodd/ stratum_area^2,
var_cov = svarprod_cov/ stratum_area^2) %>%
select(-stratum_area)
new4_strat_ply <- new4_ply %>%
left_join(sumvarprod_ply)
#merge with select master columns
six_ply <- new4_strat_ply %>%
left_join(master_ply) %>%
mutate(variance = ifelse(num_tows > 1 & mnumfish != 0 & meansw!=0 & m_nfish_strat !=0 & meansw_s!=0,
1/(num_tows * (mnumfish)^2) * (sprod/(num_tows-1)),
0),
cv = ifelse(num_tows > 1 & mnumfish != 0 & meansw != 0 & m_nfish_strat != 0 & meansw_s != 0,
((variance)^0.5)/meansw,
0),
var_s = ifelse(num_tows > 1 & mnumfish != 0 & meansw != 0 & m_nfish_strat != 0 & meansw_s != 0,
(meansw_s^2) * ((varf/(m_nfish_strat^2)) + (vard/(msw_strat^2)) - (2 * var_cov/m_nfish_strat/msw_strat)),
0),
cv_s = ifelse(num_tows > 1 & mnumfish != 0 & meansw != 0 & m_nfish_strat != 0 & meansw_s != 0,
((var_s)^0.5)/meansw_s,0)) %>%
select(pycode, svspp, year, season, pdlen, #added
meansw,
meansw_s, num_tows, m_nfish_strat, num_stra,
variance, cv, var_s, cv_s, pycode) %>%
distinct(.keep_all = TRUE)
final_ply <- six_ply %>%
mutate(pycode = as.factor(pycode)) %>%
left_join(py_list) %>%
left_join(nstom_df) %>%
group_by(svspp, year, season, pdlen) %>% #added
mutate(totwt = sum(meansw, na.rm = TRUE),
totwt_s = sum(meansw_s, na.rm = TRUE),
relmsw = 100*(meansw/totwt),
relmsw_s = 100*(meansw_s/totwt_s),
ci = sqrt(variance/num_tows)*2,
relci = (ci/totwt)*100,
ci_s = sqrt(var_s/num_tows)*2,
relci_s = (ci_s/totwt_s)*100) %>%
select(svspp, year, season, pdlen, #added
meansw, num_tows,
variance, cv, prey = tax, totwt,
relmsw, ci, relci, nstom)
survdietlen <- final_ply %>%
dplyr::left_join(focalspp, by = c("svspp" = "SVSPP")) %>%
dplyr::mutate(ModSim = "Actual",
Code = SPECIES_ITIS,
Name = modelName) %>%
dplyr::ungroup() %>%
dplyr::select(ModSim, year, Code, Name, season, pdlen, prey,
meansw, num_tows, variance, cv, totwt, relmsw,
ci, relci, nstom) %>%
tidyr::pivot_longer(cols = c("meansw", "num_tows", "variance", "cv", "totwt", "relmsw",
"ci", "relci", "nstom"),
names_to = "variable",
values_to = "value") %>%
dplyr::mutate(units = case_when(variable %in% c("meansw", "variance","totwt","ci") ~ "grams",
variable %in% c("relmsw", "relci") ~ "percent",
variable %in% c("num_tows","nstom") ~ "number",
variable %in% c("cv") ~ "unitless",
TRUE ~ as.character("NA")))
# correct prey scientific names for mskeyrun species
survdietlen$prey[survdietlen$prey == "PLEURONECTES AMERICANUS"] <- "PSEUDOPLEURONECTES AMERICANUS"
survdietlen$prey[survdietlen$prey == "PLEURONECTES FERRUGINUS"] <- "LIMANDA FERRUGINEA"
surveyLenDietcomp <- survdietlen
if (saveToData) {
usethis::use_data(surveyLenDietcomp, overwrite = TRUE)
}
return(surveyLenDietcomp)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.