#' @title Multi-year time stratified Bayesian estimator (cohort summary)
#' @description MYTSBE utilizes hierarchical Bayesian methods to estimate abundances from capture-mark-recapture (CMR) data using a temporally-stratified Lincoln-Petersen estimator. The between-year hierarchical
#' structures allows for annually recurring species characteristics to be incorporated into capture probabilities and abundance estimates.
#'
#' Summary output is by cohort year.
#'
#' Peer reviewed publication can be found at ... yadda yadda
#'
#' @param data capture-mark-recapture data frame
#' @param burnin number of initial MCMC chain iterations to be discarded
#' @param chains number of MCMC chains (>1)
#' @param iterations number of MCMC iterations per chain
#' @param thin thin rate for MCMC chains
#' @param sel.years selected year(s) to compute abundance estimates
#' @param boot number of boot strap iterations to calculate yearly and life stage abundance estimates
#' @param species character string used for titles and descriptions of reports
#' @param model.params parameters to be returned from MCMC simulation
#' @param trap.name character string used for titles and descriptions of reports
#' @param effort.cor expands the number of unmarked fish captured by a sampling effort
#' @param strata.length number of days in strata
#' @param smolt.parr.date "MM-DD" date to partition smolt life stage
#' @param parr.presmolt.date "MM-DD" date to partition parr life stage
#' @param strata.op.min minimum number of years data need to have been collected in a stratum to be included in summary
#' @param den.plot return density plots of MCMC chains
#' @param trace.plot return trace plots of MCMC chains
#' @import plyr
#' @importFrom lubridate hour
#' @importFrom R2jags jags
#' @import coda
#' @import lattice
#' @import superdiag
#' @import mcmcplots
#' @import ggmcmc
#' @importFrom data.table as.data.table
#' @export
#' @return NULL
MYTSBE_Cohort <- function(data,
effort.cor = FALSE,
sel.years = currentyear,
strata.op.min = 1,
smolt.parr.date = "07-01",
parr.presmolt.date = "09-01",
species = "",
trap.name = "",
den.plot = TRUE,
trace.plot = FALSE,
strata.length = s.length,
burnin = 100000,
chains = 3,
iterations = 400000,
thin = 100,
boot = 5000,
model.params = c("p", "U", "etaP1", "etaU1", "sigmaU", "sigmaP")) {
currentyear <- max(data$year)-2
s.length <- max(data$days)
smolt.date <- paste("2010-",smolt.parr.date, sep ="")
parr.date <- paste("2010-",parr.presmolt.date, sep ="")
model <- function() {
for(i in 1:strata){
for(j in 1:year){
m[i,j] ~ dbin(p[i,j],n[i,j])
logit(p[i,j]) <- etaP[i,j]
etaP[i,j] ~ dnorm(etaP1[i],tauP)
u[i,j] ~ dbin(p[i,j],U[i,j])
U[i,j] <- round(exp(etaU[i,j]))
etaU[i,j]~ dnorm(etaU1[i],tauU)
}
etaP1[i] ~ dnorm(np,tp)
etaU1[i] ~ dnorm(nu,tu)
}
tauP ~ dgamma(.001,.001) #equal to 1/epsilon^2_lambda_mu
sigmaP <- 1/sqrt(tauP)
tauU~ dgamma(.001,.001) #equal to 1/epsilon^2_lambda_mu
sigmaU <- 1/sqrt(tauU) #variance
np ~ dnorm(-2,.666) #variance=1.5, precision=1/1.5->.666
tp ~ dgamma(.001,.001) #equal to 1/epsilon^2_lambda_mu
sigmatp <- 1/sqrt(tp) #variance
nu ~ dnorm(10,.25) #variance=4, precision=1/4=.25 , Increased mean to 10 due to initializing issues
tu ~ dgamma(.001,.001) #equal to 1/epsilon^2_lambda_mu
sigmatu <- 1/sqrt(tu) #variance
}
#Input Data######################################################################################
#U updated for effort correction factor
data$cor.factor <-data$effort/data$days
data$u.cor <- round(data$u*(1/(data$effort/data$days)))
if(effort.cor == TRUE) {
datau=matrix(round(data$u*(1/(data$effort/data$days))), nrow=length(unique(data$strata)), ncol=length(unique(data$year)))
} else {
datau=matrix(data$u,nrow=length(unique(data$strata)), ncol=length(unique(data$year)))
}
datan=matrix(data$n,nrow=length(unique(data$strata)), ncol=length(unique(data$year)))
datam=matrix(data$m,nrow=length(unique(data$strata)), ncol=length(unique(data$year)))
strata <- as.numeric(nrow(datan))# number of unique stratum
year <- as.numeric(ncol(datan)) # number of unique years
n <- as.matrix(datan) # number of marked individuals available for recapture during the stratum
m <- as.matrix(datam) # number of marked individuals recaptured during the stratum
u <- as.matrix(datau) # number of unmarked individuals captured during the stratum
options(max.print=100000)
model.data <- list("strata", "year", "n", "m", "u")
model.fit <- jags(data = model.data, inits = NULL,
parameters.to.save = model.params, n.chains = chains, n.iter = iterations,
n.burnin = burnin, model.file = model)
#######################################################################################################################
#Add in progress bar here!
model.fit.mcmc <- as.mcmc(model.fit) #call model.fit from rjags
model.fit.gg <- suppressWarnings(ggs(model.fit.mcmc)) #convert model.fit.mcmc to gg output (useable dataframe)
options(scipen = 999) #change scientific notation
#export data used for the model
sink("Input Data.txt", append=FALSE, split=FALSE)
print(data)
sink()
#export mcmc chains
#need a for loop here############################################
for(i in 1:chains){
chain <- model.fit.gg[model.fit.gg$Chain == i,]
write.table(chain, file = paste("MCMC Chains", i,".txt"), sep="\t")
}
#############
#Diagnostics#
#############
gd <- gelman.diag(model.fit.mcmc, confidence = 0.95, transform=FALSE, autoburnin=TRUE,
multivariate=FALSE)
gdata <-as.data.frame(gd[1])
gdata <- cbind(Parameter = rownames(gdata), gdata)
colnames(gdata) <- c("Parameter", "Point.est", "Upper.CI")
nonconv.gd <- gdata[gdata$Point.est > 1.1,]
nonconv.gd <- droplevels(nonconv.gd)
nc.gd<-as.character(nonconv.gd$Parameter)
sink("GR Diagnostic Test Results.txt", append=FALSE, split=FALSE)
writeLines(paste(Sys.Date(),"\n",
"\n",
"Number of chains =", chains,"\n",
"Number of iterations per chain =", iterations, "\n",
"Burn in =", burnin,"\n",
"\n",
"********** The Gelman-Rubin diagnostic: **********","\n","\n",
"\n",
"\n",
" * Flagged parameters with point est. >1.1 * ", "\n",
"\n",
paste(nc.gd, collapse=', ' ),
"\n",
"\n"))
print(gd)
sink()
###################
#Manual Statistics#
###################
#apply descriptive statistic functions to parameter values over all chains & iterations
means<-ddply(model.fit.gg, "Parameter", summarise,
mode=names(which.max(table(value))),
mean = mean(value),sd = sd(value),
niaveSE = sd(value)/sqrt(length(value)))
#get quantiles for each parameter
means$mode<-as.numeric(means$mode)
quantiles<-ddply(model.fit.gg, "Parameter", function (x) quantile(x$value, c(0.025, 0.25, 0.5, 0.75, 0.975)))
#add year variable
quantiles$Year<-NA
for(j in 1:length(unique(data$year))){
quantiles$Year<-ifelse(grepl(paste(",",j,"]$" , sep = ""), quantiles$Parameter), min(data$year-1)+j, quantiles$Year)
}
#add strata variable
quantiles$strata<-NA
for(s in 1:length(unique(data$strata))){
quantiles$strata<-ifelse(grepl(paste(s,",", sep = ""), quantiles$Parameter), min(data$strata-1)+s, quantiles$strata)
}
#merge summary statistics and quantiles
outputsummary <- merge(quantiles, means, by.x="Parameter") # Merge statistics
is.num <- sapply(outputsummary, is.numeric) #reduce digits
outputsummary[is.num] <- lapply(outputsummary[is.num], round, 3) #reduce digits
outputsummary<-outputsummary[with(outputsummary, order(Year,Parameter)), ] #reorder by year and parameter
ag.strata <- aggregate(ceiling(data$cor.factor), by=list(strata=data$strata), FUN=sum)
excl.strata <- ag.strata[ag.strata$x < strata.op.min,]
x.strata<-as.character(excl.strata$strata)
outputsummary <- outputsummary[,c(7:8,1:6,9:12)]
sink(paste("All",species,trap.name,"Results.txt"), append=FALSE, split=FALSE)
writeLines(paste( Sys.Date(),"\n",
"\n",
species, trap.name,"\n",
"Number of chains =", chains,"\n",
"Number of iterations per chain =", iterations, "\n",
"Burn in =", burnin,"\n",
"* Strata with <", strata.op.min, "year(s) of operation * ", "\n",
paste(x.strata, collapse=', ' ),
"\n",
"* Flagged parameters with point est. >1.1 * ", "\n",
paste(nc.gd, collapse=', ' ),
"\n"
)
)
print(outputsummary)
sink()
write.csv(outputsummary, file = paste("All",species,trap.name,"Results.csv"))
#########################################################################################
############# Cohort Summary Loop #########
#########################################################################################
smolt.strata <- round(yday(as.Date(smolt.date))/strata.length) - (min(data$strata)-1)# convert to smolt cutoff date to strata
parr.strata <- round(yday(as.Date(parr.date))/strata.length) - (min(data$strata)-1) # convert parr cutoff date to strata
for(selectyr in sel.years){
#remove U parameters for parr and presmolt
pyear <- (selectyr+2) - (as.numeric(min(data$year)))
usep <- subset(model.fit.gg,grepl("^U", Parameter)) #subset all U's
usep.p <- subset(usep,grepl(paste(",",pyear,"]$" , sep = ""), Parameter)) #subset U's for first year
usep.p$strata <- sub(".*?U(.*?)(,.*|$)", "\\1", usep.p$Parameter) #create strata variable
usep.p$strata <- as.numeric(substr(usep.p$strata, 2, 5)) #clean strata variable
parr<- subset(usep.p, strata >= (smolt.strata) & strata <= (parr.strata-1))
parr<-subset(parr, !(parr$strata %in% excl.strata$strata)) # exclude strata under the strata.op.min threshold
parrdt<-as.data.table(parr)
parrUdist = numeric(boot); # set aside an empty vector for bootstrap disribution of the mean
for (i in 1:boot)
{parrUdist[i] <- sum(parrdt[, value[sample.int(.N, 1, TRUE)], by = strata]) - sum(unique(parrdt$strata))
}
parrUdist<-as.data.frame(parrUdist) #change output to dataframe
parrUdist$parrUdist<-as.numeric(parrUdist$parrUdist) #change output to numeric
write.table(parrUdist, file = paste(selectyr,"Parr U distribution.txt"), sep="\t")
#Get descriptive statistics mode, mean, sd, niaveSE or U bootstrap distribution
parrUsum<-adply(parrUdist, 2, summarise,
mode=names(which.max(table(parrUdist))),
mean = mean(parrUdist),sd = sd(parrUdist),
niaveSE = sd(parrUdist)/sqrt(length(parrUdist)))
parrUsum$mode <- as.numeric(parrUsum$mode)
#Get quantiles for U bootstrap distribution
parrUquantiles<-adply(parrUdist, 2, function (x) quantile(x$parrUdist, c(0.025, 0.25, 0.5, 0.75, 0.975)))
parrUoutputsummary <- merge(parrUquantiles, parrUsum, by.x="X1")
parrUoutputsummary$X1 <- NULL #removed X1 variable
parrUoutputsummary$Parameter<-paste("Parr U Cohort",selectyr) # add Parameter variable
parrUoutputsummary$Year<- selectyr+1 #add year variable
#########Pre smolt###############
presmolt<- subset(usep.p, strata >= parr.strata)
presmolt<-subset(presmolt, !(presmolt$strata %in% excl.strata$strata)) # exclude strata under the strata.op.min threshold
presmoltdt<-as.data.table(presmolt)
presmoltUdist = numeric(boot); # set aside an empty vector for bootstrap disribution of the mean
for (i in 1:boot)
{presmoltUdist[i] <- sum(presmoltdt[, value[sample.int(.N, 1, TRUE)], by = strata]) - sum(unique(presmoltdt$strata))
}
presmoltUdist<-as.data.frame(presmoltUdist) #change output to dataframe
presmoltUdist$presmoltUdist<-as.numeric(presmoltUdist$presmoltUdist) #change output to numeric
write.table(presmoltUdist, file = paste(selectyr,"Presmolt U distribution.txt"), sep="\t")
#Get descriptive statistics mode, mean, sd, niaveSE or U bootstrap distribution
presmoltUsum<-adply(presmoltUdist, 2, summarise,
mode=names(which.max(table(presmoltUdist))),
mean = mean(presmoltUdist),sd = sd(presmoltUdist),
niaveSE = sd(presmoltUdist)/sqrt(length(presmoltUdist)))
presmoltUsum$mode <- as.numeric(presmoltUsum$mode)
#Get quantiles for U bootstrap distribution
presmoltUquantiles<-adply(presmoltUdist, 2, function (x) quantile(x$presmoltUdist, c(0.025, 0.25, 0.5, 0.75, 0.975)))
presmoltUoutputsummary <- merge(presmoltUquantiles, presmoltUsum, by.x="X1")
presmoltUoutputsummary$X1 <- NULL #removed X1 variable
presmoltUoutputsummary$Parameter<-paste("Presmolt U Cohort",selectyr) # add Parameter variable
presmoltUoutputsummary$Year<- selectyr+1 #add year variable
########## Smolt #############
syear <- (selectyr+3) - (as.numeric(min(data$year)))
usep.s <- subset(usep,grepl(paste(",",syear,"]$" , sep = ""), Parameter)) #subset U's for first year
usep.s$strata <- sub(".*?U(.*?)(,.*|$)", "\\1", usep.s$Parameter) #create strata variable
usep.s$strata <- as.numeric(substr(usep.s$strata, 2, 5)) #clean strata variable
smolt<- subset(usep.s, strata < smolt.strata)
smolt<-subset(smolt, !(smolt$strata %in% excl.strata$strata)) # exclude strata under the strata.op.min threshold
smoltdt<-as.data.table(smolt)
smoltUdist = numeric(boot); # set aside an empty vector for bootstrap disribution of the mean
for (i in 1:boot)
{smoltUdist[i] <- sum(smoltdt[, value[sample.int(.N, 1, TRUE)], by = strata]) - sum(unique(smoltdt$strata))
}
write.table(smoltUdist, file = paste(selectyr,"Smolt U distribution.txt"), sep="\t")
smoltUdist<-as.data.frame(smoltUdist) #change output to dataframe
smoltUdist$smoltUdist<-as.numeric(smoltUdist$smoltUdist) #change output to numeric
#Get descriptive statistics mode, mean, sd, niaveSE or U bootstrap distribution
smoltUsum<-adply(smoltUdist, 2, summarise,
mode=names(which.max(table(smoltUdist))),
mean = mean(smoltUdist),sd = sd(smoltUdist),
niaveSE = sd(smoltUdist)/sqrt(length(smoltUdist)))
smoltUsum$mode <- as.numeric(smoltUsum$mode)
#Get quantiles for U bootstrap distribution
smoltUquantiles<-adply(smoltUdist, 2, function (x) quantile(x$smoltUdist, c(0.025, 0.25, 0.5, 0.75, 0.975)))
smoltUoutputsummary <- merge(smoltUquantiles, smoltUsum, by.x="X1")
smoltUoutputsummary$X1 <- NULL #removed X1 variable
smoltUoutputsummary$Parameter<-paste("Smolt U Cohort",selectyr) # add Parameter variable
smoltUoutputsummary$Year<- selectyr+2 #add year variable
#setup bootstrap to randomly draw samples from each distribution in order to obtain total U statistics
totUdist = numeric(boot); # set aside an empty vector for bootstrap disribution of the mean
for (i in 1:boot)
{totUdist[i] <- sum(parrdt[, value[sample.int(.N, 1, TRUE)], by = strata]) +
sum(presmoltdt[, value[sample.int(.N, 1, TRUE)], by = strata]) +
sum(smoltdt[, value[sample.int(.N, 1, TRUE)], by = strata]) -
sum(unique(parrdt$strata)) -
sum(unique(presmoltdt$strata)) -
sum(unique(smoltdt$strata))
}
#Get summary statistics for U bootstrapped distribution
totUdist<-as.data.frame(totUdist) #change output to dataframe
totUdist$totUdist<-as.numeric(totUdist$totUdist) #change output to numeric
#Get descriptive statistics mode, mean, sd, niaveSE or U bootstrap distribution
totUsum<-adply(totUdist, 2, summarise,
mode=names(which.max(table(totUdist))),
mean = mean(totUdist),sd = sd(totUdist),
niaveSE = sd(totUdist)/sqrt(length(totUdist)))
totUsum$mode <- as.numeric(totUsum$mode)
#Get quantiles for U bootstrap distribution
totUquantiles<-adply(totUdist, 2, function (x) quantile(x$totUdist, c(0.025, 0.25, 0.5, 0.75, 0.975)))
#merge all summary statistics together
totUoutputsummary <- merge(totUquantiles, totUsum, by.x="X1")
totUoutputsummary$X1 <- NULL #removed X1 variable
totUoutputsummary$Parameter<-paste("Total Cohort",selectyr) # add Parameter variable
totUoutputsummary$Year<- NA #add year variable
######### Cohort strata used #########
parr1<- subset(usep.p, strata >= (smolt.strata) & strata <= (parr.strata-1))
presmolt1<- subset(usep.p, strata >= parr.strata)
smolt1<- subset(usep.s, strata < smolt.strata)
cohort.strata <- rbind(parr1,presmolt1,smolt1)
cmeans<-ddply(cohort.strata, "Parameter", summarise,
mode=names(which.max(table(value))),
mean = mean(value),sd = sd(value),
niaveSE = sd(value)/sqrt(length(value)))
#get quantiles for each parameter
cmeans$mode<-as.numeric(cmeans$mode)
cquantiles<-ddply(cohort.strata, "Parameter", function (x) quantile(x$value, c(0.025, 0.25, 0.5, 0.75, 0.975)))
#add year variable
cquantiles$Year<-NA
for(j in 1:length(unique(data$year))){
cquantiles$Year<-ifelse(grepl(paste(",",j,"]$" , sep = ""), cquantiles$Parameter), min(data$year-1)+j, cquantiles$Year)
}
cquantiles$strata<-NA
for(s in 1:length(unique(data$strata))){
cquantiles$strata<-ifelse(grepl(paste(s,",", sep = ""), cquantiles$Parameter), min(data$strata-1)+s, cquantiles$strata)
}
#merge summary statistics and quantiles
coutputsummary <- merge(cquantiles, means, by.x="Parameter") # Merge statistics
is.num <- sapply(coutputsummary, is.numeric) #reduce digits
coutputsummary[is.num] <- lapply(coutputsummary[is.num], round, 3) #reduce digits
coutputsummary<-coutputsummary[with(coutputsummary, order(Year,Parameter)), ] #reorder by year and parameter
cohortsummary<-rbind.fill(totUoutputsummary,parrUoutputsummary,presmoltUoutputsummary, smoltUoutputsummary, coutputsummary) #merge outputs
cohortsummary <- cohortsummary[,c(10:12,1:9)]
#read out files
sink(paste(selectyr, species, trap.name, "Cohort Results.txt"), append=FALSE, split=FALSE)
writeLines(paste( Sys.Date(),"\n",
"\n",
selectyr, species, trap.name,"\n",
"Boot strap iterations =", boot,"\n",
"Specified smolt date:", smolt.parr.date, "\n",
"Specified parr date:", parr.presmolt.date, "\n",
"\n",
"Strata EXCLUDED from results due to <", strata.op.min, "year(s) of operation ","\n",
paste(x.strata, collapse=', '), "\n",
"\n"
)
)
print(cohortsummary)
sink()
write.csv(cohortsummary, file = paste(selectyr, species, trap.name, "Cohort Results.csv"))
options(scipen = 0)
}
if(den.plot == TRUE) {
ggmcmc(model.fit.gg, file="U Density Plots.pdf", family="U",plot="density")
}
if(trace.plot == TRUE) {
ggmcmc(model.fit.gg, file="Trace Plot.pdf", plot="traceplot")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.