R/TimeStratPetersenNonDiagError_fit.R

Defines functions TimeStratPetersenNonDiagError_fit

Documented in TimeStratPetersenNonDiagError_fit

# 2021-10-23 CJS Added trunc.logitP to deal with extreme values of logitP during plotting
# 2020-12-15 CJS Removed sampfrac from code
# 2020-12-14 CJS All bad.n1 or bad.m2 are set to 0. No NA allows for n1 or m2
#                Fixed problem with some u2=NA in the GOF computations and plots
# 2020-11-07 CJS Allowed user to specify prior for beta coefficient for logitP
# 2108-12-19 CJS deprecate of use of sampling fraction
# 2018-12-15 CJS converted the muTT and sdTT plots to ggplot objects
# 2018-12-15 CJS tested fixing logitP to certain values especially at the end of the sampling chain
# 2018-12-06 CJS converted report to textConnection() object
# 2018-12-03 CJS converted fit plot to ggplot2
# 2018-12-02 CJS converted trace plots to ggplot2
# 2018-12-01 CJS Converted acf, posterior plots to ggplot2
# 2018-11-28 CJS Fixed problem of large printing being cutoff in results
# 2018-11-25 CJS Removed all references to OpenBugs
# 2014-09-01 CJS converstion to jags
# 2012-08-30 CJS fixed NAs problem in any() and all() in error checking
# 2011-06-13 CJS added p-values to results
# 2010-02-21 CJS changed u2 to new.u2 in expanded.m2 section
# 2010-11-25 CJS pretty printing of final population estimates
# 2010-09-06 CJS forced input vectors to be vectors
# 2010-08-06 CJS produced traceplot
# 2010-08-04 CJS added version/date to final result
# 2010-03-12 CJS added n.chains etc to calling arguments
# 2010-03-03 CJS allowed the user for fix some logitPs to account for time when no sampling (or other events
#                with known probability of capture take place
# 2009-12-08 CJS added some basic error checking on arguments
# 2009-12-07 CJS added bad.n1, bad.u2 arguments and fixups
# 2009-12-01 CJS added open/winbugs directory to argument list




#' Wrapper (*_fit) to fit the Time Stratified Petersen Estimator
#' with NON Diagonal Entries function.
#' 
#' Takes the number of marked fish released, the number of recaptures, and the
#' number of unmarked fish and uses Bayesian methods to fit a fit a spline
#' through the population numbers and a hierarchical model for the trap
#' efficiencies over time.  The output is written to files and an MCMC object
#' is also created with samples from the posterior.
#' 
#' Normally the user makes a call to the *_fit function which then calls the
#' fitting function.
#' 
#' Use the \code{\link{TimeStratPetersenDiagError_fit}} function for cases
#' where recaptures take place ONLY in the stratum of release, i.e. the
#' diagonal case.
#' 
#' The non-diagonal case fits a log-normal distribution for the travel time.
#' The *NP functions fit a non-parametric distribution for the travel times.
#' The *MarkAvail functions extend the *NP functions to allow for reductions in
#' mark availability because of fall back, immediate tagging mortality, etc.
#' 
#' 
#' @template title
#' @template prefix
#' @template time
#' @template n1 
#' @param m2 A numeric matrix of the number of fish released in stratum [i] and
#' recovered in [j-1] strata later.  For example m2[3,5] is the number of
#' marked fish released in stratum 3 and recovered 4 strata later in stratum 7.
#' The first column is the number of marked fish recovered in the stratum of
#' release, i.e. 0 strata later.  Use the
#' \code{\link{TimeStratPetersenDiagError_fit}} function for cases where
#' recaptures take place ONLY in the stratum of release, i.e. the diagonal
#' case.
#' @template u2.ND
#' @template sampfrac
#' @template jump.after
#' @template bad.n1
#' @template bad.m2 
#' @template bad.u2 
#' @template logitP.cov
#' @param logitP.fixed A numeric vector (could be null) of the time strata
#' where the logit(P) would be fixed. Typically, this is used when the capture
#' rates for some strata are 0 and logit(P) is set to -10 for these strata. The
#' fixed values are given in \code{logitP.fixed.values}
#' @param logitP.fixed.values A numerical vector (could be null) of the fixed
#' values for logit(P) at strata given by logitP.fixed. Typically this is used
#' when certain strata have a 0 capture rate and the fixed value is set to -10
#' which on the logit scale gives p[i] essentially 0. Don't specify values such
#' as -50 because numerical problems could occur in JAGS.
#' @template mcmc-parms
#' @template tauU.alpha.beta
#' @template taueU.alpha.beta
#' @template prior.beta.logitP.mean.sd
#' @template tauP.alpha.beta
#' @template run.prob 
#' @template debug
#' @template InitialSeed
#' @template save.output.to.files
#' @template trunc.logitP

#' @return An MCMC object with samples from the posterior distribution. A
#' series of graphs and text file are also created in the working directory.
#' @template author 
#' @template references
#' @keywords ~models ~smooth
#' @examples
#'  
#' ##---- See the vignettes for examples of how to use this package
#' 
#' @export TimeStratPetersenNonDiagError_fit
#' @importFrom stats runif var sd

TimeStratPetersenNonDiagError_fit <-
  function( title="TSPNDE",
           prefix="TSPNDE-", time, n1, m2, u2,
           sampfrac=rep(1,length(u2)), jump.after=NULL,
           bad.n1=c(), bad.m2=c(), bad.u2=c(),
           logitP.cov=as.matrix(rep(1,length(u2))),
           logitP.fixed=NULL, logitP.fixed.values=NULL, 
           n.chains=3, n.iter=200000, n.burnin=100000, n.sims=2000, 
           tauU.alpha=1,tauU.beta=.05,
           taueU.alpha=1, taueU.beta=.05,
           prior.beta.logitP.mean = c(logit(sum(m2,na.rm=TRUE)/sum(n1,na.rm=TRUE)),rep(0,  ncol(as.matrix(logitP.cov))-1)),
           prior.beta.logitP.sd   = c(2,                                           rep(10, ncol(as.matrix(logitP.cov))-1)), 
           tauP.alpha=.001,
           tauP.beta=.001,
           run.prob=seq(0,1,.1), # what percentiles of run timing are wanted
           debug=FALSE, debug2=FALSE,
           InitialSeed=ceiling(stats::runif(1,min=0,1000000)),
           save.output.to.files=TRUE,
           trunc.logitP=15) {
# Fit a Time Stratified Petersen model with NON-diagonal entries and with smoothing on U allowing for random error
# This is the classical stratified Petersen model where the recoveries can take place for this and multiple
# strata later
#
    version <- '2021-11-02'
    options(width=200)

# Input parameters are
#    title  - title for the analysis (character string)
#    prefix - prefix used for files created with the analysis results
#             this should be in standard Window's format, eg. JC-2002-ST-TSPNDE
#             to which is appended various suffixes for plots etc (character string)
#    time   - vector of stratum numbers. For example, 9:38 would indicate that the
#             Trinity River system sampled weeks 9 to 38.
#             These values are never used in the analysis and only serve as labels for the weeks and for plotting purposes.
#             They should be contiguous equally spaced values and be the same length as u2.
#    n1, m2, u2 - the input data consisting of fish marked and released, recapture, and unmarked captured
#             Note that m2 is a MATRIX. The first column are those fish recaptured in the stratum of release
#             and the subsequent columns are those recoveries in later strata.
#             This is expanded to the full matrix [i,j] for released released in stratum i and recovered in stratum j
#             The vector u2 should be long enough to account for any fish that are recaptured later on
#             from releases late in the season. The bottom right diagonal of m2 may be all zeros - that is ok
#             Notice that length(u2) can be longer than length(n1)+nrow(m2).
#    sampfrac - Deprecated. DO NOT USE ANYMORE. 
#    jump.after - in some cases, a single spline is still not flexible enough to cope with rapid
#                 changes in the run curve. For example, in the Trinity River project, a larger
#                 hatchery release occurs around stratum 14. This is a vector indicating the
#                 strata AFTER which the spline curve is allowed to jump.
#                 null or vector of arbitrary length.
#    bad.n1  - vector of stratum numbers where the value of n1 is suspect.
#    bad.m2  - vector of stratum numbers where the value of m2 is suspect.
#              For example, the capture rate could be extremely low.
#              These are set to NA prior to the call to  JAGS
#    bad.u2  - vector of stratum numbers where the value of u2 is suspect.
#    logitP.cov - matrix of covariates for logit(P). If the strata times are "missing" some values, an intercept is assumed
#               for the first element of the covariance matrix and 0 for the rest of the covariates.
#               CAUTION - this MAY not be what you want to do. It is likely best to enter ALL strata
#               if you have any covariates. The default, if not specified, is a constant (the mean logit)
#    logitP.fixed - vector of time values where the logitP will be specified in advance. Typically this occurs
#                   when no sampling takes place and logitP <- logit(0) = -10
#    logitP.fixed.values - vector of fixed values (on the logit scale). Use -10 for p[i] <- 0, and 10 for p[i] <- 1
#    tauU.alpha, tauU.beta   - parameters for the prior on variance in spline coefficients
#    taueU.alpha, taueU.beta - parameters for the prior on variance in log(U) around fitted spline
#    prior.beta.logitP.mean, prior.beta.logitP.sd   - parameters for the prior on mean logit(P)'s [The intercept term]
#                              The other covariates are assigned priors of a mean of 0 and a sd of 30
#    tauP.alpha, tauP.beta   - parameters for the prior on 1/var of residual error in logit(P)'s
#    run.prob  - percentiles of run timing wanted
#    debug  - if TRUE, then this is a test run with very small MCMC chains run to test out the data
#             and JAGS will run and stop waiting for your to exit and complete

# force input vectors to be vectors as needed. Note that m2 is NOT a vector!
time     <- as.vector(time)
n1       <- as.vector(n1)
u2       <- as.vector(u2)
sampfrac <- as.vector(sampfrac)

#  Do some basic error checking
#  1. Check that length of n1, m2, u2, sampfrac, time are consistent with each other.
#  In the non-diagonal case, they don't have to match
if(length(n1)!=nrow(m2)){
   cat("***** ERROR ***** Length of n1 and number of rows of m2 must be equal. They are:",
        length(n1)," ",nrow(u2),"\n")
   return()}
if(!is.numeric(n1)){
   cat("***** ERROR ***** n1 must be numeric. You have:",
        paste(n1,collapse=", "),"\n")
   return()} 
if(any(is.na(n1))){
  cat("***** ERROR ***** All values of n1 must not be missing. You have: ",
        paste(n1,collapse=", "),"\n")
   return()}
if(any(n1 < 0, na.rm=TRUE)){
  cat("***** ERROR ***** All values of n1 must be non-negative. You have: ",
        paste(n1,collapse=", "),"\n")
   return()}
if(stats::var(c(length(u2),length(sampfrac),length(time)))>0){
   cat("***** ERROR ***** Lengths of u2, sampfrac, time must all be equal. They are:",
        length(u2)," ",length(sampfrac)," ",length(time),"\n")
   return()}
if(length(logitP.cov) %% length(u2) != 0){
   cat("***** ERROR ***** Dimension of covariate vector doesn't match length of u2. They are:",
        length(u2)," ",length(logitP.cov)," ",dim(logitP.cov),"\n")
   return()}
#  2. Check that rowsum of m2<= n1
if(any(apply(m2,1,sum, na.rm=TRUE)>n1)){
   cat("***** ERROR ***** m2[i,+] must be <= n1[i]. The arguments are \n n1:",
       paste(n1,collapse=","),"\n m2:",
       paste(m2,collapse=","),"\n")
   return()}
#  3. Elements of bad.m2 and jump.after must belong to time
if(!all(bad.n1 %in% time,na.rm=TRUE)){
   cat("***** ERROR ***** bad.n1 must be elements of strata identifiers. You entered \n bad.n1:",
       paste(bad.n1,collapse=","),"\n Strata identifiers are \n time:",
       paste(time,  collapse=","), "\n")
   return()}
if(!all(bad.m2 %in% time,na.rm=TRUE)){
   cat("***** ERROR ***** bad.m2 must be elements of strata identifiers. You entered \n bad.m2:",
       paste(bad.m2,collapse=","),"\n Strata identifiers are \n time:",
       paste(time,  collapse=","), "\n")
   return()}
if(!all(bad.u2 %in% time,na.rm=TRUE)){
   cat("***** ERROR ***** bad.u2 must be elements of strata identifiers. You entered \n bad.u2:",
       paste(bad.u2,collapse=","),"\n Strata identifiers are \n time:",
       paste(time  ,collapse=","), "\n")
   return()}
if(!all(jump.after %in% time,na.rm=TRUE)){
   cat("***** ERROR ***** jump.after must be elements of strata identifiers. You entered \n jump.after:",
       paste(jump.after,collapse=","),"\n Strata identifiers are \n time:",
       paste(time,      collapse=","), "\n")
   return()}

#  4. check that index of logitP.fixed belong to time
if(!all(logitP.fixed %in% time,na.rm=TRUE)){
   cat("***** ERROR ***** logitP.fixed must be elements of strata identifiers. You entered \n logitP.fixed:",
       paste(logitP.fixed,collapse=","),"\n Strata identifiers are \n time:",
       paste(time        ,collapse=","), "\n")
   return()}
if(length(logitP.fixed)!=length(logitP.fixed.values)){
   cat("***** ERROR ***** Lengths of logitP.fixed and logitP.fixed.values must all be equal. They are:",
        length(logitP.fixed)," ",length(logitP.fixed.values),"\n")
   return()}

# Check that that the prior.beta.logitP.mean and prior.beta.logitP.sd length=number of columns of covariates
logitP.cov <- as.matrix(logitP.cov)
if(!is.vector(prior.beta.logitP.mean) | !is.vector(prior.beta.logitP.sd)){
   stop("prior.beta.logitP.mean and prior.beta.logitP.sd must be vectors")
}
if(!is.numeric(prior.beta.logitP.mean) | !is.numeric(prior.beta.logitP.sd)){
   stop("prior.beta.logitP.mean and prior.beta.logitP.sd must be numeric")
}
if(length(prior.beta.logitP.mean) != ncol(logitP.cov) | length(prior.beta.logitP.sd) != ncol(logitP.cov)){
   stop("prior.beta.logitP.mean and prior.beta.logitP.sd must be same length as number columns in covariate matrix")
}

# Deprecation of sampling fraction.
if(any(sampfrac != 1)){
  cat("***** ERROR ***** Sampling fraction is deprecated for any values other than 1. DO NOT USE ANYMORE. ")
  return()
}

results.filename <- paste(prefix,"-results.txt",sep="")

stdout <- vector('character')
report <- textConnection('stdout', 'wr', local = TRUE)
sink(report)

cat(paste("Time Stratified Petersen with Non-Diagonal recaptures, error in smoothed U, and log-normal distribution for travel time - ", date()))
cat("\nVersion: ", version)

cat("\n\n", title, "Results \n\n")


cat("*** Raw data *** (padded to match length of u2) \n")
jump.indicator <- rep('   ', length(u2))
jump.indicator[time %in% jump.after]<- '***'
ex.n1 <- c(n1, rep(NA, length(u2)-length(n1)))
ex.m2 <- rbind(m2,matrix(NA, nrow=length(u2)-length(n1), ncol=ncol(m2)))
temp<- data.frame(time=time, n1=ex.n1, m2=ex.m2, u2=u2, logitP.cov=logitP.cov, jump=jump.indicator)
print(temp)
cat("\n\n")
cat("Jump point are after strata: ", jump.after)
if(length(jump.after)==0) cat("none - A single spline is fit")
cat("\nFixed logitP indices are: ", logitP.fixed)
if(length(logitP.fixed)==0) cat("none - NO fixed values")
cat("\nFixed logitP values  are: ", logitP.fixed.values)
if(length(logitP.fixed)==0) cat("none - NO fixed values")


# Obtain the Pooled Petersen estimator prior to fixup of bad.n1, bad.m2, and bad.u2 values
cat("\n\n*** Pooled Petersen Estimate prior to fixing bad n1, m2, or u2 values ***\n\n")
temp.n1 <- n1
temp.m2 <- m2
temp.u2 <- u2

cat("Total n1=", sum(temp.n1,na.rm=TRUE),";  m2=",sum(temp.m2,na.rm=TRUE),";  u2=",sum(temp.u2,na.rm=TRUE),"\n\n")
pp <- SimplePetersen(sum(temp.n1,na.rm=TRUE), sum(temp.m2,na.rm=TRUE), sum(temp.u2,na.rm=TRUE))
cat("Est U(total) ", format(round(pp$U.est),big.mark=","),"  (SE ", format(round(pp$U.se), big.mark=","), ")\n")
cat("Est N(total) ", format(round(pp$N.est),big.mark=","),"  (SE ", format(round(pp$N.se), big.mark=","), ")\n\n\n")
.

# Obtain the Pooled Petersen estimator after removal of entries with bad.n1, m2, or u2 values
select.rel <- !(time[1:length(n1)] %in% bad.n1 | time[1:length(n1)] %in% bad.m2 )
select.rec <- ! time %in% bad.u2
cat("\n\n*** Pooled Petersen Estimate after removing release and recovery strata flagged as bad ***\n\n")
cat("The following release strata were excluded:",
     if(length(time[!select.rel])>0){time[!select.rel]} else {" NONE"}, "\n")
cat("The following recovery strata were excluded:",
     if(length(time[!select.rec])>0){time[!select.rec]} else {" NONE"}, "\n")

temp.n1 <- n1[select.rel]
temp.m2 <- m2[select.rel]
temp.u2 <- u2[select.rec]

cat("Total n1=", sum(temp.n1,na.rm=TRUE),";  m2=",sum(temp.m2,na.rm=TRUE),";  u2=",sum(temp.u2, na.rm=TRUE),"\n\n")
pp <- SimplePetersen(sum(temp.n1,na.rm=TRUE), sum(temp.m2,na.rm=TRUE), sum(temp.u2,na.rm=TRUE))
cat("Est U(total) ", format(round(pp$U.est),big.mark=","),"  (SE ", format(round(pp$U.se), big.mark=","), ")\n")
cat("Est N(total) ", format(round(pp$N.est),big.mark=","),"  (SE ", format(round(pp$N.se), big.mark=","), ")\n\n\n")




# Test if pooling can be done
# We only do the release strata that are not flagged as bad and have no missing values

cat("*** Test if pooled Petersen is allowable. [Check if equal recovery from each stratum not flagged and without missing recoveries] ***\n\n")
#browser()
select <- select.rel  & (!is.na(apply(m2,1,sum)))
temp.n1 <- n1[select.rel]
temp.m2 <- m2[select.rel,]
test <- TestIfPool( temp.n1, apply(temp.m2,1,sum, na.rm=TRUE))
cat("(Large Sample) Chi-square test statistic ", test$chi$statistic," has p-value", test$chi$p.value,"\n\n")
temp <- cbind(time[1:length(n1)][select],test$chi$observed, round(test$chi$expected,1), round(test$chi$residuals^2,1))
colnames(temp) <- c('time','n1-m2','m2','E[n1-m2]','E[m2]','X2[n1-m2]','X2[m2]')
print(temp)
cat("\n Be cautious of using this test in cases of small expected values. \n\n")


# Adjust the data for the explicitly bad values or other problems
new.time <- time
new.n1   <- n1
new.m2   <- m2
new.u2   <- u2
new.logitP.cov <- logitP.cov


# Set the bad n1 values to 0 for the number of fish released and corresponding values of m2 also to 0 recovered subsequently.
# Set any bad m2 values to 0 for the number of releases and subsequent recoveries as well.
# But we don't set bqd(u2) values to 0 as this would imply no catch. We set these to missing
new.n1[time[1:length(n1)] %in% c(bad.n1,bad.m2) ]  <- 0
new.m2[time[1:length(n1)] %in% c(bad.m2,bad.n1),]  <- 0
new.u2[time %in% bad.u2]                <- NA

# Print out the revised data
cat("\n\n*** Revised data after setting flagged strata to 0 (releases) or NA (recoveries) *** \n")
jump.indicator <- rep('   ', length(u2))
jump.indicator[time %in% jump.after]<- '***'
ex.n1 <- c(new.n1, rep(NA, length(new.u2)-length(new.n1)))
ex.m2 <- rbind(new.m2,matrix(NA, nrow=length(new.u2)-length(new.n1), ncol=ncol(new.m2)))
temp<- data.frame(time=new.time, n1=ex.n1, m2=ex.m2, u2=new.u2, logitP.cov=new.logitP.cov,
       jump.after=jump.indicator)
print(temp)
cat("\n\n")

# Need to expand the m2 matrix to the full Nstrata.rel x Nstrata.cap+1 matrix with
# the [i,j] entry for the number of fish released in stratum i and recaptured in stratum j
# The last column of the expanded m2 matrix is the number of fish released but never recaptured later.

expanded.m2 <- matrix(0, nrow=length(new.n1), ncol=length(new.u2)+1)
for(i in 1:length(new.n1)){
   expanded.m2[i,1:length(new.u2)] <- c(rep(0,i-1),new.m2[i,],rep(0,length(new.u2)))[1:length(new.u2)]
   expanded.m2[i,length(new.u2)+1] <- new.n1[i] - sum(new.m2[i,])
}

cat("*** Expanded m2 array ***\n\n")
save.max.print <- getOption("max.print")
options(max.print=.Machine$integer.max)
print(expanded.m2)
options(max.print=save.max.print)


# assign the logitP fixed values etc.
new.logitP.fixed <- rep(NA, length(new.u2))
new.logitP.fixed[match(logitP.fixed, time)] <- logitP.fixed.values


# Print out information on the prior distributions used
cat("\n\n*** Information on priors *** \n")
cat("   Parameters for prior on tauU (variance in spline coefficients: ", tauU.alpha, tauU.beta,
    " which corresponds to a mean/std dev of 1/var of:",
    round(tauU.alpha/tauU.beta,2),round(sqrt(tauU.alpha/tauU.beta^2),2),"\n")
cat("   Parameters for prior on taueU (variance of log(U) about spline: ",taueU.alpha, taueU.beta,
    " which corresponds to a mean/std dev of 1/var of:",
    round(taueU.alpha/taueU.beta,2),round(sqrt(taueU.alpha/taueU.beta^2),2),"\n")
cat("   Parameters for prior on beta.logitP[1] (intercept) (mean, sd): \n", cbind(round(prior.beta.logitP.mean,3), round(prior.beta.logitP.sd,5)),"\n")
cat("   Parameters for prior on tauP (residual variance of logit(P) after adjusting for covariates: ",tauP.alpha, tauP.beta,
    " which corresponds to a mean/std dev of 1/var of:",
    round(tauP.alpha/tauP.beta,2),round(sqrt(tauP.alpha/tauP.beta^2),2),"\n")


cat("\n\nInitial seed for this run is: ",InitialSeed, "\n")

sink()

if (debug2) {
   cat("\nprior to formal call to TimeStratPetersenNonDiagError\n")
   browser()
}

if (debug)
  {results <- TimeStratPetersenNonDiagError(
        title=title, prefix=prefix,
        time=new.time, n1=new.n1, m2=expanded.m2, u2=new.u2,
        jump.after=(1:length(u2))[time %in% jump.after],
        logitP.cov=new.logitP.cov, logitP.fixed=new.logitP.fixed,
        n.chains=3, n.iter=10000, n.burnin=5000, n.sims=500,  # set to small values for debugging only
        prior.beta.logitP.mean=prior.beta.logitP.mean, 
        prior.beta.logitP.sd  =prior.beta.logitP.sd,
        tauU.alpha=tauU.alpha, tauU.beta=tauU.beta, taueU.alpha=taueU.alpha, taueU.beta=taueU.beta,
        debug=debug, debug2=debug2, InitialSeed=InitialSeed, save.output.to.files=save.output.to.files)
 }
else #notice R syntax requires { before the else
  {results <- TimeStratPetersenNonDiagError(
        title=title, prefix=prefix,
        time=new.time, n1=new.n1, m2=expanded.m2, u2=new.u2,
        jump.after=(1:length(u2))[time %in% jump.after],
        logitP.cov=new.logitP.cov, logitP.fixed=new.logitP.fixed,
        n.chains=n.chains, n.iter=n.iter, n.burnin=n.burnin, n.sims=n.sims,
        prior.beta.logitP.mean=prior.beta.logitP.mean, 
        prior.beta.logitP.sd  =prior.beta.logitP.sd,
        tauU.alpha=tauU.alpha, tauU.beta=tauU.beta, taueU.alpha=taueU.alpha, taueU.beta=taueU.beta,
        debug=debug, debug2=debug2, InitialSeed=InitialSeed, save.output.to.files=save.output.to.files)
 }

# Now to create the various summary tables of the results

Nstrata.rel <- length(n1)
Nstrata.cap <- ncol(expanded.m2) -1 # don't forget that last column of m2 is number of fish never seen

# A plot of the observed log(U) on the log scale, and the final mean log(U)
plot.df   <- data.frame(time =new.time)
plot.df$logUi <-log( c((new.u2[1:Nstrata.rel]+1)*(new.n1+2)/(apply(expanded.m2[,1:Nstrata.cap],1,sum)+1), rep(NA, length(u2)-Nstrata.rel)))

# extract the fitted U values
results.row.names <- rownames(results$summary)
etaU.row.index    <- grep("etaU", results.row.names)
etaU<- results$summary[etaU.row.index,]
plot.df$logU    =etaU[,"mean"]
plot.df$logUlcl =etaU[,"2.5%"]
plot.df$logUucl =etaU[,"97.5%"]

# extract the spline values
logUne.row.index <- grep("logUne", results.row.names)
logUne<- results$summary[logUne.row.index,"mean"]
plot.df$spline <- results$summary[logUne.row.index,"mean"]

# add limits to the plot to avoid non-monotone secondary axis problems with extreme values
   plot.df$logUi     <- pmax(-10 , pmin(20, plot.df$logUi))
   plot.df$logU      <- pmax(-10 , pmin(20, plot.df$logU ))
   plot.df$logUlcl   <- pmax(-10 , pmin(20, plot.df$logUlcl  ))
   plot.df$logUucl   <- pmax(-10 , pmin(20, plot.df$logUucl  ))
   plot.df$spline    <- pmax(-10 , pmin(20, plot.df$spline))

fit.plot <- ggplot(data=plot.df, aes_(x=~time))+
     ggtitle(title, subtitle="Fitted spline curve with 95% credible intervals for estimated log(U[i])")+
     geom_point(aes_(y=~logUi), color="red", shape=1)+  # open circle
     xlab("Time Index\nOpen/closed circles - initial and final estimates")+
     ylab("log(U[i]) + 95% credible interval")+
     geom_point(aes_(y=~logU), color="black", shape=19)+
     geom_line (aes_(y=~logU), color="black")+
     geom_errorbar(aes_(ymin=~logUlcl, ymax=~logUucl), width=.1)+
     geom_line(aes_(y=~spline),linetype="dashed")+
     scale_x_continuous(breaks=seq(min(plot.df$time, na.rm=TRUE),max(plot.df$time, na.rm=TRUE),2))+
     scale_y_continuous(sec.axis = sec_axis(~ exp(.), name="U + 95% credible interval",
                      breaks=c(1,10,20,50,
                                 100,200,500,
                                 1000,2000,5000,
                                 10000,20000, 50000,
                                 100000,200000, 500000,
                                 1000000,2000000,5000000,10000000),
                      labels = scales::comma))



if(save.output.to.files)ggsave(plot=fit.plot, filename=paste(prefix,"-fit.pdf",sep=""), height=6, width=10, units="in")
results$plots$fit.plot <- fit.plot


# plot of the logit(p) over time
logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=expanded.m2, u2=new.u2, 
                           logitP.cov=new.logitP.cov, results=results,
                           trunc.logitP=trunc.logitP)
if(save.output.to.files)ggsave(plot=logitP.plot, filename=paste(prefix,"-logitP.pdf",sep=""), height=6, width=10, units="in")
results$plots$logitP.plot <- logitP.plot


# Look at autocorrelation function for Ntot
mcmc.sample <- data.frame(parm="Utot", sample=results$sims.matrix[,"Utot"], stringsAsFactors=FALSE)
acf.Utot.plot <- plot_acf(mcmc.sample)
if(save.output.to.files)ggsave(plot=acf.Utot.plot, filename=paste(prefix,"-Utot-acf.pdf",sep=""), height=4, width=6, units="in")
results$plots$acf.Utot.plot <- acf.Utot.plot


# plot the posterior plots
mcmc.sample1 <- data.frame(parm="Utot", sample=results$sims.matrix[,"Utot"], stringsAsFactors=FALSE)
mcmc.sample2 <- data.frame(parm="Ntot", sample=results$sims.matrix[,"Ntot"], stringsAsFactors=FALSE)
mcmc.sample <- rbind(mcmc.sample1, mcmc.sample2)
post.UNtot.plot <- plot_posterior(mcmc.sample)
post.UNtot.plot
if(save.output.to.files)ggsave(plot=post.UNtot.plot, filename=paste(prefix,"-UNtot-posterior.pdf",sep=""),
                               height=ifelse(length(unique(mcmc.sample$parm))<=2,4,6), width=6, units="in")
results$plots$post.UNtot.plot <- post.UNtot.plot


# plot the mean and sd log(travel times) (the muLogTT and the sdLogTT) vs release stratum number
#browser()
results.row.names <- rownames(results$summary)
muLogTT.row.index    <- grep("muLogTT", results.row.names)
muLogTT<- data.frame(results$summary[muLogTT.row.index,])
muLogTT$stratum <- new.time[1:Nstrata.rel]
muLogTT$source  <- "Mean log(travel time)"

results.row.names <- rownames(results$summary)
sdLogTT.row.index    <- grep("sdLogTT", results.row.names)
sdLogTT<- data.frame(results$summary[sdLogTT.row.index,])
sdLogTT$stratum <- new.time[1:Nstrata.rel]
sdLogTT$source  <- "SD log(travel time)"

plotdata <- rbind( muLogTT, sdLogTT)

musdLogTT.plot <- ggplot(data=plotdata, aes_(x=~stratum, y=~mean))+
   ggtitle(title)+
   geom_point()+
   geom_line()+
   geom_errorbar(aes_(ymin=~X2.5., ymax=~X97.5.), width=.1)+
   xlab("Stratum")+ylab("mean/sd log(travel time)")+
   facet_wrap(~source, ncol=1, scales="free_y")
musdLogTT.plot

if(save.output.to.files)ggsave(plot=musdLogTT.plot, filename=paste(prefix,"-musdLogTT.pdf",sep=""),
                               height=6, width=6, units="in")
results$plots$musdLogTTt <- musdLogTT.plot


## save the Bayesian predictive distribution (Bayesian p-value plots)

discrep <-PredictivePosterior.TSPNDE (new.n1, expanded.m2, new.u2,
                                      new.logitP.fixed,
                                      expit(results$sims.list$logitP),
                                      round(results$sims.list$U),
                                      results$sims.list$muLogTT,
                                      results$sims.list$sdLogTT)
#browser()
gof <- PredictivePosteriorPlot.TSPNDE (discrep)
if(save.output.to.files)ggsave(gof[[1]],filename=paste(prefix,"-GOF.pdf",sep=""),  height=12, width=8, units="in", dpi=300 )
results$plots$gof.plot <- gof

# create traceplots of logU, U, and logitP (along with R value) to look for non-convergence
# the plot_trace will return a list of plots (one for each page as needed)
varnames <- names(results$sims.array[1,1,])  # extract the names of the variables 

#browser()
# Trace plots of logitP
trace.plot <- plot_trace(title=title, results=results, parms_to_plot=varnames[grep("^logitP", varnames)])
if(save.output.to.files){
   pdf(file=paste(prefix,"-trace-logitP.pdf",sep=""))
   plyr::l_ply(trace.plot, function(x){plot(x)})
   dev.off()
}
results$plots$trace.logitP.plot <- trace.plot

# now for the traceplots of logU (etaU), Utot, and Ntot
trace.plot <- plot_trace(title=title, results=results, parms_to_plot=varnames[c(grep("Utot",varnames), grep("Ntot",varnames), grep("^etaU", varnames))])
if(save.output.to.files){
   pdf(file=paste(prefix,"-trace-logU.pdf",sep=""))
   plyr::l_ply(trace.plot, function(x){plot(x)})
   dev.off()
}
results$plots$trace.logU.plot <- trace.plot


sink(report, append=TRUE)

# Global summary of results
cat("\n\n*** Summary of MCMC results *** \n\n")
  save.max.print <- getOption("max.print")
  options(max.print=.Machine$integer.max)
  
  print(results, digits.summary=3)#, max=.Machine$integer.max)
  
  options(max.print=save.max.print)
  
  
cat("\n\n*** Alternate DIC computation based on p_D = var(deviance)/2 \n")
results.row.names <- rownames(results$summary)
deviance.row.index<- grep("deviance", results.row.names)
deviance          <- results$summary[deviance.row.index,]
p.D <- deviance["sd"]^2/2
dic <- deviance["mean"]+p.D
cat("    D-bar: ", deviance["mean"],";  var(dev): ", deviance["sd"]^2,
    "; p.D: ", p.D, "; DIC: ", dic)

# Summary of population sizes. Add pretty printing of results.
cat("\n\n\n\n*** Summary of Unmarked Population Size ***\n")
temp<- results$summary[ grep("Utot", rownames(results$summary)),]
old.Rhat <- temp["Rhat"]
temp<- formatC(temp, big.mark=",", format="d")
temp["Rhat"] <- formatC(old.Rhat,digits=2,format="f",flag="#")
print(temp, quote=FALSE)

cat("\n\n*** Summary of Total Population Size *** \n")
temp<- results$summary[ grep("Ntot", rownames(results$summary)),]
old.Rhat <- temp["Rhat"]
temp<- formatC(temp, big.mark=",", format="d")
temp["Rhat"] <- formatC(old.Rhat,digits=2,format="f",flag="#")
print(temp, quote=FALSE)



cat("\n\n\n\n*** Summary of Quantiles of Run Timing *** \n")
cat(    "    This is based on the sample weeks provided and the U[i] values \n")
q <- RunTime(time=time, U=results$sims.list$U, prob=run.prob)
temp <- rbind(apply(q,2,mean), apply(q,2,sd))
rownames(temp) <- c("Mean", "Sd")
print(round(temp,2))

# Add the runtiming to the output object
results$runTime <- temp


cat("\n\n")
cat(paste("*** end of fit *** ", date()))

sink()

# save the report to a files?
if(save.output.to.files)writeLines(stdout, results.filename)
results$report <- stdout

# add some of the raw data to the bugs object for simplicity in referencing it later
results$data <- list( time=time, n1=n1, m2=m2, u2=u2,
                      jump.after=jump.after,
                      bad.n1=bad.n1, bad.m2=bad.m2, bad.u2=bad.u2,
                      logitP.cov=logitP.cov,
                      version=version, date_run=date(),title=title)

return(results)
} # end of function

Try the BTSPAS package in your browser

Any scripts or data that you put into this service are public.

BTSPAS documentation built on Oct. 25, 2021, 9:07 a.m.