R/TimeStratPetersenDiagErrorWHChinook.R
In BTSPAS: Bayesian Time-Stratified Population Analysis

## 2020-11-07 CJS Allow user to specify prior for beta parameters for covariates on logitP
# 2018-12-06 CJS converted initial plot to ggplot2 format
# 2014-09-01 CJS Converted to JAGS
#                - no model name
#                - C(,20) -> T(,20)
#                - dflat() to dnorm(0, 1E-6)
#                - added u2.Ncopy to improve mixing based on Matt S. suggestion
#                - added u2.Acopy to improve mixing based on Matt S. suggestion
#                - fixed monitoring of *H parameters that are only present in hatch.after or later
#                  JAGS won't monitor these variables unless entries from 1:hatch.after are defined
# 2013-09-04 CJS Add initialization for missing values; removed references to WinBugs
# 2011-05-15 CJS limited etaU to 20 or less
# 2011-01-24 SB  added call to run.windows.openbugs and run.windows.winbugs
# 2013-13-31 CJS updated for JAGS
# 2010-11-25 CJS add output to track progress of burnin and post-burnin phases
# 2010-04-26 CJS fixed problem with initial values for logitP when n1=m2 (+infinite) which crashed stats::lm()
# 2009-12-05 CJS added title to argument list
# 2009-12-01 CJS Added open/win bugs path names to argument list

#' @keywords internal
#' @importFrom stats lm spline sd

TimeStratPetersenDiagErrorWHChinook <-
    function(title, prefix, time, n1, m2, u2.A, u2.N,
             hatch.after=NULL, clip.frac.H=.25,
             logitP.cov=as.matrix(rep(1,length(u2.A))),
             n.chains=3, n.iter=200000, n.burnin=100000, n.sims=2000,
             tauU.alpha=1, tauU.beta=.05, taueU.alpha=1, taueU.beta=.05,
             prior.beta.logitP.mean = c(logit(sum(m2,na.rm=TRUE)/sum(n1,na.rm=TRUE)),rep(0,  ncol(as.matrix(logitP.cov))-1)),
             prior.beta.logitP.sd   = c(stats::sd(logit((m2+.5)/(n1+1)),na.rm=TRUE),        rep(10, ncol(as.matrix(logitP.cov))-1)), 
             tauP.alpha=.001, tauP.beta=.001,
             debug=FALSE, debug2=FALSE, 
             InitialSeed,
             save.output.to.files=TRUE){

set.seed(InitialSeed)  # set prior to initial value computations


#
#  Fit the smoothed time-Stratified Petersen estimator with Diagonal recoveries (i.e. no recoveries
#  outside stratum of release), error in the smoothed U curve, and separating wild vs hatchery stocks
#
#  This routine assumes that the strata are time (e.g. weeks).
#  In each stratum n1 fish are released (with marks). These are ususally
#     captured fish that are marked, transported upstream, and released.
#     These fish are used only to estimate the recapture rate downstream.
#  Of the n1 fish released, m2 fish are recaptured in the same stratum (e.g. week) of release.
#     There is a related function that allows fish to be recaptured in subsequent weeks.
#
#  At the same tine, u2.A (adipose fin clipped) and u2.N (not clipped)
#     other (unmarked) fish are newly captured in stratum i.
#     These EXCLUDE recaptures of marked fish. These are the fish that are "expanded"
#     to estimate the population size of fish in stratum i.
#     All wild fish are NOT ad-clipped.
#     Only a fraction of hatchery fish are ad-clipped. It is assumed that the fraction of ad-clipped
#     hatchery fish is constant over the life of the run.
#
#  Input
#      prefix - prefix for file name for initial plot of U's
#      time  - the stratum number
#      n1    - vector of number of fish released in stratum i
#      m2    - vector of number of fish recovered in stratum i (EXCLUDING recaps)
#      u2.A  - vector of number of adclipped unmarked fish captured in stratum i
#      u2.N  - vector of number of non-clipped unmarked fish captured in stratum i
#      hatch.after - point AFTER which the hatchery fish are released.
#      clip.frac.H - what fraction of hatchery fish are clipped
#      logitP.cov - covariates for logit(P)=X beta.logitP


#  This routine makes a call to the MCMC sampler to fit the model and then gets back the
#  coda files for the posteriour distribution.

      ## Set working directory to current directory (we should allow users to select this)
      working.directory <- getwd()

      ## Define paths for the model, data, and initial value files
      model.file <- file.path(working.directory, "model.txt")
      data.file <- file.path(working.directory,"data.txt")
      init.files <- file.path(working.directory,
                              paste("inits", 1:n.chains,".txt", sep = ""))


      ## Save the Bugs progam to the model.txt file
      ##
      sink(model.file)  # NOTE: NO " allowed in model as this confuses the cat command
      cat("
model {
# Time Stratified Petersen with Diagonal recapture (no spillover in subsequent weeks or marked fish)
#    and allowing for error in the smoothed U curve with separation of wild and hatchery fish
# Each of the wild and hatchery populations are fit using a SINGLE spline curve as this should be flexible
#    enough to capture the individual behaviours

#  Data input:
#      Nstrata - number of strata
#      n1         - number of marked fish released
#      m2         - number of marked fish recaptured
#      u2.A       - number of adclipped unmarked fish captured (must be hatchery fish).
#      u2.N       - number of non-clipped unmarked fish captured (wild + hatchery fish)
#      clip.frac.H- what fraction of hatchery fish are clipped
#      logitP.cov   - covariates for logitP
#      NlogitP.cov  - number of logitP covariates
#      SplineDesign.W- wildfish spline design matrix of size [Nstrata, maxelement of n.b.notflat.W]
#      SplineDesign.H- hatchery spline design matrix of size [Nstrata, maxelement of n.b.notflat.H]
#                   This is set up prior to the call.
#      b.flat.W   - vector of strata indices where the prior for the b's will be flat for wild     fish
#      b.flat.H   - vector of strata indices where the prior for the b's will be flat for hatchery fish
#                 this is normally the first two weeks of each spline segment
#      n.b.flat.W - number of b coefficients that have a flat prior - wild     fish
#      n.b.flat.H - number of b coefficients that have a flat prior - hatchery fish
#      b.notflat.W - vector of strata indices where difference in coefficients is modelled - wild     fish
#      b.notflat.H - vector of strata indices where difference in coefficients is modelled - hatchery fish
#      n.b.notflat.W - number of b coefficients that do not have a flat prior - wild     fish
#      n.b.notflat.H - number of b coefficients that do not have a flat prior - hatchery fish
#      tauU.alpha, tauU.beta   - parameters for prior on tauU
#      taueU.alpha, taueU.beta - parameters for prior on taueU
#      prior.beta.logitP.mean, prior.beta.logitP.sd  - parameters for prior of coefficient of covariates for logitP
#      tauP.alpha, tauP.beta - parameter for prior on tauP (residual variance of logit(P)'s after adjusting for
#                         covariates)
#      clip.frac.H    - what fraction of hatchery fish are clipped (KNOWN in advance)
#
#  Parameters of the model are:
#      p[i]
#       logitP[i]  = logit(p[i]) = logitP.cov*beta.logitP
#         The beta coefficients have a prior that is N(mean= prior.beta.logitP.mean, sd= prior.beta.logitP.sd)
#      U.W[i] - number of unmarked wild     fish passing stratam i in population
#      U.H[i] - number of unmarked hatchery fish passing stratum i in population
#       etaU.W[i]  = log(U.W[i])
#       etaU.H[i]  = log(U.H[i])
#         which comes from spline with parameters bU.W[1... Knots+q] or bU.H[1... knots+q]
#         + error term eU.W[i] or eu.H[i]

   ##### Fit the spline for wildfish - this covers the entire experiment ######
   for(i in 1:Nstrata){
        logUne.W[i] <- inprod(SplineDesign.W[i,1:n.bU.W],bU.W[1:n.bU.W])  # spline design matrix * spline coeff
        etaU.W[i] ~ dnorm(logUne.W[i], taueU)T(,20)              # add random error
        eU.W[i] <- etaU.W[i] - logUne.W[i]
   }
   ##### Fit the spline for hatchery fish - these fish only enter AFTER hatch.after ######
   for(i in (hatch.after+1):Nstrata){
        logUne.H[i] <- inprod(SplineDesign.H[i,1:n.bU.H],bU.H[1:n.bU.H])  # spline design matrix * spline coeff
        etaU.H[i] ~ dnorm(logUne.H[i], taueU)T(,20)              # add random error
        eU.H[i] <- etaU.H[i] - logUne.H[i]
   }

   ##### Model the capture probabilities #####
   for(i in 1:hatch.after){
        mu.logitP[i] <- inprod(logitP.cov[i,1:NlogitP.cov], beta.logitP[1:NlogitP.cov])
        ## logitP[i] ~ dnorm(mu.logitP[i],tauP)

        # use the u2.Ncopy to break the cycle (in OpenBugs) and improve mixing (see Matt S.)
        mu.epsilon[i] <- mu.logitP[i] - log(u2.Ncopy[i] + 1) + etaU.W[i]
        epsilon[i] ~ dnorm(mu.epsilon[i],tauP)

        logitP[i] <-  log(u2.Ncopy[i] + 1) - etaU.W[i] + epsilon[i]   # Matts trick to speed mixing
   }
   for(i in (hatch.after+1):Nstrata){
        mu.logitP[i] <- inprod(logitP.cov[i,1:NlogitP.cov], beta.logitP[1:NlogitP.cov])
        ## logitP[i] ~ dnorm(mu.logitP[i],tauP)

        # use the u2.Ncopy and u2.Acopy to break the cycle (in OpenBugs) and improve mixing (see Matt S.)
        mu.epsilon[i] <- mu.logitP[i] - log(u2.Ncopy[i] + u2.Acopy[i] + 1) + (etaU.W[i] + etaU.H[i])  # Matts tricek to speed mixing
        epsilon[i] ~ dnorm(mu.epsilon[i],tauP)

        logitP[i] <-  log(u2.Ncopy[i] + u2.Acopy[i] + 1) - log(exp(etaU.W[i]) + exp(etaU.H[i])) + epsilon[i]
   }

   ##### Hyperpriors #####
   ## Run size - wild and hatchery fish - flat priors
   for(i in 1:n.b.flat.W){
      bU.W[b.flat.W[i]] ~ dnorm(0, 1E-6)
   }
   for(i in 1:n.b.flat.H){
      bU.H[b.flat.H[i]] ~ dnorm(0, 1E-6)
   }

   ## Run size - priors on the difference for wild and hatchery fish
   for(i in 1:n.b.notflat.W){
      xiU.W[b.notflat.W[i]] <- 2*bU.W[b.notflat.W[i]-1] - bU.W[b.notflat.W[i]-2]
      bU.W [b.notflat.W[i]] ~ dnorm(xiU.W[b.notflat.W[i]],tauU)
   }
   for(i in 1:n.b.notflat.H){
      xiU.H[b.notflat.H[i]] <- 2*bU.H[b.notflat.H[i]-1] - bU.H[b.notflat.H[i]-2]
      bU.H [b.notflat.H[i]] ~ dnorm(xiU.H[b.notflat.H[i]],tauU)
   }

   tauU ~ dgamma(tauU.alpha,tauU.beta)  # Notice reduction from .0005 (in thesis) to .05
   sigmaU <- 1/sqrt(tauU)
   taueU ~ dgamma(taueU.alpha,taueU.beta) # dgamma(100,.05) # Notice reduction from .0005 (in thesis) to .05
   sigmaeU <- 1/sqrt(taueU)

   ## Capture probabilities covariates
   for(i in 1:NlogitP.cov){
      beta.logitP[i] ~ dnorm(prior.beta.logitP.mean[i], 1/prior.beta.logitP.sd[i]^2)  # rest of beta terms are normal 0 and a large variance
   }
   beta.logitP[NlogitP.cov+1] ~ dnorm(0, .01) # dummy so that covariates of length 1 function properly
   tauP ~ dgamma(tauP.alpha,tauP.beta)
   sigmaP <- 1/sqrt(tauP)

   ##### Likelihood contributions #####
   ## Number of marked fish recovered ##
   for(i in 1:Nstrata){
      logit(p[i]) <- logitP[i]       # convert from logit scale
      m2[i] ~ dbin(p[i],n1[i])     # recovery of marked fish
   }

   ## captures of wild (unclipped fish) - these are the only fish available upto (and including) hatch.after
   for(i in 1:hatch.after){
      U.W[i] <- round(exp(etaU.W[i]))       # convert from log scale
      u2.N[i] ~ dbin(p[i],U.W[i])
   }

   ## captures of hatchery (clipped fish) - these can only occur AFTER hatch.after
   for(i in (hatch.after+1):Nstrata){
      U.W[i] <- round(exp(etaU.W[i]))      # convert from log scale
      U.H[i] <- round(exp(etaU.H[i]))      # convert from log scale
      U.clip[i] ~ dbin(clip.frac.H, U.H[i])
      p.temp[i] <- p[i]*clip.frac.H
      u2.A[i] ~ dbin(p.temp[i], U.H[i]) # must be hatchery and clipped
   }

   ## captures of wild+hatchery unclipped fish - these can only occur AFTER hatch.after
   for(i in (hatch.after+1):Nstrata){
      U.noclip[i] <- U.W[i] + U.H[i] - U.clip[i]
      u2.N[i] ~ dbin(p[i], U.noclip[i])
   }

   ##### Derived Parameters #####
   Utot.W <- sum( U.W[1:Nstrata])              # Total number of unmarked fish - wild
   Utot.H <- sum( U.H[(hatch.after+1):Nstrata])# Total number of unmarked fish - hatchery
   Utot   <- Utot.W + Utot.H                   # Grand total number of fish
   
   # Because JAGES does not properly monitory partially defined vectors (see Section 2.5 of the JAGES user manual)
   # we need to add dummy distribution for the parameters of interest prior to the hatchery fish arriving.
   # This is not needed in OpenBugs who returns the subset actually monitored, but we add this to be consistent
   # among the two programs
   for(i in 1:hatch.after){
      U.H[i]      ~ dnorm(0,1)  # These are complete arbitrary and never gets updated
      etaU.H[i]   ~ dnorm(0,1)
      logUne.H[i] ~ dnorm(0,1)
      eU.H[i]     ~ dnorm(0,1)
   }
}  # end of model
", fill=TRUE)
   sink()  # End of saving the Bugs program

      Nstrata <- length(n1)

  # make a copy of u2.N to improve mixing in the MCMC model
  u2.Ncopy <- stats::spline(x=1:Nstrata, y=u2.N, xout=1:Nstrata)$y
  u2.Ncopy <- round(u2.Ncopy) # round to integers

  # similarly make a copy of u2.A to improve mixing in the MCMC model
  # notice that Adipose clips only occur at hatch.after or later
  u2.Acopy <- u2.A * 0
  u2.Acopy[hatch.after:Nstrata] <- stats::spline(x=hatch.after:Nstrata, y=u2.A[hatch.after:Nstrata], xout=hatch.after:Nstrata)$y
  u2.Acopy <- round(u2.Acopy) # round to integers

  datalist <- list("Nstrata", "n1", "m2",
                  "u2.A", "u2.Acopy",
                   "u2.N", "u2.Ncopy", 
                   "hatch.after", "clip.frac.H",
                   "logitP.cov", "NlogitP.cov",
                   "SplineDesign.W",
                   "b.flat.W", "n.b.flat.W", "b.notflat.W", "n.b.notflat.W", "n.bU.W",
                   "SplineDesign.H",
                   "b.flat.H", "n.b.flat.H", "b.notflat.H", "n.b.notflat.H", "n.bU.H",
                   "tauU.alpha", "tauU.beta", "taueU.alpha", "taueU.beta",
                   "prior.beta.logitP.mean", "prior.beta.logitP.sd", 
                   "tauP.alpha", "tauP.beta")

  parameters <- c("logitP", "beta.logitP", "tauP", "sigmaP",
                  "bU.W", "bU.H", "tauU", "sigmaU",
                  "eU.W", "eU.H", "taueU", "sigmaeU",
                   "Utot.W", "Utot.H", "Utot", "logUne.W", "logUne.H",
                   "etaU.W", "etaU.H", "U.W", "U.H")
  if( any(is.na(m2))) {parameters <- c(parameters,"m2")} # monitor in case some bad data where missing values present
  if( any(is.na(u2.A))) {parameters <- c(parameters,"u2.A")}
  if( any(is.na(u2.N))) {parameters <- c(parameters,"u2.N")}


  ## Now to create the initial values, and the data prior to call to the MCMC sampler 

  # Estimate number of wild and hatchery fish based on clip rate
  u2.H <- u2.A/clip.frac.H  # only a portion of the hatchery fish are clipped
  u2.W <- pmax(u2.N - u2.H*(1-clip.frac.H),0) # subtract the questimated number of hatchery fish
  u2.H[is.na(u2.H)] <- 1  # in case of missing values
  u2.W[is.na(u2.W)] <- 1  # in case of missing values

  avg.P <- sum(m2,na.rm=TRUE)/sum(n1, na.rM=TRUE)
  Uguess.W <- pmax((u2.W+1)*(n1+2)/(m2+1), u2.W/avg.P, 1, na.rm=TRUE)  # try and keep Uguess larger than observed values
  Uguess.H <- pmax((u2.H+1)*(n1+2)/(m2+1), u2.H/avg.P, 1, na.rm=TRUE)
  Uguess.H[1:hatch.after] <- 0   # no hatchery fish prior to release from hatchery

  ## create the B-spline design matrix for wild and hatchery fish
  ## The design matrix for hatchery fish will still have rows corresponding to entries PRIOR to
  ##    the hatchery release but these are never used in the winbugs fitting routines
  ## There is a separate (single) spline for hatchery and wild fish with NO breakpoints
  ## The first two coefficient have a flat prior and the rest of the coefficients are modelled using
  ##    differences between the succesive coefficients

  ## Wild fish. This covers the entire experiment.
  SplineDegree <- 3           # Degree of spline between occasions
  knots <- seq(4,Nstrata,4)/(Nstrata+1) # a knot roughly every 4th stratum
  SplineDesign.W <- bs((1:Nstrata)/(Nstrata+1), knots=knots, degree=SplineDegree, intercept=TRUE, Boundary.knots=c(0,1))
  b.flat.W      <- c(1,2)
  b.notflat.W   <- 3:(ncol(SplineDesign.W))
  n.b.flat.W    <- length(b.flat.W)
  n.b.notflat.W <- length(b.notflat.W)
  n.bU.W        <- n.b.flat.W + n.b.notflat.W
  init.bU.W   <- stats::lm(log(Uguess.W+1) ~ SplineDesign.W-1)$coefficients  # initial values for spline coefficients

  ## hatchery fish. Notice they can only enter AFTER hatch.after, The spline design matrix still has rows
  ## of zero for 1:hatch.after to make it easier in Bugs
  SplineDegree <- 3           # Degree of spline between occasions
  knots <- (seq((hatch.after+4),Nstrata-1,4)-hatch.after)/(Nstrata-hatch.after+1) # a knot roughly every 4th stratum
  SplineDesign.H <- bs((1:(Nstrata-hatch.after))/(Nstrata-hatch.after+1), knots=knots, degree=SplineDegree, intercept=TRUE, Boundary.knots=c(0,1))
  b.flat.H      <- c(1,2)
  b.notflat.H   <- 3:(ncol(SplineDesign.H))
  n.b.flat.H    <- length(b.flat.H)
  n.b.notflat.H <- length(b.notflat.H)
  n.bU.H        <- n.b.flat.H + n.b.notflat.H
  init.bU.H   <- stats::lm(log(Uguess.H[(hatch.after+1):Nstrata]+1) ~ SplineDesign.H-1)$coefficients  # initial values for spline coefficients
                                    # patch up the initial rows of the spline design matrix
  SplineDesign.H <- rbind(matrix(0,nrow=hatch.after, ncol=ncol(SplineDesign.H)), SplineDesign.H)

  ## create an initial plot of the fit to the number of unmarked fish
  plot.data <- data.frame(time=time, 
                        logUguess.H = log(Uguess.H+1),
                        logUguess.W = log(Uguess.W+1),
                        spline.H=SplineDesign.H %*% init.bU.H, 
                        spline.W=SplineDesign.W %*% init.bU.W, stringsAsFactors=FALSE)

  init.plot <- ggplot(data=plot.data, aes_(x=~time))+
     ggtitle(title, subtitle="Initial spline fit to estimated log U[i]")+
     geom_point(aes_(y=~logUguess.H), pch="H", color="green")+
     geom_point(aes_(y=~logUguess.W), pch="W", color="blue")+
     geom_line(aes_(y=~spline.H), color="green")+
     geom_line(aes_(y=~spline.W), color="blue")+
     xlab("Stratum")+ylab("log(U[i])")+
     scale_x_continuous(breaks=seq(min(plot.data$time, na.rm=TRUE),max(plot.data$time, na.rm=TRUE),2))

  if(save.output.to.files)ggsave(init.plot, filename=paste(prefix,"-initialU.pdf",sep=""), height=4, width=6, units="in")
  #results$plots$plot.init <- init.plot  # do this after running the MCMC chain (see end of function)


  # get the logitP=logit(P) covariate matrix ready
  logitP.cov <- as.matrix(logitP.cov)
  NlogitP.cov <- ncol(as.matrix(logitP.cov))


  ##  initial values for the parameters of the model
  init.vals <- genInitVals(model="TSPDE-WHchinook",
                               n1=n1,
                               m2=m2,
                               u2=list(W=u2.W,H=u2.H, A=u2.A, N=u2.N),
                               logitP.cov=logitP.cov,
                               SplineDesign=list(W=SplineDesign.W,H=SplineDesign.H),
                               hatch.after=hatch.after,
                               n.chains=n.chains)

  ## Generate data list
  data.list <- list()
  for(i in 1:length(datalist)){
      data.list[[length(data.list)+1]] <-get(datalist[[i]])
  }
  names(data.list) <- as.list(datalist)

  ## Call MCMC sampler
  results <- run.MCMC(modelFile=model.file,
                              dataFile=data.file,
                              dataList=data.list,
                              initFiles=init.files,
                              initVals=init.vals,
                              parameters=parameters,
                              nChains=n.chains,
                              nIter=n.iter,
                              nBurnin=n.burnin,
                              nSims=n.sims,
                              overRelax=FALSE,
                              initialSeed=InitialSeed,
                              working.directory=working.directory,
                              debug=debug)

  results$plots$plot.init <- init.plot  # save initial plot to results object

  return(results)
} # end of function