R/run_SIR.R

Defines functions summary_sir CALC.LNLIKE LNLIKE.BEACHED LNLIKE.GR LNLIKE.Ns PREDICT.IAs LNLIKE.IAs CALC.ANALYTIC.Q LOGISTIC.BISECTION.K TARGET.K COMPUTING.ROI PRED.GROWTH.RATE HUMPBACK.SIR

Documented in CALC.ANALYTIC.Q CALC.LNLIKE COMPUTING.ROI HUMPBACK.SIR LNLIKE.BEACHED LNLIKE.GR LNLIKE.IAs LNLIKE.Ns LOGISTIC.BISECTION.K PRED.GROWTH.RATE PREDICT.IAs summary_sir TARGET.K

# R Script for Bayesian Assessment Model of Humpback Whales - developed from the code/equations
# in Zerbini et al. (2011)
# Adapted by Grant Adams and John Best for Zerbini et al. 2019

#' HUMPBACK SIR controls the sampling from the priors, the bisection and
#' likelihoods and the output functions
#'
#' @param file_name name of a file to identified the files exported by the
#'   function
#' @param n_resamples number of resamples to compute the marginal posterior
#'   distributions
#' @param priors List of priors, usually generated using \link{make_prior_list}.
#'   Default is the default of \code{make_prior_list}. See details.
#' @param catch_multipliers List of catch multipliers, generated using \link{make_multiplier_list}
#'   Can either be estimated or explicitly provided. Default is \code{make_multiplier_list}.
#' @param target.Yr year of the target population estimate for the bisection
#'   method. Default is 2008
#' @param num.haplotypes number of haplotypes to compute minimum viable
#'   population (from Jackson et al., 2006 and IWC, 2007)
#' @param output.Yrs year for outputing the predicted abundance estimates.
#'   Default is 2008, but multiple years can be specified. For example, if
#'   outputs for 2005 and 2008 are needed, output.Yrs = c(2005, 2008)
#' @param abs.abundance R object containing year, estimate of absolute
#'   abundance, and CV (see example)
#' @param rel.abundance R object containing years, estimates of relative
#'   abudnance and CVs (see example)
#' @param rel.abundance.key key to speficy if relative abundance data are used
#'   in the likelihood. Default is TRUE
#' @param count.data R object containing years, estimates of counts and effort.
#'   NOT USED
#' @param count.data.key key to speficy in count data are used. Default is
#'   FALSE. NOT USED
#' @param growth.rate.obs observed growth rate (1st element) and standard error
#'   (2nd element) as in Zerbni et al. (2011). If third element is FALSE, the
#'   growth rate is not included in the likelihood
#' @param growth.rate.Yrs Years for which the growth.rate.obs were computed (as
#'   in Zerbini et al., 2011)
#' @param catch.data R object containing the years and catches (see example)
#' @param control A list of control parameters, usually generated by
#'   \code{sir_control}.
#' @param premodern_catch_multipliers List of historic catch multipliers, generated using \link{make_multiplier_list}
#'   Can either be estimated or explicitly provided. Default is \code{make_multiplier_list}.
#' @param premodern_catch_data R object containing the years maximum and minimum premodern catches
#' @param realized_prior Key to specify if realized prior is to be extracted. Default is FALSE.
#'
#' @return A \code{list} containing posterior samples and metadata
#'
#' Current default prior specification:
#' \code{
#' make_prior_list(r_max = make_prior(runif, 0, 0.118),
#'                 K = make_prior(use = FALSE),
#'                 N_obs = make_prior(runif, 500, 40000),
#'                 add_CV = make_prior(use = FALSE),
#'                 z = make_prior(2.39),
#'                 q_IA = make_prior(use = FALSE),
#'                 q_count = make_prior(use = FALSE)
#'
#'  make_multiplier_list(c_mult_1 = make_prior(1))}
#'
#' @export
#'
#' @examples
#'
#' \dontrun{
#' HUMPBACK.SIR(file_name = "test.N2005",
#'              n_resamples = 100,
#'              priors = make_prior_list(),
#'              catch_multipliers = make_multiplier_list(),
#'              premodern_catch_multipliers = make_multiplier_list(),
#'              Klim = c(1, 500000),
#'              target.Yr = 2005,
#'              num.haplotypes = 0,
#'              tolerance.for.bisection = 0.0001,
#'              output.Yrs = c(2005, 2006),
#'              abs.abundance = Abs.Abundance.2005,
#'              rel.abundance = Rel.Abundance,
#'              rel.abundance.key = TRUE,
#'              count.data = Count.Data,
#'              count.data.key = FALSE,
#'              growth.rate.obs = c(0.074, 0.033, TRUE),
#'              growth.rate.Yrs = c(1995, 1996, 1997, 1998),
#'              catch.data = Catch.data,
#'              premodern_catch_data = NULL,
#'              control = sir_control())
#' }
HUMPBACK.SIR <- function(file_name = "NULL",
                         n_resamples = 1000,
                         priors = make_prior_list(),
                         catch_multipliers = make_multiplier_list(),
                         premodern_catch_multipliers = make_multiplier_list(),
                         target.Yr = 2008,
                         num.haplotypes = 66,
                         output.Yrs = c(2008),
                         abs.abundance = Abs.Abundance,
                         rel.abundance = Rel.Abundance,
                         rel.abundance.key = TRUE,
                         count.data = NULL,
                         count.data.key = FALSE,
                         growth.rate.obs = c(0.074, 0.033, TRUE),
                         growth.rate.Yrs = c(1995, 1996, 1997, 1998),
                         catch.data = Catch.data,
                         premodern_catch_data = NULL,
                         realized_prior = FALSE,
                         control = sir_control()) {
    begin.time <- Sys.time()

    ################################
    # Assigning variables
    ################################
    target.Yr <- target.Yr
    ## Use the first year of the projection is set as the first year in the
    ## catch series
    start_yr <- min(catch.data$Year,
                    ifelse(is.null(premodern_catch_data),  catch.data$Year, premodern_catch_data$Year))
    ## The last year of the projection is set as the last year in the catch or
    ## abundance series, whichever is most recent
    end_yr <- max(tail(catch.data$Year, 1),
                  max(abs.abundance$Year),
                  max(rel.abundance$Year),
                  output.Yrs,
                  ifelse(is.null(premodern_catch_data),  catch.data$Year, premodern_catch_data$Year)) #FIXME: note should be able to handle if premodern_catch_data is NULL
    ## Setting the target year for the bisection method
    bisection.Yrs <- target.Yr-start_yr + 1
    ## Setting the years to project
    projection.Yrs <- end_yr-start_yr + 1
    Year <- seq(start_yr, end_yr, by = 1)

    # Expand the catch time series and fill missing values with 0
    catch.data <- merge(data.frame(Year), catch.data, by="Year", all = TRUE)
    catch.data[is.na(catch.data)] <- 0

    if(!is.null(premodern_catch_data)){
        # Expand and fill with 0 for years with no data
        premodern_catch_data <- merge(data.frame(Year), premodern_catch_data, by="Year", all = TRUE)
        premodern_catch_data[is.na(premodern_catch_data)] <- 0

        # Extract catch series
        premodern_catch_original <- as.matrix(
            premodern_catch_data[,grep("catch", colnames(premodern_catch_data), ignore.case = T)])

        # Get min and max of premodern
        premodern_catch_min <- apply(premodern_catch_original, 1, FUN=min)
        premodern_catch_max <- apply(premodern_catch_original, 1, FUN=max)
        premodern_catch_dif <- premodern_catch_max - premodern_catch_min
    }

    ## Assigning the catch data
    catch_original <- as.matrix(catch.data[, grep("catch", colnames(catch.data),
                                                  ignore.case = T)])
    n_catch_period <- ncol(catch_original)

    # Catch multiplier check
    if(length(catch_multipliers) != n_catch_period){
        stop("Number of catch multipliers (",
             length(catch_multipliers),
             ") does not equal number of catch periods (",
             n_catch_period,
             "), using no multiplier.")
    }

    if(!is.null(premodern_catch_data)){
        if(length(premodern_catch_multipliers) == 0 ){#FIXME: remove if making it flexible for multiple premodern catch multipliers
            print("No premodern catch multipliers supplied, assuming none")
            premodern_catch_multipliers <- make_multiplier_list(c_mult_1 = make_prior(1))
        }

        if(length(premodern_catch_multipliers) > 1){#FIXME: remove if making it flexible for multiple premodern catch multipliers
            print("Multiple premodern catch multipliers supplied, assuming none")
            premodern_catch_multipliers <- make_multiplier_list(c_mult_1 = make_prior(1))
        }
    }

    ## Determining the number of Indices of Abundance available
    num.IA <- max(rel.abundance$Index)

    ## Determining the number of Count Data sets available
    num.Count <- max(count.data$Index)

    ## Computing the value of sigma as in Zerbini et al. 2011
    rel.abundance$Sigma <- sqrt(log(1 + rel.abundance$CV.IA.obs^2))

    ## Computing the value of sigma for the count data as in Zerbini et al. (2011)
    count.data$Sigma <- sqrt(log(1 + count.data$CV.IA.obs^2))

    ## Computing the value of sigma as in Zerbini et al. 2011
    abs.abundance$Sigma <- sqrt(log(1 + abs.abundance$CV.obs^2))

    ## Computing the minimum viable population, if num.haplotypes=0, assumes no MVP
    MVP <- 3 * num.haplotypes

    ## Start the loop
    i <- 0
    ## Keep track of number of draws
    draw <- 1
    Cumulative.Likelihood <- 0

    #Creating output vectors
    #-------------------------------------
    sir_names <- c("r_max", "K", "z", paste0("catch_multiplier_", 1:length(catch_multipliers)) ,
                   paste0("premodern_catch_multiplier_", 1:length(premodern_catch_multipliers)),
                   "sample.N.obs", "add_CV", "sample_premodern_catch", "Nmin", "YearMin",
                   "violate_MVP", paste0("N", target.Yr), paste0("N", output.Yrs),
                   paste0("ROI_IA", unique(rel.abundance$Index)),
                   paste0("q_IA", unique(rel.abundance$Index)),
                   paste0("ROI_Count", unique(count.data$Index)),
                   paste0("q_Count", unique(count.data$Index)),
                   "NLL.IAs", "NLL.Count", "NLL.N", "NLL.GR", "NLL", "Likelihood",
                   "Max_Dep",paste0("status", target.Yr), paste("status", output.Yrs, sep = ""), "draw", "save")

    resamples_output <- matrix(NA, nrow = n_resamples, ncol = length(sir_names))
    resamples_trajectories <- matrix(NA, nrow = n_resamples, ncol = projection.Yrs)
    catch_trajectories <- matrix(NA, nrow = n_resamples, ncol = projection.Yrs)
    colnames(catch_trajectories) =  paste0("Catch_", Year)

    if (control$progress_bar) {
        pb <- txtProgressBar(min = 0, max = n_resamples, style = 3)
    }

    #Initiating the SIR loop
    while (i < n_resamples) {
        #Sampling from Priors
        #-------------------------------
        save <- FALSE #variable to indicate whether a specific draw is kept

        # Sampling for catch_multiplier
        sample_catch_multiplier <- sapply(catch_multipliers, function(x) x$rfn())
        catches <- rep(0, length(Year))
        for(p in 1:length(sample_catch_multiplier)){
            catches <- catches + (catch_original[,p] * sample_catch_multiplier[p]) # Multiply catches by multiplier and add
        }

        # Sample historic catch
        if(!is.null(premodern_catch_data)){
            sample_premodern_catch_multiplier <- sapply(premodern_catch_multipliers, function(x) x$rfn())
            sample_premodern_catch <- priors$premodern_catch_sample$rfn()

            catches <- catches + (premodern_catch_min + premodern_catch_dif * sample_premodern_catch) * sample_premodern_catch_multiplier
        } else{
            sample_premodern_catch_multiplier <- -999
            sample_premodern_catch <- -999
        }

        #Sampling for r_max
        sample.r_max <- priors$r_max$rfn()
        while (sample.r_max > 0.118) {
            sample.r_max <- priors$r_max$rfn()
        }

        ## Sampling from the N.obs prior
        sample.N.obs <- priors$N_obs$rfn()

        ## Prior on additional CV
        if (priors$add_CV$use) {
            sample.add_CV <- priors$add_CV$rfn()
        } else {
            sample.add_CV <- 0
        }

        ## Sample from prior for `z` (usually constant)
        sample.z <- priors$z$rfn()

        ## Sampling from q priors if q.prior is TRUE; priors on q for indices of
        ## abundance
        if (priors$q_IA$use) {
            q.sample.IA <- replicate(num.IA, priors$q_IA$rfn())
        } else {
            ## FIXME: -9999 is probably not a good sentinel value here; NA?
            q.sample.IA <- rep(-9999, length(unique(rel.abundance$Index)))
        }

        ##priors on q for count data
        if (priors$q_count$use) {
            q.sample.Count <- replicate(num.Count, priors$q_count$rfn())
        } else {
            ## FIXME: Sentinel -9999 again
            q.sample.Count <- rep(-9999, length(unique(count.data$Index)))
        }

        K.error <- FALSE
        sample.K <- try(LOGISTIC.BISECTION.K(K.low = control$K_bisect_lim[1],
                                         K.high = control$K_bisect_lim[2],
                                         r_max = sample.r_max,
                                         z = sample.z,
                                         num_Yrs = bisection.Yrs,
                                         start_yr = start_yr,
                                         target.Pop = sample.N.obs,
                                         catches = catches,
                                         MVP = MVP,
                                         tol = control$K_bisect_tol),
                        silent = TRUE)

        ## If population is too variable because of process error, give error and set likelihood to 0
        if(class(sample.K) == "try-error"){
            sample.K = 999
            K.error = TRUE
        }

        #Computing the predicted abundances with the samples from the priors
        #----------------------------------------
        Pred_N <- GENERALIZED_LOGISTIC(r_max = sample.r_max,
                                       K = sample.K,
                                       N1 = sample.K,
                                       z = sample.z,
                                       start_yr = start_yr,
                                       num_Yrs = projection.Yrs,
                                       catches = catches,
                                       MVP = MVP)


        #Computing the predicted ROI for the IAs and Count data, if applicable
        #----------------------------------------
        #For IAs
        if (rel.abundance.key) {
            Pred.ROI.IA <- COMPUTING.ROI(data = rel.abundance,
                                         Pred_N = Pred_N$Pred_N,
                                         start_yr = start_yr)
        } else {
            Pred.ROI.IA <- rep(0, num.IA)
        }

        #For Count Data
        if (count.data.key) {
            Pred.ROI.Count <- COMPUTING.ROI(data = count.data,
                                            Pred_N = Pred_N$Pred_N,
                                            start_yr = start_yr)
        } else {
            Pred.ROI.Count <- rep(0, num.Count)
        }

        #Calculate Analytical Qs if rel.abundance.key is TRUE
        #---------------------------------------------------------
        if (rel.abundance.key) {
            if (!priors$q_IA$use) {
                q.sample.IA <- CALC.ANALYTIC.Q(rel.abundance,
                                               Pred_N$Pred_N,
                                               start_yr,
                                               sample.add_CV,
                                               num.IA)
            } else {
                q.sample.IA <- q.sample.IA
            }
        }

        #browser()

        ## Calculate Analytical Qs if count.data.key is TRUE
        ## (NOT USED YET - AZerbini, Feb 2013)
        if (count.data.key) {
            if (!priors$q_count$use) {
                q.sample.Count <- CALC.ANALYTIC.Q(count.data,
                                                  Pred_N$Pred_N,
                                                  start_yr,
                                                  sample.add_CV,
                                                  num.Count)
            } else {
                q.sample.Count <- q.sample.Count
            }
        }

        if (control$verbose > 3) {
            message("r_max = ", sample.r_max,
                    " N.obs = ", sample.N.obs,
                    " K = ", sample.K,
                    " Pred_N.target = ", Pred_N$Pred_N[bisection.Yrs],
                    " q.IAs = ", q.sample.IA,
                    " q.Count = ", q.sample.Count)
        }

        #Compute the likelihoods
        #--------------------------------
        # (1) relative indices (if rel.abundance.key is TRUE)
        if (rel.abundance.key) {
            lnlike.IAs <- LNLIKE.IAs(rel.abundance,
                                     Pred_N$Pred_N,
                                     start_yr,
                                     q.sample.IA,
                                     sample.add_CV,
                                     TRUE)
        } else {
            lnlike.IAs <- 0
        }

        # (2) count data (if count.data.key is TRUE)
        if (count.data.key) {
            lnlike.Count <- LNLIKE.IAs(count.data,
                                       Pred_N$Pred_N,
                                       start_yr,
                                       q.sample.Count,
                                       sample.add_CV,
                                       log=TRUE)
        } else {
            lnlike.Count <- 0
        }

        # (3) absolute abundance
        lnlike.Ns <- LNLIKE.Ns(abs.abundance,
                               Pred_N$Pred_N,
                               start_yr,
                               sample.add_CV,
                               log=TRUE)

        # (4) growth rate if applicable
        if (growth.rate.obs[3]) {
            Pred.GR <- PRED.GROWTH.RATE(growth.rate.Yrs=growth.rate.Yrs,
                                        Pred_N=Pred_N$Pred_N,
                                        start_yr=start_yr)
            lnlike.GR <- LNLIKE.GR(Obs.GR=growth.rate.obs[1],
                                   Pred.GR=Pred.GR,
                                   GR.SD.Obs=growth.rate.obs[2])
        } else {
            lnlike.GR <- 0
        }

        if (control$verbose > 2) {
            message("lnlike.IAs = ", lnlike.IAs,
                    " lnlike.Count = ", lnlike.Count,
                    " lnlike.Ns = ", lnlike.Ns,
                    " lnlike.GR = ", lnlike.GR)
        }

        ## These use the likelihoods in Zerbini et al. (2011)
        NLL <- lnlike.IAs[[1]] + lnlike.Count[[1]] + lnlike.Ns[[1]] + lnlike.GR[[1]]
        Likelihood <- exp(-NLL)
        if (control$verbose > 1) {
            message("NLL = ", NLL,
                    " Likelihood = ", Likelihood)
        }

        if (Pred_N$Violate_Min_Viable_Pop) {
            Likelihood <- 0
            if (control$verbose > 0) {
                message("MVP violated on draw", draw)
            }
        }

        ## If population was too variable set likelihood to 0
        if (K.error) {
            Likelihood <- 0
            if (control$verbose > 0) {
                message("Population dynamics too variable on draw", draw)
            }
        }



        Cumulative.Likelihood <- Cumulative.Likelihood + Likelihood

        # Trick to just extract realized prior
        if(realized_prior){
            Cumulative.Likelihood <- 2 * control$threshold
        }

        if (!Pred_N$Violate_Min_Viable_Pop) {

            while (Cumulative.Likelihood > control$threshold & i < n_resamples) {
                if (control$verbose > 0) {
                    message("sample = ", i, " draw = ", draw)
                }
                if (control$verbose > 1) {
                    message("draw = ", draw,
                            " Likelihood = ", Likelihood,
                            " Cumulative = ", Cumulative.Likelihood)
                }
                save <- TRUE
                Cumulative.Likelihood <- Cumulative.Likelihood-control$threshold
                resamples_trajectories[i+1,] <- Pred_N$Pred_N
                catch_trajectories[i+1,] <- catches
                resamples_output[i+1,] <- c(sample.r_max,
                                            sample.K,
                                            sample.z,
                                            sample_catch_multiplier,
                                            sample_premodern_catch_multiplier,
                                            sample.N.obs,
                                            sample.add_CV,
                                            sample_premodern_catch,
                                            Pred_N$Min_Pop,
                                            ifelse(length(Pred_N$Min_Yr) == 1, Pred_N$Min_Yr, "Multiple"),
                                            Pred_N$Violate_Min_Viable_Pop,
                                            c(Pred_N$Pred_N[target.Yr - start_yr + 1]),
                                            c(Pred_N$Pred_N[output.Yrs - start_yr + 1]),
                                            Pred.ROI.IA,
                                            q.sample.IA,
                                            Pred.ROI.Count,
                                            q.sample.Count,
                                            lnlike.IAs[[1]],
                                            lnlike.Count[[1]],
                                            lnlike.Ns[[1]],
                                            lnlike.GR[[1]],
                                            NLL,
                                            Likelihood,
                                            Pred_N$Min_Pop / sample.K,
                                            c(Pred_N$Pred_N[target.Yr - start_yr + 1] /
                                                  sample.K),
                                            c(Pred_N$Pred_N[output.Yrs - start_yr + 1] /
                                                  sample.K),
                                            draw,
                                            save)
                i <- i+1
                if (control$progress_bar) {
                    setTxtProgressBar(pb, i)
                }
            }
        }
        draw <- draw+1
    }

    # Save outputs
    resamples_output <- data.frame(resamples_output)
    names(resamples_output) <- sir_names
    if(!is.null(file_name)){
        write.csv(resamples_output,
                  paste0(file_name, "_", "resamples_output.csv"))
    }

    resamples_trajectories <- data.frame(resamples_trajectories)
    names(resamples_trajectories) <- paste0("N_", Year)
    if(!is.null(file_name)){
        write.csv(resamples_trajectories,
                  paste0(file_name, "_", "resamples_trajectories.csv"))
    }

    catch_trajectories <- data.frame(catch_trajectories)
    names(catch_trajectories) <- paste0("Catch_", Year)
    if(!is.null(file_name)){
        write.csv(catch_trajectories,
                  paste0(file_name, "_", "catch_trajectories.csv"))
    }

    resamples.per.samples <- draw / n_resamples
    if(resamples.per.samples < 3){
        warning("Number of resamples per sample is ",
                round(resamples.per.samples, 1),
                ", use higher threshold value.")
    } else if (resamples.per.samples > 20) {
        warning("Number of resamples per sample is ",
                round(resamples.per.samples, 1),
                ", use lower threshold value.")
    }

    end.time <- Sys.time()
    if (control$verbose > 0) {
        message("Time to Compute = ", (end.time-begin.time))
    }

    return_list <- list(call = call,
                        file_name = file_name,
                        Date.Time = Sys.time(),
                        Time.to.compute.in.minutes = paste((end.time-begin.time) / 60),
                        threshold = control$threshold,
                        Ratio.Resamples.per.Sample = paste("1 resample",
                                                           ":",
                                                           resamples.per.samples,
                                                           "samples"),
                        resamples_output = resamples_output,
                        resamples_trajectories = resamples_trajectories,
                        catch_trajectories = catch_trajectories,
                        inputs = list(draws = draw,
                                      n_resamples = n_resamples,
                                      prior_r_max = priors$r_max,
                                      catch_multipliers = catch_multipliers,
                                      priors_N_obs = priors$N_obs,
                                      target.Yr = target.Yr,
                                      start_yr = start_yr,
                                      MVP = paste("num.haplotypes = ",
                                                  num.haplotypes,
                                                  "MVP = ",
                                                  3 * num.haplotypes),
                                      tolerance = control$K_bisect_tol,
                                      output.Years = output.Yrs,
                                      abs.abundance = abs.abundance,
                                      catch.data = catch.data,
                                      realized_prior = realized_prior))
    if(rel.abundance.key){ return_list$inputs$rel.abundance = rel.abundance}
    if(count.data.key){ return_list$inputs$count.data = count.data}

    class(return_list) <- "SIR" # Defines class for object

    return(return_list)
}


#' PREDICTED GROWTH RATE
#'
#' \code{PRED.GROWTH.RATE} computes the predicted growth rate if such
#' information is available from an independent estimate rather than being
#' estimated from data. Growth rate is calculated as: $$r_{t_0 - t_{fin}}^{pred}
#' = \frac{ \sum_{t = t_0} ^{t_{fin - 1}} ln \left( \frac{N_{t+1}^{pred}} {
#' N_t^{pred}} \right) } { t_{fin} - t_0 } = \frac{ ln \left( N_{fin}^{pred}
#' \right) - ln \left( N_{0}^{pred} \right)} { t_{fin} - t_0 }$$ where
#' $N^{pred}$ is the model predicted population size, in numbers, at time $t$ or
#' $t+1$ in years, $t_0$ is the start year of the equation (1995 in Zerbini et
#' al. 2011), and $t_{fin}$ is the last year of the equation (1998 in Zerbini et
#' al. 2011).
#'
#' @param growth.rate.Yrs The years to be used for growth rate computation. 1995 - 1998 are used in Zerbini et al. 2011.
#' @param Pred_N Time series of predicted abundance, in numbers, from \code{\link{GENERALIZED_LOGISTIC}}.
#' @param start_yr The first year of the projection (assumed to be the first year in the catch series).
#'
#' @return A numeric scalar representing predicted growth rate.
#'
#' @examples
#' growth.rate.Yrs  <-  c(1995:1998)
#' Pred_N <- c(1000, 1500, 1500, 2000)
#' start_yr  <-  1995
#' PRED.GROWTH.RATE(growth.rate.Yrs, Pred_N, start_yr=start_yr)
PRED.GROWTH.RATE <- function(growth.rate.Yrs, Pred_N, start_yr = start_yr) {
    ## Computing the growth rate years
    GR.Yrs <- growth.rate.Yrs - start_yr + 1
    Pred_N.GR <- Pred_N[GR.Yrs]

    ## FIXME Just return this line?
    Pred.GR <- (log(Pred_N.GR[length(Pred_N.GR)]) -
                    log(Pred_N.GR[1])) / (length(Pred_N.GR) - 1)

    Pred.GR
}

#' Computes the predicted rate of increase for a set of specified years for
#' comparison with trends estimated separately with any of the indices of
#' abundance or count data
#'
#' @param data Count data or relative abundance index to use
#' @param Pred_N Number of individuals predicted
#' @param start_yr Initial year
#'
#' @return Vector of rates of increase, one per index
#' @export
#'
COMPUTING.ROI <- function(data = data, Pred_N = Pred_N, start_yr = NULL) {
    num.indices <- max(data$Index)
    Pred.ROI <- rep(NA, num.indices)

    for (i in 1:num.indices) {
        index.ini.year <- (head(subset(data, Index == i)$Year, 1) - start_yr)
        index.final.year <- (tail(subset(data, Index == i)$Year, 1) - start_yr)
        elapsed.years <- index.final.year - index.ini.year

        Pred.ROI[i] <- exp((log(Pred_N[index.final.year]) -
                                log(Pred_N[index.ini.year])) /
                               (elapsed.years)) - 1
    }
    Pred.ROI
}

#' Calculate a target K for the bisection method
#'
#' @param r_max The maximum net recruitment rate ($r_{max}$).
#' @param K Pre-expoitation population size in numbers or biomass
#'   (depending on input).
#' @param N1 Population size in numbers or biomass at year 1 (generally
#'   assumed to be K).
#' @param z Generalized logistic shape parameter, determines population
#'   size where productivity is masimum (assumed to be 2.39 by the ISC
#'   SC).
#' @param num_Yrs The number of projection years. Set as the last year
#'   in the catchor abundance series whichever is most recent, minus the
#'   start year.
#' @param start_yr First year of the projection (assumed to be the first
#'   year in the catch series).
#' @param target.Pop Target population size.
#' @param catches Catch time series. Cannot include NAs,
#' @param MVP Minimum Viable Population Size; `4 * num.haplotypes`
#'
#' @return Vector of differences between predicted population and target
#'   population.
#' @export
#'
#' @examples
#' TARGET.K(r_max, K, N1, z, start_yr=start_yr, num_Yrs=bisection.Yrs,
#'          target.Pop=target.Pop, catches=catches, MVP=MVP)
TARGET.K <- function(r_max, K, N1, z,
                     num_Yrs, start_yr,
                     target.Pop, catches,
                     MVP = 0) {

    Pred_N <- GENERALIZED_LOGISTIC(r_max = r_max,
                                   K = K,
                                   N1 = K,
                                   z = z,
                                   start_yr = start_yr,
                                   num_Yrs = num_Yrs,
                                   catches = catches,
                                   MVP = MVP)
    Pred_N$Pred_N[num_Yrs] - target.Pop
}

#' LOGISTIC BISECTION
#'
#' Method of Butterworth and Punt (1995) where the prior distribution of the
#' current absolute abundance $N_{2005}$ and maximum net recruitment rate
#' \code{r_max} are sampled and then used to determine the unique value of the
#' population abundance $N$ in \code{start_yr} (assumed to correspond to
#' carrying capacity $K$). Requires \code{\link{TARGET.K}} and subsequent
#' dependencies.
#'
#' @param K.low Lower bound for $K$ when preforming the bisection method of Punt
#'   and Butterworth (1995). Default is 1.
#' @param K.high Upper bound for $K$ when preforming the bisection method of
#'   Punt and Butterworth (1995). Default is 500,000.
#' @param r_max The maximum net recruitment rate ($r_{max}$).
#' @param z The parameter that determines the population size where productivity
#'   is maximum (assumed to be 2.39 by the IWC SC).
#' @param num_Yrs The number of projection years. Set as the last year in the
#'   catch or abundance series, whichever is most recent, minus the
#'   \code{start_yr}.
#' @param start_yr The first year of the projection (assumed to be the first
#'   year in the catch series).
#' @param target.Pop A sample of the prior on population abundance $N$, in
#'   numbers, set as \code{sample.N.obs} sampled from \code{priors$N.obs}
#' @param catches The time series of catch in numbers or biomass. Currently does
#'   not handle NAs and zeros will have to input a priori for years in which
#'   there were no catches.
#' @param MVP The minimum viable population size in numbers or biomass. Computed
#'   as 3 * \code{\link{num.haplotypes}} to compute minimum viable population
#'   (from Jackson et al., 2006 and IWC, 2007).
#' @param tol The desired accuracy (convergence tolerance) of
#'   \code{\link{stats::uniroot}}.
#'
#' @return A numeric scalar of an estimate of  carrying capacity $K$.
#'
#' @examples
#' LOGISTIC.BISECTION.K(K.low = 1, K.high = 100000, r_max = r_max, z = z,
#'                      num_Yrs = bisection.Yrs, start_yr = start_yr,
#'                      target.Pop = target.Pop, catches = catches, MVP = MVP,
#'                      tol = 0.001)
LOGISTIC.BISECTION.K <- function(K.low,
                                 K.high,
                                 r_max,
                                 z,
                                 num_Yrs,
                                 start_yr,
                                 target.Pop,
                                 catches,
                                 MVP,
                                 tol = 0.001) {
    Kmin <- uniroot(TARGET.K,
                    tol = tol,
                    c(K.low,
                      K.high),
                    r_max = r_max,
                    z = z,
                    num_Yrs = num_Yrs,
                    start_yr = start_yr,
                    target.Pop = target.Pop,
                    catches = catches,
                    MVP = MVP)
    Kmin$root
}

#' Compute analytic estimates of q, the scaling parameter between indices and
#' absolute population size
#'
#' @param rel.abundance Relative abundance index
#' @param add_CV Coefficient of variation
#' @param Pred_N Predicted population
#' @param start_yr Initial year
#' @param num.IA Index of abundance
#'
#' @return A numeric estimator for $q$.
#' @export
#'
CALC.ANALYTIC.Q <- function(rel.abundance, Pred_N, start_yr,
                            add_CV = 0, num.IA) {
    ## Vector to store the q values
    analytic.Q <- rep(NA, num.IA)

    for (i in 1:num.IA) {
        ## Subseting across each index of abundance
        IA <- rel.abundance[rel.abundance$Index == i,]
        ## Years for which IAs are available
        IA.yrs <- IA$Year-start_yr + 1
        ## Computing the value of sigma as in Zerbini et al. 2011
        IA$Sigma <- sqrt(log(1 + IA$CV.IA.obs^2))
        ## Numerator of the analytic q estimator (Zerbini et al., 2011 - eq. (3))
        qNumerator <- sum((log(IA$IA.obs / Pred_N[IA.yrs])) /
                              (IA$Sigma * IA$Sigma + add_CV * add_CV))
        ## Denominator of the analytic q estimator (Zerbini et al., 2011 - eq. (3))
        qDenominator <- sum(1 / (IA$Sigma * IA$Sigma))
        ## Estimate of q
        analytic.Q[i] <- exp(qNumerator / qDenominator)
    }
    analytic.Q
}

#' Compute the log-likelihood of indices of abundance
#'
#' @param rel.abundance Relative abundance
#' @param Pred_N Predicted population size
#' @param start_yr Initial year
#' @param q.values Scaling parameter
#' @param add.CV Coefficient of variation
#' @param log Boolean, return log likelihood (default TRUE) or
#'   likelihood.
#'
#' @return List of likelihood based on Zerbini et al. (2011) eq. 5 or using `dnorm`
#' @export
#'
LNLIKE.IAs <- function(rel.abundance, Pred_N, start_yr,
                       q.values, add.CV, log = TRUE) {
    loglike.IA1 <- 0
    IA.yrs <- rel.abundance$Year-start_yr + 1
    loglike.IA1 <- -sum(
        dlnorm( # NOTE: can be changed to dlnorm_zerb
            x = rel.abundance$IA.obs,
            meanlog = log( q.values[rel.abundance$Index] * Pred_N[IA.yrs] ),
            sdlog = rel.abundance$Sigma + add.CV,
            log))

    loglike.IA1
}


#' Predict indices of abundance
#'
#' @param SIR SIR object
#'
#' @return List of predicted indices based on Zerbini et al. (2011) eq. 5 or using `dnorm`
#' @export
#'
PREDICT.IAs <- function(rel.abundance, Pred_N, start_yr,
                        q.values, add.CV, log = TRUE) {
    loglike.IA1 <- 0
    IA.yrs <- rel.abundance$Year-start_yr + 1
    loglike.IA1 <- -sum(
        rlnorm( # NOTE: can be changed to dlnorm_zerb
            x = rel.abundance$IA.obs,
            meanlog = log( q.values[rel.abundance$Index] * Pred_N[IA.yrs] ),
            sdlog = rel.abundance$Sigma + add.CV,
            log))

    loglike.IA1
}

#' LOG LIKELIHOOD OF ABSOLUTE ABUNDANCE
#'
#' This function computes two estimates of the log-likelihood of the estimated
#' absolute abundance using the equation from Zerbini et al. 2011 (eq. 4) and a
#' lognormal distribution from \code{\link{CALC.LNLIKE}}.
#'
#' @param Obs.N Observed absoluted abundance in numbers as a data.frame
#'   containing year, estimate of absolute abundance, and standard deviation
#' @param Pred_N Predicted absolute abundance in numbers from
#'   \code{\link{GENERALIZED_LOGISTIC}}.
#' @param start_yr The first year of the projection (assumed to be the first
#'   year in the catch series).
#' @param add_CV Additional CV to add to variance of lognormal distribution
#'   sampled from \code{priors$add_CV}.
#' @param log Return the log of the likelihood (TRUE/FALSE)
#'
#' @return A list of two numeric scalars of estimates of log-likelihood.
#'
#' @examples
#' Obs.N  <-  data.frame(Year = 2005, Sigma = 5, Obs.N = 1000)
#' Pred_N  <-  1234
#' start_yr  <-  2005
#' LNLIKE.Ns(Obs.N, Pred_N, start_yr, add_cv = 0, log=TRUE)
LNLIKE.Ns <- function(Obs.N, Pred_N, start_yr, add_cv, log = TRUE) {
    N.yrs <- Obs.N$Year-start_yr+1
    nll_n <- -sum(
        dlnorm( # NOTE: can be changed to dlnorm_zerb
            x = Obs.N$N.obs,
            meanlog = log( Pred_N[N.yrs] ),
            sdlog = Obs.N$Sigma + add_cv,
            log))  ## Years for which Ns are available
    nll_n
}

#' Calculate the log-likelihood of the growth rate
#'
#' Calculates the log-likelihood of the estimated growth rate given the observed
#' growth rate and the standard deviation of the observed growth rate.
#'
#' @param Obs.GR Observed growth rate
#' @param Pred.GR Predicted growth rate
#' @param GR.SD.Obs Standard error of the observed growth rate
#'
#' @return A \code{list} containing \code{loglike.GR1} and \code{loglike.GR2}
#'
#' @examples
#' LNLIKE.GR(0.1, 0.1, 0.1)
LNLIKE.GR <- function(Obs.GR, Pred.GR, GR.SD.Obs, log = T) {
    ## This can be converted the likelihood from Zerbini et al. 2011 (eq. 6)
    -sum( dnorm( x = Obs.GR , mean = Pred.GR , sd = GR.SD.Obs, log = log ) )
}

#' Calculate the log-likelihood of the beach whale data
#'
#' @param beached_data Data on whale strandingsas a data.frame
#'   containing year, estimate of absolute abundance, and standard deviation
#' @param Pred_N Predicted absolute abundance in numbers from
#'   \code{\link{GENERALIZED_LOGISTIC}}.
#' @param start_yr The first year of the projection (assumed to be the first
#'   year in the catch series).
#' @param add_CV Additional CV to add to variance of lognormal distribution
#'   sampled from \code{priors$add_CV}.
#' @param q_anthro is the proportion of the population that will be killed each year from anthropogenic mortality
#' @param d_anthro is the detection probability of carcasses on the beach (including the probability of washing up on shore)
#' @param p_anthro is the proportion of the range of the species covered by monitoring
#' @param log Return the log of the likelihood (TRUE/FALSE)
#'
#' @return the negative log-likelihood
LNLIKE.BEACHED <- function(beached_data,
                           Pred_N,
                           start_yr,
                           q_anthro,
                           d_anthro,
                           p_anthro,
                           log=TRUE){

    N.yrs <- beached_data$Year-start_yr+1
    nll_n <- -sum(
        dpois( # NOTE: can be changed to dlnorm_zerb
            x = beached_data$N.obs,
            lambda = q_anthro * d_anthro * p_anthro * Pred_N[N.yrs],
            log))  ## Years for which Ns are available
    nll_n
}

#' Function to calculate the log-likelihood using a lognormal distribution
#'
#' @param Obs.N Time series of observed abundance
#' @param Pred_N Time series of estimated abundance
#' @param CV coefficient of variation
#' @param log whether to export as log-likelihood
#'
#' @return returns a scalar of the likelihood
#'
#' @examples
#' Obs.N <- 2000
#' Pred_N <- 2340
#' CV <- 4
#' CALC.LNLIKE(Obs.N, Pred_N, CV)
CALC.LNLIKE <- function(Obs.N, Pred_N, CV, log = FALSE) {
    sum(dnorm(x = log(Obs.N), mean = log(Pred_N), sd = CV, log = log))
}

#' OUTPUT FUNCTION
#'
#' Function that provides a summary of SIR outputs including: mean, median, 95%
#' credible interval, 90% predicitive interval, max, and sample size.
#'
#' @param x A data.frame of mcmc samples.
#' @param object Name of the model object as specified by the user.
#' @param file_name name of a file to identified the files exported by the
#'   function. If NULL, will not save .csv file.
#'
#' @return Returns a data.frame with summary of SIR outputs
#'
#' @examples
#' x  <-  rnorm(1000, 5, 7)
#' y  <-  rnorm(1000, 6, 9)
#' df <- data.frame(x = x, y = y)
#' summary_sir( df , object = "example_summary")
summary_sir <- function(x, object = "USERDEFINED", file_name = "NULL") {

    # Change name if not supplied
    if(object == "USERDEFINED"){
        if(TRUE %in% grepl(pattern = "N_", names(x), ignore.case = FALSE)){object == "trajectory_summary"}
        if(TRUE %in% grepl(pattern = "r_max", names(x), ignore.case = FALSE)){object == "parameter_summary"}
    }

    row_names <- c("mean", "median",
                   "2.5%PI", "97.5%PI",
                   "5%PI", "95%PI",
                   "min", "max", "n")

    output_summary <- matrix(nrow = length(row_names), ncol = dim(x)[2])
    output_summary[1, ] <- sapply(x, mean)
    output_summary[2:6, ] <- sapply(x, quantile, probs= c(0.5, 0.025, 0.975, 0.05, 0.95))
    output_summary[7, ] <- sapply(x, min)
    output_summary[8, ] <- sapply(x, max)
    output_summary[9, ] <- sapply(x, length)

    output_summary <- data.frame(output_summary)
    names(output_summary) <- names(x)
    row.names(output_summary) <- row_names
    noquote(format(output_summary, digits = 3, scientific = FALSE))

    if(!is.null(file_name)){
        write.csv(output_summary,
                  paste0(file_name, "_", object, ".csv"))
    }

    list(object = object, date=Sys.time(), output_summary = output_summary)
}
antarctic-humpback-2019-assessment/HumpbackSIR documentation built on Nov. 6, 2023, 6:07 p.m.