R/checkLenAge.R

Defines functions checkLenAge

Documented in checkLenAge

#' Calculate von Bertalanffy Growth Parameters
#'
#' Calaculate the von Bertalanffy growth parameters and compare them to
#' parameters estimated when outliers are removed from the data set.
#' Outliers are determined using standard deviations
#' (i.e., \code{sdFactor} input parameter).
#'
#' @inheritParams cleanPacFIN
#' @param Par A list of five initial or externally estimated parameter values for the
#' von Bertalanffy growth function. List entries should be named as follows:
#' "K", "Linf", "L0", "CV0", "CV1".
#' If they aren't named, then it is imperative that they are in the previous order.
#' Each list element, i.e., vector, can have multiple values specific to each sex type
#' present in your data. These vector elements must be named and the length of all
#' vectors must be the same. A typical example for a list element would be
#' \code{K = c(F = 0.10, M = 0.20, U = 0.15)} for female, male, and unsexed fish.
#' If each vector within the list is only made up of a single element, it doesn't need
#' to be named and each sex type present in the data will use the same value.
#' @param len_col Name of the length column to use for comparisons (default = "lengthcm").
#' @param age_col Name of the age column to use for comparisons (default = "age").
#' @param sex_col Name of the sex column to use for comparisons (default = "SEX").
#' If the value is \code{NULL}, then all aged fish are included in a single analysis rather
#' than separating data by available sex information.
#' @param mult A multiplier to convert lengths (default value 1). This multiplier is used
#' to scale the input parameters if the length column is a different unit other than cm.
#' In reality one really shouldn't need to use this because you can make a column of
#' lengths in the correct units and supply that column name to \code{len_col}.
#' @param keepAll Option to change inconsistent lengths to a value of \code{NA},
#' which is only done if \code{keepAll = FALSE}. The default is to not change any values.
#' @param sdFactor Option to control the threshold of the estimated low and high bound
#' that is compared to when throwing out data. Default value is 4 standard deviations
#' away from the mean prediction.
#' @param Optim A logical value specifying whether or not to internally solve
#' for the parameter estimates or use \code{Par} as input parameter to the growth function
#' for predictions.
#' @param precision An integer providing the precision for the predicted lengths or the number
#' of digits the results will have after passing them through
#' \code{\link{round}(x, digits = precision)}.
#' @inheritParams cleanPacFIN
#' @param dir A file path to a directory where you want information saved to
#' a file, or multiple files. If \code{NULL}, which is the default, then
#' nothing is saved.
#'
#' @return Returns a combined dataset in PacFIN format.
#'
#' @export
#' @author Chantel R. Wetzel, Vlada Gertseva, James Thorson

checkLenAge <- function(Pdata,
                        Par = list(
                          K = 0.13,
                          Linf = 55,
                          L0 = 15,
                          CV0 = 0.10,
                          CV1 = 0.10
                        ),
                        len_col = "lengthcm",
                        age_col = "Age",
                        sex_col = "SEX",
                        mult = 1,
                        keepAll = TRUE,
                        sdFactor = 4,
                        Optim = TRUE,
                        precision = 1,
                        verbose = TRUE,
                        dir = NULL) {
  #### Initialize the three new columns
  Pdata$Lhat_pred <- NA
  Pdata$Lhat_low <- NA
  Pdata$Lhat_high <- NA

  #### Check sex-specific information
  if (is.null(sex_col)) {
    sex_col <- "ignore"
    Pdata[, sex_col] <- "A"
  }
  sex_vec <- unique(Pdata[, sex_col])

  ####  Check inputs
  if (!is.null(names(Par))) {
    new <- Par[c("K", "Linf", "L0", "CV0", "CV1")]
    stopifnot("Par not found" = length(new) == length(Par))
    Par <- new
    rm(new)
  }
  if (length(Par[[1]]) > 1) {
    if (stats::var(unlist(lapply(Par, length))) != 0) {
      stop(
        "Must be one named entry in each vector element of the list Par\n",
        "for each unique sex type in the data, e.g.,\n",
        paste(sex_vec, collapse = ", ")
      )
    }
    if (!all(sex_vec %in% names(Par[[1]]))) {
      stop(
        "The data contains the following values for sexes,\n",
        paste(sex_vec, collapse = ", "), "\n",
        " which must match names of the parameter vectors in the list Par, e.g.,\n",
        paste(unique(unlist(lapply(Par, names))), collapse = ", ")
      )
    }
    # Ensure order of parameters within each vector are ordered by sex
    Par <- lapply(Par, "[", sex_vec)
  }
  stopifnot("length column not found" = len_col %in% colnames(Pdata))
  stopifnot("age column not found" = age_col %in% colnames(Pdata))

  #### For each sex
  for (s in seq_along(sex_vec)) {
    use_data <- !is.na(Pdata[, len_col]) &
      !is.na(Pdata[, age_col]) &
      Pdata[, sex_col] %in% sex_vec[s]

    if (length(Par[[1]]) > 1) {
      pars_in <- c(
        Par[[1]][sex_vec[s]],
        Par[[2]][sex_vec[s]] * mult,
        Par[[3]][sex_vec[s]] * mult,
        Par[[4]][sex_vec[s]],
        Par[[5]][sex_vec[s]]
      )
    } else {
      pars_in <- c(
        Par[[1]][1],
        Par[[2]][1] * mult,
        Par[[3]][1] * mult,
        Par[[4]][1],
        Par[[5]][1]
      )
    }

    if (Optim) {
      ests <- stats::optim(
        fn = nwfscSurvey::fit_vbgrowth,
        par = log(pars_in),
        hessian = FALSE,
        Ages = Pdata[use_data, age_col],
        Lengths = Pdata[use_data, len_col]
      )$par
    } else {
      ests <- pars_in
    }

    Pred <- nwfscSurvey::fit_vbgrowth(
      Par = ests,
      ReturnType = "Pred",
      sdFactor = sdFactor,
      Ages = Pdata[use_data, age_col],
      Lengths = Pdata[use_data, len_col]
    )

    Pdata[use_data, c("Lhat_low", "Lhat_pred", "Lhat_high")] <- round(Pred, precision)
  }

  if (!keepAll) {
    remove <- which(
      Pdata[, len_col] > Pdata[, "Lhat_high"] |
        Pdata[, len_col] < Pdata[, "Lhat_low"]
    )
    all <- Pdata
    Pdata[remove, len_col] <- NA
  }

  #### Summary information that is saved to the disk
  if (!is.null(dir)) {
    dir <- normalizePath(dir)
    dir.create(dir, showWarnings = FALSE)

    # Estimate pars again b/c some data may be removed if !keepAll
    tempdata <- Pdata[
      !is.na(Pdata[, len_col]) &
        !is.na(Pdata[, age_col]),
    ]
    estsall <- data.frame(
      Sex = sex_vec,
      do.call("rbind", tapply(
        seq(NROW(tempdata)), tempdata[, sex_col],
        function(x) {
          exp(stats::optim(
            fn = nwfscSurvey::fit_vbgrowth,
            par = log(pars_in),
            hessian = FALSE,
            Ages = tempdata[x, age_col],
            Lengths = tempdata[x, len_col]
          )$par)
        }
      ))
    )
    colnames(estsall)[-1] <- names(Par)
    estsall <- estsall[, c("Sex", "L0", "Linf", "K", "CV0", "CV1")]
    colnames(estsall) <- c("Sex", "$L_0$", "$L_{Inf}$", "$K$", "$CV_{young}$", "$CV_{old}$")
    x <- knitr::kable(estsall,
      format = "latex",
      label = "PacFIN_vonBpars", escape = FALSE, booktabs = TRUE,
      caption = paste0(
        "Estimates of von Bertalanffy growth parameters in terms of ",
        "length at age-0 ($L_0$), rather than age at which growth is zero, and fit to ",
        "fishery-dependent data provided by \\gls{pacfin}. Estimates ",
        ifelse(all(estsall[, "Sex"] == "A"), "", "are sex-specific (row) and "),
        "include $L_0$, length at maximum age ($L_{Inf}$), growth rate ($K$), and ",
        "coefficients of variation at young ($CV_{young}$) and old ages ($CV_{old}$).",
        ifelse(keepAll, " Data used to fit the model included outliers.", "")
      )
    )
    writeLines(x, file.path(dir, "PacFIN_vonBpars.tex"))
    utils::write.table(estsall,
      file = file.path(dir, "PacFIN_vonBpars.csv"),
      sep = ",", row.names = FALSE
    )
    tempdata <- tempdata[, c(len_col, age_col, sex_col)]
    colnames(tempdata) <- c("Length_cm", "Age", "Sex")
    pars <- unlist(estsall[1, c(3, 4, 2, 5, 6), drop = TRUE])
    names(pars) <- NULL

    latage <- nwfscSurvey::PlotVarLengthAtAge.fn(
      dat = tempdata, parStart = pars,
      dir = dir, main = "PacFIN", ageBin = 1,
      bySex = !all(estsall[, "Sex"] == "A"),
      estVB = TRUE, legX = "bottomleft", dopng = TRUE
    )
  }

  Pdata <- Pdata[, !grepl("ignore", colnames(Pdata))]
  return(Pdata)
}
nwfsc-assess/PacFIN.Utilities documentation built on July 4, 2025, 9:06 a.m.