r4ss: R Code for Stock Synthesis

Documented in SSMethod.TA1.8

#' Apply Francis composition weighting method TA1.8
#'
#' Uses method TA1.8 (described in Appendix A of Francis 2011) to do
#' stage-2 weighting of composition data from a Stock Synthesis model.
#' Outputs a multiplier, *w* (with bootstrap 95% confidence interval),
#' so that *N2y* = *w* x *N1y*, where *N1y* and
#' *N2y* are the stage-1 and stage-2
#' multinomial sample sizes for the data set in year y.  Optionally
#' makes a plot of observed (with confidence limits, based on *N1y*)
#' and expected mean lengths (or ages).
#' \cr\cr
#' CAUTIONARY/EXPLANATORY NOTE.
#' The large number of options available in SS makes it very
#' difficult to be sure that what this function does is
#' appropriate for all combinations of options.  The following
#' notes might help anyone wanting to check or correct the code.
#' \enumerate{
#'   \item The code first takes the appropriate database (lendbase, sizedbase,
#'         agedbase, or condbase) and removes unneeded rows.
#'   \item The remaining rows of the database are grouped into individual
#'         comps (indexed by vector indx) and relevant statistics (e.g.,
#'         observed and expected mean length or age), and ancillary data,
#'         are calculated for each comp (these are stored in pldat - one row
#'         per comp).
#'         If the data are to be plotted, the comps are grouped, with each
#'         group corresponding to a panel in the plot, and groups are indexed
#'         by plindx.
#'   \item A single multiplier is calculated to apply to all the comps.
#' }
#'
#' @param fit Stock Synthesis output as read by r4SS function SS_output
#' @param type either 'len' (for length composition data), 'size' (for
#' generalized size composition data), 'age' (for age composition data),
#' or 'con' (for conditional age at length data)
#' @param fleet vector of one or more fleet numbers whose data are to
#' be analysed simultaneously (the output N multiplier applies
#' to all fleets combined)
#' @template fleetnames
#' @param part vector of one or more partition values; analysis is restricted
#' to composition data with one of these partition values.
#' Default is to include all partition values (0, 1, 2).
#' @param label.part Include labels indicating which partitions are included?
#' @param sexes vector of one or more values for Sexes; analysis is
#' restricted to composition data with one of these
#' Sexes values.  Ignored if type=='con'.
#' @param label.sex Include labels indicating which sexes are included?
#' @param seas string indicating how to treat data from multiple seasons
#' 'comb' - combine seasonal data for each year and plot against Yr
#' 'sep' - treat seasons separately, plotting against Yr.S
#' If is.null(seas) it is assumed that there is only one season in
#' the selected data (a warning is output if this is not true) and
#' option 'comb' is used.
#' @param method a vector of one or more size-frequency method numbers
#' (ignored unless type = 'size').
#' If !is.null(method), analysis is restricted to size-frequency
#' methods in this vector.  NB comps are separated by method
#' @param plotit if TRUE, make an illustrative plot like one or more
#' panels of Fig. 4 in Francis (2011).
#' @param printit if TRUE, print results to R console.
#' @param datonly if TRUE, don't show the model expectations
#' @param plotadj if TRUE, plot the confidence intervals associated with
#' the adjusted sample sizes (TRUE by default unless datonly = TRUE)
#' @param maxpanel maximum number of panels within a plot
#' @param set.pars Set the graphical parameters such as mar and mfrow.
#' Can be set to FALSE in order to add plots form multiple calls to
#' this function as separate panels in one larger figure.
#' @param add add to existing plot
#' @author R.I.C Chris Francis, Andre E. Punt, Ian G. Taylor
#' @export
#' @family tuning functions
#' @references Francis, R.I.C.C. (2011). Data weighting in statistical
#' fisheries stock assessment models. Canadian Journal of
#' Fisheries and Aquatic Sciences 68: 1124-1138.
#' @examples
#' \dontrun{
#' Nfleet <- length(myreplist[["FleetNames"]])
#' for (Ifleet in 1:Nfleet) {
#'   SSMethod.TA1.8(myreplist, "len", fleet = Ifleet, maxpanel = maxpanel)
#' }
#' for (Ifleet in 1:Nfleet) {
#'   SSMethod.TA1.8(myreplist, "age", fleet = Ifleet, maxpanel = maxpanel)
#' }
#' for (Ifleet in 1:Nfleet) {
#'   SSMethod.TA1.8(myreplist, "size", fleet = Ifleet, maxpanel = maxpanel)
#' }
#' for (Ifleet in 1:Nfleet) {
#'   SSMethod.TA1.8(myreplist, "con", fleet = Ifleet, maxpanel = maxpanel)
#' }
#' for (Ifleet in 1:Nfleet) {
#'   SSMethod.Cond.TA1.8(myreplist, fleet = Ifleet, maxpanel = maxpanel)
#' }
#' }
#'
SSMethod.TA1.8 <-
  function(
    fit,
    type,
    fleet,
    part = 0:2,
    sexes = 0:3,
    seas = NULL,
    method = NULL,
    plotit = TRUE,
    printit = FALSE,
    datonly = FALSE,
    plotadj = !datonly,
    maxpanel = 1000,
    fleetnames = NULL,
    label.part = TRUE,
    label.sex = TRUE,
    set.pars = TRUE,
    add = FALSE
  ) {
    # Check the type is correct and the sexes is correct
    is.in <- function(x, y) !is.na(match(x, y))
    if (!is.in(type, c("age", "len", "size", "con"))) {
      stop('Illegal value for type (should be "age", "len", "size", or "con")')
    } else {
      if (sum(!is.in(sexes, c(0:3))) > 0) {
        stop("Unrecognised value for sexes")
      }
    }

    # replace default fleetnames with user input if requested
    if (is.null(fleetnames)) {
      # use fleetnames in the model
      fleetnames <- fit[["FleetNames"]]
    } else {
      # if custom names input, check length
      if (length(fleetnames) != fit[["nfleets"]]) {
        stop(
          "fleetnames needs to be NULL or have length = nfleets = ",
          fit[["nfleets"]]
        )
      }
    }
    # Select the type of datbase
    dbase <- fit[[paste(type, "dbase", sep = "")]]
    # sel is vector of row indices selected for the plot/calculations
    # select row indices matching fleet and partition
    sel <- is.in(dbase[["Fleet"]], fleet) & is.in(dbase[["Part"]], part)
    # select row indices matching Sexes column
    if (type != "con") {
      # change column nanme on earlier SS versions to match change from
      # Pick_sex to Sexes in 3.30.12 (July 2018)
      names(dbase)[names(dbase) == "Pick_sex"] <- "Sexes"
      sel <- sel & is.in(dbase$"Sexes", sexes)
    }
    # for generalized size frequency comps, select chosen size method
    if (type == "size" & !is.null(method)) {
      sel <- sel & is.in(dbase[["method"]], method)
    }
    # if there are no rows selected, return empty result
    if (sum(sel) == 0) {
      return()
    }
    # subset comp database for selected rows
    dbase <- dbase[sel, ]
    if (is.null(seas)) {
      seas <- "comb"
      if (length(unique(dbase[["Seas"]])) > 1) {
        message("Combining data from multiple seasons")
      }
    }
    # if generalized size comp is used, check for mix of units
    if (type == "size") {
      if (length(unique(dbase[["units"]])) > 1) {
        warning("Mix of units being compared:", unique(dbase[["units"]]))
      }
    }
    # create label for partitions
    partitions <- sort(unique(dbase[["Part"]])) # values are 0, 1, or 2
    partition.labels <- c("whole", "discarded", "retained")[partitions + 1]
    partition.labels <- paste0(
      "(",
      paste(partition.labels, collapse = "&"),
      " catch)"
    )
    # indx is string combining fleet, year, and potentially conditional bin
    indx <- paste(
      dbase[["Fleet"]],
      dbase[["Yr"]],
      if (type == "con") {
        dbase$"Lbin_lo"
      } else {
        ""
      },
      if (seas == "sep") dbase[["Seas"]] else ""
    )
    # if subsetting by sex, add Sexes value to the indx strings
    sex.flag <- type != "con" &
      max(tapply(
        dbase$"Sexes",
        dbase[["Fleet"]],
        function(x) length(unique(x))
      )) >
        1
    if (sex.flag) {
      indx <- paste(indx, dbase$"Sexes")
    }
    # if subsetting by generalized size-method, add that value to indx strings
    method.flag <- type == "size" && length(unique(dbase[["method"]])) > 1
    if (method.flag) {
      indx <- paste(indx, dbase[["method"]])
    }
    # unique strings in indx vector
    uindx <- unique(indx)
    # test for length 1 results
    if (length(uindx) == 1) {
      # presumably the method is meaningless of there's only 1 point,
      # but it's good to be able to have the function play through
      message("Only one point to plot")
      return()
    }

    # create empty data.frame to store information on each observation
    pldat <- matrix(
      0,
      length(uindx),
      10,
      dimnames = list(
        uindx,
        c(
          "Obsmn",
          "Obslo",
          "Obshi",
          "semn",
          "Expmn",
          "Std.res",
          "ObsloAdj",
          "ObshiAdj",
          "Fleet",
          "Yr"
        )
      )
    )
    # add columns of zeros to fill with values necessary for subsetting
    if (type == "con") {
      pldat <- cbind(pldat, Lbin = 0)
    }
    if (sex.flag) {
      pldat <- cbind(pldat, sexes = 0)
    }
    if (type == "size") {
      pldat <- cbind(pldat, method = 0)
      # vector to store units (which are strings and don't fit in pldat matrix)
      plunits <- rep(NA, nrow(pldat))
    }

    # Find the weighting factor for this combination of factors
    for (i in seq_along(uindx)) {
      # each row of pldat is an individual comp
      subdbase <- dbase[indx == uindx[i], ]
      xvar <- subdbase[["Bin"]]
      # observed mean
      pldat[i, "Obsmn"] <- sum(subdbase[["Obs"]] * xvar) /
        sum(subdbase[["Obs"]])
      # expected mean
      pldat[i, "Expmn"] <- sum(subdbase[["Exp"]] * xvar) /
        sum(subdbase[["Exp"]])

      # use adjusted input sample size for Francis or MI weighting options
      Nsamp <- subdbase[["Nsamp_adj"]]
      if (
        "Nsamp_DM" %in% names(subdbase) || any(is.na(subdbase[["Nsamp_DM"]]))
      ) {
        # dirichlet multinomial newer format
        Nsamp <- subdbase[["Nsamp_DM"]]
      }

      # standard error of the mean
      pldat[i, "semn"] <- sqrt(
        (sum(subdbase[["Exp"]] * xvar^2) /
          sum(subdbase[["Exp"]]) -
          pldat[i, "Expmn"]^2) /
          mean(Nsamp)
      )
      # calculate confidence intervals and other stuff
      pldat[i, "Obslo"] <- pldat[i, "Obsmn"] - 2 * pldat[i, "semn"]
      pldat[i, "Obshi"] <- pldat[i, "Obsmn"] + 2 * pldat[i, "semn"]
      pldat[i, "Std.res"] <- (pldat[i, "Obsmn"] - pldat[i, "Expmn"]) /
        pldat[i, "semn"]
      pldat[i, "Fleet"] <- mean(subdbase[["Fleet"]])
      pldat[i, "Yr"] <- mean(
        if (seas == "comb") subdbase[["Yr"]] else subdbase[["Yr.S"]]
      )
      if (type == "con") {
        pldat[i, "Lbin"] <- mean(subdbase$"Lbin_lo")
      }
      if (sex.flag) {
        pldat[i, "sexes"] <- mean(subdbase$"Sexes")
      }
      if (type == "size") {
        pldat[i, "method"] <- mean(subdbase[["method"]])
        plunits[i] <- subdbase[["units"]][1] # units of size comps
      }
    }
    Nmult <- 1 / var(pldat[, "Std.res"], na.rm = TRUE)

    # Find the adjusted confidence intervals
    for (i in seq_along(uindx)) {
      pldat[i, "ObsloAdj"] <- pldat[i, "Obsmn"] -
        2 * pldat[i, "semn"] / sqrt(Nmult)
      pldat[i, "ObshiAdj"] <- pldat[i, "Obsmn"] +
        2 * pldat[i, "semn"] / sqrt(Nmult)
    }

    Nfleet <- length(unique(pldat[, "Fleet"]))
    # make plot if requested
    if (plotit) {
      plindx <- if (type == "con") {
        paste(pldat[, "Fleet"], pldat[, "Yr"])
      } else {
        pldat[, "Fleet"]
      }
      if (sex.flag) {
        plindx <- paste(plindx, pldat[, "sexes"])
      }
      if (method.flag) {
        plindx <- paste(plindx, pldat[, "method"])
      }
      uplindx <- unique(plindx)

      # Select number of panels
      Npanel <- length(uplindx)
      ## Ian T. 9/25/14: changing from having at least 4 panels to no minimum
      # NpanelSet <- max(4,min(length(uplindx),maxpanel))
      NpanelSet <- min(length(uplindx), maxpanel)
      Nr <- ceiling(sqrt(NpanelSet))
      Nc <- ceiling(NpanelSet / Nr)
      if (set.pars) {
        # save current graphical parameters
        par_current <- par()
        # set new parameters
        par(
          mfrow = c(Nr, Nc),
          mar = c(2, 2, 1, 1) + 0.1,
          mgp = c(0, 0.5, 0),
          oma = c(1.2, 1.2, 0, 0),
          las = 1
        )
        par(cex = 1)
      }
      for (i in 1:Npanel) {
        # loop over panels
        subpldat <- pldat[plindx == uplindx[i], , drop = FALSE]
        x <- subpldat[, ifelse(type == "con", "Lbin", "Yr")]
        # make empty plot (unless adding to existing plot)
        if (!add) {
          # calculate ylim, including removing Inf values
          plot(
            x,
            subpldat[, "Obsmn"],
            pch = "-",
            xlim = if (length(x) > 1) range(x) else c(x - 0.5, x + 0.5),
            ylim = range(
              subpldat[, c("Obslo", "Obshi", "ObsloAdj", "ObshiAdj", "Expmn")],
              finite = TRUE,
              na.rm = TRUE
            ),
            xlab = "",
            ylab = ""
          )
        }
        segments(
          x,
          subpldat[, "Obslo"],
          x,
          subpldat[, "Obshi"],
          lwd = 3,
          lend = 3
        )
        if (plotadj) {
          arrows(
            x,
            subpldat[, "ObsloAdj"],
            x,
            subpldat[, "ObshiAdj"],
            lwd = 1,
            length = 0.04,
            angle = 90,
            code = 3
          )
        }
        points(x, subpldat[, "Obsmn"], pch = 21, bg = "grey80")
        ord <- order(x)
        if (!datonly) {
          if (length(x) > 1) {
            lines(x[ord], subpldat[ord, "Expmn"], lwd = 3, col = 4)
          } else {
            lines(
              c(x - 0.5, x + 0.5),
              rep(subpldat[, "Expmn"], 2),
              lwd = 3,
              col = 4
            )
          }
        }
        # Lines
        fl <- fleetnames[subpldat[1, "Fleet"]]
        yr <- paste(subpldat[1, "Yr"])
        lab <- if (type == "con") ifelse(Nfleet > 1, paste(yr, fl), yr) else fl
        if (sex.flag & label.sex) {
          lab <- paste(lab, ifelse(subpldat[1, "sexes"] == 0, "comb", "sex"))
        }
        if (method.flag) {
          lab <- paste(lab, "meth", subpldat[1, "method"])
        }
        if (label.part) {
          lab <- paste(lab, partition.labels)
        }
        if (!add) {
          mtext(lab, side = 3, at = mean(x))
        }
      }
      # define y-axis label
      ylab <- "Mean age" # default as age unless replaced below
      if (type == "len") {
        ylab <- "Mean length (cm)"
      }
      if (type == "size") {
        # probably more efficient ways to sort out these labels,
        # but lots of if-statements make logic easier to follow
        units <- unique(plunits[plindx %in% uplindx])
        if (length(units) == 1) {
          # not sure if this will always be true or not
          if (units %in% c("kg", "lb")) {
            ylab <- paste0("Mean weight (", units, ")")
          }
          if (units %in% c("cm", "in")) {
            ylab <- paste0("Mean length (", units, ")")
          }
        } else {
          # just in case it's possible to have multiple units in one panel
          ylab <- paste0("Mean value (", paste(units, collapse = " or "), ")")
        }
      }
      if (!add) {
        mtext(ylab, side = 2, las = 0, outer = TRUE)
        mtext(ifelse(type == "con", "Length", "Year"), side = 1, outer = TRUE)
      }
      # restore previous graphics parameters (if changed to begin with
      if (set.pars) {
        par(
          mfrow = par_current[["mfrow"]],
          mar = par_current[["mar"]],
          mgp = par_current[["mgp"]],
          oma = par_current[["oma"]],
          las = par_current[["las"]]
        )
      }
    }
    if (!datonly) {
      tmp <- matrix(
        sample(pldat[, "Std.res"], 1000 * nrow(pldat), replace = TRUE),
        nrow(pldat)
      )
      confint <- as.vector(quantile(
        apply(tmp, 2, function(x) 1 / var(x, na.rm = TRUE)),
        c(0.025, 0.975),
        na.rm = TRUE
      ))
      Output <- c(w = Nmult, lo = confint[1], hi = confint[2])
      Outs <- paste(
        "Francis Weights - ",
        type,
        ": ",
        fleetnames[fleet],
        ": ",
        round(Nmult, 4),
        " (",
        round(confint[1], 4),
        "-",
        round(confint[2], 4),
        ")",
        sep = ""
      )
      if (printit) {
        print(Outs)
      }
      return(Output)
    }
  }