# ******************************************************************************
# Created: 02-Aug-2018
# Author: J. DuBois
# Contact: jason.dubois@wildlife.ca.gov
# Purpose: This file contains functions or methods to create age-length key or
# to get age frequency distribution.
# Source:
# ******************************************************************************
#' Make an age-length key with lengths as rows and ages as columns.
#'
#' @description \code{MakeALKey} creates an age-length key according
#' to length frequency per age. Resulting frequency converted to
#' proportion for application to entire length dataset.
#'
#' @param data A dataframe with length and age fields (columns).
#' @param len Field name in data containing numeric lengths.
#' @param age Field name in data containing nummeric ages.
#' @param lenBreaks How lengths are binned. See \code{\link{cut}}
#' for more information. (Supplied to \code{breaks} in \code{cut}.)
#' @param breakLabs NULL (default), currently not used.
#'
#' @return Age-length key (length[rows] x age[columns]) with proportion
#' or 0 in each cell. Return object is of class "table" with
#' \code{dimnames(<ALK>)} providing \code{Bin} and \code{Age} list items.
#' @export
#'
#' @note In the example below, some length bins contain no aged sample.
#' One can adjust the "by" argument in \code{seq} as a possible fix.
#' Or one needs to age fish within those length bins.
#'
#' @examples
#' # check range(trammel_catch[["FL"]])
#' len_breaks <- seq(from = 50, to = 220, by = 5)
#' MakeALKey(data = trammel_catch, len = FL, age = Age, lenBreaks = len_breaks)
MakeALKey <- function(data, len, age, lenBreaks, breakLabs = NULL) {
# get length and age columns
l <- as.character(substitute(len))
a <- as.character(substitute(age))
b <- CreateLenBins(len = data[[l]], lenBreaks = lenBreaks, numericBin = TRUE)
alk <- prop.table(table(b, data[[a]], dnn = c("Bin", "Age")), margin = 1)
alk
}
# end MakeALKey
#' Assign age to each fish based on extant age-length data.
#'
#' @description \code{AgeEach} inspired by \code{FSA::alkIndivAge}. Here
#' \code{AgeEach} output is more tailored for use within this package.
#' \code{AgeEach} handles age assignment at the length bin level, so
#' instance of an age-length key is not required.
#'
#' @param data A dataframe with length and age fields (columns).
#' @param len Field name in data containing numeric lengths.
#' @param age Field name in data containing nummeric ages.
#' @param lenBreaks lenBreaks How lengths are binned. See \code{\link{cut}}
#' for more information. (Supplied to \code{breaks} in \code{cut}.)
#' @param sex (default NULL) Currently not used.
#'
#' @note Print method for convenience of output.
#'
#' @return A list with age & bin diagnostics along with
#' a numeric vector of ages, with length = \code{nrow(data)}.
#' @export
#'
#' @examples
#' # check range(trammel_catch[["FL"]])
#' len_breaks <- seq(from = 50, to = 220, by = 5)
#' AgeEach(data = trammel_catch, len = FL, age = Age, lenBreaks = len_breaks)
AgeEach <- function(data, len, age, lenBreaks, sex = NULL) {
# get length and age columns
l <- as.character(substitute(len))
a <- as.character(substitute(age))
# create length bins
b <- CreateLenBins(len = data[[l]], lenBreaks = lenBreaks)
# to check ages in each length bin
age_split <- split(data[[a]], f = b, drop = FALSE)
bin_check <- vapply(age_split, FUN = CheckBin, FUN.VALUE = character(1L))
# only report values NOT NA
bin_check <- bin_check[!is.na(bin_check)]
# order does not reconcile with order in lengths in data need to figure out
# order [order(b)] below does not work
# unlist(lapply(age_split, FUN = CheckAge), use.names = FALSE)[order(b)]
a <- unsplit(value = lapply(age_split, FUN = CheckAge), f = b, drop = FALSE)
out <- list(
BinDiagnostics = list(
BinCheck = bin_check,
CountNA = sum(is.na(a)),
CountTotal = length(a)
),
Ages = a
)
class(out) <- "AgeEach"
out
}
# end AgeEach
#' @describeIn AgeEach \code{print} method (See notes).
#' @param x Object of class \code{AgeEach}.
#' @param ... Currently not used.
#' @export
print.AgeEach <- function(x, ...) {
# get diagnostics
bd <- x[["BinDiagnostics"]]
out <- sprintf(
fmt = "Number aged = %s, with %s NA.", # \n Bin Check:\n %s",
bd[["CountTotal"]] - bd[["CountNA"]],
bd[["CountNA"]] #,
# bd[["BinCheck"]]
)
# print(out)
cat(out, "\n", "\n", "Bins below empty or all NA:\n", sep = "")
print(noquote(bd[["BinCheck"]]))
}
# end print.AgeEach
#' @keywords internal
#' @rdname spopmodel-internals
CheckAge <- function(age) {
# TODO: test this function
# TODO: finalize what to do with length(age) == 0
# TODO: how to treat bins with no ages ??
# TODO: nail down whole & sum_rem (still issues with double or integer values)
# TODO: nail down replace = T or replace = F
# TODO: age some input verification
# TODO: set ouput to integer ??
# TODO: add set seed component here or in AgeEach
# age vector result of splitting age field on length bins so length(age) = 0
# means no lengths (& thus ages) are in that specific bin return numeric(0)
# various scenarios of incoming age vector -----------------------------------
# age ==> (1) numeric(0) OR (2) length = 1 OR (3) length > 1
#
# (4) all numbers (i.e., no NA values) OR ...
# (5) all NA OR ...
# (6) mixture of numbers & NA values
#
# if (1) return numeric(0) [length bin is empty]
# if (2) if !NA see (4) else see (5)
# if (3) see (4) OR (5) OR (6)
#
# if (4) use assigned age irrespective of length(age)
# if (5) cannot do much; advise re-binning or if possible ageing
# some fish in bin [x, y)
# if (6) length(age) must be > 1 so go through process using
# known ages (& proportions) to "age the NAs"
#
# processing scenario (6) **********************
#
# freq <- frequency (as proportion) of age variable
# get column names of `freq` (these are the ages as character vector)
#
# (a) if length(freq) = 1 then set all NA
# values to column names of `freq` as there is only one age
# value for this length bin
#
# (b) if number NA values = 1 then
# a <- sample(column names of `freq`, size = 1, prop = `freq`)
# age[which is na] <- as.numeric(a)
# return age
#
# (c) set `freq` * sum(all NA values of age)
# perform integer division ...
# whole <- `freq` * sum(all NA values of age) %/% 1
#
# get remainder using modulus
# remainder <- `freq` * sum(all NA values of age) %% 1
#
# if all of whole = 0 then
# a <- sample(column names of `freq`,
# size = sum(remainder),
# prop = `freq`)
# age[which is na] <- as.numeric(a)
#
# the reality with this scenario is that sum(remainder) should
# really be 1 if all of whole = 0 (I believe)
#
# else
# create a character vector of ages using names of `whole` &
# values of `whole`
# new_ages <- rep(dimnames(whole)[[1]], times = whole)
#
# then sample from these new ages
# new_ages <- sample(new_ages, size = length(new_ages), replace = T)
#
# recalculate number remaining
# remainder <- remainder - length(new_ages)
#
# sample again & combine with `new_ages` vector
# new_ages <- c(new_ages, sample(new_ages, size = sum_rem,
# replace = TRUE))
# age[which is NA] <- as.numeric(new_ages)
# return age
#
# code here ------------------------------------------------------------------
# age is essentially empty
if (length(age) == 0) return(numeric(length = 0L))
# check for NAs within age vector
is_na <- is.na(age)
# check point: irrespective of length(age), we note if all values are aged OR
# all values are NA; essentially not much we need to or can do here except
# return age --- we may issue a warning about return NA values, but we can
# handle this in AgeEach()
if (all(is_na) || all(!is_na)) return(age)
# check point: we know at this point length(age) must be > 1 AND age is a
# mixture of NA & numeric values
# get proportion per age ignoring (for now) NA values
freq <- prop.table(table(age, dnn = NULL))
# get names of `freq` --- essentially age values as character
char_ages <- dimnames(freq)[[1]]
# if there is only one unique age in `age` then we have no other choice but to
# assign all NA values to this age (i.e., no need for random sampling)
if (length(char_ages) == 1) {
age[is_na] <- as.numeric(char_ages)
return(age)
}
# check point: we know at this stage we have > 1 unique age, now we need to
# check the number of NA values; here we check if `age` contains 1 NA value
num_not_aged <- sum(is_na)
if (num_not_aged == 1) {
a <- sample(char_ages, size = 1, prob = freq)
age[is_na] <- as.numeric(a)
return(age)
}
# check point: at this point we have > 1 unique age & > 1 NA value
# essentially like using an al-key multiplying the count by proportions
age_assign <- freq * num_not_aged
# integer division to get complete age assignments
whole <- age_assign %/% 1
# modulo to get remainder (to be handled later)
sum_rem <- sum(age_assign %% 1)
# sum_rem <- ceiling(sum_rem)
# the logic is that if all whole = 0 then sum_rem should equal the number not
# aged thus we can just use num_not_aged as our size
if (all(whole == 0)) {
a <- sample(char_ages, size = num_not_aged, replace = TRUE, prob = freq)
age[is_na] <- as.numeric(a)
# print("all whole = 0")
return(age)
}
# a needs "re"-sampling here before proceeding in case sum_rem != 0
a <- rep(char_ages, times = whole)
a <- sample(a, size = length(a), replace = TRUE)
# in this case length(a) should equal number not aged
if (sum_rem == 0) {
# a <- sample(a, size = length(a), replace = TRUE)
age[is_na] <- as.numeric(a)
# print("sum rem = 0")
return(age)
}
# NEED TO TEST LOGIC BELOW
# check point: here all(whole == 0) is FALSE AND sum_rem > 0
# not sure if recalculation is necessary
# recalculate number remaining
# sum_rem <- length(a) - sum_rem # WRONG
# if (sum_rem > length(a)) {
# sum_rem <- sum_rem - length(a) # WRONG
# }
# sum_rem <- max(sum_rem, length(a)) # WRONG
# this *should be* the proper recalculation for sum_rem
sum_rem <- num_not_aged - length(a) # CORRECT
# print calls for debugging (uncomment as needed)
# print(a)
# print(sum_rem)
# combine 'a' with samples on the remainder
a <- c(a, sample(char_ages, size = sum_rem, replace = TRUE, prob = freq))
age[is_na] <- as.numeric(a)
# print(a)
# print(age_assign %% 1)
# print("made it to the end")
age
}
# end CheckAge
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.