#' @title Proportions-at-age from an age-length key
#'
#' @description Computes the proportions-at-age (with standard errors) in a larger sample based on an age-length-key created from a subsample of ages through a two-stage random sampling design. Follows the methods in Quinn and Deriso (1999).
#'
#' @details The age-length key in \code{key} must have length intervals as rows and ages as columns. The row names of \code{key} (i.e., \code{rownames(key)}) must contain the minimum values of each length interval (e.g., if an interval is 100-109 then the corresponding row name must be 100). The column names of \code{key} (i.e., \code{colnames(key)}) must contain the age values (e.g., the columns can NOT be named with \dQuote{age.1}, for example).
#'
#' The length intervals in the rows of \code{key} must contain all of the length intervals present in the larger sample. Thus, the length of \code{len.n} must, at least, equal the number of rows in \code{key}. If this constraint is not met, then the function will stop with an error message.
#'
#' The values in \code{lenA.n} are equal to what the row sums of \code{key} would have been before \code{key} was converted to a row proportions table. Thus, the length of \code{lenA.n} must also be equal to the number of rows in \code{key}. If this constraint is not met, then the function will stop with an error message.
#'
#' @param key A numeric matrix that contains the age-length key. See details.
#' @param lenA.n A numeric vector of sample sizes for each length interval in the \emph{aged sample}.
#' @param len.n A numeric vector of sample sizes for each length interval in the \emph{complete sample} (i.e., all fish regardless of whether they were aged or not).
#'
#' @return A data.frame with as many rows as ages (columns) present in \code{key} and the following three variables:
#' \itemize{
#' \item age The ages.
#' \item prop The proportion of fish at each age.
#' \item se The SE for the proportion of fish at each age.
#' }
#'
#' @section Testing: The results from this function perfectly match the results in Table 8.4 (left) of Quinn and Deriso (1999) using \code{\link[FSAdata]{SnapperHG2}} from \pkg{FSAdata}. The results also perfectly match the results from using \code{\link[fishmethods]{alkprop}} in \pkg{fishmethods}.
#'
#' @author Derek H. Ogle, \email{DerekOgle51@gmail.com}
#'
#' @section IFAR Chapter: 5-Age-Length Key.
#'
#' @seealso See \code{\link{alkIndivAge}} and related functions for a completely different methodology. See \code{\link[fishmethods]{alkprop}} from \pkg{fishmethods} for the exact same methodology but with a different format for the inputs.
#'
#' @references Ogle, D.H. 2016. \href{https://fishr-core-team.github.io/fishR/pages/books.html#introductory-fisheries-analyses-with-r}{Introductory Fisheries Analyses with R}. Chapman & Hall/CRC, Boca Raton, FL.
#'
#' Lai, H.-L. 1987. Optimum allocation for estimating age composition using age-length key. Fishery Bulletin, 85:179-185.
#'
#' Lai, H.-L. 1993. Optimum sampling design for using the age-length key to estimate age composition of a fish population. Fishery Bulletin, 92:382-388.
#'
#' Quinn, T. J. and R. B. Deriso. 1999. Quantitative Fish Dynamics. Oxford University Press, New York, New York. 542 pages.
#'
#' @keywords manip
#'
#' @examples
#' ## Example -- Even breaks for length categories
#' WR1 <- WR79
#' # add length intervals (width=5)
#' WR1$LCat <- lencat(WR1$len,w=5)
#' # get number of fish in each length interval in the entire sample
#' len.n <- xtabs(~LCat,data=WR1)
#' # isolate aged sample and get number in each length interval
#' WR1.age <- subset(WR1, !is.na(age))
#' lenA.n <- xtabs(~LCat,data=WR1.age)
#' # create age-length key
#' raw <- xtabs(~LCat+age,data=WR1.age)
#' ( WR1.key <- prop.table(raw, margin=1) )
#'
#' # use age-length key to estimate age distribution of all fish
#' alkAgeDist(WR1.key,lenA.n,len.n)
#'
#' @export
alkAgeDist <- function(key,lenA.n,len.n) {
## Some checks
key <- iCheckALK(key)
L <- nrow(key)
if (length(lenA.n)!=L)
STOP("'lenA.n' and the 'key' have different numbers of length intervals.")
if (length(len.n)!=L)
STOP("'len.n' and the 'key' have different numbers of length intervals.")
## total number of fish sampled
N <- sum(len.n)
## proportion of total fish sampled by length interval
l_i <- len.n/N
## Create a matrix of proportions-at-age with corresponding SE and CV.
tmp <- t(apply(key,2,iALKAgeProp,l_i=l_i,n_i=lenA.n,N=N))
res <- data.frame(as.numeric(rownames(tmp)),tmp)
names(res) <- c("age","prop","se")
row.names(res) <- NULL
## return the result
res
}
## ===========================================================
## An internal function that allows the use of apply()
## in alkAgeDist() rather than using a for loop. This computes
## the proportion at each age (p_j) using 8.14a and the SE
## (sqrt of var.p_j) of each proportion using 8.14b
## (note that only a single sum was used here) of Quinn and
## Deriso (1999).
## ===========================================================
iALKAgeProp <- function(p_jgi,l_i,n_i,N) {
p_ij <- l_i*p_jgi
p_j <- sum(p_ij,na.rm=TRUE)
var_p_ij <- ((l_i^2)*p_jgi*(1-p_jgi))/(n_i-1) + (l_i*((p_jgi-p_j)^2))/N
var.p_j <- sum(var_p_ij,na.rm=TRUE)
c(p_j,sqrt(var.p_j))
}
#' @title Mean Values-at-age from an age-length key
#'
#' @description Computes the mean value-at-age in a larger sample based on an age-length-key created from a subsample of ages through a two-stage random sampling design. The mean values could be mean length-, weight-, or fecundity-at-age, for example. The methods of Bettoli and Miranda (2001) or Quinn and Deriso (1999) are used. A standard deviation is computed for the Bettoli and Miranda (2001) method and standard error for the Quinn and Deriso (1999) method. See the testing section notes.
#'
#' @details The age-length key \code{key} must have length intervals as rows and ages as columns. The row names of \code{key} (i.e., \code{rownames(key)}) must contain the minimum values of each length interval (e.g., if an interval is 100-109, then the corresponding row name must be 100). The column names of \code{key} (i.e., \code{colnames(key)}) must contain the age values (e.g., the columns can NOT be named with \dQuote{age.1}, for example).
#'
#' The length intervals in the rows of \code{key} must contain all of the length intervals present in the larger sample. Thus, the length of \code{len.n} must, at least, equal the number of rows in \code{key}. If this constraint is not met, then the function will stop with an error message.
#'
#' Note that the function will stop with an error if the formula in \code{formula} does not meet the specific criteria outlined in the parameter list above.
#'
#' @param key A numeric matrix that contains the age-length key. See details.
#' @param formula A formula of the form \code{var~lencat+age} where \code{var} generically represents the variable to be summarized (e.g., length, weight, fecundity), \code{lencat} generically represents the variable that contains the length intervals, and \code{age} generically represents the variable that contains the assigned ages.
#' @param data A data.frame that minimally contains the length intervals, assessed ages, and the variable to be summarized (i.e., this should be the aged sample) as given in \code{formula}.
#' @param len.n A vector of sample sizes for each length interval in the \emph{complete sample} (i.e., all fish regardless of whether they were aged or not).
#' @param method A string that indicates which method of calculation should be used. See details.
#'
#' @return A data.frame with as many rows as ages (columns) present in \code{key} and the following three variables:
#' \itemize{
#' \item age The ages.
#' \item mean The mean value at each age.
#' \item sd,se The SD if \code{method="BettoliMiranda"} or SE of the mean if \code{method="QuinnDeriso"} for the value at each age.
#' }
#'
#' @section Testing: The results of these functions have not yet been rigorously tested. The Bettoli and Miranda (2001) results appear, at least, approximately correct when compared to the results from \code{\link{alkIndivAge}}. The Quinn and Deriso (1999) results appear at least approximately correct for the mean values, but do not appear to be correct for the SE values. Thus, a note is returned with the Quinn and Deriso (1999) results that the SE should not be trusted.
#'
#' @author Derek H. Ogle, \email{DerekOgle51@gmail.com}
#'
#' @section IFAR Chapter: 5-Age-Length Key.
#'
#' @seealso See \code{\link{alkIndivAge}} and related functions for a completely different methodology. See \code{\link{alkAgeDist}} for a related method of determining the proportion of fish at each age. See the \pkg{ALKr} package.
#'
#' @references Ogle, D.H. 2016. \href{https://fishr-core-team.github.io/fishR/pages/books.html#introductory-fisheries-analyses-with-r}{Introductory Fisheries Analyses with R}. Chapman & Hall/CRC, Boca Raton, FL.
#'
#' Bettoli, P. W. and Miranda, L. E. 2001. A cautionary note about estimating mean length at age with subsampled data. North American Journal of Fisheries Management, 21:425-428.
#'
#' Quinn, T. J. and R. B. Deriso. 1999. Quantitative Fish Dynamics. Oxford University Press, New York, New York. 542 pages
#'
#' @keywords manip
#'
#' @examples
#' ## Example -- Even breaks for length categories
#' WR1 <- WR79
#' # add length intervals (width=5)
#' WR1$LCat <- lencat(WR1$len,w=5)
#' # get number of fish in each length interval in the entire sample
#' len.n <- xtabs(~LCat,data=WR1)
#' # isolate aged sample
#' WR1.age <- subset(WR1, !is.na(age))
#' # create age-length key
#' raw <- xtabs(~LCat+age,data=WR1.age)
#' ( WR1.key <- prop.table(raw, margin=1) )
#'
#' ## use age-length key to estimate mean length-at-age of all fish
#' # Bettoli-Miranda method
#' alkMeanVar(WR1.key,len~LCat+age,WR1.age,len.n)
#'
#' # Quinn-Deriso method
#' alkMeanVar(WR1.key,len~LCat+age,WR1.age,len.n,method="QuinnDeriso")
#'
#' @export
alkMeanVar <- function(key,formula,data,len.n,
method=c("BettoliMiranda","QuinnDeriso")) {
## Some checks
method <- match.arg(method)
key <- iCheckALK(key)
tmp <- iHndlFormula(formula,data,expNumR=1,expNumE=2)
if (!tmp$metExpNumR)
STOP("'formula' must have a LHS with only one variable.")
if (!tmp$Rclass %in% c("numeric","integer"))
STOP("LHS of 'formula' must be numeric.")
if (!tmp$metExpNumE)
STOP("'formula' must have two and only two variables on the RHS.")
if (length(len.n)!=nrow(key))
STOP("'len.n' and the 'key' have different numbers of length intervals.")
## Main calculations (in internal functions)
switch(method,
BettoliMiranda= { res=iALKMean.BM(key,formula,data,N_i=len.n) },
QuinnDeriso= { res=iALKMean.QD(key,formula,data,N_i=len.n) }
)
res
}
##############################################################
## Internal function for Bettoli-Miranda method
## equations 3 and 4b
##############################################################
iALKMean.BM <- function(key,formula,data,N_i) {
## Compute means by length and age (suppress warnings)
suppressWarnings(mn_ij <- sumTable(formula,data,FUN=mean))
## See if key has a row that sums to zero, remove that row from
## the key, N_i, and mn_ij
key.goodrows <- which(rowSums(key,na.rm=TRUE)!=0)
if (length(key.goodrows)>0) {
key <- key[key.goodrows,]
N_i <- N_i[key.goodrows]
mn_ij <- mn_ij[key.goodrows,]
}
## Get overall number by length and age (can set check.margin=FALSE
## because already checked the dimensions ... see ?sweep)
N_ij <- sweep(key,MARGIN=1,FUN="*",STATS=N_i,check.margin=FALSE)
## Get overall number by age
N_j <- colSums(N_ij)
## Number of ages prsent
A <- ncol(key)
## Prepare result vectors
mn_j <- var_j <- numeric(A)
## Loop through the ages
for (j in 1:A) {
## These are the formulas in Bettoli and Miranda (2001)
mn_j[j] <- sum(N_ij[,j]*mn_ij[,j],na.rm=TRUE)/N_j[j]
var_j[j] <- sum(N_ij[,j]*((mn_ij[,j]-mn_j[j])^2),na.rm=TRUE)/(N_j[j]-1)
## These are the equivalent formulas in IFAR (would need to
## define N <- sum(N_i) and p_ij <- N_ij/N before loop.)
#mn_j[j] <- sum(p_ij[,j]*mn_ij[,j],na.rm=TRUE)/N_j[j]*N
#var_j[j] <- sum(p_ij[,j]*((mn_ij[,j]-mn_j[j])^2),na.rm=TRUE)/(N_j[j]-1)*N
}
res <- data.frame(age=as.numeric(colnames(key)),mean=mn_j,sd=sqrt(var_j))
rownames(res) <- NULL
## return the result
res
}
##############################################################
## Internal function for Quinn-Deriso method
## equations 8.15a and 8.15b
##############################################################
iALKMean.QD <- function(key,formula,data,N_i) {
## Compute ns, means, and variances by length and age (suppress warnings)
suppressWarnings(n_ij <- sumTable(formula,data,FUN=length))
suppressWarnings(mn_ij <- sumTable(formula,data,FUN=mean))
# Not sure from Q&D if this should be divided by sqrt(n) or not
suppressWarnings(var_ij <- sumTable(formula,data,FUN=stats::var)/
sqrt(sumTable(formula,data,FUN=length)))
## See if key has a row that sums to zero, remove that row from
## the key, N_i, n_ij, mn_ij, and var_ij
key.goodrows <- which(rowSums(key,na.rm=TRUE)!=0)
if (length(key.goodrows)>0) {
key <- key[key.goodrows,]
N_i <- N_i[key.goodrows]
n_ij <- n_ij[key.goodrows,]
mn_ij <- mn_ij[key.goodrows,]
var_ij <- var_ij[key.goodrows,]
}
## Get the number in aged sample in each length
n_i <- colSums(n_ij,na.rm=TRUE)
## total number of fish sampled
N <- sum(N_i)
## proportion of total fish sampled by length interval
l_i <- N_i/N
## Number of ages prsent
A <- ncol(key)
## Prepare result vectors
mn_j <- var_j <- numeric(A)
## Loop through ages
for (j in 1:A) {
# top of page 305 from Quinn and Deriso (1999)
p_jgi <- key[,j]
# equation 8.14a from Quinn and Deriso (1999) ... correct from ALKAgeDist
p_ij <- l_i*p_jgi
p_j <- sum(p_ij)
# RHS equivalency of 8.14b and 8.14c from Quinn and Deriso (1999) ... correct from ALKAgeDist
var_p_ij <- ((l_i^2)*p_jgi*(1-p_jgi))/(n_i[j]-1) + (l_i*((p_jgi-p_j)^2))/N
# equation 8.15a from Quinn and Deriso (1999)
mn_j[j] <- sum(p_ij*mn_ij[,j],na.rm=TRUE)/p_j
# 8.15b from Quinn and Deriso (1999)
var_j[j] <- sum((p_ij^2)*var_ij[,j] + ((mn_ij[,j]-mn_j[j])^2)*var_p_ij,na.rm=TRUE)/(p_j^2)
}
## Put it all together and return the result
res <- data.frame(age=as.numeric(colnames(key)),mean=mn_j,se=sqrt(var_j))
rownames(res) <- NULL
message("The 'se' values should not be trusted!")
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.